From 11ca7edb44714c32bea536fab94da17b050d6a8d Mon Sep 17 00:00:00 2001 From: Araozu Date: Wed, 30 Nov 2022 08:38:43 -0500 Subject: [PATCH] Refactor and scan grouping signs --- src/lexic/mod.rs | 91 ++++++++++++++++++++++++----------- src/lexic/scanner/mod.rs | 51 ++++++++++++++++++-- src/lexic/scanner/operator.rs | 49 +++++++++++-------- src/lexic/utils.rs | 4 ++ src/token.rs | 5 +- 5 files changed, 146 insertions(+), 54 deletions(-) diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 71311ec..5a823c8 100644 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -4,6 +4,15 @@ use super::token::{self, Token}; type Chars = Vec; +pub enum LexResult { + // A token was scanned + Some(Token, usize), + // No token was found, but there was no error (EOF) + None(usize), + Err(String), +} + + /// Scans and returns all the tokens in the input String pub fn get_tokens(input: &String) -> Result, String> { let chars: Vec = input.chars().into_iter().collect(); @@ -12,14 +21,14 @@ pub fn get_tokens(input: &String) -> Result, String> { while has_input(&chars, current_pos) { match next_token(&chars, current_pos) { - Ok((Some(token), next_pos)) => { + LexResult::Some(token, next_pos) => { results.push(token); current_pos = next_pos; }, - Ok((None, next_pos)) => { + LexResult::None(next_pos) => { current_pos = next_pos; }, - Err(reason) => return Err(reason), + LexResult::Err(reason) => return Err(reason), } } @@ -27,12 +36,12 @@ pub fn get_tokens(input: &String) -> Result, String> { Ok(results) } -fn next_token(chars: &Chars, current_pos: usize) -> Result<(Option, usize),String> { +fn next_token(chars: &Chars, current_pos: usize) -> LexResult { let next_char = peek(chars, current_pos); - // If EOF is reached return nothing + // If EOF is reached return nothing but the current position if next_char == '\0' { - return Ok((None, current_pos)) + return LexResult::None(current_pos) } // Handle whitespace recursively @@ -41,20 +50,19 @@ fn next_token(chars: &Chars, current_pos: usize) -> Result<(Option, usize } // Test number - if utils::is_digit(next_char) { - match scanner::number(chars, current_pos) { - Ok((token, next_pos)) => Ok((Some(token), next_pos)), - Err(reason) => Err(reason), - } - } - // Test operator - else if utils::is_operator(next_char) { - let (token, next_pos) = scanner::operator(chars, current_pos); - Ok((Some(token), next_pos)) - } - else { - Err(format!("Unrecognized character: {}", next_char)) - } + None + .or_else(|| { + scanner::number(next_char, chars, current_pos) + }) + .or_else(|| { + scanner::operator(next_char, chars, current_pos) + }) + .or_else(|| { + scanner::grouping_sign(next_char, chars, current_pos) + }) + .unwrap_or_else(|| { + LexResult::Err(format!("Unrecognized character: {}", next_char)) + }) } fn peek(input: &Chars, pos: usize) -> char { @@ -103,11 +111,11 @@ mod tests { assert_eq!(4, chars.len()); assert!(has_input(&chars, 0)); - match next_token(&chars, 0).unwrap() { - (Some(t), _) => { + match next_token(&chars, 0) { + LexResult::Some(t, _) => { assert_eq!("126", t.value) }, - (None, _) => { + _ => { panic!() } } @@ -116,7 +124,7 @@ mod tests { /// Should scan numbers #[test] fn number_test() { - let input = String::from("126 278.98 0.282398"); + let input = String::from("126 278.98 0.282398 1789e+1 239.3298e-103"); let tokens = get_tokens(&input).unwrap(); let t1 = tokens.get(0).unwrap(); @@ -130,10 +138,39 @@ mod tests { let t3 = tokens.get(2).unwrap(); assert_eq!(TokenType::Number, t3.token_type); assert_eq!("0.282398", t3.value); - /* - assert_eq!("1798e+1", tokens.get(3).unwrap().value); + + assert_eq!("1789e+1", tokens.get(3).unwrap().value); assert_eq!("239.3298e-103", tokens.get(4).unwrap().value); assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type); - */ + } + + #[test] + fn grouping_sign_test() { + let input = String::from("( ) { } [ ]"); + let tokens = get_tokens(&input).unwrap(); + + let t = tokens.get(0).unwrap(); + assert_eq!(TokenType::LeftParen, t.token_type); + assert_eq!("(", t.value); + + let t = tokens.get(1).unwrap(); + assert_eq!(TokenType::RightParen, t.token_type); + assert_eq!(")", t.value); + + let t = tokens.get(2).unwrap(); + assert_eq!(TokenType::LeftBrace, t.token_type); + assert_eq!("{", t.value); + + let t = tokens.get(3).unwrap(); + assert_eq!(TokenType::RightBrace, t.token_type); + assert_eq!("}", t.value); + + let t = tokens.get(4).unwrap(); + assert_eq!(TokenType::LeftBracket, t.token_type); + assert_eq!("[", t.value); + + let t = tokens.get(5).unwrap(); + assert_eq!(TokenType::RightBracket, t.token_type); + assert_eq!("]", t.value); } } diff --git a/src/lexic/scanner/mod.rs b/src/lexic/scanner/mod.rs index cc90d1c..3656d7f 100644 --- a/src/lexic/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -1,12 +1,53 @@ -use super::token::Token; +use super::{token::{TokenType, self}, utils, LexResult}; mod number; mod operator; -pub fn number(chars: &Vec, start_pos: usize) -> Result<(Token, usize), String> { - number::scan(chars, start_pos) +/// Attempts to scan a number. Returns None to be able to chain other scanner +pub fn number(c: char, chars: &Vec, start_pos: usize) -> Option { + if utils::is_digit(c) { + match number::scan(chars, start_pos) { + Ok((token, next_pos)) => { + Some(LexResult::Some(token, next_pos)) + }, + Err(reason) => { + Some(LexResult::Err(reason)) + }, + } + } + else { + None + } } -pub fn operator(chars: &Vec, start_pos: usize) -> (Token, usize) { - operator::scan(chars, start_pos) + +/// Attempts to scan an operator. Returns None to be able to chain other scanner +pub fn operator(c: char, chars: &Vec, start_pos: usize) -> Option { + if utils::is_operator(c) { + Some(operator::scan(chars, start_pos)) + } + else { + None + } +} + + +/// Attempts to scan a grouping sign. Returns None to be able to chain other scanner +pub fn grouping_sign(c: char, _: &Vec, start_pos: usize) -> Option { + let token_type = match c { + '(' => TokenType::LeftParen, + ')' => TokenType::RightParen, + '[' => TokenType::LeftBracket, + ']' => TokenType::RightBracket, + '{' => TokenType::LeftBrace, + '}' => TokenType::RightBrace, + _ => return None, + }; + + let token = token::new_grouping_sign( + c.to_string(), + start_pos as i32, + token_type, + ); + Some(LexResult::Some(token, start_pos + 1)) } diff --git a/src/lexic/scanner/operator.rs b/src/lexic/scanner/operator.rs index b9d3052..1ae57c9 100644 --- a/src/lexic/scanner/operator.rs +++ b/src/lexic/scanner/operator.rs @@ -1,26 +1,27 @@ -use crate::lexic::{token::{Token, self}, utils}; +use crate::lexic::{token::{Token, self}, utils, LexResult}; /// Function to scan an operator /// /// This function assumes the character at `start_pos` is an operator -pub fn scan(chars: &Vec, start_pos: usize) -> (Token, usize) { +pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { scan_impl(chars, start_pos, String::from("")) } -pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> (Token, usize) { - let next_char = chars.get(start_pos); - - if let Some(c) = next_char { - if utils::is_operator(*c) { - return scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) +pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { + match chars.get(start_pos) { + Some(c) if utils::is_operator(*c) => { + scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) + }, + _ => { + LexResult::Some(token::new_operator(current, start_pos as i32), start_pos) } } - - // Return current value - (token::new_operator(current, start_pos as i32), start_pos) } + + + #[cfg(test)] mod tests { use super::*; @@ -59,11 +60,14 @@ mod tests { for op in operators { let input = str_to_vec(op); let start_pos = 0; - let (token, next) = scan(&input, start_pos); - - assert_eq!(1, next); - assert_eq!(TokenType::Operator, token.token_type); - assert_eq!(op, token.value); + match scan(&input, start_pos) { + LexResult::Some(token, next) => { + assert_eq!(1, next); + assert_eq!(TokenType::Operator, token.token_type); + assert_eq!(op, token.value); + }, + _ => panic!() + } } } @@ -91,11 +95,14 @@ mod tests { for op in operators { let input = str_to_vec(op); let start_pos = 0; - let (token, next) = scan(&input, start_pos); - - assert_eq!(2, next); - assert_eq!(TokenType::Operator, token.token_type); - assert_eq!(op, token.value); + match scan(&input, start_pos) { + LexResult::Some(token, next) => { + assert_eq!(2, next); + assert_eq!(TokenType::Operator, token.token_type); + assert_eq!(op, token.value); + }, + _ => panic!() + } } } } diff --git a/src/lexic/utils.rs b/src/lexic/utils.rs index bd74331..9a03711 100644 --- a/src/lexic/utils.rs +++ b/src/lexic/utils.rs @@ -18,3 +18,7 @@ pub fn is_operator(c: char) -> bool { || c == '&' || c == '?' || c == '<' || c == '>' || c == '^' || c == '.' || c == ':' } + +pub fn is_grouping_sign(c: char) -> bool { + c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' +} diff --git a/src/token.rs b/src/token.rs index ee08d85..2f29fec 100644 --- a/src/token.rs +++ b/src/token.rs @@ -5,7 +5,6 @@ pub enum TokenType { Comment, Number, String, - Unit, Operator, LeftParen, RightParen, @@ -52,3 +51,7 @@ pub fn new_operator(value: String, position: i32) -> Token { position } } + +pub fn new_grouping_sign(value: String, position: i32, token_type: TokenType) -> Token { + Token {token_type, value, position} +}