From def93715c74b4944bebe628e4488b842be4f6335 Mon Sep 17 00:00:00 2001 From: Araozu Date: Thu, 16 Mar 2023 13:31:24 -0500 Subject: [PATCH] Refactor. Fix invalid position field of Number tokens --- src/error_handling/syntax_error.rs | 27 +++++- src/lexic/scanner/identifier.rs | 15 +++- src/lexic/scanner/number.rs | 61 ++++++++++++- src/main.rs | 1 + src/syntax/binding.rs | 132 +++++++++++++++++++---------- src/utils/mod.rs | 14 +++ 6 files changed, 197 insertions(+), 53 deletions(-) create mode 100644 src/utils/mod.rs diff --git a/src/error_handling/syntax_error.rs b/src/error_handling/syntax_error.rs index a68d4b2..f109cb7 100644 --- a/src/error_handling/syntax_error.rs +++ b/src/error_handling/syntax_error.rs @@ -11,8 +11,7 @@ impl PrintableError for SyntaxError { format!( "\n{}\n{}{}\n\n{}{}{}\n{}", - line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":", - self.reason + line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":", self.reason ) } } @@ -122,7 +121,29 @@ mod tests { let (chars, error) = get_error_data(String::from("val")); let actual_err = error.get_error_str(&chars); // TODO: Write a better error message (something that explains why it failed) - let expected_str = format!("\n{}\n{}\n\n{}\n{}", "val", "^^^", "Syntax error at pos 0:", "There should be an identifier after a `val` token"); + let expected_str = format!( + "\n{}\n{}\n\n{}\n{}", + "val", + "^^^", + "Syntax error at pos 0:", + "There should be an identifier after a `val` token" + ); + + assert_eq!(expected_str, actual_err); + } + + #[test] + fn should_show_an_error_for_missing_equal_operator() { + let (chars, error) = get_error_data(String::from("val name")); + let actual_err = error.get_error_str(&chars); + // TODO: Write a better error message (something that explains why it failed) + let expected_str = format!( + "\n{}\n{}\n\n{}\n{}", + "val name", + " ^^^^", + "Syntax error at pos 4:", + "There should be an equal sign `=` after the identifier" + ); assert_eq!(expected_str, actual_err); } diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs index be209dd..9110793 100755 --- a/src/lexic/scanner/identifier.rs +++ b/src/lexic/scanner/identifier.rs @@ -38,11 +38,20 @@ fn scan_impl(chars: &Vec, start_pos: usize, current: String, is_datatype: // so this is used to retrieve the original START position of the token let current_len = current.len(); if let Some(token_type) = str_is_keyword(¤t) { - LexResult::Some(token::new(current, start_pos - current_len, token_type), start_pos) + LexResult::Some( + token::new(current, start_pos - current_len, token_type), + start_pos, + ) } else if is_datatype { - LexResult::Some(token::new_datatype(current, start_pos - current_len), start_pos) + LexResult::Some( + token::new_datatype(current, start_pos - current_len), + start_pos, + ) } else { - LexResult::Some(token::new_identifier(current, start_pos - current_len), start_pos) + LexResult::Some( + token::new_identifier(current, start_pos - current_len), + start_pos, + ) } } } diff --git a/src/lexic/scanner/number.rs b/src/lexic/scanner/number.rs index 11728fb..2261144 100755 --- a/src/lexic/scanner/number.rs +++ b/src/lexic/scanner/number.rs @@ -33,7 +33,13 @@ fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> LexResu Some(c) if utils::is_digit(*c) => { scan_decimal(chars, start_pos + 1, utils::str_append(current, *c)) } - _ => LexResult::Some(token::new_number(current, start_pos), start_pos), + _ => { + // start_pos is the position where the token ENDS, not where it STARTS, + // so this is used to retrieve the original START position of the token + let current_len = current.len(); + + LexResult::Some(token::new_number(current, start_pos - current_len), start_pos) + }, } } @@ -86,7 +92,13 @@ fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> Lex Some(c) if *c == 'e' => { scan_scientific(chars, start_pos + 1, utils::str_append(current, *c)) } - _ => LexResult::Some(token::new_number(current, start_pos), start_pos), + _ => { + // start_pos is the position where the token ENDS, not where it STARTS, + // so this is used to retrieve the original START position of the token + let current_len = current.len(); + + LexResult::Some(token::new_number(current, start_pos - current_len), start_pos) + } } } @@ -123,7 +135,13 @@ fn scan_digits(chars: &Vec, start_pos: usize, current: String) -> (Token, Some(c) if utils::is_digit(*c) => { scan_digits(chars, start_pos + 1, utils::str_append(current, *c)) } - _ => (token::new_number(current, start_pos), start_pos), + _ => { + // start_pos is the position where the token ENDS, not where it STARTS, + // so this is used to retrieve the original START position of the token + let current_len = current.len(); + + (token::new_number(current, start_pos - current_len), start_pos) + } } } @@ -133,7 +151,13 @@ fn scan_hex_digits(chars: &Vec, start_pos: usize, current: String) -> (Tok Some(c) if utils::is_hex_digit(*c) => { scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)) } - _ => (token::new_number(current, start_pos), start_pos), + _ => { + // start_pos is the position where the token ENDS, not where it STARTS, + // so this is used to retrieve the original START position of the token + let current_len = current.len(); + + (token::new_number(current, start_pos - current_len), start_pos) + } } } @@ -156,6 +180,7 @@ mod tests { assert_eq!(3, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -167,6 +192,7 @@ mod tests { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0123", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -178,6 +204,7 @@ mod tests { assert_eq!(8, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123456", token.value); + assert_eq!(2, token.position); } else { panic!() } @@ -207,6 +234,7 @@ mod tests { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0x20", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -218,6 +246,7 @@ mod tests { assert_eq!(12, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0xff23DA", token.value); + assert_eq!(4, token.position); } else { panic!() } @@ -268,6 +297,7 @@ mod tests { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("3.22", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -278,6 +308,7 @@ mod tests { assert_eq!(11, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123456.7890", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -317,6 +348,7 @@ mod tests { assert_eq!("1e+0", token.value); assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); + assert_eq!(0, token.position); } else { panic!() } @@ -327,6 +359,7 @@ mod tests { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("1e-0", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -337,6 +370,7 @@ mod tests { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0e+0", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -347,6 +381,7 @@ mod tests { assert_eq!(19, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123498790e+12349870", token.value); + assert_eq!(0, token.position); } else { panic!() } @@ -361,6 +396,7 @@ mod tests { assert_eq!("1.24e+1", token.value); assert_eq!(7, next); assert_eq!(TokenType::Number, token.token_type); + assert_eq!(0, token.position); } else { panic!() } @@ -371,8 +407,25 @@ mod tests { assert_eq!("0.00000000000001e+1", token.value); assert_eq!(19, next); assert_eq!(TokenType::Number, token.token_type); + assert_eq!(0, token.position); } else { panic!() } } + + #[test] + fn position_should_be_valid() { + let input = str_to_vec(" 123 "); + let start_pos = 2; + + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(5, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("123", token.value); + assert_eq!(2, token.position); + } else { + panic!("Expected some value") + }; + + } } diff --git a/src/main.rs b/src/main.rs index ad6058c..215f3db 100755 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ mod ast_types; mod symbol_table; // Transforms an AST to JS mod codegen; +mod utils; mod error_handling; diff --git a/src/syntax/binding.rs b/src/syntax/binding.rs index 9260464..cc738f5 100644 --- a/src/syntax/binding.rs +++ b/src/syntax/binding.rs @@ -2,6 +2,7 @@ use super::ast_types::{Binding, ValBinding, VarBinding}; use super::{expression, SyntaxResult}; use crate::error_handling::SyntaxError; use crate::token::{Token, TokenType}; +use crate::utils::Result3; // TODO: Should return a 3 state value: // - Success: binding parsed successfully @@ -12,16 +13,13 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option // Optional datatype annotation let datatype_annotation = { - match tokens.get(pos) { - Some(t) if t.token_type == TokenType::Datatype => { + match try_token_type(tokens, pos, TokenType::Datatype) { + Result3::Ok(t) => { pos += 1; Some(String::from(&t.value)) } - // If the first token is anything else, ignore - Some(_) => None, - // This should never match, as there should always be at least a - // TokenType::Semicolon or TokenType::EOF - None => panic!( + Result3::Err(_) => None, + Result3::None => panic!( "Internal compiler error: Illegal token stream at src/syntax/binding.rs#try_parse" ), } @@ -31,40 +29,67 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option let (is_val, binding_token) = { let res1 = try_token_type(tokens, pos, TokenType::VAL); match res1 { - Some(val_token) => (true, val_token), - None => { + Result3::Ok(val_token) => (true, val_token), + _ => { let res2 = try_token_type(tokens, pos, TokenType::VAR); match res2 { - Some(var_token) => (false, var_token), + Result3::Ok(var_token) => (false, var_token), // Neither VAL nor VAR were matched, the parser should try // other constructs - None => return None, + _ => return None, } } } }; - let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier); - if identifier.is_none() { - // TODO: Differentiate between no token found and incorrect token found. - // TODO: - // The parser didn't find an Identifier after VAL/VAR - return Some(SyntaxResult::Err(SyntaxError { - reason: format!( - "There should be an identifier after a `{}` token", - if is_val {"val"} else {"var"} - ), - error_start: binding_token.position, - error_end: binding_token.position + binding_token.value.len(), - })); - } - let identifier = identifier.unwrap(); + let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) { + Result3::Ok(t) => t, + Result3::Err(t) => { + // The parser found a token, but it's not an identifier + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an identifier after a `{}` token", + if is_val { "val" } else { "var" } + ), + error_start: binding_token.position, + error_end: binding_token.position + binding_token.value.len(), + })); + } + Result3::None => { + // TODO: Differentiate between no token found and incorrect token found. + // The parser didn't find an Identifier after VAL/VAR + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an identifier after a `{}` token", + if is_val { "val" } else { "var" } + ), + error_start: binding_token.position, + error_end: binding_token.position + binding_token.value.len(), + })); + } + }; - let equal_operator = try_operator(tokens, pos + 2, String::from("=")); - if equal_operator.is_none() { - // TODO: return Error - return None; - } + let _equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) { + Result3::Ok(t) => t, + Result3::Err(t) => { + // TODO: Differentiate between no token found and incorrect token found. + // The parser didn't find the `=` operator after the identifier + return Some(SyntaxResult::Err(SyntaxError { + reason: format!("There should be an equal sign `=` after the identifier",), + error_start: identifier.position, + error_end: identifier.position + identifier.value.len(), + })); + } + Result3::None => { + // TODO: Differentiate between no token found and incorrect token found. + // The parser didn't find the `=` operator after the identifier + return Some(SyntaxResult::Err(SyntaxError { + reason: format!("There should be an equal sign `=` after the identifier",), + error_start: identifier.position, + error_end: identifier.position + identifier.value.len(), + })); + } + }; let expression = expression::try_parse(tokens, pos + 3); if expression.is_none() { @@ -90,16 +115,21 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option Some(SyntaxResult::Ok(binding)) } -fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> Option<&Token> { - tokens - .get(pos) - .and_then(|token| (token.token_type == token_type).then(|| token)) +/// Expects the token at `pos` to be of type `token_type` +fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> Result3<&Token> { + match tokens.get(pos) { + Some(t) if t.token_type == token_type => Result3::Ok(t), + Some(t) => Result3::Err(t), + None => Result3::None, + } } -fn try_operator(tokens: &Vec, pos: usize, operator: String) -> Option<&Token> { - tokens.get(pos).and_then(|token| { - (token.token_type == TokenType::Operator && token.value == operator).then(|| token) - }) +fn try_operator(tokens: &Vec, pos: usize, operator: String) -> Result3<&Token> { + match tokens.get(pos) { + Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t), + Some(t) => Result3::Err(t), + None => Result3::None, + } } #[cfg(test)] @@ -123,7 +153,7 @@ mod tests { #[test] fn should_parse_val() { let tokens = get_tokens(&String::from("val")).unwrap(); - let token = try_token_type(&tokens, 0, TokenType::VAL).unwrap(); + let token = *try_token_type(&tokens, 0, TokenType::VAL).unwrap(); assert_eq!(TokenType::VAL, token.token_type); assert_eq!("val", token.value); @@ -132,7 +162,7 @@ mod tests { #[test] fn should_parse_identifier() { let tokens = get_tokens(&String::from("identifier")).unwrap(); - let token = try_token_type(&tokens, 0, TokenType::Identifier).unwrap(); + let token = *try_token_type(&tokens, 0, TokenType::Identifier).unwrap(); assert_eq!("identifier", token.value); } @@ -140,7 +170,7 @@ mod tests { #[test] fn should_parse_operator() { let tokens = get_tokens(&String::from("=")).unwrap(); - let token = try_operator(&tokens, 0, String::from("=")).unwrap(); + let token = *try_operator(&tokens, 0, String::from("=")).unwrap(); assert_eq!("=", token.value); } @@ -182,7 +212,23 @@ mod tests { assert_eq!(0, error.error_start); assert_eq!(3, error.error_end); } - _ => panic!(), + _ => panic!("Error expected"), + } + } + + #[test] + fn should_return_error_when_identifier_is_wrong() { + let tokens = get_tokens(&String::from("val 322")).unwrap(); + assert_eq!(TokenType::VAL, tokens[0].token_type); + assert_eq!(0, tokens[0].position); + let binding = try_parse(&tokens, 0).unwrap(); + + match binding { + SyntaxResult::Err(error) => { + // assert_eq!(4, error.error_start); + // assert_eq!(7, error.error_end); + } + _ => panic!("Error expected") } } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..75fbe2a --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,14 @@ +pub enum Result3 { + Ok(T), + Err(T), + None, +} + +impl Result3 { + pub fn unwrap(&self) -> &T { + match self { + Result3::Ok(t) => t, + _ => panic!("") + } + } +}