Refactor. Fix invalid position field of Number tokens

2023-03-16 13:31:24 -05:00 · 2023-03-16 13:31:24 -05:00 · def93715c7
commit def93715c7
parent f882942f3f
6 changed files with 197 additions and 53 deletions
--- a/src/error_handling/syntax_error.rs
+++ b/src/error_handling/syntax_error.rs
@ -11,8 +11,7 @@ impl PrintableError for SyntaxError {
        format!(
            "\n{}\n{}{}\n\n{}{}{}\n{}",
-            line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":",
+            line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":", self.reason
            self.reason
        )
    }
 }
@ -122,7 +121,29 @@ mod tests {
        let (chars, error) = get_error_data(String::from("val"));
        let actual_err = error.get_error_str(&chars);
        // TODO: Write a better error message (something that explains why it failed)
-        let expected_str = format!("\n{}\n{}\n\n{}\n{}", "val", "^^^", "Syntax error at pos 0:", "There should be an identifier after a `val` token");
+        let expected_str = format!(
            "\n{}\n{}\n\n{}\n{}",
            "val",
            "^^^",
            "Syntax error at pos 0:",
            "There should be an identifier after a `val` token"
        );
        assert_eq!(expected_str, actual_err);
    }
    #[test]
    fn should_show_an_error_for_missing_equal_operator() {
        let (chars, error) = get_error_data(String::from("val name"));
        let actual_err = error.get_error_str(&chars);
        // TODO: Write a better error message (something that explains why it failed)
        let expected_str = format!(
            "\n{}\n{}\n\n{}\n{}",
            "val name",
            "    ^^^^",
            "Syntax error at pos 4:",
            "There should be an equal sign `=` after the identifier"
        );
        assert_eq!(expected_str, actual_err);
    }
--- a/src/lexic/scanner/identifier.rs
+++ b/src/lexic/scanner/identifier.rs
@ -38,11 +38,20 @@ fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype:
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
            if let Some(token_type) = str_is_keyword(&current) {
-                LexResult::Some(token::new(current, start_pos - current_len, token_type), start_pos)
+                LexResult::Some(
                    token::new(current, start_pos - current_len, token_type),
                    start_pos,
                )
            } else if is_datatype {
-                LexResult::Some(token::new_datatype(current, start_pos - current_len), start_pos)
+                LexResult::Some(
                    token::new_datatype(current, start_pos - current_len),
                    start_pos,
                )
            } else {
-                LexResult::Some(token::new_identifier(current, start_pos - current_len), start_pos)
+                LexResult::Some(
                    token::new_identifier(current, start_pos - current_len),
                    start_pos,
                )
            }
        }
    }
--- a/src/lexic/scanner/number.rs
+++ b/src/lexic/scanner/number.rs
@ -33,7 +33,13 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
        Some(c) if utils::is_digit(*c) => {
            scan_decimal(chars, start_pos + 1, utils::str_append(current, *c))
        }
-        _ => LexResult::Some(token::new_number(current, start_pos), start_pos),
+        _ => {
            // start_pos is the position where the token ENDS, not where it STARTS,
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
        },
    }
 }
@ -86,7 +92,13 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
        Some(c) if *c == 'e' => {
            scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
        }
-        _ => LexResult::Some(token::new_number(current, start_pos), start_pos),
+        _ => {
            // start_pos is the position where the token ENDS, not where it STARTS,
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
        }
    }
 }
@ -123,7 +135,13 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
        Some(c) if utils::is_digit(*c) => {
            scan_digits(chars, start_pos + 1, utils::str_append(current, *c))
        }
-        _ => (token::new_number(current, start_pos), start_pos),
+        _ => {
            // start_pos is the position where the token ENDS, not where it STARTS,
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
            (token::new_number(current, start_pos - current_len), start_pos)
        }
    }
 }
@ -133,7 +151,13 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
        Some(c) if utils::is_hex_digit(*c) => {
            scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
        }
-        _ => (token::new_number(current, start_pos), start_pos),
+        _ => {
            // start_pos is the position where the token ENDS, not where it STARTS,
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
            (token::new_number(current, start_pos - current_len), start_pos)
        }
    }
 }
@ -156,6 +180,7 @@ mod tests {
            assert_eq!(3, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("123", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -167,6 +192,7 @@ mod tests {
            assert_eq!(4, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("0123", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -178,6 +204,7 @@ mod tests {
            assert_eq!(8, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("123456", token.value);
            assert_eq!(2, token.position);
        } else {
            panic!()
        }
@ -207,6 +234,7 @@ mod tests {
            assert_eq!(4, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("0x20", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -218,6 +246,7 @@ mod tests {
            assert_eq!(12, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("0xff23DA", token.value);
            assert_eq!(4, token.position);
        } else {
            panic!()
        }
@ -268,6 +297,7 @@ mod tests {
            assert_eq!(4, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("3.22", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -278,6 +308,7 @@ mod tests {
            assert_eq!(11, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("123456.7890", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -317,6 +348,7 @@ mod tests {
            assert_eq!("1e+0", token.value);
            assert_eq!(4, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -327,6 +359,7 @@ mod tests {
            assert_eq!(4, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("1e-0", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -337,6 +370,7 @@ mod tests {
            assert_eq!(4, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("0e+0", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -347,6 +381,7 @@ mod tests {
            assert_eq!(19, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("123498790e+12349870", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -361,6 +396,7 @@ mod tests {
            assert_eq!("1.24e+1", token.value);
            assert_eq!(7, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
@ -371,8 +407,25 @@ mod tests {
            assert_eq!("0.00000000000001e+1", token.value);
            assert_eq!(19, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!(0, token.position);
        } else {
            panic!()
        }
    }
    #[test]
    fn position_should_be_valid() {
        let input = str_to_vec("  123  ");
        let start_pos = 2;
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(5, next);
            assert_eq!(TokenType::Number, token.token_type);
            assert_eq!("123", token.value);
            assert_eq!(2, token.position);
        } else {
            panic!("Expected some value")
        };
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -17,6 +17,7 @@ mod ast_types;
 mod symbol_table;
 // Transforms an AST to JS
 mod codegen;
 mod utils;
 mod error_handling;
--- a/src/syntax/binding.rs
+++ b/src/syntax/binding.rs
@ -2,6 +2,7 @@ use super::ast_types::{Binding, ValBinding, VarBinding};
 use super::{expression, SyntaxResult};
 use crate::error_handling::SyntaxError;
 use crate::token::{Token, TokenType};
 use crate::utils::Result3;
 // TODO: Should return a 3 state value:
 // - Success: binding parsed successfully
@ -12,16 +13,13 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
    // Optional datatype annotation
    let datatype_annotation = {
-        match tokens.get(pos) {
+        match try_token_type(tokens, pos, TokenType::Datatype) {
-            Some(t) if t.token_type == TokenType::Datatype => {
+            Result3::Ok(t) => {
                pos += 1;
                Some(String::from(&t.value))
            }
-            // If the first token is anything else, ignore
+            Result3::Err(_) => None,
-            Some(_) => None,
+            Result3::None => panic!(
            // This should never match, as there should always be at least a
            // TokenType::Semicolon or TokenType::EOF
            None => panic!(
                "Internal compiler error: Illegal token stream at src/syntax/binding.rs#try_parse"
            ),
        }
@ -31,40 +29,67 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
    let (is_val, binding_token) = {
        let res1 = try_token_type(tokens, pos, TokenType::VAL);
        match res1 {
-            Some(val_token) => (true, val_token),
+            Result3::Ok(val_token) => (true, val_token),
-            None => {
+            _ => {
                let res2 = try_token_type(tokens, pos, TokenType::VAR);
                match res2 {
-                    Some(var_token) => (false, var_token),
+                    Result3::Ok(var_token) => (false, var_token),
                    // Neither VAL nor VAR were matched, the parser should try
                    // other constructs
-                    None => return None,
+                    _ => return None,
                }
            }
        }
    };
-    let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
+    let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) {
-    if identifier.is_none() {
+        Result3::Ok(t) => t,
-        // TODO: Differentiate between no token found and incorrect token found.
+        Result3::Err(t) => {
-        // TODO: 
+            // The parser found a token, but it's not an identifier
-        // The parser didn't find an Identifier after VAL/VAR
+            return Some(SyntaxResult::Err(SyntaxError {
-        return Some(SyntaxResult::Err(SyntaxError {
+                reason: format!(
-            reason: format!(
+                    "There should be an identifier after a `{}` token",
-                "There should be an identifier after a `{}` token", 
+                    if is_val { "val" } else { "var" }
-                if is_val {"val"} else {"var"}
+                ),
-            ),
+                error_start: binding_token.position,
-            error_start: binding_token.position,
+                error_end: binding_token.position + binding_token.value.len(),
-            error_end: binding_token.position + binding_token.value.len(),
+            }));
-        }));
+        }
-    }
+        Result3::None => {
-    let identifier = identifier.unwrap();
+            // TODO: Differentiate between no token found and incorrect token found.
            // The parser didn't find an Identifier after VAL/VAR
            return Some(SyntaxResult::Err(SyntaxError {
                reason: format!(
                    "There should be an identifier after a `{}` token",
                    if is_val { "val" } else { "var" }
                ),
                error_start: binding_token.position,
                error_end: binding_token.position + binding_token.value.len(),
            }));
        }
    };
-    let equal_operator = try_operator(tokens, pos + 2, String::from("="));
+    let _equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) {
-    if equal_operator.is_none() {
+        Result3::Ok(t) => t,
-        // TODO: return Error
+        Result3::Err(t) => {
-        return None;
+            // TODO: Differentiate between no token found and incorrect token found.
-    }
+            // The parser didn't find the `=` operator after the identifier
            return Some(SyntaxResult::Err(SyntaxError {
                reason: format!("There should be an equal sign `=` after the identifier",),
                error_start: identifier.position,
                error_end: identifier.position + identifier.value.len(),
            }));
        }
        Result3::None => {
            // TODO: Differentiate between no token found and incorrect token found.
            // The parser didn't find the `=` operator after the identifier
            return Some(SyntaxResult::Err(SyntaxError {
                reason: format!("There should be an equal sign `=` after the identifier",),
                error_start: identifier.position,
                error_end: identifier.position + identifier.value.len(),
            }));
        }
    };
    let expression = expression::try_parse(tokens, pos + 3);
    if expression.is_none() {
@ -90,16 +115,21 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
    Some(SyntaxResult::Ok(binding))
 }
-fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Option<&Token> {
+/// Expects the token at `pos` to be of type `token_type`
-    tokens
+fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
-        .get(pos)
+    match tokens.get(pos) {
-        .and_then(|token| (token.token_type == token_type).then(|| token))
+        Some(t) if t.token_type == token_type => Result3::Ok(t),
        Some(t) => Result3::Err(t),
        None => Result3::None,
    }
 }
-fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Option<&Token> {
+fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
-    tokens.get(pos).and_then(|token| {
+    match tokens.get(pos) {
-        (token.token_type == TokenType::Operator && token.value == operator).then(|| token)
+        Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
-    })
+        Some(t) => Result3::Err(t),
        None => Result3::None,
    }
 }
 #[cfg(test)]
@ -123,7 +153,7 @@ mod tests {
    #[test]
    fn should_parse_val() {
        let tokens = get_tokens(&String::from("val")).unwrap();
-        let token = try_token_type(&tokens, 0, TokenType::VAL).unwrap();
+        let token = *try_token_type(&tokens, 0, TokenType::VAL).unwrap();
        assert_eq!(TokenType::VAL, token.token_type);
        assert_eq!("val", token.value);
@ -132,7 +162,7 @@ mod tests {
    #[test]
    fn should_parse_identifier() {
        let tokens = get_tokens(&String::from("identifier")).unwrap();
-        let token = try_token_type(&tokens, 0, TokenType::Identifier).unwrap();
+        let token = *try_token_type(&tokens, 0, TokenType::Identifier).unwrap();
        assert_eq!("identifier", token.value);
    }
@ -140,7 +170,7 @@ mod tests {
    #[test]
    fn should_parse_operator() {
        let tokens = get_tokens(&String::from("=")).unwrap();
-        let token = try_operator(&tokens, 0, String::from("=")).unwrap();
+        let token = *try_operator(&tokens, 0, String::from("=")).unwrap();
        assert_eq!("=", token.value);
    }
@ -182,7 +212,23 @@ mod tests {
                assert_eq!(0, error.error_start);
                assert_eq!(3, error.error_end);
            }
-            _ => panic!(),
+            _ => panic!("Error expected"),
        }
    }
    #[test]
    fn should_return_error_when_identifier_is_wrong() {
        let tokens = get_tokens(&String::from("val 322")).unwrap();
        assert_eq!(TokenType::VAL, tokens[0].token_type);
        assert_eq!(0, tokens[0].position);
        let binding = try_parse(&tokens, 0).unwrap();
        match binding {
            SyntaxResult::Err(error) => {
                // assert_eq!(4, error.error_start);
                // assert_eq!(7, error.error_end);
            }
            _ => panic!("Error expected")
        }
    }
 }
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@ -0,0 +1,14 @@
 pub enum Result3<T> {
    Ok(T),
    Err(T),
    None,
 }
 impl<T> Result3<T> {
    pub fn unwrap(&self) -> &T {
        match self {
            Result3::Ok(t) => t,
            _ => panic!("")
        }
    }
 }