From def93715c74b4944bebe628e4488b842be4f6335 Mon Sep 17 00:00:00 2001
From: Araozu <faraoz@unsa.edu.pe>
Date: Thu, 16 Mar 2023 13:31:24 -0500
Subject: [PATCH] Refactor. Fix invalid position field of Number tokens

---
 src/error_handling/syntax_error.rs |  27 +++++-
 src/lexic/scanner/identifier.rs    |  15 +++-
 src/lexic/scanner/number.rs        |  61 ++++++++++++-
 src/main.rs                        |   1 +
 src/syntax/binding.rs              | 132 +++++++++++++++++++----------
 src/utils/mod.rs                   |  14 +++
 6 files changed, 197 insertions(+), 53 deletions(-)
 create mode 100644 src/utils/mod.rs
diff --git a/src/error_handling/syntax_error.rs b/src/error_handling/syntax_error.rs
index a68d4b2..f109cb7 100644
--- a/src/error_handling/syntax_error.rs
+++ b/src/error_handling/syntax_error.rs
@@ -11,8 +11,7 @@ impl PrintableError for SyntaxError {
 
         format!(
             "\n{}\n{}{}\n\n{}{}{}\n{}",
-            line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":",
-            self.reason
+            line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":", self.reason
         )
     }
 }
@@ -122,7 +121,29 @@ mod tests {
         let (chars, error) = get_error_data(String::from("val"));
         let actual_err = error.get_error_str(&chars);
         // TODO: Write a better error message (something that explains why it failed)
-        let expected_str = format!("\n{}\n{}\n\n{}\n{}", "val", "^^^", "Syntax error at pos 0:", "There should be an identifier after a `val` token");
+        let expected_str = format!(
+            "\n{}\n{}\n\n{}\n{}",
+            "val",
+            "^^^",
+            "Syntax error at pos 0:",
+            "There should be an identifier after a `val` token"
+        );
+
+        assert_eq!(expected_str, actual_err);
+    }
+
+    #[test]
+    fn should_show_an_error_for_missing_equal_operator() {
+        let (chars, error) = get_error_data(String::from("val name"));
+        let actual_err = error.get_error_str(&chars);
+        // TODO: Write a better error message (something that explains why it failed)
+        let expected_str = format!(
+            "\n{}\n{}\n\n{}\n{}",
+            "val name",
+            "    ^^^^",
+            "Syntax error at pos 4:",
+            "There should be an equal sign `=` after the identifier"
+        );
 
         assert_eq!(expected_str, actual_err);
     }
diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs
index be209dd..9110793 100755
--- a/src/lexic/scanner/identifier.rs
+++ b/src/lexic/scanner/identifier.rs
@@ -38,11 +38,20 @@ fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype:
             // so this is used to retrieve the original START position of the token
             let current_len = current.len();
             if let Some(token_type) = str_is_keyword(&current) {
-                LexResult::Some(token::new(current, start_pos - current_len, token_type), start_pos)
+                LexResult::Some(
+                    token::new(current, start_pos - current_len, token_type),
+                    start_pos,
+                )
             } else if is_datatype {
-                LexResult::Some(token::new_datatype(current, start_pos - current_len), start_pos)
+                LexResult::Some(
+                    token::new_datatype(current, start_pos - current_len),
+                    start_pos,
+                )
             } else {
-                LexResult::Some(token::new_identifier(current, start_pos - current_len), start_pos)
+                LexResult::Some(
+                    token::new_identifier(current, start_pos - current_len),
+                    start_pos,
+                )
             }
         }
     }
diff --git a/src/lexic/scanner/number.rs b/src/lexic/scanner/number.rs
index 11728fb..2261144 100755
--- a/src/lexic/scanner/number.rs
+++ b/src/lexic/scanner/number.rs
@@ -33,7 +33,13 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
         Some(c) if utils::is_digit(*c) => {
             scan_decimal(chars, start_pos + 1, utils::str_append(current, *c))
         }
-        _ => LexResult::Some(token::new_number(current, start_pos), start_pos),
+        _ => {
+            // start_pos is the position where the token ENDS, not where it STARTS,
+            // so this is used to retrieve the original START position of the token
+            let current_len = current.len();
+
+            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
+        },
     }
 }
 
@@ -86,7 +92,13 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
         Some(c) if *c == 'e' => {
             scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
         }
-        _ => LexResult::Some(token::new_number(current, start_pos), start_pos),
+        _ => {
+            // start_pos is the position where the token ENDS, not where it STARTS,
+            // so this is used to retrieve the original START position of the token
+            let current_len = current.len();
+
+            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
+        }
     }
 }
 
@@ -123,7 +135,13 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
         Some(c) if utils::is_digit(*c) => {
             scan_digits(chars, start_pos + 1, utils::str_append(current, *c))
         }
-        _ => (token::new_number(current, start_pos), start_pos),
+        _ => {
+            // start_pos is the position where the token ENDS, not where it STARTS,
+            // so this is used to retrieve the original START position of the token
+            let current_len = current.len();
+
+            (token::new_number(current, start_pos - current_len), start_pos)
+        }
     }
 }
 
@@ -133,7 +151,13 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
         Some(c) if utils::is_hex_digit(*c) => {
             scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
         }
-        _ => (token::new_number(current, start_pos), start_pos),
+        _ => {
+            // start_pos is the position where the token ENDS, not where it STARTS,
+            // so this is used to retrieve the original START position of the token
+            let current_len = current.len();
+
+            (token::new_number(current, start_pos - current_len), start_pos)
+        }
     }
 }
 
@@ -156,6 +180,7 @@ mod tests {
             assert_eq!(3, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("123", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -167,6 +192,7 @@ mod tests {
             assert_eq!(4, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("0123", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -178,6 +204,7 @@ mod tests {
             assert_eq!(8, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("123456", token.value);
+            assert_eq!(2, token.position);
         } else {
             panic!()
         }
@@ -207,6 +234,7 @@ mod tests {
             assert_eq!(4, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("0x20", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -218,6 +246,7 @@ mod tests {
             assert_eq!(12, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("0xff23DA", token.value);
+            assert_eq!(4, token.position);
         } else {
             panic!()
         }
@@ -268,6 +297,7 @@ mod tests {
             assert_eq!(4, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("3.22", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -278,6 +308,7 @@ mod tests {
             assert_eq!(11, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("123456.7890", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -317,6 +348,7 @@ mod tests {
             assert_eq!("1e+0", token.value);
             assert_eq!(4, next);
             assert_eq!(TokenType::Number, token.token_type);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -327,6 +359,7 @@ mod tests {
             assert_eq!(4, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("1e-0", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -337,6 +370,7 @@ mod tests {
             assert_eq!(4, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("0e+0", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -347,6 +381,7 @@ mod tests {
             assert_eq!(19, next);
             assert_eq!(TokenType::Number, token.token_type);
             assert_eq!("123498790e+12349870", token.value);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -361,6 +396,7 @@ mod tests {
             assert_eq!("1.24e+1", token.value);
             assert_eq!(7, next);
             assert_eq!(TokenType::Number, token.token_type);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
@@ -371,8 +407,25 @@ mod tests {
             assert_eq!("0.00000000000001e+1", token.value);
             assert_eq!(19, next);
             assert_eq!(TokenType::Number, token.token_type);
+            assert_eq!(0, token.position);
         } else {
             panic!()
         }
     }
+
+    #[test]
+    fn position_should_be_valid() {
+        let input = str_to_vec("  123  ");
+        let start_pos = 2;
+
+        if let LexResult::Some(token, next) = scan(&input, start_pos) {
+            assert_eq!(5, next);
+            assert_eq!(TokenType::Number, token.token_type);
+            assert_eq!("123", token.value);
+            assert_eq!(2, token.position);
+        } else {
+            panic!("Expected some value")
+        };
+
+    }
 }
diff --git a/src/main.rs b/src/main.rs
index ad6058c..215f3db 100755
--- a/src/main.rs
+++ b/src/main.rs
@@ -17,6 +17,7 @@ mod ast_types;
 mod symbol_table;
 // Transforms an AST to JS
 mod codegen;
+mod utils;
 
 mod error_handling;
 
diff --git a/src/syntax/binding.rs b/src/syntax/binding.rs
index 9260464..cc738f5 100644
--- a/src/syntax/binding.rs
+++ b/src/syntax/binding.rs
@@ -2,6 +2,7 @@ use super::ast_types::{Binding, ValBinding, VarBinding};
 use super::{expression, SyntaxResult};
 use crate::error_handling::SyntaxError;
 use crate::token::{Token, TokenType};
+use crate::utils::Result3;
 
 // TODO: Should return a 3 state value:
 // - Success: binding parsed successfully
@@ -12,16 +13,13 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
 
     // Optional datatype annotation
     let datatype_annotation = {
-        match tokens.get(pos) {
-            Some(t) if t.token_type == TokenType::Datatype => {
+        match try_token_type(tokens, pos, TokenType::Datatype) {
+            Result3::Ok(t) => {
                 pos += 1;
                 Some(String::from(&t.value))
             }
-            // If the first token is anything else, ignore
-            Some(_) => None,
-            // This should never match, as there should always be at least a
-            // TokenType::Semicolon or TokenType::EOF
-            None => panic!(
+            Result3::Err(_) => None,
+            Result3::None => panic!(
                 "Internal compiler error: Illegal token stream at src/syntax/binding.rs#try_parse"
             ),
         }
@@ -31,40 +29,67 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
     let (is_val, binding_token) = {
         let res1 = try_token_type(tokens, pos, TokenType::VAL);
         match res1 {
-            Some(val_token) => (true, val_token),
-            None => {
+            Result3::Ok(val_token) => (true, val_token),
+            _ => {
                 let res2 = try_token_type(tokens, pos, TokenType::VAR);
                 match res2 {
-                    Some(var_token) => (false, var_token),
+                    Result3::Ok(var_token) => (false, var_token),
                     // Neither VAL nor VAR were matched, the parser should try
                     // other constructs
-                    None => return None,
+                    _ => return None,
                 }
             }
         }
     };
 
-    let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
-    if identifier.is_none() {
-        // TODO: Differentiate between no token found and incorrect token found.
-        // TODO: 
-        // The parser didn't find an Identifier after VAL/VAR
-        return Some(SyntaxResult::Err(SyntaxError {
-            reason: format!(
-                "There should be an identifier after a `{}` token", 
-                if is_val {"val"} else {"var"}
-            ),
-            error_start: binding_token.position,
-            error_end: binding_token.position + binding_token.value.len(),
-        }));
-    }
-    let identifier = identifier.unwrap();
+    let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) {
+        Result3::Ok(t) => t,
+        Result3::Err(t) => {
+            // The parser found a token, but it's not an identifier
+            return Some(SyntaxResult::Err(SyntaxError {
+                reason: format!(
+                    "There should be an identifier after a `{}` token",
+                    if is_val { "val" } else { "var" }
+                ),
+                error_start: binding_token.position,
+                error_end: binding_token.position + binding_token.value.len(),
+            }));
+        }
+        Result3::None => {
+            // TODO: Differentiate between no token found and incorrect token found.
+            // The parser didn't find an Identifier after VAL/VAR
+            return Some(SyntaxResult::Err(SyntaxError {
+                reason: format!(
+                    "There should be an identifier after a `{}` token",
+                    if is_val { "val" } else { "var" }
+                ),
+                error_start: binding_token.position,
+                error_end: binding_token.position + binding_token.value.len(),
+            }));
+        }
+    };
 
-    let equal_operator = try_operator(tokens, pos + 2, String::from("="));
-    if equal_operator.is_none() {
-        // TODO: return Error
-        return None;
-    }
+    let _equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) {
+        Result3::Ok(t) => t,
+        Result3::Err(t) => {
+            // TODO: Differentiate between no token found and incorrect token found.
+            // The parser didn't find the `=` operator after the identifier
+            return Some(SyntaxResult::Err(SyntaxError {
+                reason: format!("There should be an equal sign `=` after the identifier",),
+                error_start: identifier.position,
+                error_end: identifier.position + identifier.value.len(),
+            }));
+        }
+        Result3::None => {
+            // TODO: Differentiate between no token found and incorrect token found.
+            // The parser didn't find the `=` operator after the identifier
+            return Some(SyntaxResult::Err(SyntaxError {
+                reason: format!("There should be an equal sign `=` after the identifier",),
+                error_start: identifier.position,
+                error_end: identifier.position + identifier.value.len(),
+            }));
+        }
+    };
 
     let expression = expression::try_parse(tokens, pos + 3);
     if expression.is_none() {
@@ -90,16 +115,21 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
     Some(SyntaxResult::Ok(binding))
 }
 
-fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Option<&Token> {
-    tokens
-        .get(pos)
-        .and_then(|token| (token.token_type == token_type).then(|| token))
+/// Expects the token at `pos` to be of type `token_type`
+fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
+    match tokens.get(pos) {
+        Some(t) if t.token_type == token_type => Result3::Ok(t),
+        Some(t) => Result3::Err(t),
+        None => Result3::None,
+    }
 }
 
-fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Option<&Token> {
-    tokens.get(pos).and_then(|token| {
-        (token.token_type == TokenType::Operator && token.value == operator).then(|| token)
-    })
+fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
+    match tokens.get(pos) {
+        Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
+        Some(t) => Result3::Err(t),
+        None => Result3::None,
+    }
 }
 
 #[cfg(test)]
@@ -123,7 +153,7 @@ mod tests {
     #[test]
     fn should_parse_val() {
         let tokens = get_tokens(&String::from("val")).unwrap();
-        let token = try_token_type(&tokens, 0, TokenType::VAL).unwrap();
+        let token = *try_token_type(&tokens, 0, TokenType::VAL).unwrap();
 
         assert_eq!(TokenType::VAL, token.token_type);
         assert_eq!("val", token.value);
@@ -132,7 +162,7 @@ mod tests {
     #[test]
     fn should_parse_identifier() {
         let tokens = get_tokens(&String::from("identifier")).unwrap();
-        let token = try_token_type(&tokens, 0, TokenType::Identifier).unwrap();
+        let token = *try_token_type(&tokens, 0, TokenType::Identifier).unwrap();
 
         assert_eq!("identifier", token.value);
     }
@@ -140,7 +170,7 @@ mod tests {
     #[test]
     fn should_parse_operator() {
         let tokens = get_tokens(&String::from("=")).unwrap();
-        let token = try_operator(&tokens, 0, String::from("=")).unwrap();
+        let token = *try_operator(&tokens, 0, String::from("=")).unwrap();
 
         assert_eq!("=", token.value);
     }
@@ -182,7 +212,23 @@ mod tests {
                 assert_eq!(0, error.error_start);
                 assert_eq!(3, error.error_end);
             }
-            _ => panic!(),
+            _ => panic!("Error expected"),
+        }
+    }
+
+    #[test]
+    fn should_return_error_when_identifier_is_wrong() {
+        let tokens = get_tokens(&String::from("val 322")).unwrap();
+        assert_eq!(TokenType::VAL, tokens[0].token_type);
+        assert_eq!(0, tokens[0].position);
+        let binding = try_parse(&tokens, 0).unwrap();
+
+        match binding {
+            SyntaxResult::Err(error) => {
+                // assert_eq!(4, error.error_start);
+                // assert_eq!(7, error.error_end);
+            }
+            _ => panic!("Error expected")
         }
     }
 }
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
new file mode 100644
index 0000000..75fbe2a
--- /dev/null
+++ b/src/utils/mod.rs
@@ -0,0 +1,14 @@
+pub enum Result3<T> {
+    Ok(T),
+    Err(T),
+    None,
+}
+
+impl<T> Result3<T> {
+    pub fn unwrap(&self) -> &T {
+        match self {
+            Result3::Ok(t) => t,
+            _ => panic!("")
+        }
+    }
+}