refactor: Dont store quotes inside string tokens

2024-07-31 10:54:17 -05:00 · 2024-07-31 10:54:17 -05:00 · c0e20ad283
commit c0e20ad283
parent a62d08455b
7 changed files with 48 additions and 28 deletions
--- a/src/error_handling/mod.rs
+++ b/src/error_handling/mod.rs
@ -22,6 +22,7 @@ pub enum MistiError {
 pub struct LexError {
    pub position: usize,
    // TODO: Add and end position
+    pub end_position: usize,
    pub reason: String,
 }

--- a/src/lexic/mod.rs
+++ b/src/lexic/mod.rs
@ -152,6 +152,7 @@ fn next_token(
        .unwrap_or_else(|| {
            let error = LexError {
                position: current_pos,
+                end_position: current_pos + 1,
                reason: format!(
                    "Illegal character `{}` (escaped: {})",
                    next_char,
@ -196,6 +197,7 @@ fn handle_indentation(
                // Illegal state: Indentation error
                let error = LexError {
                    position: current_pos,
+                    end_position: current_pos + 1,
                    reason: format!(
                        "Indentation error: expected {} spaces, found {}",
                        new_top, spaces
--- a/src/lexic/scanner/new_comment.rs
+++ b/src/lexic/scanner/new_comment.rs
@ -34,22 +34,27 @@ fn scan_any_except_new_line(
 /// and the character at `start_pos + 1` is '*'
 pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
    match multiline_impl(chars, start_pos + 2) {
-        Some((value, next_position)) => LexResult::Some(
+        Ok((value, next_position)) => LexResult::Some(
            Token::new_multiline_comment(value.iter().collect(), start_pos),
            next_position,
        ),
-        None => {
+        Err(last_position) => {
            // Throw an error: Incomplete multiline comment
            LexResult::Err(LexError {
                position: start_pos,
                // TODO: add an end_position
+                end_position: last_position,
                reason: "Unfinished multiline commend".into(),
            })
        }
    }
 }

-fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usize)> {
+/// Implementation that scans the multiline comment.
+/// 
+/// May only error if EOF is found before the comment is finished.
+/// If Err, returns the last position where a char was available.
+fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Result<(Vec<char>, usize), usize> {
    let mut current_position = start_pos;
    let mut result = Vec::<char>::new();

@ -61,10 +66,10 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
                        // Scan nested comment
                        let (mut nested, next_position) =
                            match multiline_impl(chars, current_position + 2) {
-                                Some(v) => v,
-                                None => {
+                                Ok(v) => v,
+                                Err(pos) => {
                                    // The nested comment is not closed.
-                                    return None;
+                                    return Err(pos);
                                }
                            };
                        result.push('/');
@ -79,7 +84,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
                        result.push('/');
                        result.push(*c);
                    }
-                    None => return None,
+                    None => return Err(current_position),
                }
            }
            Some('*') => {
@ -88,7 +93,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
                    Some('/') => {
                        // Create and return the token,
                        // ignoring the `*/`
-                        return Some((result, current_position + 2));
+                        return Ok((result, current_position + 2));
                    }
                    Some(c) => {
                        // Append both and continue
@ -98,7 +103,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
                    }
                    None => {
                        // Throw an error
-                        return None;
+                        return Err(current_position);
                    }
                }
            }
@ -108,10 +113,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
                current_position += 1;
            }
            None => {
-                // TODO: Also return the position where this token ends,
-                // to display better error messages.
-                // Requires LexError to implement an end_position field
-                return None;
+                return Err(current_position);
            }
        }
    }
--- a/src/lexic/scanner/number.rs
+++ b/src/lexic/scanner/number.rs
@ -53,6 +53,7 @@ fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
        }
        _ => LexResult::Err(LexError {
            position: start_pos,
+            end_position: start_pos + 1,
            reason: String::from("Tried to scan an incomplete hex value"),
        }),
    }
@ -69,12 +70,14 @@ fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResul
        Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
        Some(_) => LexResult::Err(LexError {
            position: start_pos,
+            end_position: start_pos + 1,
            reason: String::from(
                "The character after the dot when scanning a double is not a number.",
            ),
        }),
        _ => LexResult::Err(LexError {
            position: start_pos,
+            end_position: start_pos + 1,
            reason: String::from("EOF when scanning a double number."),
        }),
    }
@ -122,6 +125,7 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
        }
        _ => LexResult::Err(LexError {
            position: start_pos,
+            end_position: start_pos + 1,
            reason: String::from(
                "The characters after 'e' are not + or -, or are not followed by a number",
            ),
--- a/src/lexic/scanner/string.rs
+++ b/src/lexic/scanner/string.rs
@ -7,9 +7,11 @@ use crate::lexic::{utils, LexResult};
 /// This function assumes that `start_pos` is after the first double quote,
 /// e.g. if the input is `"hello"`, `start_pos == 1`
 pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
-    scan_impl(chars, start_pos, String::from("\""))
+    scan_impl(chars, start_pos, String::from(""))
 }

+// TODO: This can be iterative instead of recursive
+
 /// Recursive function that does the scanning
 pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
    match chars.get(start_pos) {
@ -17,16 +19,16 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
            // start_pos is the position where the token ENDS, not where it STARTS,
            // so this is used to retrieve the original START position of the token
            // 1 is added to account for the opening `"`
-            let current_len = current.len();
+            let current_len = current.len() + 1;

-            let final_str = format!("{}\"", current);
            LexResult::Some(
-                Token::new_string(final_str, start_pos - current_len),
+                Token::new_string(current, start_pos - current_len),
                start_pos + 1,
            )
        }
        Some(c) if *c == '\n' => LexResult::Err(LexError {
            position: start_pos,
+            end_position: start_pos + 1,
            reason: String::from("Unexpected new line inside a string."),
        }),
        Some(c) if *c == '\\' => {
@ -40,6 +42,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
        Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
        None => LexResult::Err(LexError {
            position: start_pos,
+            end_position: start_pos + 1,
            reason: String::from("Incomplete string found"),
        }),
    }
@ -79,7 +82,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(2, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"\"", token.value);
+            assert_eq!("", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -93,7 +96,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(15, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Hello, world!\"", token.value);
+            assert_eq!("Hello, world!", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -118,7 +121,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(14, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Sample\\ntext\"", token.value);
+            assert_eq!("Sample\\ntext", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -129,7 +132,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(14, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Sample\\\"text\"", token.value);
+            assert_eq!("Sample\\\"text", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -140,7 +143,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(14, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Sample\\rtext\"", token.value);
+            assert_eq!("Sample\\rtext", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -151,7 +154,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(14, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Sample\\\\text\"", token.value);
+            assert_eq!("Sample\\\\text", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -162,7 +165,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(14, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Sample\\ttext\"", token.value);
+            assert_eq!("Sample\\ttext", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -173,7 +176,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(14, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Sample\\ text\"", token.value);
+            assert_eq!("Sample\\ text", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
@ -187,7 +190,7 @@ mod tests {
        if let LexResult::Some(token, next) = scan(&input, start_pos) {
            assert_eq!(14, next);
            assert_eq!(TokenType::String, token.token_type);
-            assert_eq!("\"Sample\\atext\"", token.value);
+            assert_eq!("Sample\\atext", token.value);
            assert_eq!(0, token.position);
        } else {
            panic!()
--- a/src/lexic/token.rs
+++ b/src/lexic/token.rs
@ -38,7 +38,15 @@ pub struct Token {

 impl Token {
    pub fn get_end_position(&self) -> usize {
-        self.position + self.value.len()
+        match self.token_type {
+            // 4 extra characters for /* and */
+            TokenType::MultilineComment => self.position + self.value.len() + 4,
+            // 2 extra characters for //
+            TokenType::Comment => self.position + self.value.len() + 2,
+            // 2 extra characters for ""
+            TokenType::String => self.position + self.value.len() + 2,
+            _ => self.position + self.value.len()
+        }
    }
 }

--- a/src/syntax/parsers/expression/primary.rs
+++ b/src/syntax/parsers/expression/primary.rs
@ -66,7 +66,7 @@ mod tests {

        match expression {
            Ok((Expression::String(value), _)) => {
-                assert_eq!("\"Hello\"", format!("{}", value))
+                assert_eq!("Hello", format!("{}", value))
            }
            _ => panic!(),
        }