[Compiler] Scan single line comments

2023-04-05 10:31:12 -05:00 · 2023-04-05 10:31:12 -05:00 · 74e4d16105
commit 74e4d16105
parent 5c60943fab
12 changed files with 179 additions and 50 deletions
--- a/compiler/Cargo.toml
+++ b/compiler/Cargo.toml
@ -7,6 +7,7 @@ edition = "2021"
 [lib]
 name = "misti"
 path = "src/lib.rs"
 test = false
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
--- a/compiler/src/error_handling/syntax_error.rs
+++ b/compiler/src/error_handling/syntax_error.rs
@ -16,31 +16,31 @@ impl PrintableError for SyntaxError {
    }
 }
-/// Extracts a line of code
+// Extracts a line of code
-///
+//
-/// - `chars`: Input where to extract the line from
+// - `chars`: Input where to extract the line from
-/// - `start_position`: Position where the erroneous code starts
+// - `start_position`: Position where the erroneous code starts
-/// - `end_position`: Position where the erroneous code ends
+// - `end_position`: Position where the erroneous code ends
-///
+//
-/// Returns a tuple of:
+// Returns a tuple of:
-///
+//
-/// - `String`: The faulty line
+// - `String`: The faulty line
-/// - `usize`: The amount of chars *before* the faulty code
+// - `usize`: The amount of chars *before* the faulty code
-/// - `usize`: The lenght of the faulty code
+// - `usize`: The lenght of the faulty code
-///
+//
-/// ## Example
+// ## Example
-///
+//
-/// ```
+// ```
-/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
+// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
-/// let start_position = 13;
+// let start_position = 13;
-/// let end_position = 15;
+// let end_position = 15;
-///
+//
-/// let (line, before, length) = get_line(&input, start_position, end_position);
+// let (line, before, length) = get_line(&input, start_position, end_position);
-///
+//
-/// assert_eq!("val number == 50", line);
+// assert_eq!("val number == 50", line);
-/// assert_eq!(11, before);
+// assert_eq!(11, before);
-/// assert_eq!(2, length);
+// assert_eq!(2, length);
-/// ```
+// ```
 fn get_line(
    chars: &Vec<char>,
    start_position: usize,
--- a/compiler/src/lexic/mod.rs
+++ b/compiler/src/lexic/mod.rs
@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
        .or_else(|| scanner::identifier(next_char, chars, current_pos))
        .or_else(|| scanner::datatype(next_char, chars, current_pos))
        .or_else(|| scanner::string(next_char, chars, current_pos))
        .or_else(|| scanner::new_comment(next_char, chars, current_pos))
        .or_else(|| scanner::operator(next_char, chars, current_pos))
        .or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
        .or_else(|| scanner::new_line(next_char, chars, current_pos))
--- a/compiler/src/lexic/scanner/mod.rs
+++ b/compiler/src/lexic/scanner/mod.rs
@ -4,6 +4,7 @@ use super::{
 };
 mod identifier;
 mod new_comment;
 mod new_line;
 mod number;
 mod operator;
@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
 pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
    (c == '\n').then(|| new_line::scan(chars, start_pos))
 }
 /// Attempts to scan a single line comment.
 pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
    let next_char = chars.get(start_pos + 1);
    match (c, next_char) {
        ('/', Some('/')) => Some(new_comment::scan(chars, start_pos)),
        _ => None,
    }
 }
--- a/compiler/src/lexic/scanner/new_comment.rs
+++ b/compiler/src/lexic/scanner/new_comment.rs
@ -0,0 +1,78 @@
 use crate::{
    lexic::{utils, LexResult},
    token::new_comment,
 };
 /// Scans a new line.
 ///
 /// Assummes that `start_pos` and `start_pos + 1` point to a slash `/`
 ///
 /// This methods always succeedes
 pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
    let (comment_content, next_pos) =
        scan_any_except_new_line(chars, start_pos + 2, String::from(""));
    let token = new_comment(format!("//{}", comment_content), start_pos);
    LexResult::Some(token, next_pos)
 }
 fn scan_any_except_new_line(
    chars: &Vec<char>,
    start_pos: usize,
    current: String,
 ) -> (String, usize) {
    match chars.get(start_pos) {
        Some(c) if *c == '\n' => (current, start_pos),
        Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)),
        None => (current, start_pos),
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::lexic::scanner::TokenType;
    use super::*;
    fn str_to_vec(s: &str) -> Vec<char> {
        s.chars().collect()
    }
    #[test]
    fn should_scan_empty_comment() {
        let input = str_to_vec("//");
        let start_pos = 0;
        let result = scan(&input, start_pos);
        match result {
            LexResult::Some(t, next) => {
                assert_eq!(2, next);
                assert_eq!("//", t.value);
                assert_eq!(0, t.position);
                assert_eq!(TokenType::Comment, t.token_type);
            }
            _ => {
                panic!()
            }
        }
    }
    #[test]
    fn should_scan_until_new_line() {
        let input = str_to_vec("  // some comment\n// other comment");
        let start_pos = 2;
        let result = scan(&input, start_pos);
        match result {
            LexResult::Some(t, next) => {
                assert_eq!(17, next);
                assert_eq!("// some comment", t.value);
                assert_eq!(start_pos, t.position);
                assert_eq!(TokenType::Comment, t.token_type);
            }
            _ => {
                panic!()
            }
        }
    }
 }
--- a/compiler/src/lexic/scanner/number.rs
+++ b/compiler/src/lexic/scanner/number.rs
@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
-            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
+            LexResult::Some(
-        },
+                token::new_number(current, start_pos - current_len),
                start_pos,
            )
        }
    }
 }
@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
-            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
+            LexResult::Some(
                token::new_number(current, start_pos - current_len),
                start_pos,
            )
        }
    }
 }
@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
-            (token::new_number(current, start_pos - current_len), start_pos)
+            (
                token::new_number(current, start_pos - current_len),
                start_pos,
            )
        }
    }
 }
@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
-            (token::new_number(current, start_pos - current_len), start_pos)
+            (
                token::new_number(current, start_pos - current_len),
                start_pos,
            )
        }
    }
 }
@ -426,6 +438,5 @@ mod tests {
        } else {
            panic!("Expected some value")
        };
    }
 }
--- a/compiler/src/lexic/scanner/operator.rs
+++ b/compiler/src/lexic/scanner/operator.rs
@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();
-            LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos)
+            LexResult::Some(
                token::new_operator(current, start_pos - current_len),
                start_pos,
            )
        }
    }
 }
--- a/compiler/src/lexic/scanner/string.rs
+++ b/compiler/src/lexic/scanner/string.rs
@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
            // 1 is added to account for the opening `"`
            let current_len = current.len() + 1;
-            LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1)
+            LexResult::Some(
                token::new_string(current, start_pos - current_len),
                start_pos + 1,
            )
        }
        Some(c) if *c == '\n' => LexResult::Err(LexError {
            position: start_pos,
--- a/compiler/src/lib.rs
+++ b/compiler/src/lib.rs
@ -1,10 +1,25 @@
-
+// Module to handle the repl and its compilation
-mod lexic;
+mod repl;
 // Defines the types of tokens and provides functions to create them
 mod token;
 // Module to handle lexical analysis
 mod syntax;
 // Module to handle syntactic analysis
 mod lexic;
 // Module to handle semantic analysis
 mod semantic;
 // Defines the AST
 mod ast_types;
 // Defines the Symbol table and operations within
 mod symbol_table;
 // Transforms an AST to JS
 mod codegen;
 mod utils;
 mod error_handling;
 use token::Token;
 use error_handling::MistiError;
 use token::Token;
 pub use token::TokenType;
--- a/compiler/src/syntax/binding.rs
+++ b/compiler/src/syntax/binding.rs
@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
 fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
    match tokens.get(pos) {
        Some(t) if t.token_type == token_type => Result3::Ok(t),
-        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
+        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
            Result3::None
        }
        Some(t) => Result3::Err(t),
        None => Result3::None,
    }
@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Res
 fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
    match tokens.get(pos) {
        Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
-        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
+        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
            Result3::None
        }
        Some(t) => Result3::Err(t),
        None => Result3::None,
    }
@ -238,10 +242,9 @@ mod tests {
                assert_eq!(4, error.error_start);
                assert_eq!(7, error.error_end);
            }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
        }
        let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
        let binding = try_parse(&tokens, 0).unwrap();
@ -250,7 +253,7 @@ mod tests {
                assert_eq!(4, error.error_start);
                assert_eq!(11, error.error_end);
            }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
        }
    }
@ -264,7 +267,7 @@ mod tests {
                assert_eq!(7, error.error_start);
                assert_eq!(14, error.error_end);
            }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
        }
    }
 }
--- a/compiler/src/token.rs
+++ b/compiler/src/token.rs
@ -12,12 +12,12 @@ pub enum TokenType {
    LeftBrace,
    RightBrace,
    Semicolon,
    Comment,
    VAR,
    VAL,
    EOF,
 }
 #[derive(Debug)]
 pub struct Token {
    pub token_type: TokenType,
@ -31,12 +31,8 @@ pub struct Token {
 impl Token {
    pub fn get_end_position(&self) -> usize {
        match self.token_type {
-            TokenType::String => {
+            TokenType::String => self.position + self.value.len() + 2,
-                self.position + self.value.len() + 2
+            _ => self.position + self.value.len(),
            }
            _ => {
                self.position + self.value.len()
            }
        }
    }
 }
@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token {
        position,
    }
 }
 pub fn new_comment(value: String, position: usize) -> Token {
    Token {
        token_type: TokenType::Comment,
        value,
        position,
    }
 }
--- a/compiler/src/utils/mod.rs
+++ b/compiler/src/utils/mod.rs
@ -8,7 +8,7 @@ impl<T> Result3<T> {
    pub fn unwrap(&self) -> &T {
        match self {
            Result3::Ok(t) => t,
-            _ => panic!("")
+            _ => panic!(""),
        }
    }
 }