[Compiler] Scan single line comments

2023-04-05 10:31:12 -05:00 · 2023-04-05 10:31:12 -05:00 · 74e4d16105
commit 74e4d16105
parent 5c60943fab
12 changed files with 179 additions and 50 deletions
--- a/compiler/Cargo.toml
+++ b/compiler/Cargo.toml
@ -7,6 +7,7 @@ edition = "2021"
 [lib]
 name = "misti"
 path = "src/lib.rs"
+test = false

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

--- a/compiler/src/error_handling/syntax_error.rs
+++ b/compiler/src/error_handling/syntax_error.rs
@ -16,31 +16,31 @@ impl PrintableError for SyntaxError {
    }
 }

-/// Extracts a line of code
-///
-/// - `chars`: Input where to extract the line from
-/// - `start_position`: Position where the erroneous code starts
-/// - `end_position`: Position where the erroneous code ends
-///
-/// Returns a tuple of:
-///
-/// - `String`: The faulty line
-/// - `usize`: The amount of chars *before* the faulty code
-/// - `usize`: The lenght of the faulty code
-///
-/// ## Example
-///
-/// ```
-/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
-/// let start_position = 13;
-/// let end_position = 15;
-///
-/// let (line, before, length) = get_line(&input, start_position, end_position);
-///
-/// assert_eq!("val number == 50", line);
-/// assert_eq!(11, before);
-/// assert_eq!(2, length);
-/// ```
+// Extracts a line of code
+//
+// - `chars`: Input where to extract the line from
+// - `start_position`: Position where the erroneous code starts
+// - `end_position`: Position where the erroneous code ends
+//
+// Returns a tuple of:
+//
+// - `String`: The faulty line
+// - `usize`: The amount of chars *before* the faulty code
+// - `usize`: The lenght of the faulty code
+//
+// ## Example
+//
+// ```
+// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
+// let start_position = 13;
+// let end_position = 15;
+//
+// let (line, before, length) = get_line(&input, start_position, end_position);
+//
+// assert_eq!("val number == 50", line);
+// assert_eq!(11, before);
+// assert_eq!(2, length);
+// ```
 fn get_line(
    chars: &Vec<char>,
    start_position: usize,
--- a/compiler/src/lexic/mod.rs
+++ b/compiler/src/lexic/mod.rs
@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
        .or_else(|| scanner::identifier(next_char, chars, current_pos))
        .or_else(|| scanner::datatype(next_char, chars, current_pos))
        .or_else(|| scanner::string(next_char, chars, current_pos))
+        .or_else(|| scanner::new_comment(next_char, chars, current_pos))
        .or_else(|| scanner::operator(next_char, chars, current_pos))
        .or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
        .or_else(|| scanner::new_line(next_char, chars, current_pos))
--- a/compiler/src/lexic/scanner/mod.rs
+++ b/compiler/src/lexic/scanner/mod.rs
@ -4,6 +4,7 @@ use super::{
 };

 mod identifier;
+mod new_comment;
 mod new_line;
 mod number;
 mod operator;
@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
 pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
    (c == '\n').then(|| new_line::scan(chars, start_pos))
 }
+
+/// Attempts to scan a single line comment.
+pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
+    let next_char = chars.get(start_pos + 1);
+    match (c, next_char) {
+        ('/', Some('/')) => Some(new_comment::scan(chars, start_pos)),
+        _ => None,
+    }
+}
--- a/compiler/src/lexic/scanner/new_comment.rs
+++ b/compiler/src/lexic/scanner/new_comment.rs
@ -0,0 +1,78 @@
+use crate::{
+    lexic::{utils, LexResult},
+    token::new_comment,
+};
+
+/// Scans a new line.
+///
+/// Assummes that `start_pos` and `start_pos + 1` point to a slash `/`
+///
+/// This methods always succeedes
+pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
+    let (comment_content, next_pos) =
+        scan_any_except_new_line(chars, start_pos + 2, String::from(""));
+    let token = new_comment(format!("//{}", comment_content), start_pos);
+
+    LexResult::Some(token, next_pos)
+}
+
+fn scan_any_except_new_line(
+    chars: &Vec<char>,
+    start_pos: usize,
+    current: String,
+) -> (String, usize) {
+    match chars.get(start_pos) {
+        Some(c) if *c == '\n' => (current, start_pos),
+        Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)),
+        None => (current, start_pos),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::lexic::scanner::TokenType;
+
+    use super::*;
+
+    fn str_to_vec(s: &str) -> Vec<char> {
+        s.chars().collect()
+    }
+
+    #[test]
+    fn should_scan_empty_comment() {
+        let input = str_to_vec("//");
+        let start_pos = 0;
+
+        let result = scan(&input, start_pos);
+        match result {
+            LexResult::Some(t, next) => {
+                assert_eq!(2, next);
+                assert_eq!("//", t.value);
+                assert_eq!(0, t.position);
+                assert_eq!(TokenType::Comment, t.token_type);
+            }
+            _ => {
+                panic!()
+            }
+        }
+    }
+
+    #[test]
+    fn should_scan_until_new_line() {
+        let input = str_to_vec("  // some comment\n// other comment");
+        let start_pos = 2;
+
+        let result = scan(&input, start_pos);
+        match result {
+            LexResult::Some(t, next) => {
+                assert_eq!(17, next);
+                assert_eq!("// some comment", t.value);
+                assert_eq!(start_pos, t.position);
+                assert_eq!(TokenType::Comment, t.token_type);
+            }
+            _ => {
+                panic!()
+            }
+        }
+    }
+}
--- a/compiler/src/lexic/scanner/number.rs
+++ b/compiler/src/lexic/scanner/number.rs
@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();

-            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
-        },
+            LexResult::Some(
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
+        }
    }
 }

@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();

-            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
+            LexResult::Some(
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
        }
    }
 }
@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();

-            (token::new_number(current, start_pos - current_len), start_pos)
+            (
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
        }
    }
 }
@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();

-            (token::new_number(current, start_pos - current_len), start_pos)
+            (
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
        }
    }
 }
@ -426,6 +438,5 @@ mod tests {
        } else {
            panic!("Expected some value")
        };
-
    }
 }
--- a/compiler/src/lexic/scanner/operator.rs
+++ b/compiler/src/lexic/scanner/operator.rs
@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
            // so this is used to retrieve the original START position of the token
            let current_len = current.len();

-            LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos)
+            LexResult::Some(
+                token::new_operator(current, start_pos - current_len),
+                start_pos,
+            )
        }
    }
 }
--- a/compiler/src/lexic/scanner/string.rs
+++ b/compiler/src/lexic/scanner/string.rs
@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
            // 1 is added to account for the opening `"`
            let current_len = current.len() + 1;

-            LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1)
+            LexResult::Some(
+                token::new_string(current, start_pos - current_len),
+                start_pos + 1,
+            )
        }
        Some(c) if *c == '\n' => LexResult::Err(LexError {
            position: start_pos,
--- a/compiler/src/lib.rs
+++ b/compiler/src/lib.rs
@ -1,10 +1,25 @@
-
-mod lexic;
+// Module to handle the repl and its compilation
+mod repl;
+// Defines the types of tokens and provides functions to create them
 mod token;
+// Module to handle lexical analysis
+mod syntax;
+// Module to handle syntactic analysis
+mod lexic;
+// Module to handle semantic analysis
+mod semantic;
+// Defines the AST
+mod ast_types;
+// Defines the Symbol table and operations within
+mod symbol_table;
+// Transforms an AST to JS
+mod codegen;
+mod utils;
+
 mod error_handling;

-use token::Token;
 use error_handling::MistiError;
+use token::Token;

 pub use token::TokenType;

--- a/compiler/src/syntax/binding.rs
+++ b/compiler/src/syntax/binding.rs
@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
 fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
    match tokens.get(pos) {
        Some(t) if t.token_type == token_type => Result3::Ok(t),
-        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
+        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
+            Result3::None
+        }
        Some(t) => Result3::Err(t),
        None => Result3::None,
    }
@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Res
 fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
    match tokens.get(pos) {
        Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
-        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
+        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
+            Result3::None
+        }
        Some(t) => Result3::Err(t),
        None => Result3::None,
    }
@ -238,10 +242,9 @@ mod tests {
                assert_eq!(4, error.error_start);
                assert_eq!(7, error.error_end);
            }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
        }

-
        let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
        let binding = try_parse(&tokens, 0).unwrap();

@ -250,7 +253,7 @@ mod tests {
                assert_eq!(4, error.error_start);
                assert_eq!(11, error.error_end);
            }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
        }
    }

@ -264,7 +267,7 @@ mod tests {
                assert_eq!(7, error.error_start);
                assert_eq!(14, error.error_end);
            }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
        }
    }
 }
--- a/compiler/src/token.rs
+++ b/compiler/src/token.rs
@ -12,12 +12,12 @@ pub enum TokenType {
    LeftBrace,
    RightBrace,
    Semicolon,
+    Comment,
    VAR,
    VAL,
    EOF,
 }

-
 #[derive(Debug)]
 pub struct Token {
    pub token_type: TokenType,
@ -31,12 +31,8 @@ pub struct Token {
 impl Token {
    pub fn get_end_position(&self) -> usize {
        match self.token_type {
-            TokenType::String => {
-                self.position + self.value.len() + 2
-            }
-            _ => {
-                self.position + self.value.len()
-            }
+            TokenType::String => self.position + self.value.len() + 2,
+            _ => self.position + self.value.len(),
        }
    }
 }
@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token {
        position,
    }
 }
+
+pub fn new_comment(value: String, position: usize) -> Token {
+    Token {
+        token_type: TokenType::Comment,
+        value,
+        position,
+    }
+}
--- a/compiler/src/utils/mod.rs
+++ b/compiler/src/utils/mod.rs
@ -8,7 +8,7 @@ impl<T> Result3<T> {
    pub fn unwrap(&self) -> &T {
        match self {
            Result3::Ok(t) => t,
-            _ => panic!("")
+            _ => panic!(""),
        }
    }
 }