From 74e4d161054ff0c4b60dacfe47b61cac3d757ba7 Mon Sep 17 00:00:00 2001 From: Araozu Date: Wed, 5 Apr 2023 10:31:12 -0500 Subject: [PATCH] [Compiler] Scan single line comments --- compiler/Cargo.toml | 1 + compiler/src/error_handling/syntax_error.rs | 50 ++++++------- compiler/src/lexic/mod.rs | 1 + compiler/src/lexic/scanner/mod.rs | 10 +++ compiler/src/lexic/scanner/new_comment.rs | 78 +++++++++++++++++++++ compiler/src/lexic/scanner/number.rs | 23 ++++-- compiler/src/lexic/scanner/operator.rs | 5 +- compiler/src/lexic/scanner/string.rs | 5 +- compiler/src/lib.rs | 21 +++++- compiler/src/syntax/binding.rs | 15 ++-- compiler/src/token.rs | 18 +++-- compiler/src/utils/mod.rs | 2 +- 12 files changed, 179 insertions(+), 50 deletions(-) create mode 100644 compiler/src/lexic/scanner/new_comment.rs diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index c3976d8..da83978 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [lib] name = "misti" path = "src/lib.rs" +test = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/compiler/src/error_handling/syntax_error.rs b/compiler/src/error_handling/syntax_error.rs index f109cb7..aae486e 100644 --- a/compiler/src/error_handling/syntax_error.rs +++ b/compiler/src/error_handling/syntax_error.rs @@ -16,31 +16,31 @@ impl PrintableError for SyntaxError { } } -/// Extracts a line of code -/// -/// - `chars`: Input where to extract the line from -/// - `start_position`: Position where the erroneous code starts -/// - `end_position`: Position where the erroneous code ends -/// -/// Returns a tuple of: -/// -/// - `String`: The faulty line -/// - `usize`: The amount of chars *before* the faulty code -/// - `usize`: The lenght of the faulty code -/// -/// ## Example -/// -/// ``` -/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect(); -/// let start_position = 13; -/// let end_position = 15; -/// -/// let (line, before, length) = get_line(&input, start_position, end_position); -/// -/// assert_eq!("val number == 50", line); -/// assert_eq!(11, before); -/// assert_eq!(2, length); -/// ``` +// Extracts a line of code +// +// - `chars`: Input where to extract the line from +// - `start_position`: Position where the erroneous code starts +// - `end_position`: Position where the erroneous code ends +// +// Returns a tuple of: +// +// - `String`: The faulty line +// - `usize`: The amount of chars *before* the faulty code +// - `usize`: The lenght of the faulty code +// +// ## Example +// +// ``` +// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect(); +// let start_position = 13; +// let end_position = 15; +// +// let (line, before, length) = get_line(&input, start_position, end_position); +// +// assert_eq!("val number == 50", line); +// assert_eq!(11, before); +// assert_eq!(2, length); +// ``` fn get_line( chars: &Vec, start_position: usize, diff --git a/compiler/src/lexic/mod.rs b/compiler/src/lexic/mod.rs index 59ef9c6..ed940b7 100755 --- a/compiler/src/lexic/mod.rs +++ b/compiler/src/lexic/mod.rs @@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { .or_else(|| scanner::identifier(next_char, chars, current_pos)) .or_else(|| scanner::datatype(next_char, chars, current_pos)) .or_else(|| scanner::string(next_char, chars, current_pos)) + .or_else(|| scanner::new_comment(next_char, chars, current_pos)) .or_else(|| scanner::operator(next_char, chars, current_pos)) .or_else(|| scanner::grouping_sign(next_char, chars, current_pos)) .or_else(|| scanner::new_line(next_char, chars, current_pos)) diff --git a/compiler/src/lexic/scanner/mod.rs b/compiler/src/lexic/scanner/mod.rs index f6d08df..28b7153 100755 --- a/compiler/src/lexic/scanner/mod.rs +++ b/compiler/src/lexic/scanner/mod.rs @@ -4,6 +4,7 @@ use super::{ }; mod identifier; +mod new_comment; mod new_line; mod number; mod operator; @@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec, start_pos: usize) -> Option pub fn new_line(c: char, chars: &Vec, start_pos: usize) -> Option { (c == '\n').then(|| new_line::scan(chars, start_pos)) } + +/// Attempts to scan a single line comment. +pub fn new_comment(c: char, chars: &Vec, start_pos: usize) -> Option { + let next_char = chars.get(start_pos + 1); + match (c, next_char) { + ('/', Some('/')) => Some(new_comment::scan(chars, start_pos)), + _ => None, + } +} diff --git a/compiler/src/lexic/scanner/new_comment.rs b/compiler/src/lexic/scanner/new_comment.rs new file mode 100644 index 0000000..c60a74d --- /dev/null +++ b/compiler/src/lexic/scanner/new_comment.rs @@ -0,0 +1,78 @@ +use crate::{ + lexic::{utils, LexResult}, + token::new_comment, +}; + +/// Scans a new line. +/// +/// Assummes that `start_pos` and `start_pos + 1` point to a slash `/` +/// +/// This methods always succeedes +pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { + let (comment_content, next_pos) = + scan_any_except_new_line(chars, start_pos + 2, String::from("")); + let token = new_comment(format!("//{}", comment_content), start_pos); + + LexResult::Some(token, next_pos) +} + +fn scan_any_except_new_line( + chars: &Vec, + start_pos: usize, + current: String, +) -> (String, usize) { + match chars.get(start_pos) { + Some(c) if *c == '\n' => (current, start_pos), + Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)), + None => (current, start_pos), + } +} + +#[cfg(test)] +mod tests { + use crate::lexic::scanner::TokenType; + + use super::*; + + fn str_to_vec(s: &str) -> Vec { + s.chars().collect() + } + + #[test] + fn should_scan_empty_comment() { + let input = str_to_vec("//"); + let start_pos = 0; + + let result = scan(&input, start_pos); + match result { + LexResult::Some(t, next) => { + assert_eq!(2, next); + assert_eq!("//", t.value); + assert_eq!(0, t.position); + assert_eq!(TokenType::Comment, t.token_type); + } + _ => { + panic!() + } + } + } + + #[test] + fn should_scan_until_new_line() { + let input = str_to_vec(" // some comment\n// other comment"); + let start_pos = 2; + + let result = scan(&input, start_pos); + match result { + LexResult::Some(t, next) => { + assert_eq!(17, next); + assert_eq!("// some comment", t.value); + assert_eq!(start_pos, t.position); + assert_eq!(TokenType::Comment, t.token_type); + } + _ => { + panic!() + } + } + } +} diff --git a/compiler/src/lexic/scanner/number.rs b/compiler/src/lexic/scanner/number.rs index 2261144..d269a38 100755 --- a/compiler/src/lexic/scanner/number.rs +++ b/compiler/src/lexic/scanner/number.rs @@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> LexResu // so this is used to retrieve the original START position of the token let current_len = current.len(); - LexResult::Some(token::new_number(current, start_pos - current_len), start_pos) - }, + LexResult::Some( + token::new_number(current, start_pos - current_len), + start_pos, + ) + } } } @@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> Lex // so this is used to retrieve the original START position of the token let current_len = current.len(); - LexResult::Some(token::new_number(current, start_pos - current_len), start_pos) + LexResult::Some( + token::new_number(current, start_pos - current_len), + start_pos, + ) } } } @@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec, start_pos: usize, current: String) -> (Token, // so this is used to retrieve the original START position of the token let current_len = current.len(); - (token::new_number(current, start_pos - current_len), start_pos) + ( + token::new_number(current, start_pos - current_len), + start_pos, + ) } } } @@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec, start_pos: usize, current: String) -> (Tok // so this is used to retrieve the original START position of the token let current_len = current.len(); - (token::new_number(current, start_pos - current_len), start_pos) + ( + token::new_number(current, start_pos - current_len), + start_pos, + ) } } } @@ -426,6 +438,5 @@ mod tests { } else { panic!("Expected some value") }; - } } diff --git a/compiler/src/lexic/scanner/operator.rs b/compiler/src/lexic/scanner/operator.rs index 4124434..31f8092 100755 --- a/compiler/src/lexic/scanner/operator.rs +++ b/compiler/src/lexic/scanner/operator.rs @@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexRes // so this is used to retrieve the original START position of the token let current_len = current.len(); - LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos) + LexResult::Some( + token::new_operator(current, start_pos - current_len), + start_pos, + ) } } } diff --git a/compiler/src/lexic/scanner/string.rs b/compiler/src/lexic/scanner/string.rs index e25363b..f46e24b 100755 --- a/compiler/src/lexic/scanner/string.rs +++ b/compiler/src/lexic/scanner/string.rs @@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexRes // 1 is added to account for the opening `"` let current_len = current.len() + 1; - LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1) + LexResult::Some( + token::new_string(current, start_pos - current_len), + start_pos + 1, + ) } Some(c) if *c == '\n' => LexResult::Err(LexError { position: start_pos, diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index c335b9f..3ab13f7 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,10 +1,25 @@ - -mod lexic; +// Module to handle the repl and its compilation +mod repl; +// Defines the types of tokens and provides functions to create them mod token; +// Module to handle lexical analysis +mod syntax; +// Module to handle syntactic analysis +mod lexic; +// Module to handle semantic analysis +mod semantic; +// Defines the AST +mod ast_types; +// Defines the Symbol table and operations within +mod symbol_table; +// Transforms an AST to JS +mod codegen; +mod utils; + mod error_handling; -use token::Token; use error_handling::MistiError; +use token::Token; pub use token::TokenType; diff --git a/compiler/src/syntax/binding.rs b/compiler/src/syntax/binding.rs index d573884..2eec9ab 100644 --- a/compiler/src/syntax/binding.rs +++ b/compiler/src/syntax/binding.rs @@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> Result3<&Token> { match tokens.get(pos) { Some(t) if t.token_type == token_type => Result3::Ok(t), - Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None, + Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => { + Result3::None + } Some(t) => Result3::Err(t), None => Result3::None, } @@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> Res fn try_operator(tokens: &Vec, pos: usize, operator: String) -> Result3<&Token> { match tokens.get(pos) { Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t), - Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None, + Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => { + Result3::None + } Some(t) => Result3::Err(t), None => Result3::None, } @@ -238,10 +242,9 @@ mod tests { assert_eq!(4, error.error_start); assert_eq!(7, error.error_end); } - _ => panic!("Error expected") + _ => panic!("Error expected"), } - let tokens = get_tokens(&String::from("val \"hello\"")).unwrap(); let binding = try_parse(&tokens, 0).unwrap(); @@ -250,7 +253,7 @@ mod tests { assert_eq!(4, error.error_start); assert_eq!(11, error.error_end); } - _ => panic!("Error expected") + _ => panic!("Error expected"), } } @@ -264,7 +267,7 @@ mod tests { assert_eq!(7, error.error_start); assert_eq!(14, error.error_end); } - _ => panic!("Error expected") + _ => panic!("Error expected"), } } } diff --git a/compiler/src/token.rs b/compiler/src/token.rs index a4fd259..7ad5c81 100755 --- a/compiler/src/token.rs +++ b/compiler/src/token.rs @@ -12,12 +12,12 @@ pub enum TokenType { LeftBrace, RightBrace, Semicolon, + Comment, VAR, VAL, EOF, } - #[derive(Debug)] pub struct Token { pub token_type: TokenType, @@ -31,12 +31,8 @@ pub struct Token { impl Token { pub fn get_end_position(&self) -> usize { match self.token_type { - TokenType::String => { - self.position + self.value.len() + 2 - } - _ => { - self.position + self.value.len() - } + TokenType::String => self.position + self.value.len() + 2, + _ => self.position + self.value.len(), } } } @@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token { position, } } + +pub fn new_comment(value: String, position: usize) -> Token { + Token { + token_type: TokenType::Comment, + value, + position, + } +} diff --git a/compiler/src/utils/mod.rs b/compiler/src/utils/mod.rs index 75fbe2a..2148f98 100644 --- a/compiler/src/utils/mod.rs +++ b/compiler/src/utils/mod.rs @@ -8,7 +8,7 @@ impl Result3 { pub fn unwrap(&self) -> &T { match self { Result3::Ok(t) => t, - _ => panic!("") + _ => panic!(""), } } }