From 74e4d161054ff0c4b60dacfe47b61cac3d757ba7 Mon Sep 17 00:00:00 2001
From: Araozu <faraoz@unsa.edu.pe>
Date: Wed, 5 Apr 2023 10:31:12 -0500
Subject: [PATCH] [Compiler] Scan single line comments

---
 compiler/Cargo.toml                         |  1 +
 compiler/src/error_handling/syntax_error.rs | 50 ++++++-------
 compiler/src/lexic/mod.rs                   |  1 +
 compiler/src/lexic/scanner/mod.rs           | 10 +++
 compiler/src/lexic/scanner/new_comment.rs   | 78 +++++++++++++++++++++
 compiler/src/lexic/scanner/number.rs        | 23 ++++--
 compiler/src/lexic/scanner/operator.rs      |  5 +-
 compiler/src/lexic/scanner/string.rs        |  5 +-
 compiler/src/lib.rs                         | 21 +++++-
 compiler/src/syntax/binding.rs              | 15 ++--
 compiler/src/token.rs                       | 18 +++--
 compiler/src/utils/mod.rs                   |  2 +-
 12 files changed, 179 insertions(+), 50 deletions(-)
 create mode 100644 compiler/src/lexic/scanner/new_comment.rs
diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml
index c3976d8..da83978 100644
--- a/compiler/Cargo.toml
+++ b/compiler/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 [lib]
 name = "misti"
 path = "src/lib.rs"
+test = false
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
diff --git a/compiler/src/error_handling/syntax_error.rs b/compiler/src/error_handling/syntax_error.rs
index f109cb7..aae486e 100644
--- a/compiler/src/error_handling/syntax_error.rs
+++ b/compiler/src/error_handling/syntax_error.rs
@@ -16,31 +16,31 @@ impl PrintableError for SyntaxError {
     }
 }
 
-/// Extracts a line of code
-///
-/// - `chars`: Input where to extract the line from
-/// - `start_position`: Position where the erroneous code starts
-/// - `end_position`: Position where the erroneous code ends
-///
-/// Returns a tuple of:
-///
-/// - `String`: The faulty line
-/// - `usize`: The amount of chars *before* the faulty code
-/// - `usize`: The lenght of the faulty code
-///
-/// ## Example
-///
-/// ```
-/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
-/// let start_position = 13;
-/// let end_position = 15;
-///
-/// let (line, before, length) = get_line(&input, start_position, end_position);
-///
-/// assert_eq!("val number == 50", line);
-/// assert_eq!(11, before);
-/// assert_eq!(2, length);
-/// ```
+// Extracts a line of code
+//
+// - `chars`: Input where to extract the line from
+// - `start_position`: Position where the erroneous code starts
+// - `end_position`: Position where the erroneous code ends
+//
+// Returns a tuple of:
+//
+// - `String`: The faulty line
+// - `usize`: The amount of chars *before* the faulty code
+// - `usize`: The lenght of the faulty code
+//
+// ## Example
+//
+// ```
+// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
+// let start_position = 13;
+// let end_position = 15;
+//
+// let (line, before, length) = get_line(&input, start_position, end_position);
+//
+// assert_eq!("val number == 50", line);
+// assert_eq!(11, before);
+// assert_eq!(2, length);
+// ```
 fn get_line(
     chars: &Vec<char>,
     start_position: usize,
diff --git a/compiler/src/lexic/mod.rs b/compiler/src/lexic/mod.rs
index 59ef9c6..ed940b7 100755
--- a/compiler/src/lexic/mod.rs
+++ b/compiler/src/lexic/mod.rs
@@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
         .or_else(|| scanner::identifier(next_char, chars, current_pos))
         .or_else(|| scanner::datatype(next_char, chars, current_pos))
         .or_else(|| scanner::string(next_char, chars, current_pos))
+        .or_else(|| scanner::new_comment(next_char, chars, current_pos))
         .or_else(|| scanner::operator(next_char, chars, current_pos))
         .or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
         .or_else(|| scanner::new_line(next_char, chars, current_pos))
diff --git a/compiler/src/lexic/scanner/mod.rs b/compiler/src/lexic/scanner/mod.rs
index f6d08df..28b7153 100755
--- a/compiler/src/lexic/scanner/mod.rs
+++ b/compiler/src/lexic/scanner/mod.rs
@@ -4,6 +4,7 @@ use super::{
 };
 
 mod identifier;
+mod new_comment;
 mod new_line;
 mod number;
 mod operator;
@@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
 pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
     (c == '\n').then(|| new_line::scan(chars, start_pos))
 }
+
+/// Attempts to scan a single line comment.
+pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
+    let next_char = chars.get(start_pos + 1);
+    match (c, next_char) {
+        ('/', Some('/')) => Some(new_comment::scan(chars, start_pos)),
+        _ => None,
+    }
+}
diff --git a/compiler/src/lexic/scanner/new_comment.rs b/compiler/src/lexic/scanner/new_comment.rs
new file mode 100644
index 0000000..c60a74d
--- /dev/null
+++ b/compiler/src/lexic/scanner/new_comment.rs
@@ -0,0 +1,78 @@
+use crate::{
+    lexic::{utils, LexResult},
+    token::new_comment,
+};
+
+/// Scans a new line.
+///
+/// Assummes that `start_pos` and `start_pos + 1` point to a slash `/`
+///
+/// This methods always succeedes
+pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
+    let (comment_content, next_pos) =
+        scan_any_except_new_line(chars, start_pos + 2, String::from(""));
+    let token = new_comment(format!("//{}", comment_content), start_pos);
+
+    LexResult::Some(token, next_pos)
+}
+
+fn scan_any_except_new_line(
+    chars: &Vec<char>,
+    start_pos: usize,
+    current: String,
+) -> (String, usize) {
+    match chars.get(start_pos) {
+        Some(c) if *c == '\n' => (current, start_pos),
+        Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)),
+        None => (current, start_pos),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::lexic::scanner::TokenType;
+
+    use super::*;
+
+    fn str_to_vec(s: &str) -> Vec<char> {
+        s.chars().collect()
+    }
+
+    #[test]
+    fn should_scan_empty_comment() {
+        let input = str_to_vec("//");
+        let start_pos = 0;
+
+        let result = scan(&input, start_pos);
+        match result {
+            LexResult::Some(t, next) => {
+                assert_eq!(2, next);
+                assert_eq!("//", t.value);
+                assert_eq!(0, t.position);
+                assert_eq!(TokenType::Comment, t.token_type);
+            }
+            _ => {
+                panic!()
+            }
+        }
+    }
+
+    #[test]
+    fn should_scan_until_new_line() {
+        let input = str_to_vec("  // some comment\n// other comment");
+        let start_pos = 2;
+
+        let result = scan(&input, start_pos);
+        match result {
+            LexResult::Some(t, next) => {
+                assert_eq!(17, next);
+                assert_eq!("// some comment", t.value);
+                assert_eq!(start_pos, t.position);
+                assert_eq!(TokenType::Comment, t.token_type);
+            }
+            _ => {
+                panic!()
+            }
+        }
+    }
+}
diff --git a/compiler/src/lexic/scanner/number.rs b/compiler/src/lexic/scanner/number.rs
index 2261144..d269a38 100755
--- a/compiler/src/lexic/scanner/number.rs
+++ b/compiler/src/lexic/scanner/number.rs
@@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
             // so this is used to retrieve the original START position of the token
             let current_len = current.len();
 
-            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
-        },
+            LexResult::Some(
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
+        }
     }
 }
 
@@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
             // so this is used to retrieve the original START position of the token
             let current_len = current.len();
 
-            LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
+            LexResult::Some(
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
         }
     }
 }
@@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
             // so this is used to retrieve the original START position of the token
             let current_len = current.len();
 
-            (token::new_number(current, start_pos - current_len), start_pos)
+            (
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
         }
     }
 }
@@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
             // so this is used to retrieve the original START position of the token
             let current_len = current.len();
 
-            (token::new_number(current, start_pos - current_len), start_pos)
+            (
+                token::new_number(current, start_pos - current_len),
+                start_pos,
+            )
         }
     }
 }
@@ -426,6 +438,5 @@ mod tests {
         } else {
             panic!("Expected some value")
         };
-
     }
 }
diff --git a/compiler/src/lexic/scanner/operator.rs b/compiler/src/lexic/scanner/operator.rs
index 4124434..31f8092 100755
--- a/compiler/src/lexic/scanner/operator.rs
+++ b/compiler/src/lexic/scanner/operator.rs
@@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
             // so this is used to retrieve the original START position of the token
             let current_len = current.len();
 
-            LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos)
+            LexResult::Some(
+                token::new_operator(current, start_pos - current_len),
+                start_pos,
+            )
         }
     }
 }
diff --git a/compiler/src/lexic/scanner/string.rs b/compiler/src/lexic/scanner/string.rs
index e25363b..f46e24b 100755
--- a/compiler/src/lexic/scanner/string.rs
+++ b/compiler/src/lexic/scanner/string.rs
@@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
             // 1 is added to account for the opening `"`
             let current_len = current.len() + 1;
 
-            LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1)
+            LexResult::Some(
+                token::new_string(current, start_pos - current_len),
+                start_pos + 1,
+            )
         }
         Some(c) if *c == '\n' => LexResult::Err(LexError {
             position: start_pos,
diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs
index c335b9f..3ab13f7 100644
--- a/compiler/src/lib.rs
+++ b/compiler/src/lib.rs
@@ -1,10 +1,25 @@
-
-mod lexic;
+// Module to handle the repl and its compilation
+mod repl;
+// Defines the types of tokens and provides functions to create them
 mod token;
+// Module to handle lexical analysis
+mod syntax;
+// Module to handle syntactic analysis
+mod lexic;
+// Module to handle semantic analysis
+mod semantic;
+// Defines the AST
+mod ast_types;
+// Defines the Symbol table and operations within
+mod symbol_table;
+// Transforms an AST to JS
+mod codegen;
+mod utils;
+
 mod error_handling;
 
-use token::Token;
 use error_handling::MistiError;
+use token::Token;
 
 pub use token::TokenType;
 
diff --git a/compiler/src/syntax/binding.rs b/compiler/src/syntax/binding.rs
index d573884..2eec9ab 100644
--- a/compiler/src/syntax/binding.rs
+++ b/compiler/src/syntax/binding.rs
@@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
 fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
     match tokens.get(pos) {
         Some(t) if t.token_type == token_type => Result3::Ok(t),
-        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
+        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
+            Result3::None
+        }
         Some(t) => Result3::Err(t),
         None => Result3::None,
     }
@@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Res
 fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
     match tokens.get(pos) {
         Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
-        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
+        Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
+            Result3::None
+        }
         Some(t) => Result3::Err(t),
         None => Result3::None,
     }
@@ -238,10 +242,9 @@ mod tests {
                 assert_eq!(4, error.error_start);
                 assert_eq!(7, error.error_end);
             }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
         }
 
-
         let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
         let binding = try_parse(&tokens, 0).unwrap();
 
@@ -250,7 +253,7 @@ mod tests {
                 assert_eq!(4, error.error_start);
                 assert_eq!(11, error.error_end);
             }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
         }
     }
 
@@ -264,7 +267,7 @@ mod tests {
                 assert_eq!(7, error.error_start);
                 assert_eq!(14, error.error_end);
             }
-            _ => panic!("Error expected")
+            _ => panic!("Error expected"),
         }
     }
 }
diff --git a/compiler/src/token.rs b/compiler/src/token.rs
index a4fd259..7ad5c81 100755
--- a/compiler/src/token.rs
+++ b/compiler/src/token.rs
@@ -12,12 +12,12 @@ pub enum TokenType {
     LeftBrace,
     RightBrace,
     Semicolon,
+    Comment,
     VAR,
     VAL,
     EOF,
 }
 
-
 #[derive(Debug)]
 pub struct Token {
     pub token_type: TokenType,
@@ -31,12 +31,8 @@ pub struct Token {
 impl Token {
     pub fn get_end_position(&self) -> usize {
         match self.token_type {
-            TokenType::String => {
-                self.position + self.value.len() + 2
-            }
-            _ => {
-                self.position + self.value.len()
-            }
+            TokenType::String => self.position + self.value.len() + 2,
+            _ => self.position + self.value.len(),
         }
     }
 }
@@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token {
         position,
     }
 }
+
+pub fn new_comment(value: String, position: usize) -> Token {
+    Token {
+        token_type: TokenType::Comment,
+        value,
+        position,
+    }
+}
diff --git a/compiler/src/utils/mod.rs b/compiler/src/utils/mod.rs
index 75fbe2a..2148f98 100644
--- a/compiler/src/utils/mod.rs
+++ b/compiler/src/utils/mod.rs
@@ -8,7 +8,7 @@ impl<T> Result3<T> {
     pub fn unwrap(&self) -> &T {
         match self {
             Result3::Ok(t) => t,
-            _ => panic!("")
+            _ => panic!(""),
         }
     }
 }