[Compiler] Scan single line comments

This commit is contained in:
Araozu 2023-04-05 10:31:12 -05:00
parent 5c60943fab
commit 74e4d16105
12 changed files with 179 additions and 50 deletions

View File

@ -7,6 +7,7 @@ edition = "2021"
[lib] [lib]
name = "misti" name = "misti"
path = "src/lib.rs" path = "src/lib.rs"
test = false
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -16,31 +16,31 @@ impl PrintableError for SyntaxError {
} }
} }
/// Extracts a line of code // Extracts a line of code
/// //
/// - `chars`: Input where to extract the line from // - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts // - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends // - `end_position`: Position where the erroneous code ends
/// //
/// Returns a tuple of: // Returns a tuple of:
/// //
/// - `String`: The faulty line // - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code // - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code // - `usize`: The lenght of the faulty code
/// //
/// ## Example // ## Example
/// //
/// ``` // ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect(); // let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13; // let start_position = 13;
/// let end_position = 15; // let end_position = 15;
/// //
/// let (line, before, length) = get_line(&input, start_position, end_position); // let (line, before, length) = get_line(&input, start_position, end_position);
/// //
/// assert_eq!("val number == 50", line); // assert_eq!("val number == 50", line);
/// assert_eq!(11, before); // assert_eq!(11, before);
/// assert_eq!(2, length); // assert_eq!(2, length);
/// ``` // ```
fn get_line( fn get_line(
chars: &Vec<char>, chars: &Vec<char>,
start_position: usize, start_position: usize,

View File

@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
.or_else(|| scanner::identifier(next_char, chars, current_pos)) .or_else(|| scanner::identifier(next_char, chars, current_pos))
.or_else(|| scanner::datatype(next_char, chars, current_pos)) .or_else(|| scanner::datatype(next_char, chars, current_pos))
.or_else(|| scanner::string(next_char, chars, current_pos)) .or_else(|| scanner::string(next_char, chars, current_pos))
.or_else(|| scanner::new_comment(next_char, chars, current_pos))
.or_else(|| scanner::operator(next_char, chars, current_pos)) .or_else(|| scanner::operator(next_char, chars, current_pos))
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos)) .or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
.or_else(|| scanner::new_line(next_char, chars, current_pos)) .or_else(|| scanner::new_line(next_char, chars, current_pos))

View File

@ -4,6 +4,7 @@ use super::{
}; };
mod identifier; mod identifier;
mod new_comment;
mod new_line; mod new_line;
mod number; mod number;
mod operator; mod operator;
@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(c == '\n').then(|| new_line::scan(chars, start_pos)) (c == '\n').then(|| new_line::scan(chars, start_pos))
} }
/// Attempts to scan a single line comment.
pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
let next_char = chars.get(start_pos + 1);
match (c, next_char) {
('/', Some('/')) => Some(new_comment::scan(chars, start_pos)),
_ => None,
}
}

View File

@ -0,0 +1,78 @@
use crate::{
lexic::{utils, LexResult},
token::new_comment,
};
/// Scans a new line.
///
/// Assummes that `start_pos` and `start_pos + 1` point to a slash `/`
///
/// This methods always succeedes
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
let (comment_content, next_pos) =
scan_any_except_new_line(chars, start_pos + 2, String::from(""));
let token = new_comment(format!("//{}", comment_content), start_pos);
LexResult::Some(token, next_pos)
}
fn scan_any_except_new_line(
chars: &Vec<char>,
start_pos: usize,
current: String,
) -> (String, usize) {
match chars.get(start_pos) {
Some(c) if *c == '\n' => (current, start_pos),
Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)),
None => (current, start_pos),
}
}
#[cfg(test)]
mod tests {
use crate::lexic::scanner::TokenType;
use super::*;
fn str_to_vec(s: &str) -> Vec<char> {
s.chars().collect()
}
#[test]
fn should_scan_empty_comment() {
let input = str_to_vec("//");
let start_pos = 0;
let result = scan(&input, start_pos);
match result {
LexResult::Some(t, next) => {
assert_eq!(2, next);
assert_eq!("//", t.value);
assert_eq!(0, t.position);
assert_eq!(TokenType::Comment, t.token_type);
}
_ => {
panic!()
}
}
}
#[test]
fn should_scan_until_new_line() {
let input = str_to_vec(" // some comment\n// other comment");
let start_pos = 2;
let result = scan(&input, start_pos);
match result {
LexResult::Some(t, next) => {
assert_eq!(17, next);
assert_eq!("// some comment", t.value);
assert_eq!(start_pos, t.position);
assert_eq!(TokenType::Comment, t.token_type);
}
_ => {
panic!()
}
}
}
}

View File

@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
// so this is used to retrieve the original START position of the token // so this is used to retrieve the original START position of the token
let current_len = current.len(); let current_len = current.len();
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos) LexResult::Some(
}, token::new_number(current, start_pos - current_len),
start_pos,
)
}
} }
} }
@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
// so this is used to retrieve the original START position of the token // so this is used to retrieve the original START position of the token
let current_len = current.len(); let current_len = current.len();
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos) LexResult::Some(
token::new_number(current, start_pos - current_len),
start_pos,
)
} }
} }
} }
@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
// so this is used to retrieve the original START position of the token // so this is used to retrieve the original START position of the token
let current_len = current.len(); let current_len = current.len();
(token::new_number(current, start_pos - current_len), start_pos) (
token::new_number(current, start_pos - current_len),
start_pos,
)
} }
} }
} }
@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
// so this is used to retrieve the original START position of the token // so this is used to retrieve the original START position of the token
let current_len = current.len(); let current_len = current.len();
(token::new_number(current, start_pos - current_len), start_pos) (
token::new_number(current, start_pos - current_len),
start_pos,
)
} }
} }
} }
@ -426,6 +438,5 @@ mod tests {
} else { } else {
panic!("Expected some value") panic!("Expected some value")
}; };
} }
} }

View File

@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
// so this is used to retrieve the original START position of the token // so this is used to retrieve the original START position of the token
let current_len = current.len(); let current_len = current.len();
LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos) LexResult::Some(
token::new_operator(current, start_pos - current_len),
start_pos,
)
} }
} }
} }

View File

@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
// 1 is added to account for the opening `"` // 1 is added to account for the opening `"`
let current_len = current.len() + 1; let current_len = current.len() + 1;
LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1) LexResult::Some(
token::new_string(current, start_pos - current_len),
start_pos + 1,
)
} }
Some(c) if *c == '\n' => LexResult::Err(LexError { Some(c) if *c == '\n' => LexResult::Err(LexError {
position: start_pos, position: start_pos,

View File

@ -1,10 +1,25 @@
// Module to handle the repl and its compilation
mod lexic; mod repl;
// Defines the types of tokens and provides functions to create them
mod token; mod token;
// Module to handle lexical analysis
mod syntax;
// Module to handle syntactic analysis
mod lexic;
// Module to handle semantic analysis
mod semantic;
// Defines the AST
mod ast_types;
// Defines the Symbol table and operations within
mod symbol_table;
// Transforms an AST to JS
mod codegen;
mod utils;
mod error_handling; mod error_handling;
use token::Token;
use error_handling::MistiError; use error_handling::MistiError;
use token::Token;
pub use token::TokenType; pub use token::TokenType;

View File

@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> { fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
match tokens.get(pos) { match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t), Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None, Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
Result3::None
}
Some(t) => Result3::Err(t), Some(t) => Result3::Err(t),
None => Result3::None, None => Result3::None,
} }
@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Res
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> { fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
match tokens.get(pos) { match tokens.get(pos) {
Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t), Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None, Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
Result3::None
}
Some(t) => Result3::Err(t), Some(t) => Result3::Err(t),
None => Result3::None, None => Result3::None,
} }
@ -238,10 +242,9 @@ mod tests {
assert_eq!(4, error.error_start); assert_eq!(4, error.error_start);
assert_eq!(7, error.error_end); assert_eq!(7, error.error_end);
} }
_ => panic!("Error expected") _ => panic!("Error expected"),
} }
let tokens = get_tokens(&String::from("val \"hello\"")).unwrap(); let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
let binding = try_parse(&tokens, 0).unwrap(); let binding = try_parse(&tokens, 0).unwrap();
@ -250,7 +253,7 @@ mod tests {
assert_eq!(4, error.error_start); assert_eq!(4, error.error_start);
assert_eq!(11, error.error_end); assert_eq!(11, error.error_end);
} }
_ => panic!("Error expected") _ => panic!("Error expected"),
} }
} }
@ -264,7 +267,7 @@ mod tests {
assert_eq!(7, error.error_start); assert_eq!(7, error.error_start);
assert_eq!(14, error.error_end); assert_eq!(14, error.error_end);
} }
_ => panic!("Error expected") _ => panic!("Error expected"),
} }
} }
} }

View File

@ -12,12 +12,12 @@ pub enum TokenType {
LeftBrace, LeftBrace,
RightBrace, RightBrace,
Semicolon, Semicolon,
Comment,
VAR, VAR,
VAL, VAL,
EOF, EOF,
} }
#[derive(Debug)] #[derive(Debug)]
pub struct Token { pub struct Token {
pub token_type: TokenType, pub token_type: TokenType,
@ -31,12 +31,8 @@ pub struct Token {
impl Token { impl Token {
pub fn get_end_position(&self) -> usize { pub fn get_end_position(&self) -> usize {
match self.token_type { match self.token_type {
TokenType::String => { TokenType::String => self.position + self.value.len() + 2,
self.position + self.value.len() + 2 _ => self.position + self.value.len(),
}
_ => {
self.position + self.value.len()
}
} }
} }
} }
@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token {
position, position,
} }
} }
pub fn new_comment(value: String, position: usize) -> Token {
Token {
token_type: TokenType::Comment,
value,
position,
}
}

View File

@ -8,7 +8,7 @@ impl<T> Result3<T> {
pub fn unwrap(&self) -> &T { pub fn unwrap(&self) -> &T {
match self { match self {
Result3::Ok(t) => t, Result3::Ok(t) => t,
_ => panic!("") _ => panic!(""),
} }
} }
} }