[Compiler] Scan single line comments

master
Araozu 2023-04-05 10:31:12 -05:00
parent 5c60943fab
commit 74e4d16105
12 changed files with 179 additions and 50 deletions

View File

@ -7,6 +7,7 @@ edition = "2021"
[lib]
name = "misti"
path = "src/lib.rs"
test = false
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -16,31 +16,31 @@ impl PrintableError for SyntaxError {
}
}
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
// Extracts a line of code
//
// - `chars`: Input where to extract the line from
// - `start_position`: Position where the erroneous code starts
// - `end_position`: Position where the erroneous code ends
//
// Returns a tuple of:
//
// - `String`: The faulty line
// - `usize`: The amount of chars *before* the faulty code
// - `usize`: The lenght of the faulty code
//
// ## Example
//
// ```
// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
// let start_position = 13;
// let end_position = 15;
//
// let (line, before, length) = get_line(&input, start_position, end_position);
//
// assert_eq!("val number == 50", line);
// assert_eq!(11, before);
// assert_eq!(2, length);
// ```
fn get_line(
chars: &Vec<char>,
start_position: usize,

View File

@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
.or_else(|| scanner::identifier(next_char, chars, current_pos))
.or_else(|| scanner::datatype(next_char, chars, current_pos))
.or_else(|| scanner::string(next_char, chars, current_pos))
.or_else(|| scanner::new_comment(next_char, chars, current_pos))
.or_else(|| scanner::operator(next_char, chars, current_pos))
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
.or_else(|| scanner::new_line(next_char, chars, current_pos))

View File

@ -4,6 +4,7 @@ use super::{
};
mod identifier;
mod new_comment;
mod new_line;
mod number;
mod operator;
@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(c == '\n').then(|| new_line::scan(chars, start_pos))
}
/// Attempts to scan a single line comment.
pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
let next_char = chars.get(start_pos + 1);
match (c, next_char) {
('/', Some('/')) => Some(new_comment::scan(chars, start_pos)),
_ => None,
}
}

View File

@ -0,0 +1,78 @@
use crate::{
lexic::{utils, LexResult},
token::new_comment,
};
/// Scans a new line.
///
/// Assummes that `start_pos` and `start_pos + 1` point to a slash `/`
///
/// This methods always succeedes
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
let (comment_content, next_pos) =
scan_any_except_new_line(chars, start_pos + 2, String::from(""));
let token = new_comment(format!("//{}", comment_content), start_pos);
LexResult::Some(token, next_pos)
}
fn scan_any_except_new_line(
chars: &Vec<char>,
start_pos: usize,
current: String,
) -> (String, usize) {
match chars.get(start_pos) {
Some(c) if *c == '\n' => (current, start_pos),
Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)),
None => (current, start_pos),
}
}
#[cfg(test)]
mod tests {
use crate::lexic::scanner::TokenType;
use super::*;
fn str_to_vec(s: &str) -> Vec<char> {
s.chars().collect()
}
#[test]
fn should_scan_empty_comment() {
let input = str_to_vec("//");
let start_pos = 0;
let result = scan(&input, start_pos);
match result {
LexResult::Some(t, next) => {
assert_eq!(2, next);
assert_eq!("//", t.value);
assert_eq!(0, t.position);
assert_eq!(TokenType::Comment, t.token_type);
}
_ => {
panic!()
}
}
}
#[test]
fn should_scan_until_new_line() {
let input = str_to_vec(" // some comment\n// other comment");
let start_pos = 2;
let result = scan(&input, start_pos);
match result {
LexResult::Some(t, next) => {
assert_eq!(17, next);
assert_eq!("// some comment", t.value);
assert_eq!(start_pos, t.position);
assert_eq!(TokenType::Comment, t.token_type);
}
_ => {
panic!()
}
}
}
}

View File

@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
// so this is used to retrieve the original START position of the token
let current_len = current.len();
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
},
LexResult::Some(
token::new_number(current, start_pos - current_len),
start_pos,
)
}
}
}
@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
// so this is used to retrieve the original START position of the token
let current_len = current.len();
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
LexResult::Some(
token::new_number(current, start_pos - current_len),
start_pos,
)
}
}
}
@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
// so this is used to retrieve the original START position of the token
let current_len = current.len();
(token::new_number(current, start_pos - current_len), start_pos)
(
token::new_number(current, start_pos - current_len),
start_pos,
)
}
}
}
@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
// so this is used to retrieve the original START position of the token
let current_len = current.len();
(token::new_number(current, start_pos - current_len), start_pos)
(
token::new_number(current, start_pos - current_len),
start_pos,
)
}
}
}
@ -426,6 +438,5 @@ mod tests {
} else {
panic!("Expected some value")
};
}
}

View File

@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
// so this is used to retrieve the original START position of the token
let current_len = current.len();
LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos)
LexResult::Some(
token::new_operator(current, start_pos - current_len),
start_pos,
)
}
}
}

View File

@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
// 1 is added to account for the opening `"`
let current_len = current.len() + 1;
LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1)
LexResult::Some(
token::new_string(current, start_pos - current_len),
start_pos + 1,
)
}
Some(c) if *c == '\n' => LexResult::Err(LexError {
position: start_pos,

View File

@ -1,10 +1,25 @@
mod lexic;
// Module to handle the repl and its compilation
mod repl;
// Defines the types of tokens and provides functions to create them
mod token;
// Module to handle lexical analysis
mod syntax;
// Module to handle syntactic analysis
mod lexic;
// Module to handle semantic analysis
mod semantic;
// Defines the AST
mod ast_types;
// Defines the Symbol table and operations within
mod symbol_table;
// Transforms an AST to JS
mod codegen;
mod utils;
mod error_handling;
use token::Token;
use error_handling::MistiError;
use token::Token;
pub use token::TokenType;

View File

@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
Result3::None
}
Some(t) => Result3::Err(t),
None => Result3::None,
}
@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Res
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
match tokens.get(pos) {
Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
Result3::None
}
Some(t) => Result3::Err(t),
None => Result3::None,
}
@ -238,10 +242,9 @@ mod tests {
assert_eq!(4, error.error_start);
assert_eq!(7, error.error_end);
}
_ => panic!("Error expected")
_ => panic!("Error expected"),
}
let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
let binding = try_parse(&tokens, 0).unwrap();
@ -250,7 +253,7 @@ mod tests {
assert_eq!(4, error.error_start);
assert_eq!(11, error.error_end);
}
_ => panic!("Error expected")
_ => panic!("Error expected"),
}
}
@ -264,7 +267,7 @@ mod tests {
assert_eq!(7, error.error_start);
assert_eq!(14, error.error_end);
}
_ => panic!("Error expected")
_ => panic!("Error expected"),
}
}
}

View File

@ -12,12 +12,12 @@ pub enum TokenType {
LeftBrace,
RightBrace,
Semicolon,
Comment,
VAR,
VAL,
EOF,
}
#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
@ -31,12 +31,8 @@ pub struct Token {
impl Token {
pub fn get_end_position(&self) -> usize {
match self.token_type {
TokenType::String => {
self.position + self.value.len() + 2
}
_ => {
self.position + self.value.len()
}
TokenType::String => self.position + self.value.len() + 2,
_ => self.position + self.value.len(),
}
}
}
@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token {
position,
}
}
pub fn new_comment(value: String, position: usize) -> Token {
Token {
token_type: TokenType::Comment,
value,
position,
}
}

View File

@ -8,7 +8,7 @@ impl<T> Result3<T> {
pub fn unwrap(&self) -> &T {
match self {
Result3::Ok(t) => t,
_ => panic!("")
_ => panic!(""),
}
}
}