[Compiler] Scan single line comments
This commit is contained in:
parent
5c60943fab
commit
74e4d16105
@ -7,6 +7,7 @@ edition = "2021"
|
||||
[lib]
|
||||
name = "misti"
|
||||
path = "src/lib.rs"
|
||||
test = false
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
|
@ -16,31 +16,31 @@ impl PrintableError for SyntaxError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts a line of code
|
||||
///
|
||||
/// - `chars`: Input where to extract the line from
|
||||
/// - `start_position`: Position where the erroneous code starts
|
||||
/// - `end_position`: Position where the erroneous code ends
|
||||
///
|
||||
/// Returns a tuple of:
|
||||
///
|
||||
/// - `String`: The faulty line
|
||||
/// - `usize`: The amount of chars *before* the faulty code
|
||||
/// - `usize`: The lenght of the faulty code
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
|
||||
/// let start_position = 13;
|
||||
/// let end_position = 15;
|
||||
///
|
||||
/// let (line, before, length) = get_line(&input, start_position, end_position);
|
||||
///
|
||||
/// assert_eq!("val number == 50", line);
|
||||
/// assert_eq!(11, before);
|
||||
/// assert_eq!(2, length);
|
||||
/// ```
|
||||
// Extracts a line of code
|
||||
//
|
||||
// - `chars`: Input where to extract the line from
|
||||
// - `start_position`: Position where the erroneous code starts
|
||||
// - `end_position`: Position where the erroneous code ends
|
||||
//
|
||||
// Returns a tuple of:
|
||||
//
|
||||
// - `String`: The faulty line
|
||||
// - `usize`: The amount of chars *before* the faulty code
|
||||
// - `usize`: The lenght of the faulty code
|
||||
//
|
||||
// ## Example
|
||||
//
|
||||
// ```
|
||||
// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
|
||||
// let start_position = 13;
|
||||
// let end_position = 15;
|
||||
//
|
||||
// let (line, before, length) = get_line(&input, start_position, end_position);
|
||||
//
|
||||
// assert_eq!("val number == 50", line);
|
||||
// assert_eq!(11, before);
|
||||
// assert_eq!(2, length);
|
||||
// ```
|
||||
fn get_line(
|
||||
chars: &Vec<char>,
|
||||
start_position: usize,
|
||||
|
@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::new_comment(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::new_line(next_char, chars, current_pos))
|
||||
|
@ -4,6 +4,7 @@ use super::{
|
||||
};
|
||||
|
||||
mod identifier;
|
||||
mod new_comment;
|
||||
mod new_line;
|
||||
mod number;
|
||||
mod operator;
|
||||
@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
|
||||
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
(c == '\n').then(|| new_line::scan(chars, start_pos))
|
||||
}
|
||||
|
||||
/// Attempts to scan a single line comment.
|
||||
pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
let next_char = chars.get(start_pos + 1);
|
||||
match (c, next_char) {
|
||||
('/', Some('/')) => Some(new_comment::scan(chars, start_pos)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
78
compiler/src/lexic/scanner/new_comment.rs
Normal file
78
compiler/src/lexic/scanner/new_comment.rs
Normal file
@ -0,0 +1,78 @@
|
||||
use crate::{
|
||||
lexic::{utils, LexResult},
|
||||
token::new_comment,
|
||||
};
|
||||
|
||||
/// Scans a new line.
|
||||
///
|
||||
/// Assummes that `start_pos` and `start_pos + 1` point to a slash `/`
|
||||
///
|
||||
/// This methods always succeedes
|
||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
let (comment_content, next_pos) =
|
||||
scan_any_except_new_line(chars, start_pos + 2, String::from(""));
|
||||
let token = new_comment(format!("//{}", comment_content), start_pos);
|
||||
|
||||
LexResult::Some(token, next_pos)
|
||||
}
|
||||
|
||||
fn scan_any_except_new_line(
|
||||
chars: &Vec<char>,
|
||||
start_pos: usize,
|
||||
current: String,
|
||||
) -> (String, usize) {
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if *c == '\n' => (current, start_pos),
|
||||
Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)),
|
||||
None => (current, start_pos),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic::scanner::TokenType;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn str_to_vec(s: &str) -> Vec<char> {
|
||||
s.chars().collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_empty_comment() {
|
||||
let input = str_to_vec("//");
|
||||
let start_pos = 0;
|
||||
|
||||
let result = scan(&input, start_pos);
|
||||
match result {
|
||||
LexResult::Some(t, next) => {
|
||||
assert_eq!(2, next);
|
||||
assert_eq!("//", t.value);
|
||||
assert_eq!(0, t.position);
|
||||
assert_eq!(TokenType::Comment, t.token_type);
|
||||
}
|
||||
_ => {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_until_new_line() {
|
||||
let input = str_to_vec(" // some comment\n// other comment");
|
||||
let start_pos = 2;
|
||||
|
||||
let result = scan(&input, start_pos);
|
||||
match result {
|
||||
LexResult::Some(t, next) => {
|
||||
assert_eq!(17, next);
|
||||
assert_eq!("// some comment", t.value);
|
||||
assert_eq!(start_pos, t.position);
|
||||
assert_eq!(TokenType::Comment, t.token_type);
|
||||
}
|
||||
_ => {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
|
||||
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
|
||||
},
|
||||
LexResult::Some(
|
||||
token::new_number(current, start_pos - current_len),
|
||||
start_pos,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
|
||||
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
|
||||
LexResult::Some(
|
||||
token::new_number(current, start_pos - current_len),
|
||||
start_pos,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
|
||||
(token::new_number(current, start_pos - current_len), start_pos)
|
||||
(
|
||||
token::new_number(current, start_pos - current_len),
|
||||
start_pos,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
|
||||
(token::new_number(current, start_pos - current_len), start_pos)
|
||||
(
|
||||
token::new_number(current, start_pos - current_len),
|
||||
start_pos,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -426,6 +438,5 @@ mod tests {
|
||||
} else {
|
||||
panic!("Expected some value")
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
|
||||
LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos)
|
||||
LexResult::Some(
|
||||
token::new_operator(current, start_pos - current_len),
|
||||
start_pos,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
// 1 is added to account for the opening `"`
|
||||
let current_len = current.len() + 1;
|
||||
|
||||
LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1)
|
||||
LexResult::Some(
|
||||
token::new_string(current, start_pos - current_len),
|
||||
start_pos + 1,
|
||||
)
|
||||
}
|
||||
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
|
@ -1,10 +1,25 @@
|
||||
|
||||
mod lexic;
|
||||
// Module to handle the repl and its compilation
|
||||
mod repl;
|
||||
// Defines the types of tokens and provides functions to create them
|
||||
mod token;
|
||||
// Module to handle lexical analysis
|
||||
mod syntax;
|
||||
// Module to handle syntactic analysis
|
||||
mod lexic;
|
||||
// Module to handle semantic analysis
|
||||
mod semantic;
|
||||
// Defines the AST
|
||||
mod ast_types;
|
||||
// Defines the Symbol table and operations within
|
||||
mod symbol_table;
|
||||
// Transforms an AST to JS
|
||||
mod codegen;
|
||||
mod utils;
|
||||
|
||||
mod error_handling;
|
||||
|
||||
use token::Token;
|
||||
use error_handling::MistiError;
|
||||
use token::Token;
|
||||
|
||||
pub use token::TokenType;
|
||||
|
||||
|
@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
|
||||
match tokens.get(pos) {
|
||||
Some(t) if t.token_type == token_type => Result3::Ok(t),
|
||||
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
|
||||
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
|
||||
Result3::None
|
||||
}
|
||||
Some(t) => Result3::Err(t),
|
||||
None => Result3::None,
|
||||
}
|
||||
@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Res
|
||||
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
|
||||
match tokens.get(pos) {
|
||||
Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
|
||||
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
|
||||
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
|
||||
Result3::None
|
||||
}
|
||||
Some(t) => Result3::Err(t),
|
||||
None => Result3::None,
|
||||
}
|
||||
@ -238,10 +242,9 @@ mod tests {
|
||||
assert_eq!(4, error.error_start);
|
||||
assert_eq!(7, error.error_end);
|
||||
}
|
||||
_ => panic!("Error expected")
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
|
||||
|
||||
let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
@ -250,7 +253,7 @@ mod tests {
|
||||
assert_eq!(4, error.error_start);
|
||||
assert_eq!(11, error.error_end);
|
||||
}
|
||||
_ => panic!("Error expected")
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
}
|
||||
|
||||
@ -264,7 +267,7 @@ mod tests {
|
||||
assert_eq!(7, error.error_start);
|
||||
assert_eq!(14, error.error_end);
|
||||
}
|
||||
_ => panic!("Error expected")
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,12 +12,12 @@ pub enum TokenType {
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
Semicolon,
|
||||
Comment,
|
||||
VAR,
|
||||
VAL,
|
||||
EOF,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Token {
|
||||
pub token_type: TokenType,
|
||||
@ -31,12 +31,8 @@ pub struct Token {
|
||||
impl Token {
|
||||
pub fn get_end_position(&self) -> usize {
|
||||
match self.token_type {
|
||||
TokenType::String => {
|
||||
self.position + self.value.len() + 2
|
||||
}
|
||||
_ => {
|
||||
self.position + self.value.len()
|
||||
}
|
||||
TokenType::String => self.position + self.value.len() + 2,
|
||||
_ => self.position + self.value.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token {
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_comment(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Comment,
|
||||
value,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ impl<T> Result3<T> {
|
||||
pub fn unwrap(&self) -> &T {
|
||||
match self {
|
||||
Result3::Ok(t) => t,
|
||||
_ => panic!("")
|
||||
_ => panic!(""),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user