[Compiler] Scan single line comments
This commit is contained in:
parent
5c60943fab
commit
74e4d16105
@ -7,6 +7,7 @@ edition = "2021"
|
|||||||
[lib]
|
[lib]
|
||||||
name = "misti"
|
name = "misti"
|
||||||
path = "src/lib.rs"
|
path = "src/lib.rs"
|
||||||
|
test = false
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
@ -16,31 +16,31 @@ impl PrintableError for SyntaxError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts a line of code
|
// Extracts a line of code
|
||||||
///
|
//
|
||||||
/// - `chars`: Input where to extract the line from
|
// - `chars`: Input where to extract the line from
|
||||||
/// - `start_position`: Position where the erroneous code starts
|
// - `start_position`: Position where the erroneous code starts
|
||||||
/// - `end_position`: Position where the erroneous code ends
|
// - `end_position`: Position where the erroneous code ends
|
||||||
///
|
//
|
||||||
/// Returns a tuple of:
|
// Returns a tuple of:
|
||||||
///
|
//
|
||||||
/// - `String`: The faulty line
|
// - `String`: The faulty line
|
||||||
/// - `usize`: The amount of chars *before* the faulty code
|
// - `usize`: The amount of chars *before* the faulty code
|
||||||
/// - `usize`: The lenght of the faulty code
|
// - `usize`: The lenght of the faulty code
|
||||||
///
|
//
|
||||||
/// ## Example
|
// ## Example
|
||||||
///
|
//
|
||||||
/// ```
|
// ```
|
||||||
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
|
// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
|
||||||
/// let start_position = 13;
|
// let start_position = 13;
|
||||||
/// let end_position = 15;
|
// let end_position = 15;
|
||||||
///
|
//
|
||||||
/// let (line, before, length) = get_line(&input, start_position, end_position);
|
// let (line, before, length) = get_line(&input, start_position, end_position);
|
||||||
///
|
//
|
||||||
/// assert_eq!("val number == 50", line);
|
// assert_eq!("val number == 50", line);
|
||||||
/// assert_eq!(11, before);
|
// assert_eq!(11, before);
|
||||||
/// assert_eq!(2, length);
|
// assert_eq!(2, length);
|
||||||
/// ```
|
// ```
|
||||||
fn get_line(
|
fn get_line(
|
||||||
chars: &Vec<char>,
|
chars: &Vec<char>,
|
||||||
start_position: usize,
|
start_position: usize,
|
||||||
|
@ -77,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
|||||||
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||||
|
.or_else(|| scanner::new_comment(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::new_line(next_char, chars, current_pos))
|
.or_else(|| scanner::new_line(next_char, chars, current_pos))
|
||||||
|
@ -4,6 +4,7 @@ use super::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
mod identifier;
|
mod identifier;
|
||||||
|
mod new_comment;
|
||||||
mod new_line;
|
mod new_line;
|
||||||
mod number;
|
mod number;
|
||||||
mod operator;
|
mod operator;
|
||||||
@ -58,3 +59,12 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
|
|||||||
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
(c == '\n').then(|| new_line::scan(chars, start_pos))
|
(c == '\n').then(|| new_line::scan(chars, start_pos))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attempts to scan a single line comment.
|
||||||
|
pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
|
let next_char = chars.get(start_pos + 1);
|
||||||
|
match (c, next_char) {
|
||||||
|
('/', Some('/')) => Some(new_comment::scan(chars, start_pos)),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
78
compiler/src/lexic/scanner/new_comment.rs
Normal file
78
compiler/src/lexic/scanner/new_comment.rs
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
use crate::{
|
||||||
|
lexic::{utils, LexResult},
|
||||||
|
token::new_comment,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Scans a new line.
|
||||||
|
///
|
||||||
|
/// Assummes that `start_pos` and `start_pos + 1` point to a slash `/`
|
||||||
|
///
|
||||||
|
/// This methods always succeedes
|
||||||
|
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||||
|
let (comment_content, next_pos) =
|
||||||
|
scan_any_except_new_line(chars, start_pos + 2, String::from(""));
|
||||||
|
let token = new_comment(format!("//{}", comment_content), start_pos);
|
||||||
|
|
||||||
|
LexResult::Some(token, next_pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_any_except_new_line(
|
||||||
|
chars: &Vec<char>,
|
||||||
|
start_pos: usize,
|
||||||
|
current: String,
|
||||||
|
) -> (String, usize) {
|
||||||
|
match chars.get(start_pos) {
|
||||||
|
Some(c) if *c == '\n' => (current, start_pos),
|
||||||
|
Some(c) => scan_any_except_new_line(chars, start_pos + 1, utils::str_append(current, *c)),
|
||||||
|
None => (current, start_pos),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::lexic::scanner::TokenType;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn str_to_vec(s: &str) -> Vec<char> {
|
||||||
|
s.chars().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_scan_empty_comment() {
|
||||||
|
let input = str_to_vec("//");
|
||||||
|
let start_pos = 0;
|
||||||
|
|
||||||
|
let result = scan(&input, start_pos);
|
||||||
|
match result {
|
||||||
|
LexResult::Some(t, next) => {
|
||||||
|
assert_eq!(2, next);
|
||||||
|
assert_eq!("//", t.value);
|
||||||
|
assert_eq!(0, t.position);
|
||||||
|
assert_eq!(TokenType::Comment, t.token_type);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
panic!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_scan_until_new_line() {
|
||||||
|
let input = str_to_vec(" // some comment\n// other comment");
|
||||||
|
let start_pos = 2;
|
||||||
|
|
||||||
|
let result = scan(&input, start_pos);
|
||||||
|
match result {
|
||||||
|
LexResult::Some(t, next) => {
|
||||||
|
assert_eq!(17, next);
|
||||||
|
assert_eq!("// some comment", t.value);
|
||||||
|
assert_eq!(start_pos, t.position);
|
||||||
|
assert_eq!(TokenType::Comment, t.token_type);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
panic!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -38,8 +38,11 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
|
|||||||
// so this is used to retrieve the original START position of the token
|
// so this is used to retrieve the original START position of the token
|
||||||
let current_len = current.len();
|
let current_len = current.len();
|
||||||
|
|
||||||
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
|
LexResult::Some(
|
||||||
},
|
token::new_number(current, start_pos - current_len),
|
||||||
|
start_pos,
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,7 +100,10 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
|
|||||||
// so this is used to retrieve the original START position of the token
|
// so this is used to retrieve the original START position of the token
|
||||||
let current_len = current.len();
|
let current_len = current.len();
|
||||||
|
|
||||||
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
|
LexResult::Some(
|
||||||
|
token::new_number(current, start_pos - current_len),
|
||||||
|
start_pos,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -140,7 +146,10 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
|
|||||||
// so this is used to retrieve the original START position of the token
|
// so this is used to retrieve the original START position of the token
|
||||||
let current_len = current.len();
|
let current_len = current.len();
|
||||||
|
|
||||||
(token::new_number(current, start_pos - current_len), start_pos)
|
(
|
||||||
|
token::new_number(current, start_pos - current_len),
|
||||||
|
start_pos,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -156,7 +165,10 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
|
|||||||
// so this is used to retrieve the original START position of the token
|
// so this is used to retrieve the original START position of the token
|
||||||
let current_len = current.len();
|
let current_len = current.len();
|
||||||
|
|
||||||
(token::new_number(current, start_pos - current_len), start_pos)
|
(
|
||||||
|
token::new_number(current, start_pos - current_len),
|
||||||
|
start_pos,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -426,6 +438,5 @@ mod tests {
|
|||||||
} else {
|
} else {
|
||||||
panic!("Expected some value")
|
panic!("Expected some value")
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
|||||||
// so this is used to retrieve the original START position of the token
|
// so this is used to retrieve the original START position of the token
|
||||||
let current_len = current.len();
|
let current_len = current.len();
|
||||||
|
|
||||||
LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos)
|
LexResult::Some(
|
||||||
|
token::new_operator(current, start_pos - current_len),
|
||||||
|
start_pos,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
|||||||
// 1 is added to account for the opening `"`
|
// 1 is added to account for the opening `"`
|
||||||
let current_len = current.len() + 1;
|
let current_len = current.len() + 1;
|
||||||
|
|
||||||
LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1)
|
LexResult::Some(
|
||||||
|
token::new_string(current, start_pos - current_len),
|
||||||
|
start_pos + 1,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
|
@ -1,10 +1,25 @@
|
|||||||
|
// Module to handle the repl and its compilation
|
||||||
mod lexic;
|
mod repl;
|
||||||
|
// Defines the types of tokens and provides functions to create them
|
||||||
mod token;
|
mod token;
|
||||||
|
// Module to handle lexical analysis
|
||||||
|
mod syntax;
|
||||||
|
// Module to handle syntactic analysis
|
||||||
|
mod lexic;
|
||||||
|
// Module to handle semantic analysis
|
||||||
|
mod semantic;
|
||||||
|
// Defines the AST
|
||||||
|
mod ast_types;
|
||||||
|
// Defines the Symbol table and operations within
|
||||||
|
mod symbol_table;
|
||||||
|
// Transforms an AST to JS
|
||||||
|
mod codegen;
|
||||||
|
mod utils;
|
||||||
|
|
||||||
mod error_handling;
|
mod error_handling;
|
||||||
|
|
||||||
use token::Token;
|
|
||||||
use error_handling::MistiError;
|
use error_handling::MistiError;
|
||||||
|
use token::Token;
|
||||||
|
|
||||||
pub use token::TokenType;
|
pub use token::TokenType;
|
||||||
|
|
||||||
|
@ -127,7 +127,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
|||||||
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
|
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
|
||||||
match tokens.get(pos) {
|
match tokens.get(pos) {
|
||||||
Some(t) if t.token_type == token_type => Result3::Ok(t),
|
Some(t) if t.token_type == token_type => Result3::Ok(t),
|
||||||
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
|
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
|
||||||
|
Result3::None
|
||||||
|
}
|
||||||
Some(t) => Result3::Err(t),
|
Some(t) => Result3::Err(t),
|
||||||
None => Result3::None,
|
None => Result3::None,
|
||||||
}
|
}
|
||||||
@ -136,7 +138,9 @@ fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Res
|
|||||||
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
|
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
|
||||||
match tokens.get(pos) {
|
match tokens.get(pos) {
|
||||||
Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
|
Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
|
||||||
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => Result3::None,
|
Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => {
|
||||||
|
Result3::None
|
||||||
|
}
|
||||||
Some(t) => Result3::Err(t),
|
Some(t) => Result3::Err(t),
|
||||||
None => Result3::None,
|
None => Result3::None,
|
||||||
}
|
}
|
||||||
@ -238,10 +242,9 @@ mod tests {
|
|||||||
assert_eq!(4, error.error_start);
|
assert_eq!(4, error.error_start);
|
||||||
assert_eq!(7, error.error_end);
|
assert_eq!(7, error.error_end);
|
||||||
}
|
}
|
||||||
_ => panic!("Error expected")
|
_ => panic!("Error expected"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
|
let tokens = get_tokens(&String::from("val \"hello\"")).unwrap();
|
||||||
let binding = try_parse(&tokens, 0).unwrap();
|
let binding = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
@ -250,7 +253,7 @@ mod tests {
|
|||||||
assert_eq!(4, error.error_start);
|
assert_eq!(4, error.error_start);
|
||||||
assert_eq!(11, error.error_end);
|
assert_eq!(11, error.error_end);
|
||||||
}
|
}
|
||||||
_ => panic!("Error expected")
|
_ => panic!("Error expected"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -264,7 +267,7 @@ mod tests {
|
|||||||
assert_eq!(7, error.error_start);
|
assert_eq!(7, error.error_start);
|
||||||
assert_eq!(14, error.error_end);
|
assert_eq!(14, error.error_end);
|
||||||
}
|
}
|
||||||
_ => panic!("Error expected")
|
_ => panic!("Error expected"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,12 +12,12 @@ pub enum TokenType {
|
|||||||
LeftBrace,
|
LeftBrace,
|
||||||
RightBrace,
|
RightBrace,
|
||||||
Semicolon,
|
Semicolon,
|
||||||
|
Comment,
|
||||||
VAR,
|
VAR,
|
||||||
VAL,
|
VAL,
|
||||||
EOF,
|
EOF,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
pub token_type: TokenType,
|
pub token_type: TokenType,
|
||||||
@ -31,12 +31,8 @@ pub struct Token {
|
|||||||
impl Token {
|
impl Token {
|
||||||
pub fn get_end_position(&self) -> usize {
|
pub fn get_end_position(&self) -> usize {
|
||||||
match self.token_type {
|
match self.token_type {
|
||||||
TokenType::String => {
|
TokenType::String => self.position + self.value.len() + 2,
|
||||||
self.position + self.value.len() + 2
|
_ => self.position + self.value.len(),
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
self.position + self.value.len()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,3 +100,11 @@ pub fn new_datatype(value: String, position: usize) -> Token {
|
|||||||
position,
|
position,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_comment(value: String, position: usize) -> Token {
|
||||||
|
Token {
|
||||||
|
token_type: TokenType::Comment,
|
||||||
|
value,
|
||||||
|
position,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -8,7 +8,7 @@ impl<T> Result3<T> {
|
|||||||
pub fn unwrap(&self) -> &T {
|
pub fn unwrap(&self) -> &T {
|
||||||
match self {
|
match self {
|
||||||
Result3::Ok(t) => t,
|
Result3::Ok(t) => t,
|
||||||
_ => panic!("")
|
_ => panic!(""),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user