From 0a22391bae8a2ab7cabdad0242d45a00033166df Mon Sep 17 00:00:00 2001 From: Araozu Date: Thu, 5 Jan 2023 12:48:34 -0500 Subject: [PATCH] Fix bugs and improve error messages --- src/lexic/lex_error.rs | 6 ++++++ src/lexic/mod.rs | 17 +++++++++++++--- src/lexic/scanner/identifier.rs | 0 src/lexic/scanner/mod.rs | 3 ++- src/lexic/scanner/number.rs | 36 ++++++++++++++++++++++----------- src/lexic/scanner/operator.rs | 2 +- src/lexic/scanner/string.rs | 16 ++++++++++----- src/lexic/utils.rs | 0 src/main.rs | 0 src/repl/mod.rs | 14 ++++++++++++- src/syntax/mod.rs | 0 src/token.rs | 0 12 files changed, 71 insertions(+), 23 deletions(-) create mode 100755 src/lexic/lex_error.rs mode change 100644 => 100755 src/lexic/mod.rs mode change 100644 => 100755 src/lexic/scanner/identifier.rs mode change 100644 => 100755 src/lexic/scanner/mod.rs mode change 100644 => 100755 src/lexic/scanner/number.rs mode change 100644 => 100755 src/lexic/scanner/operator.rs mode change 100644 => 100755 src/lexic/scanner/string.rs mode change 100644 => 100755 src/lexic/utils.rs mode change 100644 => 100755 src/main.rs mode change 100644 => 100755 src/repl/mod.rs mode change 100644 => 100755 src/syntax/mod.rs mode change 100644 => 100755 src/token.rs diff --git a/src/lexic/lex_error.rs b/src/lexic/lex_error.rs new file mode 100755 index 0000000..3e43059 --- /dev/null +++ b/src/lexic/lex_error.rs @@ -0,0 +1,6 @@ + +#[derive(Debug)] +pub struct LexError { + pub position: usize, + pub reason: String, +} diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs old mode 100644 new mode 100755 index 423d01e..102a571 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -1,6 +1,8 @@ mod utils; mod scanner; +mod lex_error; use super::token::{self, Token}; +use lex_error::LexError; type Chars = Vec; @@ -9,12 +11,12 @@ pub enum LexResult { Some(Token, usize), // No token was found, but there was no error (EOF) None(usize), - Err(String), + Err(LexError), } /// Scans and returns all the tokens in the input String -pub fn get_tokens(input: &String) -> Result, String> { +pub fn get_tokens(input: &String) -> Result, LexError> { let chars: Vec = input.chars().into_iter().collect(); let mut results = Vec::new(); let mut current_pos: usize = 0; @@ -44,6 +46,11 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { return LexResult::None(current_pos) } + // Ignore new lines for now... + if next_char == '\n' { + return next_token(chars, current_pos + 1) + } + // Handle whitespace recursively if next_char == ' ' { return next_token(chars, current_pos + 1) @@ -57,7 +64,11 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { .or_else(|| scanner::operator(next_char, chars, current_pos)) .or_else(|| scanner::grouping_sign(next_char, chars, current_pos)) .unwrap_or_else(|| { - LexResult::Err(format!("Unrecognized character: {}", next_char)) + let error = LexError { + position: current_pos, + reason: format!("Unrecognized character: {}", next_char), + }; + LexResult::Err(error) }) } diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs old mode 100644 new mode 100755 diff --git a/src/lexic/scanner/mod.rs b/src/lexic/scanner/mod.rs old mode 100644 new mode 100755 index d9cc455..43ba880 --- a/src/lexic/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -40,7 +40,8 @@ pub fn grouping_sign(c: char, _: &Vec, start_pos: usize) -> Option, start_pos: usize) -> Option { - utils::is_lowercase(c).then(|| identifier::scan(c, chars, start_pos)) + (utils::is_lowercase(c) || c == '_') + .then(|| identifier::scan(c, chars, start_pos)) } diff --git a/src/lexic/scanner/number.rs b/src/lexic/scanner/number.rs old mode 100644 new mode 100755 index 40e9e70..8203e0e --- a/src/lexic/scanner/number.rs +++ b/src/lexic/scanner/number.rs @@ -1,6 +1,6 @@ use crate::lexic::{ token::{self, Token}, - utils, LexResult, + utils, LexResult, lex_error::LexError, }; /// Function to scan a number @@ -47,7 +47,10 @@ fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> LexResult { let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)); LexResult::Some(t, next) } - _ => LexResult::Err(String::from("Tried to scan an incomplete hex value")), + _ => LexResult::Err(LexError { + position: start_pos, + reason: String::from("Tried to scan an incomplete hex value"), + }), } } @@ -60,10 +63,16 @@ fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> LexResult { fn scan_double(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current), - Some(_) => LexResult::Err(String::from( - "The character after the dot when scanning a double is not a number.", - )), - _ => LexResult::Err(String::from("EOF when scanning a double number.")), + Some(_) => LexResult::Err(LexError { + position: start_pos, + reason : String::from( + "The character after the dot when scanning a double is not a number.", + ) + }), + _ => LexResult::Err(LexError { + position: start_pos, + reason: String::from("EOF when scanning a double number."), + }), } } @@ -98,9 +107,12 @@ fn scan_scientific(chars: &Vec, start_pos: usize, current: String) -> LexR let (t, next) = scan_digits(chars, start_pos + 2, new_value); LexResult::Some(t, next) } - _ => LexResult::Err(String::from( - "The characters after 'e' are not + or -, or are not followed by a number", - )), + _ => LexResult::Err(LexError { + position: start_pos, + reason: String::from( + "The characters after 'e' are not + or -, or are not followed by a number", + ) + }), } } @@ -217,7 +229,7 @@ mod tests { let start_pos = 0; match scan(&input, start_pos) { - LexResult::Err(reason) => assert_eq!("Tried to scan an incomplete hex value", reason), + LexResult::Err(reason) => assert_eq!("Tried to scan an incomplete hex value", reason.reason), _ => panic!(), } @@ -277,7 +289,7 @@ mod tests { match scan(&input, start_pos) { LexResult::Err(reason) => assert_eq!( "The character after the dot when scanning a double is not a number.", - reason + reason.reason ), _ => panic!(), } @@ -286,7 +298,7 @@ mod tests { let start_pos = 0; match scan(&input, start_pos) { - LexResult::Err(reason) => assert_eq!("EOF when scanning a double number.", reason), + LexResult::Err(reason) => assert_eq!("EOF when scanning a double number.", reason.reason), _ => panic!(), } } diff --git a/src/lexic/scanner/operator.rs b/src/lexic/scanner/operator.rs old mode 100644 new mode 100755 index 1ae57c9..639bbb3 --- a/src/lexic/scanner/operator.rs +++ b/src/lexic/scanner/operator.rs @@ -1,4 +1,4 @@ -use crate::lexic::{token::{Token, self}, utils, LexResult}; +use crate::lexic::{token, utils, LexResult}; /// Function to scan an operator diff --git a/src/lexic/scanner/string.rs b/src/lexic/scanner/string.rs old mode 100644 new mode 100755 index 6b54a64..ec1b764 --- a/src/lexic/scanner/string.rs +++ b/src/lexic/scanner/string.rs @@ -1,6 +1,6 @@ use crate::lexic::{ - token::{self, Token}, - utils, LexResult, + token, + utils, LexResult, lex_error::LexError, }; /// Function to scan a string @@ -17,7 +17,10 @@ pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexRes LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1) } Some(c) if *c == '\n' => { - LexResult::Err(String::from("Unexpected new line inside a string.")) + LexResult::Err(LexError { + position: start_pos, + reason: String::from("Unexpected new line inside a string.") + }) } Some(c) if *c == '\\' => { if let Some(escape) = test_escape_char(chars, start_pos + 1) { @@ -44,7 +47,10 @@ pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexRes ) } None => { - LexResult::Err(String::from("Incomplete string found")) + LexResult::Err(LexError { + position: start_pos, + reason: String::from("Incomplete string found") + }) } } } @@ -108,7 +114,7 @@ mod tests { let input = str_to_vec("\"Hello,\nworld!\""); let start_pos = 1; if let LexResult::Err(reason) = scan(&input, start_pos) { - assert_eq!("Unexpected new line inside a string.", reason) + assert_eq!("Unexpected new line inside a string.", reason.reason) } else {panic!()} } diff --git a/src/lexic/utils.rs b/src/lexic/utils.rs old mode 100644 new mode 100755 diff --git a/src/main.rs b/src/main.rs old mode 100644 new mode 100755 diff --git a/src/repl/mod.rs b/src/repl/mod.rs old mode 100644 new mode 100755 index 244b771..0cbf20c --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -4,6 +4,19 @@ use super::lexic; fn compile(input: &String) { let _tokens = lexic::get_tokens(input); + + match _tokens { + Ok(tokens) => { + for token in tokens { + print!("[{}] ", token.value); + } + println!(""); + }, + Err(error) => { + eprintln!("Error scanning.\n{} at pos {}", error.reason, error.position) + } + } + } pub fn run() -> io::Result<()> { @@ -23,7 +36,6 @@ pub fn run() -> io::Result<()> { break Ok(()) }, Ok(_) => { - println!("{}", buffer); compile(&buffer); }, Err(error) => { diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs old mode 100644 new mode 100755 diff --git a/src/token.rs b/src/token.rs old mode 100644 new mode 100755