Show offending line and token when a syntax error is found

master
Araozu 2023-03-15 16:33:00 -05:00
parent cdafc40ff7
commit e383d300f2
15 changed files with 220 additions and 53 deletions

View File

@ -10,10 +10,12 @@
- [ ] Namespace identifiers in the symbol table
- [ ] Stdlib
- [ ] Document code
- [ ] Test that the field `position` of the tokens actually points to the start of the token, and not its length
## v0.0.4
- Explicit datatype of variables
- Improve error messages when a syntax error is found (show offending line and offending token)
## v0.0.3

View File

@ -6,11 +6,7 @@ impl PrintableError for LexError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
let (erroneous_code, back_count) = get_line(chars, self.position);
let mut whitespace = Vec::<char>::new();
for _ in 0..back_count {
whitespace.push(' ');
}
let whitespace = whitespace.iter().collect::<String>();
let whitespace = vec![' '; back_count].iter().collect::<String>();
format!(
"\n{}\n{}^\n\n{}{}\n{}",
@ -41,6 +37,8 @@ fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before pos
before_pos += 1;
break;
}

View File

@ -19,6 +19,8 @@ pub struct LexError {
#[derive(Debug)]
pub struct SyntaxError {
pub error_start: usize,
pub error_end: usize,
pub reason: String,
}

View File

@ -1,7 +1,145 @@
use std::collections::VecDeque;
use super::{PrintableError, SyntaxError};
impl PrintableError for SyntaxError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
String::from("Syntax error: NOT IMPLEMENTED")
let (line, before, length) = get_line(chars, self.error_start, self.error_end);
let whitespace = vec![' '; before].iter().collect::<String>();
let indicator = vec!['^'; length].iter().collect::<String>();
format!(
"\n{}\n{}{}\n\n{}{}{}",
line, whitespace, indicator, "Syntax error at pos ", self.error_start, ": "
)
}
}
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
fn get_line(
chars: &Vec<char>,
start_position: usize,
end_position: usize,
) -> (String, usize, usize) {
let mut result_chars = VecDeque::<char>::new();
// Push chars to the front until a new line is found
let mut before_pos = start_position;
loop {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before start_position
before_pos += 1;
break;
}
result_chars.push_front(current_char);
if before_pos == 0 {
break;
}
before_pos -= 1;
}
// Push chars to the end until a new line is found
let mut after_pos = start_position + 1;
let char_count = chars.len();
while after_pos < char_count {
let current_char = chars[after_pos];
if current_char == '\n' {
break;
}
result_chars.push_back(current_char);
after_pos += 1;
}
(
result_chars.iter().collect::<String>(),
start_position - before_pos,
end_position - start_position,
)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
error_handling::{PrintableError, SyntaxError},
lexic::get_tokens,
syntax::construct_ast,
};
fn get_error_data(input: String) -> (Vec<char>, SyntaxError) {
let tokens = get_tokens(&input).unwrap();
let error_holder = construct_ast(&tokens);
match error_holder {
Ok(_) => panic!(
"syntax_error test: Input expected to throw error didn't:\n\n{}",
input
),
Err(error) => {
let chars: Vec<char> = input.chars().into_iter().collect();
(chars, error)
}
}
}
#[test]
fn should_show_an_error_for_missing_binding_name() {
let (chars, error) = get_error_data(String::from("val"));
let actual_err = error.get_error_str(&chars);
// TODO: Write a better error message (something that explains why it failed)
let expected_str = format!("\n{}\n{}\n\n{}", "val", "^^^", "Syntax error at pos 0: ");
assert_eq!(expected_str, actual_err);
}
#[test]
fn should_get_line() {
let input: Vec<char> = String::from("\n\nval number == 50\n\n")
.chars()
.into_iter()
.collect();
let start_position = 13;
let end_position = 15;
let (line, before, length) = get_line(&input, start_position, end_position);
assert_eq!("val number == 50", line);
assert_eq!(11, before);
assert_eq!(2, length);
}
}

View File

@ -1,9 +0,0 @@
/// Represents an error in the scanning process
#[derive(Debug)]
pub struct LexError {
/// Position where the offending char was found
pub position: usize,
/// Reason of the errror
pub reason: String,
}

View File

@ -34,12 +34,15 @@ fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype:
is_datatype,
),
_ => {
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
let current_len = current.len();
if let Some(token_type) = str_is_keyword(&current) {
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
LexResult::Some(token::new(current, start_pos - current_len, token_type), start_pos)
} else if is_datatype {
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
LexResult::Some(token::new_datatype(current, start_pos - current_len), start_pos)
} else {
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
LexResult::Some(token::new_identifier(current, start_pos - current_len), start_pos)
}
}
}

View File

@ -33,7 +33,7 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
_ => return None,
};
let token = token::new(c.to_string(), start_pos as i32, token_type);
let token = token::new(c.to_string(), start_pos, token_type);
Some(LexResult::Some(token, start_pos + 1))
}

View File

@ -17,12 +17,12 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
Some(c) if *c == ' ' => match look_ahead_for_new_line(chars, start_pos + 1) {
Some(next_pos) => scan(chars, next_pos),
None => {
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
let token = token::new(String::from(";"), start_pos, TokenType::Semicolon);
LexResult::Some(token, start_pos)
}
},
Some(_) | None => {
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
let token = token::new(String::from(";"), start_pos, TokenType::Semicolon);
LexResult::Some(token, start_pos)
}
}

View File

@ -33,7 +33,7 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
Some(c) if utils::is_digit(*c) => {
scan_decimal(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
_ => LexResult::Some(token::new_number(current, start_pos), start_pos),
}
}
@ -86,7 +86,7 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
Some(c) if *c == 'e' => {
scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
_ => LexResult::Some(token::new_number(current, start_pos), start_pos),
}
}
@ -123,7 +123,7 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
Some(c) if utils::is_digit(*c) => {
scan_digits(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => (token::new_number(current, start_pos as i32), start_pos),
_ => (token::new_number(current, start_pos), start_pos),
}
}
@ -133,7 +133,7 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
Some(c) if utils::is_hex_digit(*c) => {
scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => (token::new_number(current, start_pos as i32), start_pos),
_ => (token::new_number(current, start_pos), start_pos),
}
}

View File

@ -12,7 +12,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
Some(c) if utils::is_operator(*c) => {
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => LexResult::Some(token::new_operator(current, start_pos as i32), start_pos),
_ => LexResult::Some(token::new_operator(current, start_pos), start_pos),
}
}

View File

@ -13,7 +13,7 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if *c == '"' => {
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
LexResult::Some(token::new_string(current, start_pos), start_pos + 1)
}
Some(c) if *c == '\n' => LexResult::Err(LexError {
position: start_pos,

View File

@ -39,7 +39,7 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
}
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
eprintln!("Syntax error.\n{}", reason.get_error_str(&chars))
eprintln!("{}", reason.get_error_str(&chars))
}
}
}

View File

@ -1,5 +1,6 @@
use super::ast_types::{Binding, ValBinding, VarBinding};
use super::{expression, SyntaxResult};
use crate::error_handling::SyntaxError;
use crate::token::{Token, TokenType};
// TODO: Should return a 3 state value:
@ -16,22 +17,27 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
pos += 1;
Some(String::from(&t.value))
}
// If the first token is anything else, ignore
Some(_) => None,
// TODO: return Error
None => return None,
// This should never match, as there should always be at least a
// TokenType::Semicolon or TokenType::EOF
None => panic!(
"Internal compiler error: Illegal token stream at src/syntax/binding.rs#try_parse"
),
}
};
// var/val keyword
let is_val = {
let (is_val, binding_token) = {
let res1 = try_token_type(tokens, pos, TokenType::VAL);
match res1 {
Some(_) => true,
Some(val_token) => (true, val_token),
None => {
let res2 = try_token_type(tokens, pos, TokenType::VAR);
match res2 {
Some(_) => false,
// TODO: return Error
Some(var_token) => (false, var_token),
// Neither VAL nor VAR were matched, the parser should try
// other constructs
None => return None,
}
}
@ -41,7 +47,12 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
if identifier.is_none() {
// TODO: return Error
return None;
// The parser didn't find an Identifier after VAL/VAR
return Some(SyntaxResult::Err(SyntaxError {
reason: String::from("D:"),
error_start: binding_token.position,
error_end: binding_token.position + binding_token.value.len(),
}));
}
let identifier = identifier.unwrap();
@ -154,4 +165,20 @@ mod tests {
_ => panic!(),
}
}
#[test]
fn should_return_correct_error() {
let tokens = get_tokens(&String::from("val")).unwrap();
assert_eq!(TokenType::VAL, tokens[0].token_type);
assert_eq!(0, tokens[0].position);
let binding = try_parse(&tokens, 0).unwrap();
match binding {
SyntaxResult::Err(error) => {
assert_eq!(0, error.error_start);
assert_eq!(3, error.error_end);
}
_ => panic!(),
}
}
}

View File

@ -31,7 +31,10 @@ pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, Syntax
bindings: vec![module],
}),
SyntaxResult::None => Err(SyntaxError {
reason: String::from("D:"),
reason: String::from("PARSER couldn't parse any construction"),
// FIXME: This should get the position of the _token_ that current_pos points to
error_start: current_pos,
error_end: current_pos,
}),
SyntaxResult::Err(err) => Err(err),
}
@ -42,6 +45,9 @@ fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResul
.unwrap_or_else(|| {
SyntaxResult::Err(SyntaxError {
reason: String::from("Unrecognized token"),
// FIXME: This should get the position of the _token_ that current_pos points to
error_start: current_pos,
error_end: current_pos,
})
})
}

View File

@ -23,69 +23,69 @@ pub struct Token {
pub value: String,
/// The absolute position of this token, from the
/// start of the file
_position: i32,
pub position: usize,
}
pub fn new_eof(position: i32) -> Token {
pub fn new_eof(position: usize) -> Token {
Token {
token_type: TokenType::EOF,
value: String::from(""),
_position: position,
position,
}
}
pub fn new_number(value: String, position: i32) -> Token {
pub fn new_number(value: String, position: usize) -> Token {
Token {
token_type: TokenType::Number,
value,
_position: position,
position,
}
}
pub fn new_operator(value: String, position: i32) -> Token {
pub fn new_operator(value: String, position: usize) -> Token {
Token {
token_type: TokenType::Operator,
value,
_position: position,
position,
}
}
pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
pub fn new(value: String, position: usize, token_type: TokenType) -> Token {
Token {
token_type,
value,
_position: position,
position,
}
}
pub fn new_identifier(value: String, position: i32) -> Token {
pub fn new_identifier(value: String, position: usize) -> Token {
Token {
token_type: TokenType::Identifier,
value,
_position: position,
position,
}
}
pub fn new_string(value: String, position: i32) -> Token {
pub fn new_string(value: String, position: usize) -> Token {
Token {
token_type: TokenType::String,
value,
_position: position,
position,
}
}
pub fn new_semicolon(position: i32) -> Token {
pub fn new_semicolon(position: usize) -> Token {
Token {
token_type: TokenType::Semicolon,
value: String::from(";"),
_position: position,
position,
}
}
pub fn new_datatype(value: String, position: i32) -> Token {
pub fn new_datatype(value: String, position: usize) -> Token {
Token {
token_type: TokenType::Datatype,
value,
_position: position,
position,
}
}