Show offending line and token when a syntax error is found
This commit is contained in:
parent
cdafc40ff7
commit
e383d300f2
@ -10,10 +10,12 @@
|
||||
- [ ] Namespace identifiers in the symbol table
|
||||
- [ ] Stdlib
|
||||
- [ ] Document code
|
||||
- [ ] Test that the field `position` of the tokens actually points to the start of the token, and not its length
|
||||
|
||||
## v0.0.4
|
||||
|
||||
- Explicit datatype of variables
|
||||
- Improve error messages when a syntax error is found (show offending line and offending token)
|
||||
|
||||
## v0.0.3
|
||||
|
||||
|
@ -6,11 +6,7 @@ impl PrintableError for LexError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||
let (erroneous_code, back_count) = get_line(chars, self.position);
|
||||
|
||||
let mut whitespace = Vec::<char>::new();
|
||||
for _ in 0..back_count {
|
||||
whitespace.push(' ');
|
||||
}
|
||||
let whitespace = whitespace.iter().collect::<String>();
|
||||
let whitespace = vec![' '; back_count].iter().collect::<String>();
|
||||
|
||||
format!(
|
||||
"\n{}\n{}^\n\n{}{}\n{}",
|
||||
@ -41,6 +37,8 @@ fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
|
||||
let current_char = chars[before_pos];
|
||||
|
||||
if current_char == '\n' {
|
||||
// This is important because before_pos will be used to calculate
|
||||
// the number of chars before pos
|
||||
before_pos += 1;
|
||||
break;
|
||||
}
|
||||
|
@ -19,6 +19,8 @@ pub struct LexError {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SyntaxError {
|
||||
pub error_start: usize,
|
||||
pub error_end: usize,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,145 @@
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use super::{PrintableError, SyntaxError};
|
||||
|
||||
impl PrintableError for SyntaxError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||
String::from("Syntax error: NOT IMPLEMENTED")
|
||||
let (line, before, length) = get_line(chars, self.error_start, self.error_end);
|
||||
|
||||
let whitespace = vec![' '; before].iter().collect::<String>();
|
||||
let indicator = vec!['^'; length].iter().collect::<String>();
|
||||
|
||||
format!(
|
||||
"\n{}\n{}{}\n\n{}{}{}",
|
||||
line, whitespace, indicator, "Syntax error at pos ", self.error_start, ": "
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts a line of code
|
||||
///
|
||||
/// - `chars`: Input where to extract the line from
|
||||
/// - `start_position`: Position where the erroneous code starts
|
||||
/// - `end_position`: Position where the erroneous code ends
|
||||
///
|
||||
/// Returns a tuple of:
|
||||
///
|
||||
/// - `String`: The faulty line
|
||||
/// - `usize`: The amount of chars *before* the faulty code
|
||||
/// - `usize`: The lenght of the faulty code
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
|
||||
/// let start_position = 13;
|
||||
/// let end_position = 15;
|
||||
///
|
||||
/// let (line, before, length) = get_line(&input, start_position, end_position);
|
||||
///
|
||||
/// assert_eq!("val number == 50", line);
|
||||
/// assert_eq!(11, before);
|
||||
/// assert_eq!(2, length);
|
||||
/// ```
|
||||
fn get_line(
|
||||
chars: &Vec<char>,
|
||||
start_position: usize,
|
||||
end_position: usize,
|
||||
) -> (String, usize, usize) {
|
||||
let mut result_chars = VecDeque::<char>::new();
|
||||
|
||||
// Push chars to the front until a new line is found
|
||||
let mut before_pos = start_position;
|
||||
loop {
|
||||
let current_char = chars[before_pos];
|
||||
|
||||
if current_char == '\n' {
|
||||
// This is important because before_pos will be used to calculate
|
||||
// the number of chars before start_position
|
||||
before_pos += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
result_chars.push_front(current_char);
|
||||
|
||||
if before_pos == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
before_pos -= 1;
|
||||
}
|
||||
|
||||
// Push chars to the end until a new line is found
|
||||
let mut after_pos = start_position + 1;
|
||||
let char_count = chars.len();
|
||||
while after_pos < char_count {
|
||||
let current_char = chars[after_pos];
|
||||
|
||||
if current_char == '\n' {
|
||||
break;
|
||||
}
|
||||
|
||||
result_chars.push_back(current_char);
|
||||
after_pos += 1;
|
||||
}
|
||||
|
||||
(
|
||||
result_chars.iter().collect::<String>(),
|
||||
start_position - before_pos,
|
||||
end_position - start_position,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
error_handling::{PrintableError, SyntaxError},
|
||||
lexic::get_tokens,
|
||||
syntax::construct_ast,
|
||||
};
|
||||
|
||||
fn get_error_data(input: String) -> (Vec<char>, SyntaxError) {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
let error_holder = construct_ast(&tokens);
|
||||
|
||||
match error_holder {
|
||||
Ok(_) => panic!(
|
||||
"syntax_error test: Input expected to throw error didn't:\n\n{}",
|
||||
input
|
||||
),
|
||||
Err(error) => {
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
|
||||
(chars, error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_show_an_error_for_missing_binding_name() {
|
||||
let (chars, error) = get_error_data(String::from("val"));
|
||||
let actual_err = error.get_error_str(&chars);
|
||||
// TODO: Write a better error message (something that explains why it failed)
|
||||
let expected_str = format!("\n{}\n{}\n\n{}", "val", "^^^", "Syntax error at pos 0: ");
|
||||
|
||||
assert_eq!(expected_str, actual_err);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_get_line() {
|
||||
let input: Vec<char> = String::from("\n\nval number == 50\n\n")
|
||||
.chars()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let start_position = 13;
|
||||
let end_position = 15;
|
||||
|
||||
let (line, before, length) = get_line(&input, start_position, end_position);
|
||||
|
||||
assert_eq!("val number == 50", line);
|
||||
assert_eq!(11, before);
|
||||
assert_eq!(2, length);
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +0,0 @@
|
||||
|
||||
/// Represents an error in the scanning process
|
||||
#[derive(Debug)]
|
||||
pub struct LexError {
|
||||
/// Position where the offending char was found
|
||||
pub position: usize,
|
||||
/// Reason of the errror
|
||||
pub reason: String,
|
||||
}
|
@ -34,12 +34,15 @@ fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype:
|
||||
is_datatype,
|
||||
),
|
||||
_ => {
|
||||
// start_pos is the position where the token ENDS, not where it STARTS,
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
if let Some(token_type) = str_is_keyword(¤t) {
|
||||
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
|
||||
LexResult::Some(token::new(current, start_pos - current_len, token_type), start_pos)
|
||||
} else if is_datatype {
|
||||
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
|
||||
LexResult::Some(token::new_datatype(current, start_pos - current_len), start_pos)
|
||||
} else {
|
||||
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
|
||||
LexResult::Some(token::new_identifier(current, start_pos - current_len), start_pos)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let token = token::new(c.to_string(), start_pos as i32, token_type);
|
||||
let token = token::new(c.to_string(), start_pos, token_type);
|
||||
Some(LexResult::Some(token, start_pos + 1))
|
||||
}
|
||||
|
||||
|
@ -17,12 +17,12 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
Some(c) if *c == ' ' => match look_ahead_for_new_line(chars, start_pos + 1) {
|
||||
Some(next_pos) => scan(chars, next_pos),
|
||||
None => {
|
||||
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
|
||||
let token = token::new(String::from(";"), start_pos, TokenType::Semicolon);
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
},
|
||||
Some(_) | None => {
|
||||
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
|
||||
let token = token::new(String::from(";"), start_pos, TokenType::Semicolon);
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
|
||||
Some(c) if utils::is_digit(*c) => {
|
||||
scan_decimal(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
}
|
||||
_ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
|
||||
_ => LexResult::Some(token::new_number(current, start_pos), start_pos),
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
|
||||
Some(c) if *c == 'e' => {
|
||||
scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
}
|
||||
_ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
|
||||
_ => LexResult::Some(token::new_number(current, start_pos), start_pos),
|
||||
}
|
||||
}
|
||||
|
||||
@ -123,7 +123,7 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
|
||||
Some(c) if utils::is_digit(*c) => {
|
||||
scan_digits(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
}
|
||||
_ => (token::new_number(current, start_pos as i32), start_pos),
|
||||
_ => (token::new_number(current, start_pos), start_pos),
|
||||
}
|
||||
}
|
||||
|
||||
@ -133,7 +133,7 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
|
||||
Some(c) if utils::is_hex_digit(*c) => {
|
||||
scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
}
|
||||
_ => (token::new_number(current, start_pos as i32), start_pos),
|
||||
_ => (token::new_number(current, start_pos), start_pos),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
Some(c) if utils::is_operator(*c) => {
|
||||
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
}
|
||||
_ => LexResult::Some(token::new_operator(current, start_pos as i32), start_pos),
|
||||
_ => LexResult::Some(token::new_operator(current, start_pos), start_pos),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if *c == '"' => {
|
||||
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
|
||||
LexResult::Some(token::new_string(current, start_pos), start_pos + 1)
|
||||
}
|
||||
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
|
@ -39,7 +39,7 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
|
||||
}
|
||||
Err(reason) => {
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
eprintln!("Syntax error.\n{}", reason.get_error_str(&chars))
|
||||
eprintln!("{}", reason.get_error_str(&chars))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use super::ast_types::{Binding, ValBinding, VarBinding};
|
||||
use super::{expression, SyntaxResult};
|
||||
use crate::error_handling::SyntaxError;
|
||||
use crate::token::{Token, TokenType};
|
||||
|
||||
// TODO: Should return a 3 state value:
|
||||
@ -16,22 +17,27 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
pos += 1;
|
||||
Some(String::from(&t.value))
|
||||
}
|
||||
// If the first token is anything else, ignore
|
||||
Some(_) => None,
|
||||
// TODO: return Error
|
||||
None => return None,
|
||||
// This should never match, as there should always be at least a
|
||||
// TokenType::Semicolon or TokenType::EOF
|
||||
None => panic!(
|
||||
"Internal compiler error: Illegal token stream at src/syntax/binding.rs#try_parse"
|
||||
),
|
||||
}
|
||||
};
|
||||
|
||||
// var/val keyword
|
||||
let is_val = {
|
||||
let (is_val, binding_token) = {
|
||||
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
||||
match res1 {
|
||||
Some(_) => true,
|
||||
Some(val_token) => (true, val_token),
|
||||
None => {
|
||||
let res2 = try_token_type(tokens, pos, TokenType::VAR);
|
||||
match res2 {
|
||||
Some(_) => false,
|
||||
// TODO: return Error
|
||||
Some(var_token) => (false, var_token),
|
||||
// Neither VAL nor VAR were matched, the parser should try
|
||||
// other constructs
|
||||
None => return None,
|
||||
}
|
||||
}
|
||||
@ -41,7 +47,12 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
|
||||
if identifier.is_none() {
|
||||
// TODO: return Error
|
||||
return None;
|
||||
// The parser didn't find an Identifier after VAL/VAR
|
||||
return Some(SyntaxResult::Err(SyntaxError {
|
||||
reason: String::from("D:"),
|
||||
error_start: binding_token.position,
|
||||
error_end: binding_token.position + binding_token.value.len(),
|
||||
}));
|
||||
}
|
||||
let identifier = identifier.unwrap();
|
||||
|
||||
@ -154,4 +165,20 @@ mod tests {
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_return_correct_error() {
|
||||
let tokens = get_tokens(&String::from("val")).unwrap();
|
||||
assert_eq!(TokenType::VAL, tokens[0].token_type);
|
||||
assert_eq!(0, tokens[0].position);
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
SyntaxResult::Err(error) => {
|
||||
assert_eq!(0, error.error_start);
|
||||
assert_eq!(3, error.error_end);
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,10 @@ pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, Syntax
|
||||
bindings: vec![module],
|
||||
}),
|
||||
SyntaxResult::None => Err(SyntaxError {
|
||||
reason: String::from("D:"),
|
||||
reason: String::from("PARSER couldn't parse any construction"),
|
||||
// FIXME: This should get the position of the _token_ that current_pos points to
|
||||
error_start: current_pos,
|
||||
error_end: current_pos,
|
||||
}),
|
||||
SyntaxResult::Err(err) => Err(err),
|
||||
}
|
||||
@ -42,6 +45,9 @@ fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResul
|
||||
.unwrap_or_else(|| {
|
||||
SyntaxResult::Err(SyntaxError {
|
||||
reason: String::from("Unrecognized token"),
|
||||
// FIXME: This should get the position of the _token_ that current_pos points to
|
||||
error_start: current_pos,
|
||||
error_end: current_pos,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
34
src/token.rs
34
src/token.rs
@ -23,69 +23,69 @@ pub struct Token {
|
||||
pub value: String,
|
||||
/// The absolute position of this token, from the
|
||||
/// start of the file
|
||||
_position: i32,
|
||||
pub position: usize,
|
||||
}
|
||||
|
||||
pub fn new_eof(position: i32) -> Token {
|
||||
pub fn new_eof(position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::EOF,
|
||||
value: String::from(""),
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_number(value: String, position: i32) -> Token {
|
||||
pub fn new_number(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
value,
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_operator(value: String, position: i32) -> Token {
|
||||
pub fn new_operator(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Operator,
|
||||
value,
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
|
||||
pub fn new(value: String, position: usize, token_type: TokenType) -> Token {
|
||||
Token {
|
||||
token_type,
|
||||
value,
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_identifier(value: String, position: i32) -> Token {
|
||||
pub fn new_identifier(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Identifier,
|
||||
value,
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_string(value: String, position: i32) -> Token {
|
||||
pub fn new_string(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::String,
|
||||
value,
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_semicolon(position: i32) -> Token {
|
||||
pub fn new_semicolon(position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Semicolon,
|
||||
value: String::from(";"),
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_datatype(value: String, position: i32) -> Token {
|
||||
pub fn new_datatype(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Datatype,
|
||||
value,
|
||||
_position: position,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user