Refactor. Fix invalid position field of Number tokens

This commit is contained in:
Araozu 2023-03-16 13:31:24 -05:00
parent f882942f3f
commit def93715c7
6 changed files with 197 additions and 53 deletions

View File

@ -11,8 +11,7 @@ impl PrintableError for SyntaxError {
format!(
"\n{}\n{}{}\n\n{}{}{}\n{}",
line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":",
self.reason
line, whitespace, indicator, "Syntax error at pos ", self.error_start, ":", self.reason
)
}
}
@ -122,7 +121,29 @@ mod tests {
let (chars, error) = get_error_data(String::from("val"));
let actual_err = error.get_error_str(&chars);
// TODO: Write a better error message (something that explains why it failed)
let expected_str = format!("\n{}\n{}\n\n{}\n{}", "val", "^^^", "Syntax error at pos 0:", "There should be an identifier after a `val` token");
let expected_str = format!(
"\n{}\n{}\n\n{}\n{}",
"val",
"^^^",
"Syntax error at pos 0:",
"There should be an identifier after a `val` token"
);
assert_eq!(expected_str, actual_err);
}
#[test]
fn should_show_an_error_for_missing_equal_operator() {
let (chars, error) = get_error_data(String::from("val name"));
let actual_err = error.get_error_str(&chars);
// TODO: Write a better error message (something that explains why it failed)
let expected_str = format!(
"\n{}\n{}\n\n{}\n{}",
"val name",
" ^^^^",
"Syntax error at pos 4:",
"There should be an equal sign `=` after the identifier"
);
assert_eq!(expected_str, actual_err);
}

View File

@ -38,11 +38,20 @@ fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype:
// so this is used to retrieve the original START position of the token
let current_len = current.len();
if let Some(token_type) = str_is_keyword(&current) {
LexResult::Some(token::new(current, start_pos - current_len, token_type), start_pos)
LexResult::Some(
token::new(current, start_pos - current_len, token_type),
start_pos,
)
} else if is_datatype {
LexResult::Some(token::new_datatype(current, start_pos - current_len), start_pos)
LexResult::Some(
token::new_datatype(current, start_pos - current_len),
start_pos,
)
} else {
LexResult::Some(token::new_identifier(current, start_pos - current_len), start_pos)
LexResult::Some(
token::new_identifier(current, start_pos - current_len),
start_pos,
)
}
}
}

View File

@ -33,7 +33,13 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
Some(c) if utils::is_digit(*c) => {
scan_decimal(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => LexResult::Some(token::new_number(current, start_pos), start_pos),
_ => {
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
let current_len = current.len();
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
},
}
}
@ -86,7 +92,13 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> Lex
Some(c) if *c == 'e' => {
scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => LexResult::Some(token::new_number(current, start_pos), start_pos),
_ => {
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
let current_len = current.len();
LexResult::Some(token::new_number(current, start_pos - current_len), start_pos)
}
}
}
@ -123,7 +135,13 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
Some(c) if utils::is_digit(*c) => {
scan_digits(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => (token::new_number(current, start_pos), start_pos),
_ => {
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
let current_len = current.len();
(token::new_number(current, start_pos - current_len), start_pos)
}
}
}
@ -133,7 +151,13 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
Some(c) if utils::is_hex_digit(*c) => {
scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => (token::new_number(current, start_pos), start_pos),
_ => {
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
let current_len = current.len();
(token::new_number(current, start_pos - current_len), start_pos)
}
}
}
@ -156,6 +180,7 @@ mod tests {
assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -167,6 +192,7 @@ mod tests {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0123", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -178,6 +204,7 @@ mod tests {
assert_eq!(8, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456", token.value);
assert_eq!(2, token.position);
} else {
panic!()
}
@ -207,6 +234,7 @@ mod tests {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0x20", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -218,6 +246,7 @@ mod tests {
assert_eq!(12, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0xff23DA", token.value);
assert_eq!(4, token.position);
} else {
panic!()
}
@ -268,6 +297,7 @@ mod tests {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("3.22", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -278,6 +308,7 @@ mod tests {
assert_eq!(11, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456.7890", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -317,6 +348,7 @@ mod tests {
assert_eq!("1e+0", token.value);
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -327,6 +359,7 @@ mod tests {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("1e-0", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -337,6 +370,7 @@ mod tests {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0e+0", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -347,6 +381,7 @@ mod tests {
assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123498790e+12349870", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -361,6 +396,7 @@ mod tests {
assert_eq!("1.24e+1", token.value);
assert_eq!(7, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!(0, token.position);
} else {
panic!()
}
@ -371,8 +407,25 @@ mod tests {
assert_eq!("0.00000000000001e+1", token.value);
assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!(0, token.position);
} else {
panic!()
}
}
#[test]
fn position_should_be_valid() {
let input = str_to_vec(" 123 ");
let start_pos = 2;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(5, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value);
assert_eq!(2, token.position);
} else {
panic!("Expected some value")
};
}
}

View File

@ -17,6 +17,7 @@ mod ast_types;
mod symbol_table;
// Transforms an AST to JS
mod codegen;
mod utils;
mod error_handling;

View File

@ -2,6 +2,7 @@ use super::ast_types::{Binding, ValBinding, VarBinding};
use super::{expression, SyntaxResult};
use crate::error_handling::SyntaxError;
use crate::token::{Token, TokenType};
use crate::utils::Result3;
// TODO: Should return a 3 state value:
// - Success: binding parsed successfully
@ -12,16 +13,13 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
// Optional datatype annotation
let datatype_annotation = {
match tokens.get(pos) {
Some(t) if t.token_type == TokenType::Datatype => {
match try_token_type(tokens, pos, TokenType::Datatype) {
Result3::Ok(t) => {
pos += 1;
Some(String::from(&t.value))
}
// If the first token is anything else, ignore
Some(_) => None,
// This should never match, as there should always be at least a
// TokenType::Semicolon or TokenType::EOF
None => panic!(
Result3::Err(_) => None,
Result3::None => panic!(
"Internal compiler error: Illegal token stream at src/syntax/binding.rs#try_parse"
),
}
@ -31,23 +29,34 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
let (is_val, binding_token) = {
let res1 = try_token_type(tokens, pos, TokenType::VAL);
match res1 {
Some(val_token) => (true, val_token),
None => {
Result3::Ok(val_token) => (true, val_token),
_ => {
let res2 = try_token_type(tokens, pos, TokenType::VAR);
match res2 {
Some(var_token) => (false, var_token),
Result3::Ok(var_token) => (false, var_token),
// Neither VAL nor VAR were matched, the parser should try
// other constructs
None => return None,
_ => return None,
}
}
}
};
let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
if identifier.is_none() {
let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) {
Result3::Ok(t) => t,
Result3::Err(t) => {
// The parser found a token, but it's not an identifier
return Some(SyntaxResult::Err(SyntaxError {
reason: format!(
"There should be an identifier after a `{}` token",
if is_val { "val" } else { "var" }
),
error_start: binding_token.position,
error_end: binding_token.position + binding_token.value.len(),
}));
}
Result3::None => {
// TODO: Differentiate between no token found and incorrect token found.
// TODO:
// The parser didn't find an Identifier after VAL/VAR
return Some(SyntaxResult::Err(SyntaxError {
reason: format!(
@ -58,13 +67,29 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
error_end: binding_token.position + binding_token.value.len(),
}));
}
let identifier = identifier.unwrap();
};
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
if equal_operator.is_none() {
// TODO: return Error
return None;
let _equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) {
Result3::Ok(t) => t,
Result3::Err(t) => {
// TODO: Differentiate between no token found and incorrect token found.
// The parser didn't find the `=` operator after the identifier
return Some(SyntaxResult::Err(SyntaxError {
reason: format!("There should be an equal sign `=` after the identifier",),
error_start: identifier.position,
error_end: identifier.position + identifier.value.len(),
}));
}
Result3::None => {
// TODO: Differentiate between no token found and incorrect token found.
// The parser didn't find the `=` operator after the identifier
return Some(SyntaxResult::Err(SyntaxError {
reason: format!("There should be an equal sign `=` after the identifier",),
error_start: identifier.position,
error_end: identifier.position + identifier.value.len(),
}));
}
};
let expression = expression::try_parse(tokens, pos + 3);
if expression.is_none() {
@ -90,16 +115,21 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
Some(SyntaxResult::Ok(binding))
}
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Option<&Token> {
tokens
.get(pos)
.and_then(|token| (token.token_type == token_type).then(|| token))
/// Expects the token at `pos` to be of type `token_type`
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) => Result3::Err(t),
None => Result3::None,
}
}
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Option<&Token> {
tokens.get(pos).and_then(|token| {
(token.token_type == TokenType::Operator && token.value == operator).then(|| token)
})
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
match tokens.get(pos) {
Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t),
Some(t) => Result3::Err(t),
None => Result3::None,
}
}
#[cfg(test)]
@ -123,7 +153,7 @@ mod tests {
#[test]
fn should_parse_val() {
let tokens = get_tokens(&String::from("val")).unwrap();
let token = try_token_type(&tokens, 0, TokenType::VAL).unwrap();
let token = *try_token_type(&tokens, 0, TokenType::VAL).unwrap();
assert_eq!(TokenType::VAL, token.token_type);
assert_eq!("val", token.value);
@ -132,7 +162,7 @@ mod tests {
#[test]
fn should_parse_identifier() {
let tokens = get_tokens(&String::from("identifier")).unwrap();
let token = try_token_type(&tokens, 0, TokenType::Identifier).unwrap();
let token = *try_token_type(&tokens, 0, TokenType::Identifier).unwrap();
assert_eq!("identifier", token.value);
}
@ -140,7 +170,7 @@ mod tests {
#[test]
fn should_parse_operator() {
let tokens = get_tokens(&String::from("=")).unwrap();
let token = try_operator(&tokens, 0, String::from("=")).unwrap();
let token = *try_operator(&tokens, 0, String::from("=")).unwrap();
assert_eq!("=", token.value);
}
@ -182,7 +212,23 @@ mod tests {
assert_eq!(0, error.error_start);
assert_eq!(3, error.error_end);
}
_ => panic!(),
_ => panic!("Error expected"),
}
}
#[test]
fn should_return_error_when_identifier_is_wrong() {
let tokens = get_tokens(&String::from("val 322")).unwrap();
assert_eq!(TokenType::VAL, tokens[0].token_type);
assert_eq!(0, tokens[0].position);
let binding = try_parse(&tokens, 0).unwrap();
match binding {
SyntaxResult::Err(error) => {
// assert_eq!(4, error.error_start);
// assert_eq!(7, error.error_end);
}
_ => panic!("Error expected")
}
}
}

14
src/utils/mod.rs Normal file
View File

@ -0,0 +1,14 @@
pub enum Result3<T> {
Ok(T),
Err(T),
None,
}
impl<T> Result3<T> {
pub fn unwrap(&self) -> &T {
match self {
Result3::Ok(t) => t,
_ => panic!("")
}
}
}