fix(lex): don't emit NewLine if after them there is EOF

This commit is contained in:
Araozu 2024-06-02 18:18:32 -05:00
parent ade1a809aa
commit b650447e06
4 changed files with 137 additions and 15 deletions

View File

@ -56,7 +56,11 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
current_pos = next_pos;
}
LexResult::Multiple(tokens, next_pos) => {
at_new_line = tokens.last().unwrap().token_type == TokenType::NewLine;
at_new_line = match tokens.last() {
Some(t) if t.token_type == TokenType::NewLine => true,
// This may be None if there are newlines followed by EOF.
_ => false,
};
results.extend(tokens);
current_pos = next_pos;
@ -423,6 +427,16 @@ mod tests {
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::DEDENT, tokens[9].token_type);
}
#[test]
fn shouldnt_emit_trailing_newlines() {
let input = String::from("token\n");
let tokens = get_tokens(&input).unwrap();
assert_eq!(2, tokens.len());
assert_eq!(TokenType::Identifier, tokens[0].token_type);
assert_eq!(TokenType::EOF, tokens[1].token_type);
}
}
#[cfg(test)]

View File

@ -16,6 +16,11 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
LexResult::Some(token, start_pos)
}
},
None => {
// Here EOF is found. Don't emit a newline, but consume the tokens.
// To consume the tokens without returning a newline, we use LexResult::Multiple
LexResult::Multiple(vec![], start_pos)
}
_ => {
let token = Token::new(String::from(""), start_pos, TokenType::NewLine);
LexResult::Some(token, start_pos)
@ -43,8 +48,8 @@ mod tests {
}
#[test]
fn should_emit_semicolon_instead_of_new_line() {
let input = str_to_vec("\n");
fn should_emit_newline() {
let input = str_to_vec("\n_");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
@ -56,8 +61,8 @@ mod tests {
}
#[test]
fn should_emit_a_single_semicolon_with_multiple_new_lines() {
let input = str_to_vec("\n\n\n");
fn should_emit_a_single_newline_with_multiple_new_lines() {
let input = str_to_vec("\n\n\n_");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
@ -80,7 +85,7 @@ mod tests {
#[test]
fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() {
let input = str_to_vec("\n \n \n");
let input = str_to_vec("\n \n \n_");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
@ -100,7 +105,7 @@ mod tests {
panic!()
}
let input = str_to_vec("\n \n \n ");
let input = str_to_vec("\n \n \n _");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
@ -110,4 +115,19 @@ mod tests {
panic!()
}
}
#[test]
fn shouldnt_emit_newline_if_eof_is_found() {
let input = str_to_vec("\n\n");
match scan(&input, 0) {
LexResult::Multiple(vec, next_pos) => {
assert_eq!(vec.len(), 0);
assert_eq!(next_pos, 2);
}
_ => {
panic!("Expected a multiple result")
}
}
}
}

View File

@ -16,7 +16,7 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
/// always starts from token 0.
///
/// Its grammar is defined it the spec, at the webpage
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
fn try_parse(tokens: &'a Vec<Token>, _current_pos: usize) -> ParsingResult<'a, Self::Item> {
let mut productions = Vec::<ModuleMembers>::new();
let tokens_len = tokens.len();
let mut current_pos = 0;
@ -24,6 +24,10 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
// Minus one because last token is EOF
// TODO: Does that EOF do anything?
while current_pos < tokens_len - 1 {
println!(
"len: {} pos: {}, value: `{}`, type: {:?}",
tokens_len, current_pos, tokens[current_pos].value, tokens[current_pos].token_type
);
// Attempt to parse an statement
match Statement::try_parse(tokens, current_pos) {
Ok((prod, next_pos)) => {
@ -64,3 +68,33 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
Ok((ModuleAST { productions }, current_pos))
}
}
#[cfg(test)]
mod test {
use crate::lexic::get_tokens;
use super::*;
#[test]
fn should_parse_fn_decl_1() {
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
match ModuleAST::try_parse(&tokens, 0) {
Ok((prods, next)) => {
assert_eq!(6, next);
assert_eq!(1, prods.productions.len());
let prod = &prods.productions[0];
match prod {
ModuleMembers::Stmt(Statement::FnDecl(fn_decl)) => {
assert_eq!("id", fn_decl.identifier.value)
}
_ => panic!("Expected a function declaration"),
}
}
_ => {
panic!("Expected a function declaration");
}
}
}
}

View File

@ -1,5 +1,8 @@
use crate::syntax::{
ast::Statement, binding, functions::function_declaration, parseable::Parseable,
ast::Statement,
binding,
functions::function_declaration,
parseable::{Parseable, ParsingError},
};
impl<'a> Parseable<'a> for Statement<'a> {
@ -15,9 +18,11 @@ impl<'a> Parseable<'a> for Statement<'a> {
Ok((prod, next)) => {
return Ok((Statement::Binding(prod), next));
}
Err(_) => {
// TODO
Err(ParsingError::Err(error)) => {
// TODO: Better error handling, write a better error message
return Err(ParsingError::Err(error));
}
_ => {}
}
// Try to parse a function declaration
@ -26,12 +31,61 @@ impl<'a> Parseable<'a> for Statement<'a> {
Ok((prod, next)) => {
return Ok((Statement::FnDecl(prod), next));
}
Err(_) => {
// TODO
Err(ParsingError::Err(error)) => {
// TODO: Better error handling, write a better error message
return Err(ParsingError::Err(error));
}
_ => {}
}
// Here nothing was parsed.
Err(ParsingError::Unmatched)
}
}
// Here nothing was parsed. Should fail
todo!("Nothing was parsed. Should fail")
#[cfg(test)]
mod test {
use crate::lexic::get_tokens;
use super::*;
#[test]
fn should_parse_fn_decl_1() {
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
match Statement::try_parse(&tokens, 0) {
Ok((prod, next)) => {
assert_eq!(6, next);
match prod {
Statement::FnDecl(fn_decl) => {
assert_eq!("id", fn_decl.identifier.value)
}
_ => panic!("Expected a function declaration"),
}
}
_ => {
panic!("Expected a function declaration");
}
}
}
#[test]
fn should_parse_fn_decl_w_whitespace() {
let tokens = get_tokens(&String::from("\nfun id() {}")).unwrap();
match Statement::try_parse(&tokens, 0) {
Ok((prod, next)) => {
assert_eq!(7, next);
match prod {
Statement::FnDecl(fn_decl) => {
assert_eq!("id", fn_decl.identifier.value)
}
_ => panic!("Expected a function declaration"),
}
}
_ => {
panic!("Expected a function declaration");
}
}
}
}