fix(lex): don't emit NewLine if after them there is EOF
This commit is contained in:
parent
ade1a809aa
commit
b650447e06
@ -56,7 +56,11 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
||||
current_pos = next_pos;
|
||||
}
|
||||
LexResult::Multiple(tokens, next_pos) => {
|
||||
at_new_line = tokens.last().unwrap().token_type == TokenType::NewLine;
|
||||
at_new_line = match tokens.last() {
|
||||
Some(t) if t.token_type == TokenType::NewLine => true,
|
||||
// This may be None if there are newlines followed by EOF.
|
||||
_ => false,
|
||||
};
|
||||
|
||||
results.extend(tokens);
|
||||
current_pos = next_pos;
|
||||
@ -423,6 +427,16 @@ mod tests {
|
||||
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[9].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shouldnt_emit_trailing_newlines() {
|
||||
let input = String::from("token\n");
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(2, tokens.len());
|
||||
assert_eq!(TokenType::Identifier, tokens[0].token_type);
|
||||
assert_eq!(TokenType::EOF, tokens[1].token_type);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -16,6 +16,11 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
},
|
||||
None => {
|
||||
// Here EOF is found. Don't emit a newline, but consume the tokens.
|
||||
// To consume the tokens without returning a newline, we use LexResult::Multiple
|
||||
LexResult::Multiple(vec![], start_pos)
|
||||
}
|
||||
_ => {
|
||||
let token = Token::new(String::from(""), start_pos, TokenType::NewLine);
|
||||
LexResult::Some(token, start_pos)
|
||||
@ -43,8 +48,8 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_emit_semicolon_instead_of_new_line() {
|
||||
let input = str_to_vec("\n");
|
||||
fn should_emit_newline() {
|
||||
let input = str_to_vec("\n_");
|
||||
let start_pos = 0;
|
||||
|
||||
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
|
||||
@ -56,8 +61,8 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_emit_a_single_semicolon_with_multiple_new_lines() {
|
||||
let input = str_to_vec("\n\n\n");
|
||||
fn should_emit_a_single_newline_with_multiple_new_lines() {
|
||||
let input = str_to_vec("\n\n\n_");
|
||||
let start_pos = 0;
|
||||
|
||||
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
|
||||
@ -80,7 +85,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() {
|
||||
let input = str_to_vec("\n \n \n");
|
||||
let input = str_to_vec("\n \n \n_");
|
||||
let start_pos = 0;
|
||||
|
||||
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
|
||||
@ -100,7 +105,7 @@ mod tests {
|
||||
panic!()
|
||||
}
|
||||
|
||||
let input = str_to_vec("\n \n \n ");
|
||||
let input = str_to_vec("\n \n \n _");
|
||||
let start_pos = 0;
|
||||
|
||||
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
|
||||
@ -110,4 +115,19 @@ mod tests {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shouldnt_emit_newline_if_eof_is_found() {
|
||||
let input = str_to_vec("\n\n");
|
||||
|
||||
match scan(&input, 0) {
|
||||
LexResult::Multiple(vec, next_pos) => {
|
||||
assert_eq!(vec.len(), 0);
|
||||
assert_eq!(next_pos, 2);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a multiple result")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
|
||||
/// always starts from token 0.
|
||||
///
|
||||
/// Its grammar is defined it the spec, at the webpage
|
||||
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
|
||||
fn try_parse(tokens: &'a Vec<Token>, _current_pos: usize) -> ParsingResult<'a, Self::Item> {
|
||||
let mut productions = Vec::<ModuleMembers>::new();
|
||||
let tokens_len = tokens.len();
|
||||
let mut current_pos = 0;
|
||||
@ -24,6 +24,10 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
|
||||
// Minus one because last token is EOF
|
||||
// TODO: Does that EOF do anything?
|
||||
while current_pos < tokens_len - 1 {
|
||||
println!(
|
||||
"len: {} pos: {}, value: `{}`, type: {:?}",
|
||||
tokens_len, current_pos, tokens[current_pos].value, tokens[current_pos].token_type
|
||||
);
|
||||
// Attempt to parse an statement
|
||||
match Statement::try_parse(tokens, current_pos) {
|
||||
Ok((prod, next_pos)) => {
|
||||
@ -64,3 +68,33 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
|
||||
Ok((ModuleAST { productions }, current_pos))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::lexic::get_tokens;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn should_parse_fn_decl_1() {
|
||||
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
|
||||
|
||||
match ModuleAST::try_parse(&tokens, 0) {
|
||||
Ok((prods, next)) => {
|
||||
assert_eq!(6, next);
|
||||
assert_eq!(1, prods.productions.len());
|
||||
|
||||
let prod = &prods.productions[0];
|
||||
match prod {
|
||||
ModuleMembers::Stmt(Statement::FnDecl(fn_decl)) => {
|
||||
assert_eq!("id", fn_decl.identifier.value)
|
||||
}
|
||||
_ => panic!("Expected a function declaration"),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a function declaration");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,8 @@
|
||||
use crate::syntax::{
|
||||
ast::Statement, binding, functions::function_declaration, parseable::Parseable,
|
||||
ast::Statement,
|
||||
binding,
|
||||
functions::function_declaration,
|
||||
parseable::{Parseable, ParsingError},
|
||||
};
|
||||
|
||||
impl<'a> Parseable<'a> for Statement<'a> {
|
||||
@ -15,9 +18,11 @@ impl<'a> Parseable<'a> for Statement<'a> {
|
||||
Ok((prod, next)) => {
|
||||
return Ok((Statement::Binding(prod), next));
|
||||
}
|
||||
Err(_) => {
|
||||
// TODO
|
||||
Err(ParsingError::Err(error)) => {
|
||||
// TODO: Better error handling, write a better error message
|
||||
return Err(ParsingError::Err(error));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Try to parse a function declaration
|
||||
@ -26,12 +31,61 @@ impl<'a> Parseable<'a> for Statement<'a> {
|
||||
Ok((prod, next)) => {
|
||||
return Ok((Statement::FnDecl(prod), next));
|
||||
}
|
||||
Err(_) => {
|
||||
// TODO
|
||||
Err(ParsingError::Err(error)) => {
|
||||
// TODO: Better error handling, write a better error message
|
||||
return Err(ParsingError::Err(error));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Here nothing was parsed. Should fail
|
||||
todo!("Nothing was parsed. Should fail")
|
||||
// Here nothing was parsed.
|
||||
Err(ParsingError::Unmatched)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::lexic::get_tokens;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn should_parse_fn_decl_1() {
|
||||
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
|
||||
|
||||
match Statement::try_parse(&tokens, 0) {
|
||||
Ok((prod, next)) => {
|
||||
assert_eq!(6, next);
|
||||
match prod {
|
||||
Statement::FnDecl(fn_decl) => {
|
||||
assert_eq!("id", fn_decl.identifier.value)
|
||||
}
|
||||
_ => panic!("Expected a function declaration"),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a function declaration");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_fn_decl_w_whitespace() {
|
||||
let tokens = get_tokens(&String::from("\nfun id() {}")).unwrap();
|
||||
|
||||
match Statement::try_parse(&tokens, 0) {
|
||||
Ok((prod, next)) => {
|
||||
assert_eq!(7, next);
|
||||
match prod {
|
||||
Statement::FnDecl(fn_decl) => {
|
||||
assert_eq!("id", fn_decl.identifier.value)
|
||||
}
|
||||
_ => panic!("Expected a function declaration"),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a function declaration");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user