diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 377382e..084dfe3 100755 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -56,7 +56,11 @@ pub fn get_tokens(input: &String) -> Result, MistiError> { current_pos = next_pos; } LexResult::Multiple(tokens, next_pos) => { - at_new_line = tokens.last().unwrap().token_type == TokenType::NewLine; + at_new_line = match tokens.last() { + Some(t) if t.token_type == TokenType::NewLine => true, + // This may be None if there are newlines followed by EOF. + _ => false, + }; results.extend(tokens); current_pos = next_pos; @@ -423,6 +427,16 @@ mod tests { assert_eq!(TokenType::DEDENT, tokens[8].token_type); assert_eq!(TokenType::DEDENT, tokens[9].token_type); } + + #[test] + fn shouldnt_emit_trailing_newlines() { + let input = String::from("token\n"); + let tokens = get_tokens(&input).unwrap(); + + assert_eq!(2, tokens.len()); + assert_eq!(TokenType::Identifier, tokens[0].token_type); + assert_eq!(TokenType::EOF, tokens[1].token_type); + } } #[cfg(test)] diff --git a/src/lexic/scanner/new_line.rs b/src/lexic/scanner/new_line.rs index 1e14396..7602242 100644 --- a/src/lexic/scanner/new_line.rs +++ b/src/lexic/scanner/new_line.rs @@ -16,6 +16,11 @@ pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { LexResult::Some(token, start_pos) } }, + None => { + // Here EOF is found. Don't emit a newline, but consume the tokens. + // To consume the tokens without returning a newline, we use LexResult::Multiple + LexResult::Multiple(vec![], start_pos) + } _ => { let token = Token::new(String::from(""), start_pos, TokenType::NewLine); LexResult::Some(token, start_pos) @@ -43,8 +48,8 @@ mod tests { } #[test] - fn should_emit_semicolon_instead_of_new_line() { - let input = str_to_vec("\n"); + fn should_emit_newline() { + let input = str_to_vec("\n_"); let start_pos = 0; if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { @@ -56,8 +61,8 @@ mod tests { } #[test] - fn should_emit_a_single_semicolon_with_multiple_new_lines() { - let input = str_to_vec("\n\n\n"); + fn should_emit_a_single_newline_with_multiple_new_lines() { + let input = str_to_vec("\n\n\n_"); let start_pos = 0; if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { @@ -80,7 +85,7 @@ mod tests { #[test] fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() { - let input = str_to_vec("\n \n \n"); + let input = str_to_vec("\n \n \n_"); let start_pos = 0; if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { @@ -100,7 +105,7 @@ mod tests { panic!() } - let input = str_to_vec("\n \n \n "); + let input = str_to_vec("\n \n \n _"); let start_pos = 0; if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { @@ -110,4 +115,19 @@ mod tests { panic!() } } + + #[test] + fn shouldnt_emit_newline_if_eof_is_found() { + let input = str_to_vec("\n\n"); + + match scan(&input, 0) { + LexResult::Multiple(vec, next_pos) => { + assert_eq!(vec.len(), 0); + assert_eq!(next_pos, 2); + } + _ => { + panic!("Expected a multiple result") + } + } + } } diff --git a/src/syntax/parsers/module.rs b/src/syntax/parsers/module.rs index d6c1570..316e642 100644 --- a/src/syntax/parsers/module.rs +++ b/src/syntax/parsers/module.rs @@ -16,7 +16,7 @@ impl<'a> Parseable<'a> for ModuleAST<'a> { /// always starts from token 0. /// /// Its grammar is defined it the spec, at the webpage - fn try_parse(tokens: &'a Vec, current_pos: usize) -> ParsingResult<'a, Self::Item> { + fn try_parse(tokens: &'a Vec, _current_pos: usize) -> ParsingResult<'a, Self::Item> { let mut productions = Vec::::new(); let tokens_len = tokens.len(); let mut current_pos = 0; @@ -24,6 +24,10 @@ impl<'a> Parseable<'a> for ModuleAST<'a> { // Minus one because last token is EOF // TODO: Does that EOF do anything? while current_pos < tokens_len - 1 { + println!( + "len: {} pos: {}, value: `{}`, type: {:?}", + tokens_len, current_pos, tokens[current_pos].value, tokens[current_pos].token_type + ); // Attempt to parse an statement match Statement::try_parse(tokens, current_pos) { Ok((prod, next_pos)) => { @@ -64,3 +68,33 @@ impl<'a> Parseable<'a> for ModuleAST<'a> { Ok((ModuleAST { productions }, current_pos)) } } + +#[cfg(test)] +mod test { + use crate::lexic::get_tokens; + + use super::*; + + #[test] + fn should_parse_fn_decl_1() { + let tokens = get_tokens(&String::from("fun id() {}")).unwrap(); + + match ModuleAST::try_parse(&tokens, 0) { + Ok((prods, next)) => { + assert_eq!(6, next); + assert_eq!(1, prods.productions.len()); + + let prod = &prods.productions[0]; + match prod { + ModuleMembers::Stmt(Statement::FnDecl(fn_decl)) => { + assert_eq!("id", fn_decl.identifier.value) + } + _ => panic!("Expected a function declaration"), + } + } + _ => { + panic!("Expected a function declaration"); + } + } + } +} diff --git a/src/syntax/parsers/statement.rs b/src/syntax/parsers/statement.rs index 00d5c9c..73db84d 100644 --- a/src/syntax/parsers/statement.rs +++ b/src/syntax/parsers/statement.rs @@ -1,5 +1,8 @@ use crate::syntax::{ - ast::Statement, binding, functions::function_declaration, parseable::Parseable, + ast::Statement, + binding, + functions::function_declaration, + parseable::{Parseable, ParsingError}, }; impl<'a> Parseable<'a> for Statement<'a> { @@ -15,9 +18,11 @@ impl<'a> Parseable<'a> for Statement<'a> { Ok((prod, next)) => { return Ok((Statement::Binding(prod), next)); } - Err(_) => { - // TODO + Err(ParsingError::Err(error)) => { + // TODO: Better error handling, write a better error message + return Err(ParsingError::Err(error)); } + _ => {} } // Try to parse a function declaration @@ -26,12 +31,61 @@ impl<'a> Parseable<'a> for Statement<'a> { Ok((prod, next)) => { return Ok((Statement::FnDecl(prod), next)); } - Err(_) => { - // TODO + Err(ParsingError::Err(error)) => { + // TODO: Better error handling, write a better error message + return Err(ParsingError::Err(error)); } + _ => {} } - // Here nothing was parsed. Should fail - todo!("Nothing was parsed. Should fail") + // Here nothing was parsed. + Err(ParsingError::Unmatched) + } +} + +#[cfg(test)] +mod test { + use crate::lexic::get_tokens; + + use super::*; + + #[test] + fn should_parse_fn_decl_1() { + let tokens = get_tokens(&String::from("fun id() {}")).unwrap(); + + match Statement::try_parse(&tokens, 0) { + Ok((prod, next)) => { + assert_eq!(6, next); + match prod { + Statement::FnDecl(fn_decl) => { + assert_eq!("id", fn_decl.identifier.value) + } + _ => panic!("Expected a function declaration"), + } + } + _ => { + panic!("Expected a function declaration"); + } + } + } + + #[test] + fn should_parse_fn_decl_w_whitespace() { + let tokens = get_tokens(&String::from("\nfun id() {}")).unwrap(); + + match Statement::try_parse(&tokens, 0) { + Ok((prod, next)) => { + assert_eq!(7, next); + match prod { + Statement::FnDecl(fn_decl) => { + assert_eq!("id", fn_decl.identifier.value) + } + _ => panic!("Expected a function declaration"), + } + } + _ => { + panic!("Expected a function declaration"); + } + } } }