refactor: simplify statement termination parsing

master
Araozu 2024-08-27 10:49:25 -05:00
parent 46758f1ddf
commit 6ff782a20e
3 changed files with 46 additions and 27 deletions

View File

@ -4,7 +4,7 @@ use crate::{
syntax::{
ast::{var_binding::VariableBinding, Expression},
parseable::{Parseable, ParsingError, ParsingResult},
utils::{parse_token_type, try_operator},
utils::{parse_terminator, parse_token_type, try_operator},
},
};
@ -127,23 +127,17 @@ impl<'a> Parseable<'a> for VariableBinding<'a> {
// After the expression there should be a new line
// to terminate the statement
match tokens.get(next_pos) {
Some(t) if t.token_type == TokenType::NewLine || t.token_type == TokenType::EOF => {
// continue
}
Some(t) => {
let next_pos = match parse_terminator(tokens, next_pos) {
Ok((_, next)) => next,
Err(ParsingError::Mismatch(t)) => {
return Err(ParsingError::Err(SyntaxError {
error_start: t.position,
error_end: t.get_end_position(),
reason: format!("Unexpected token `{}`, expected a new line", t.value),
}))
}
_ => {
// this should never happen, the lexer always appends
// an EOF
unreachable!("got to the final of a token stream without finding EOF")
}
}
_ => unreachable!(),
};
let binding = VariableBinding {
datatype,

View File

@ -4,6 +4,7 @@ use crate::{
syntax::{
ast::{Expression, ModuleAST, ModuleMembers, Statement},
parseable::{Parseable, ParsingError, ParsingResult},
utils::parse_terminator,
},
};
@ -42,14 +43,9 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
Ok((prod, next_pos)) => {
// After a expression is parsed as an statement
// there should be a delimiter (new line or EOF)
match tokens.get(next_pos) {
Some(t)
if t.token_type == TokenType::NewLine
|| t.token_type == TokenType::EOF =>
{
// continue
}
Some(t) => {
let next_pos = match parse_terminator(tokens, next_pos) {
Ok((_, next)) => next,
Err(ParsingError::Mismatch(t)) => {
return Err(ParsingError::Err(SyntaxError {
error_start: t.position,
error_end: t.get_end_position(),
@ -59,12 +55,8 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
),
}))
}
_ => {
// this should never happen, the lexer always appends
// an EOF
unreachable!("got to the final of a token stream without finding EOF")
}
}
_ => unreachable!(),
};
productions.push(ModuleMembers::Expr(prod));
current_pos = next_pos;
@ -135,7 +127,7 @@ mod test {
let (_, next) = ModuleAST::try_parse(&tokens, 0).unwrap();
assert_eq!(next, 1);
assert_eq!(next, 2);
}
#[test]

View File

@ -80,6 +80,39 @@ pub fn parse_token_type(
}
}
/// Expects the token at `pos` to be a terminator (newline or eof)
///
/// Ignores indentation, newlines and comments.
///
/// Only returns: Ok or Unmatched.
pub fn parse_terminator(
tokens: &Vec<Token>,
pos: usize,
) -> ParsingResult<()> {
let mut current_pos = pos;
// Ignore all whitespace, newlines and semicolons
while let Some(t) = tokens.get(current_pos) {
if t.token_type == TokenType::INDENT
|| t.token_type == TokenType::DEDENT
|| t.token_type == TokenType::Comment
|| t.token_type == TokenType::MultilineComment
{
current_pos += 1;
} else {
break;
}
}
match tokens.get(current_pos) {
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
Ok(((), current_pos + 1))
}
Some(t) => Err(ParsingError::Mismatch(t)),
None => unreachable!("Stream of tokens finished before getting an EOF"),
}
}
#[cfg(test)]
mod tests {
use crate::{