refactor: yet another strategy to parse indentation around binary op

This commit is contained in:
Araozu 2024-06-15 19:32:04 -05:00
parent d08019c010
commit 8429ca8d8f
2 changed files with 103 additions and 26 deletions

View File

@ -1,5 +1,5 @@
use crate::lexic::token::TokenType; use crate::lexic::token::TokenType;
use crate::syntax::parsers::expression::utils::try_binary_op; use crate::syntax::parsers::expression::utils::{try_binary_op, try_binary_op_2};
use crate::{ use crate::{
handle_dedentation, handle_indentation, handle_dedentation, handle_indentation,
lexic::token::Token, lexic::token::Token,
@ -28,31 +28,28 @@ fn parse_many<'a>(
) -> ParsingResult<'a, Expression<'a>> { ) -> ParsingResult<'a, Expression<'a>> {
// term = factor, (("-" | "+"), factor)*; // term = factor, (("-" | "+"), factor)*;
let (token, next_pos, indent_count) = try_binary_op_2(
match try_binary_op(tokens, pos, vec!["+", "-"], indentation_level) { tokens,
Some(t) => t, pos,
None => return Ok((prev_expr, pos)), prev_expr,
}; vec!["+", "-"],
indentation_level,
|tokens, pos, prev_expr, token, indent_count: u32| {
// Parse the next factor
match super::factor::try_parse(tokens, pos) {
Ok((expr, next_pos)) => {
let expr = Expression::BinaryOperator(
Box::new(prev_expr),
Box::new(expr),
&token.value,
);
// Parse the next factor parse_many(tokens, next_pos, expr, indentation_level + indent_count)
let result = match super::factor::try_parse(tokens, next_pos) { }
Ok((expr, next_pos)) => { _ => return Err(ParsingError::Unmatched),
let expr = }
Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value); },
)
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
}
_ => return Err(ParsingError::Unmatched),
};
let (new_expr, mut next_pos) = match result {
Ok((e, n)) => (e, n),
_ => return result,
};
handle_dedentation!(tokens, next_pos, indent_count);
Ok((new_expr, next_pos))
} }
#[cfg(test)] #[cfg(test)]

View File

@ -1,5 +1,7 @@
use crate::lexic::token::Token; use crate::lexic::token::Token;
use crate::lexic::token::TokenType::{NewLine, INDENT}; use crate::lexic::token::TokenType::{NewLine, DEDENT, INDENT};
use crate::syntax::ast::Expression;
use crate::syntax::parseable::ParsingResult;
/// Attempts to parse a binary operator and handles indentation /// Attempts to parse a binary operator and handles indentation
/// ///
@ -52,6 +54,84 @@ pub fn try_binary_op<'a>(
Some((matched_token, pos, indent_count)) Some((matched_token, pos, indent_count))
} }
// TODO: document
pub fn try_binary_op_2<'a, F>(
tokens: &'a Vec<Token>,
original_pos: usize,
prev_expr: Expression<'a>,
operators: Vec<&str>,
indentation_level: u32,
fun: F,
) -> ParsingResult<'a, Expression<'a>>
where
F: FnOnce(
&'a Vec<Token>,
usize,
Expression<'a>,
&'a Token,
u32,
) -> ParsingResult<'a, Expression<'a>>,
{
let mut indent_count = 0;
let pos = original_pos;
// handle possible opening indentation
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
// New indentation level
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
indent_count += 1;
pos + 2
}
// when indented, ignore newlines
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
// let other handlers handle this
_ => pos,
};
// try to parse any of the binary operators
let (matched_token, pos) = match tokens.get(pos) {
Some(token) if operators.contains(&token.value.as_str()) => (token, pos + 1),
// If not matched, return the existing expression
_ => return Ok((prev_expr, original_pos)),
};
// handle possible closing indentation
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
// New indentation level
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
indent_count += 1;
pos + 2
}
// when indented, ignore newlines
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
// let other handlers handle this
_ => pos,
};
// run the rest of the logic
let (new_expr, mut next_pos) = match fun(tokens, pos, prev_expr, matched_token, indent_count) {
Ok((e, n)) => (e, n),
x => return x,
};
// handle the possible dedentation before/after the operator
for _ in 0..indent_count {
// expect a DEDENT for each INDENT matched
match tokens.get(next_pos) {
// continue
Some(t) if t.token_type == DEDENT => {}
_ => unreachable!(
"Illegal parser state: Expected DEDENT (count: {})",
indent_count
),
};
next_pos += 1;
}
Ok((new_expr, next_pos))
}
/// macro for handling indentation in expressions /// macro for handling indentation in expressions
#[macro_export] #[macro_export]
macro_rules! handle_indentation { macro_rules! handle_indentation {