From 8429ca8d8f30d00fb7a6b3dc3dc328d22de0ebd4 Mon Sep 17 00:00:00 2001 From: Araozu Date: Sat, 15 Jun 2024 19:32:04 -0500 Subject: [PATCH] refactor: yet another strategy to parse indentation around binary op --- src/syntax/parsers/expression/term.rs | 47 +++++++-------- src/syntax/parsers/expression/utils.rs | 82 +++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 26 deletions(-) diff --git a/src/syntax/parsers/expression/term.rs b/src/syntax/parsers/expression/term.rs index 075ca28..869123f 100644 --- a/src/syntax/parsers/expression/term.rs +++ b/src/syntax/parsers/expression/term.rs @@ -1,5 +1,5 @@ use crate::lexic::token::TokenType; -use crate::syntax::parsers::expression::utils::try_binary_op; +use crate::syntax::parsers::expression::utils::{try_binary_op, try_binary_op_2}; use crate::{ handle_dedentation, handle_indentation, lexic::token::Token, @@ -28,31 +28,28 @@ fn parse_many<'a>( ) -> ParsingResult<'a, Expression<'a>> { // term = factor, (("-" | "+"), factor)*; - let (token, next_pos, indent_count) = - match try_binary_op(tokens, pos, vec!["+", "-"], indentation_level) { - Some(t) => t, - None => return Ok((prev_expr, pos)), - }; + try_binary_op_2( + tokens, + pos, + prev_expr, + vec!["+", "-"], + indentation_level, + |tokens, pos, prev_expr, token, indent_count: u32| { + // Parse the next factor + match super::factor::try_parse(tokens, pos) { + Ok((expr, next_pos)) => { + let expr = Expression::BinaryOperator( + Box::new(prev_expr), + Box::new(expr), + &token.value, + ); - // Parse the next factor - let result = match super::factor::try_parse(tokens, next_pos) { - Ok((expr, next_pos)) => { - let expr = - Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value); - - parse_many(tokens, next_pos, expr, indentation_level + indent_count) - } - _ => return Err(ParsingError::Unmatched), - }; - - let (new_expr, mut next_pos) = match result { - Ok((e, n)) => (e, n), - _ => return result, - }; - - handle_dedentation!(tokens, next_pos, indent_count); - - Ok((new_expr, next_pos)) + parse_many(tokens, next_pos, expr, indentation_level + indent_count) + } + _ => return Err(ParsingError::Unmatched), + } + }, + ) } #[cfg(test)] diff --git a/src/syntax/parsers/expression/utils.rs b/src/syntax/parsers/expression/utils.rs index 141d7c9..b25c1b4 100644 --- a/src/syntax/parsers/expression/utils.rs +++ b/src/syntax/parsers/expression/utils.rs @@ -1,5 +1,7 @@ use crate::lexic::token::Token; -use crate::lexic::token::TokenType::{NewLine, INDENT}; +use crate::lexic::token::TokenType::{NewLine, DEDENT, INDENT}; +use crate::syntax::ast::Expression; +use crate::syntax::parseable::ParsingResult; /// Attempts to parse a binary operator and handles indentation /// @@ -52,6 +54,84 @@ pub fn try_binary_op<'a>( Some((matched_token, pos, indent_count)) } +// TODO: document +pub fn try_binary_op_2<'a, F>( + tokens: &'a Vec, + original_pos: usize, + prev_expr: Expression<'a>, + operators: Vec<&str>, + indentation_level: u32, + fun: F, +) -> ParsingResult<'a, Expression<'a>> +where + F: FnOnce( + &'a Vec, + usize, + Expression<'a>, + &'a Token, + u32, + ) -> ParsingResult<'a, Expression<'a>>, +{ + let mut indent_count = 0; + let pos = original_pos; + + // handle possible opening indentation + let pos = match (tokens.get(pos), tokens.get(pos + 1)) { + // New indentation level + (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { + indent_count += 1; + pos + 2 + } + // when indented, ignore newlines + (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, + // let other handlers handle this + _ => pos, + }; + + // try to parse any of the binary operators + let (matched_token, pos) = match tokens.get(pos) { + Some(token) if operators.contains(&token.value.as_str()) => (token, pos + 1), + // If not matched, return the existing expression + _ => return Ok((prev_expr, original_pos)), + }; + + // handle possible closing indentation + let pos = match (tokens.get(pos), tokens.get(pos + 1)) { + // New indentation level + (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { + indent_count += 1; + pos + 2 + } + // when indented, ignore newlines + (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, + // let other handlers handle this + _ => pos, + }; + + // run the rest of the logic + let (new_expr, mut next_pos) = match fun(tokens, pos, prev_expr, matched_token, indent_count) { + Ok((e, n)) => (e, n), + x => return x, + }; + + // handle the possible dedentation before/after the operator + for _ in 0..indent_count { + // expect a DEDENT for each INDENT matched + match tokens.get(next_pos) { + // continue + Some(t) if t.token_type == DEDENT => {} + _ => unreachable!( + "Illegal parser state: Expected DEDENT (count: {})", + indent_count + ), + }; + + next_pos += 1; + } + + Ok((new_expr, next_pos)) +} + /// macro for handling indentation in expressions #[macro_export] macro_rules! handle_indentation {