refactor: yet another strategy to parse indentation around binary op

2024-06-15 19:32:04 -05:00 · 2024-06-15 19:32:04 -05:00 · 8429ca8d8f
commit 8429ca8d8f
parent d08019c010
2 changed files with 103 additions and 26 deletions
--- a/src/syntax/parsers/expression/term.rs
+++ b/src/syntax/parsers/expression/term.rs
@ -1,5 +1,5 @@
 use crate::lexic::token::TokenType;
-use crate::syntax::parsers::expression::utils::try_binary_op;
+use crate::syntax::parsers::expression::utils::{try_binary_op, try_binary_op_2};
 use crate::{
    handle_dedentation, handle_indentation,
    lexic::token::Token,
@ -28,31 +28,28 @@ fn parse_many<'a>(
 ) -> ParsingResult<'a, Expression<'a>> {
    // term = factor, (("-" | "+"), factor)*;
-    let (token, next_pos, indent_count) =
+    try_binary_op_2(
-        match try_binary_op(tokens, pos, vec!["+", "-"], indentation_level) {
+        tokens,
-            Some(t) => t,
+        pos,
-            None => return Ok((prev_expr, pos)),
+        prev_expr,
-        };
+        vec!["+", "-"],
        indentation_level,
        |tokens, pos, prev_expr, token, indent_count: u32| {
            // Parse the next factor
            match super::factor::try_parse(tokens, pos) {
                Ok((expr, next_pos)) => {
                    let expr = Expression::BinaryOperator(
                        Box::new(prev_expr),
                        Box::new(expr),
                        &token.value,
                    );
-    // Parse the next factor
+                    parse_many(tokens, next_pos, expr, indentation_level + indent_count)
-    let result = match super::factor::try_parse(tokens, next_pos) {
+                }
-        Ok((expr, next_pos)) => {
+                _ => return Err(ParsingError::Unmatched),
-            let expr =
+            }
-                Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value);
+        },
-
+    )
            parse_many(tokens, next_pos, expr, indentation_level + indent_count)
        }
        _ => return Err(ParsingError::Unmatched),
    };
    let (new_expr, mut next_pos) = match result {
        Ok((e, n)) => (e, n),
        _ => return result,
    };
    handle_dedentation!(tokens, next_pos, indent_count);
    Ok((new_expr, next_pos))
 }
 #[cfg(test)]
--- a/src/syntax/parsers/expression/utils.rs
+++ b/src/syntax/parsers/expression/utils.rs
@ -1,5 +1,7 @@
 use crate::lexic::token::Token;
-use crate::lexic::token::TokenType::{NewLine, INDENT};
+use crate::lexic::token::TokenType::{NewLine, DEDENT, INDENT};
 use crate::syntax::ast::Expression;
 use crate::syntax::parseable::ParsingResult;
 /// Attempts to parse a binary operator and handles indentation
 ///
@ -52,6 +54,84 @@ pub fn try_binary_op<'a>(
    Some((matched_token, pos, indent_count))
 }
 // TODO: document
 pub fn try_binary_op_2<'a, F>(
    tokens: &'a Vec<Token>,
    original_pos: usize,
    prev_expr: Expression<'a>,
    operators: Vec<&str>,
    indentation_level: u32,
    fun: F,
 ) -> ParsingResult<'a, Expression<'a>>
 where
    F: FnOnce(
        &'a Vec<Token>,
        usize,
        Expression<'a>,
        &'a Token,
        u32,
    ) -> ParsingResult<'a, Expression<'a>>,
 {
    let mut indent_count = 0;
    let pos = original_pos;
    // handle possible opening indentation
    let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
        // New indentation level
        (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
            indent_count += 1;
            pos + 2
        }
        // when indented, ignore newlines
        (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
        // let other handlers handle this
        _ => pos,
    };
    // try to parse any of the binary operators
    let (matched_token, pos) = match tokens.get(pos) {
        Some(token) if operators.contains(&token.value.as_str()) => (token, pos + 1),
        // If not matched, return the existing expression
        _ => return Ok((prev_expr, original_pos)),
    };
    // handle possible closing indentation
    let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
        // New indentation level
        (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
            indent_count += 1;
            pos + 2
        }
        // when indented, ignore newlines
        (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
        // let other handlers handle this
        _ => pos,
    };
    // run the rest of the logic
    let (new_expr, mut next_pos) = match fun(tokens, pos, prev_expr, matched_token, indent_count) {
        Ok((e, n)) => (e, n),
        x => return x,
    };
    // handle the possible dedentation before/after the operator
    for _ in 0..indent_count {
        // expect a DEDENT for each INDENT matched
        match tokens.get(next_pos) {
            // continue
            Some(t) if t.token_type == DEDENT => {}
            _ => unreachable!(
                "Illegal parser state: Expected DEDENT (count: {})",
                indent_count
            ),
        };
        next_pos += 1;
    }
    Ok((new_expr, next_pos))
 }
 /// macro for handling indentation in expressions
 #[macro_export]
 macro_rules! handle_indentation {