refactor: yet another strategy to parse indentation around binary op

2024-06-15 19:32:04 -05:00 · 2024-06-15 19:32:04 -05:00 · 8429ca8d8f
commit 8429ca8d8f
parent d08019c010
2 changed files with 103 additions and 26 deletions
--- a/src/syntax/parsers/expression/term.rs
+++ b/src/syntax/parsers/expression/term.rs
@ -1,5 +1,5 @@
 use crate::lexic::token::TokenType;
-use crate::syntax::parsers::expression::utils::try_binary_op;
+use crate::syntax::parsers::expression::utils::{try_binary_op, try_binary_op_2};
 use crate::{
    handle_dedentation, handle_indentation,
    lexic::token::Token,
@ -28,31 +28,28 @@ fn parse_many<'a>(
 ) -> ParsingResult<'a, Expression<'a>> {
    // term = factor, (("-" | "+"), factor)*;

-    let (token, next_pos, indent_count) =
-        match try_binary_op(tokens, pos, vec!["+", "-"], indentation_level) {
-            Some(t) => t,
-            None => return Ok((prev_expr, pos)),
-        };
+    try_binary_op_2(
+        tokens,
+        pos,
+        prev_expr,
+        vec!["+", "-"],
+        indentation_level,
+        |tokens, pos, prev_expr, token, indent_count: u32| {
+            // Parse the next factor
+            match super::factor::try_parse(tokens, pos) {
+                Ok((expr, next_pos)) => {
+                    let expr = Expression::BinaryOperator(
+                        Box::new(prev_expr),
+                        Box::new(expr),
+                        &token.value,
+                    );

-    // Parse the next factor
-    let result = match super::factor::try_parse(tokens, next_pos) {
-        Ok((expr, next_pos)) => {
-            let expr =
-                Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value);
-
-            parse_many(tokens, next_pos, expr, indentation_level + indent_count)
-        }
-        _ => return Err(ParsingError::Unmatched),
-    };
-
-    let (new_expr, mut next_pos) = match result {
-        Ok((e, n)) => (e, n),
-        _ => return result,
-    };
-
-    handle_dedentation!(tokens, next_pos, indent_count);
-
-    Ok((new_expr, next_pos))
+                    parse_many(tokens, next_pos, expr, indentation_level + indent_count)
+                }
+                _ => return Err(ParsingError::Unmatched),
+            }
+        },
+    )
 }

 #[cfg(test)]
--- a/src/syntax/parsers/expression/utils.rs
+++ b/src/syntax/parsers/expression/utils.rs
@ -1,5 +1,7 @@
 use crate::lexic::token::Token;
-use crate::lexic::token::TokenType::{NewLine, INDENT};
+use crate::lexic::token::TokenType::{NewLine, DEDENT, INDENT};
+use crate::syntax::ast::Expression;
+use crate::syntax::parseable::ParsingResult;

 /// Attempts to parse a binary operator and handles indentation
 ///
@ -52,6 +54,84 @@ pub fn try_binary_op<'a>(
    Some((matched_token, pos, indent_count))
 }

+// TODO: document
+pub fn try_binary_op_2<'a, F>(
+    tokens: &'a Vec<Token>,
+    original_pos: usize,
+    prev_expr: Expression<'a>,
+    operators: Vec<&str>,
+    indentation_level: u32,
+    fun: F,
+) -> ParsingResult<'a, Expression<'a>>
+where
+    F: FnOnce(
+        &'a Vec<Token>,
+        usize,
+        Expression<'a>,
+        &'a Token,
+        u32,
+    ) -> ParsingResult<'a, Expression<'a>>,
+{
+    let mut indent_count = 0;
+    let pos = original_pos;
+
+    // handle possible opening indentation
+    let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
+        // New indentation level
+        (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
+            indent_count += 1;
+            pos + 2
+        }
+        // when indented, ignore newlines
+        (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
+        // let other handlers handle this
+        _ => pos,
+    };
+
+    // try to parse any of the binary operators
+    let (matched_token, pos) = match tokens.get(pos) {
+        Some(token) if operators.contains(&token.value.as_str()) => (token, pos + 1),
+        // If not matched, return the existing expression
+        _ => return Ok((prev_expr, original_pos)),
+    };
+
+    // handle possible closing indentation
+    let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
+        // New indentation level
+        (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
+            indent_count += 1;
+            pos + 2
+        }
+        // when indented, ignore newlines
+        (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
+        // let other handlers handle this
+        _ => pos,
+    };
+
+    // run the rest of the logic
+    let (new_expr, mut next_pos) = match fun(tokens, pos, prev_expr, matched_token, indent_count) {
+        Ok((e, n)) => (e, n),
+        x => return x,
+    };
+
+    // handle the possible dedentation before/after the operator
+    for _ in 0..indent_count {
+        // expect a DEDENT for each INDENT matched
+        match tokens.get(next_pos) {
+            // continue
+            Some(t) if t.token_type == DEDENT => {}
+            _ => unreachable!(
+                "Illegal parser state: Expected DEDENT (count: {})",
+                indent_count
+            ),
+        };
+
+        next_pos += 1;
+    }
+
+    Ok((new_expr, next_pos))
+}
+
 /// macro for handling indentation in expressions
 #[macro_export]
 macro_rules! handle_indentation {