From d08019c010fafbb342b2f3b8dd6cd19bfdb5b849 Mon Sep 17 00:00:00 2001 From: Araozu Date: Sat, 15 Jun 2024 19:03:31 -0500 Subject: [PATCH] refactor: new strategy to handle indentation around binary op --- src/syntax/parsers/expression/term.rs | 37 +++++++----------- src/syntax/parsers/expression/utils.rs | 54 ++++++++++++++++++++++++++ src/syntax/utils.rs | 1 + 3 files changed, 68 insertions(+), 24 deletions(-) diff --git a/src/syntax/parsers/expression/term.rs b/src/syntax/parsers/expression/term.rs index f85d5e4..075ca28 100644 --- a/src/syntax/parsers/expression/term.rs +++ b/src/syntax/parsers/expression/term.rs @@ -1,4 +1,5 @@ use crate::lexic::token::TokenType; +use crate::syntax::parsers::expression::utils::try_binary_op; use crate::{ handle_dedentation, handle_indentation, lexic::token::Token, @@ -27,33 +28,21 @@ fn parse_many<'a>( ) -> ParsingResult<'a, Expression<'a>> { // term = factor, (("-" | "+"), factor)*; - let mut indent_count: u32 = 0; - let mut next_pos = pos; + let (token, next_pos, indent_count) = + match try_binary_op(tokens, pos, vec!["+", "-"], indentation_level) { + Some(t) => t, + None => return Ok((prev_expr, pos)), + }; - // Handle possible indentation before binary operator - handle_indentation!(tokens, next_pos, indent_count, indentation_level); + // Parse the next factor + let result = match super::factor::try_parse(tokens, next_pos) { + Ok((expr, next_pos)) => { + let expr = + Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value); - let result = match tokens.get(next_pos) { - Some(token) if token.value == "+" || token.value == "-" => { - next_pos += 1; - - // Handle possible indentation after binary operator - handle_indentation!(tokens, next_pos, indent_count, indentation_level); - - match super::factor::try_parse(tokens, next_pos) { - Ok((expr, next_pos)) => { - let expr = Expression::BinaryOperator( - Box::new(prev_expr), - Box::new(expr), - &token.value, - ); - - parse_many(tokens, next_pos, expr, indentation_level + indent_count) - } - _ => return Err(ParsingError::Unmatched), - } + parse_many(tokens, next_pos, expr, indentation_level + indent_count) } - _ => return Ok((prev_expr, pos)), + _ => return Err(ParsingError::Unmatched), }; let (new_expr, mut next_pos) = match result { diff --git a/src/syntax/parsers/expression/utils.rs b/src/syntax/parsers/expression/utils.rs index 38b99d8..141d7c9 100644 --- a/src/syntax/parsers/expression/utils.rs +++ b/src/syntax/parsers/expression/utils.rs @@ -1,3 +1,57 @@ +use crate::lexic::token::Token; +use crate::lexic::token::TokenType::{NewLine, INDENT}; + +/// Attempts to parse a binary operator and handles indentation +/// +/// Binary operators may be in a new line as long as they are indented. +/// The new line may be before or after the operator. +/// +/// Once an operator is indented, all following operators completely disregard newline/indentation +/// until a matching dedent is found. +pub fn try_binary_op<'a>( + tokens: &'a Vec, + pos: usize, + operators: Vec<&str>, + indentation_level: u32, +) -> Option<(&'a Token, usize, u32)> { + let mut indent_count = 0; + + // handle possible opening indentation + let pos = match (tokens.get(pos), tokens.get(pos + 1)) { + // New indentation level + (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { + indent_count += 1; + pos + 2 + } + // when indented, ignore newlines + (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, + // let other handlers handle this + _ => pos, + }; + + // try to parse binary operator + let (matched_token, pos) = match tokens.get(pos) { + Some(token) if operators.contains(&token.value.as_str()) => (token, pos + 1), + _ => return None, + }; + + // handle possible closing indentation + let pos = match (tokens.get(pos), tokens.get(pos + 1)) { + // New indentation level + (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { + indent_count += 1; + pos + 2 + } + // when indented, ignore newlines + (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, + // let other handlers handle this + _ => pos, + }; + + // return the matched token, next position and new indentation level + Some((matched_token, pos, indent_count)) +} + /// macro for handling indentation in expressions #[macro_export] macro_rules! handle_indentation { diff --git a/src/syntax/utils.rs b/src/syntax/utils.rs index 9e72027..502f2ca 100644 --- a/src/syntax/utils.rs +++ b/src/syntax/utils.rs @@ -31,6 +31,7 @@ impl Tokenizer for Vec { } } + // unused? remove? fn get_indented<'a>(&'a self, index: usize, indented: bool) -> (Option<&'a Token>, usize) { if !indented { return (self.get(index), index + 1);