From d5681143496b12072bb568b48f7d05b07d17af50 Mon Sep 17 00:00:00 2001 From: Araozu Date: Wed, 5 Jun 2024 11:04:01 -0500 Subject: [PATCH] feat: parse indent/dedent inside expressions --- CHANGELOG.md | 8 +-- src/syntax/parsers/expression/equality.rs | 80 ++++++++++++++++++++++- src/syntax/parsers/expression/mod.rs | 16 ++++- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb263ba..70399d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,12 +28,12 @@ ## v0.0.13 -- [ ] Begin work on a formal grammar -- [ ] Simplify/rewrite AST -- [ ] Define the top level constructs +- [x] Begin work on a formal grammar +- [x] Simplify/rewrite AST +- [x] Define the top level constructs - [ ] Include the original tokens in the AST - [ ] Finish the workflow for a hello world -- [ ] Refactor code +- [x] Refactor code - [x] Remove `PARSER couldn't parse any construction` error & replace with an actual error message diff --git a/src/syntax/parsers/expression/equality.rs b/src/syntax/parsers/expression/equality.rs index 37f293e..e1d60b0 100644 --- a/src/syntax/parsers/expression/equality.rs +++ b/src/syntax/parsers/expression/equality.rs @@ -1,5 +1,5 @@ use crate::{ - lexic::token::Token, + lexic::token::{Token, TokenType}, syntax::{ast::Expression, ParsingError, ParsingResult}, }; @@ -9,6 +9,7 @@ use crate::{ /// equality = comparison, (("==" | "!="), comparison )*; /// ``` pub fn try_parse(tokens: &Vec, pos: usize) -> ParsingResult { + // TODO: This must be newline/indentation aware let (comparison, next_pos) = match super::comparison::try_parse(tokens, pos) { Ok((expr, next_pos)) => (expr, next_pos), _ => return Err(ParsingError::Unmatched), @@ -39,6 +40,33 @@ fn parse_many<'a>( _ => Err(ParsingError::Unmatched), } } + // If token is a newline: check if the following token is INDENT. + // If so, ignore those 2 and continue parsing + // Then, we should find a DEDENT token to finish this expression? + Some(token) if token.token_type == TokenType::NewLine => { + match tokens.get(pos + 1) { + Some(t) if t.token_type == TokenType::INDENT => { + // Ignore indentation and continue parsing + let result = parse_many(tokens, pos + 2, prev_expr); + // Expect a DEDENT token + match result { + Ok((expr, next)) => { + match tokens.get(next) { + Some(t) if t.token_type == TokenType::DEDENT => { + Ok((expr, next + 1)) + } + _ => unreachable!("Invalid parser state: expected a DEDENT after parsing an indented expression") + } + } + _ => result + } + } + _ => { + // Return current parsed value + return Ok((prev_expr, pos)); + } + } + } _ => Ok((prev_expr, pos)), } } @@ -83,4 +111,54 @@ mod tests { _ => panic!("Expected an Unmatched error"), } } + + #[test] + fn should_parse_indented_1() { + let tokens = get_tokens(&String::from("a\n == b")).unwrap(); + let (result, next) = try_parse(&tokens, 0).unwrap(); + + assert_eq!(tokens[5].token_type, TokenType::DEDENT); + assert_eq!(next, 6); + + match result { + Expression::BinaryOperator(_, _, op) => { + assert_eq!(op, "==") + }, + _ => panic!("Expected a binary operator") + } + } + + #[test] + fn should_parse_indented_2() { + let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap(); + let (result, next) = try_parse(&tokens, 0).unwrap(); + + assert_eq!(tokens[9].token_type, TokenType::DEDENT); + assert_eq!(tokens[10].token_type, TokenType::DEDENT); + assert_eq!(next, 11); + + match result { + Expression::BinaryOperator(_, _, op) => { + assert_eq!(op, "==") + }, + _ => panic!("Expected a binary operator") + } + } + + #[test] + fn should_parse_indented_3() { + let tokens = get_tokens(&String::from("a\n == b == c")).unwrap(); + let (result, next) = try_parse(&tokens, 0).unwrap(); + + assert_eq!(tokens[7].token_type, TokenType::DEDENT); + assert_eq!(next, 8); + + match result { + Expression::BinaryOperator(_, _, op) => { + assert_eq!(op, "==") + }, + _ => panic!("Expected a binary operator") + } + } + } diff --git a/src/syntax/parsers/expression/mod.rs b/src/syntax/parsers/expression/mod.rs index dd529d5..1f48ad9 100644 --- a/src/syntax/parsers/expression/mod.rs +++ b/src/syntax/parsers/expression/mod.rs @@ -19,4 +19,18 @@ impl<'a> Parseable<'a> for Expression<'a> { } #[cfg(test)] -mod tests {} +mod tests { + use crate::lexic::get_tokens; + + use super::*; + + #[test] + fn should_parse_expression_w_indentation_1() { + let tokens = get_tokens(&String::from("a\n == b")).unwrap(); + let (expr, _) = Expression::try_parse(&tokens, 0).unwrap(); + match expr { + Expression::BinaryOperator(_e1, _e2, op) => {} + _ => panic!("Expected a binary operation"), + } + } +}