From fa4d79dbe4b65493ade8aaf74b4a0c68a14bb1b3 Mon Sep 17 00:00:00 2001 From: Araozu Date: Tue, 27 Aug 2024 18:34:30 -0500 Subject: [PATCH] feat: parse arrays --- CHANGELOG.md | 1 + src/lexic/scanner/identifier.rs | 6 +++ src/lexic/token.rs | 6 +++ src/php_ast/transformers/expression.rs | 1 + src/semantic/checks/expression.rs | 1 + src/semantic/types/expression.rs | 1 + src/syntax/ast/mod.rs | 13 +++++ src/syntax/parsers/expression/array.rs | 67 ++++++++++++++++++++++++ src/syntax/parsers/expression/mod.rs | 1 + src/syntax/parsers/expression/primary.rs | 18 ++++++- 10 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 src/syntax/parsers/expression/array.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 8915bb9..edf3264 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ ## v0.1.2 - [x] Parse conditionals +- [x] Parse arrays ## v0.1.1 diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs index 371078e..f5a25d1 100755 --- a/src/lexic/scanner/identifier.rs +++ b/src/lexic/scanner/identifier.rs @@ -9,6 +9,12 @@ fn str_is_keyword(s: &String) -> Option { "fun" => Some(TokenType::FUN), "if" => Some(TokenType::IF), "else" => Some(TokenType::ELSE), + "for" => Some(TokenType::FOR), + "in" => Some(TokenType::IN), + "while" => Some(TokenType::WHILE), + "loop" => Some(TokenType::LOOP), + "match" => Some(TokenType::MATCH), + "case" => Some(TokenType::CASE), _ => None, } } diff --git a/src/lexic/token.rs b/src/lexic/token.rs index 65939b3..0254ff3 100755 --- a/src/lexic/token.rs +++ b/src/lexic/token.rs @@ -26,6 +26,12 @@ pub enum TokenType { FUN, IF, ELSE, + FOR, + IN, + WHILE, + LOOP, + MATCH, + CASE, } #[derive(Serialize, Debug, Clone, PartialEq)] diff --git a/src/php_ast/transformers/expression.rs b/src/php_ast/transformers/expression.rs index cc2a792..f377f8b 100644 --- a/src/php_ast/transformers/expression.rs +++ b/src/php_ast/transformers/expression.rs @@ -37,6 +37,7 @@ impl<'a> PHPTransformable<'a> for Expression<'_> { } Expression::UnaryOperator(_, _) => unimplemented!("transform unary op into php"), Expression::BinaryOperator(_, _, _) => unimplemented!("transform binary op into php"), + Expression::Array(_) => unimplemented!("transform array into php"), } } } diff --git a/src/semantic/checks/expression.rs b/src/semantic/checks/expression.rs index c402951..59e6212 100644 --- a/src/semantic/checks/expression.rs +++ b/src/semantic/checks/expression.rs @@ -193,6 +193,7 @@ impl SemanticCheck for Expression<'_> { // After all these checks, we are ok Ok(()) } + Expression::Array(_) => unimplemented!("check for array"), } } } diff --git a/src/semantic/types/expression.rs b/src/semantic/types/expression.rs index c36b8e5..167009b 100644 --- a/src/semantic/types/expression.rs +++ b/src/semantic/types/expression.rs @@ -122,6 +122,7 @@ impl Typed for Expression<'_> { ), })); } + Expression::Array(_) => unimplemented!("get type of array"), } } } diff --git a/src/syntax/ast/mod.rs b/src/syntax/ast/mod.rs index 93e5cee..57a2914 100644 --- a/src/syntax/ast/mod.rs +++ b/src/syntax/ast/mod.rs @@ -92,6 +92,16 @@ pub enum Expression<'a> { UnaryOperator(&'a Token, Box>), /// left expression, right expression, operator BinaryOperator(Box>, Box>, &'a Token), + Array(Array<'a>), +} + +#[derive(Debug)] +pub struct Array<'a> { + pub exps: Vec>, + /// The position of the open bracket [ + pub start: usize, + /// The position of the closed bracket ] + pub end: usize, } impl Positionable for Expression<'_> { @@ -115,6 +125,9 @@ impl Positionable for Expression<'_> { let (_, end) = right_expr.get_position(); (start, end) } + Expression::Array(Array {start, end, exps: _}) => { + (*start, *end) + } } } } diff --git a/src/syntax/parsers/expression/array.rs b/src/syntax/parsers/expression/array.rs new file mode 100644 index 0000000..f963712 --- /dev/null +++ b/src/syntax/parsers/expression/array.rs @@ -0,0 +1,67 @@ +use crate::{ + lexic::token::{Token, TokenType}, + syntax::{ + ast::{Array, Expression}, + parseable::{Parseable, ParsingError, ParsingResult}, + utils::parse_token_type, + }, +}; + +impl<'a> Parseable<'a> for Array<'a> { + type Item = Array<'a>; + + fn try_parse(tokens: &'a Vec, current_pos: usize) -> ParsingResult<'a, Self::Item> { + // parse open bracket + let (open_bracket, next) = + match parse_token_type(tokens, current_pos, TokenType::LeftBracket) { + Ok(t) => t, + Err(_) => return Err(ParsingError::Unmatched), + }; + + // parse expressions + let mut exps = Vec::new(); + let mut current_pos = next; + let tokens_len = tokens.len(); + while current_pos < tokens_len { + // parse expression + let (exp, after_exp) = match Expression::try_parse(tokens, current_pos) { + Ok(t) => t, + Err(ParsingError::Mismatch(_)) => break, + Err(ParsingError::Unmatched) => break, + // If an error is found parsing an exp, bubble up + Err(e) => return Err(e), + }; + + // add exp to vec + exps.push(exp); + + // parse comma + let (_, after_comma) = match parse_token_type(tokens, after_exp, TokenType::Comma) { + Ok(t) => t, + // If a comma is not found then the expressions are over + Err(_) => { + current_pos = after_exp; + break; + } + }; + + // update position tracker + current_pos = after_comma; + } + + // parse closed bracket + let (closed_bracket, next) = + match parse_token_type(tokens, current_pos, TokenType::RightBracket) { + Ok(t) => t, + Err(e) => return Err(e), + }; + + // return + let arr = Array { + exps, + start: open_bracket.position, + end: closed_bracket.position, + }; + Ok((arr, next)) + } +} diff --git a/src/syntax/parsers/expression/mod.rs b/src/syntax/parsers/expression/mod.rs index e961fb1..d6be261 100644 --- a/src/syntax/parsers/expression/mod.rs +++ b/src/syntax/parsers/expression/mod.rs @@ -9,6 +9,7 @@ mod primary; mod term; mod unary; mod utils; +mod array; impl<'a> Parseable<'a> for Expression<'a> { type Item = Expression<'a>; diff --git a/src/syntax/parsers/expression/primary.rs b/src/syntax/parsers/expression/primary.rs index 8e23ec5..71883d2 100644 --- a/src/syntax/parsers/expression/primary.rs +++ b/src/syntax/parsers/expression/primary.rs @@ -1,16 +1,30 @@ use crate::{ lexic::token::{Token, TokenType}, syntax::{ - ast::Expression, parseable::Parseable, utils::Tokenizer, ParsingError, ParsingResult, + ast::{Array, Expression}, parseable::Parseable, utils::Tokenizer, ParsingError, ParsingResult, }, }; /// This grammar may not be up to date. Refer to the spec for the latest grammar. /// /// ```ebnf -/// primary = number | string | boolean | identifier | ("(", expression, ")"); +/// primary = array +/// | number +/// | string +/// | boolean +/// | identifier +/// | ("(", expression, ")"); /// ``` pub fn try_parse(tokens: &Vec, pos: usize) -> ParsingResult { + // array + match Array::try_parse(tokens, pos) { + Ok((exp, next)) => { + return Ok((Expression::Array(exp), next)) + }, + Err(ParsingError::Err(e)) => return Err(ParsingError::Err(e)), + Err(_) => {}, + } + match tokens.get_significant(pos) { Some((token, token_pos)) => match token.token_type { TokenType::Int => Ok((Expression::Int(&token), token_pos + 1)),