From 86166c2105f7fabd19ed51aaf5f26b3196ac5833 Mon Sep 17 00:00:00 2001 From: Araozu Date: Wed, 28 Aug 2024 08:52:35 -0500 Subject: [PATCH] feat: parse loops --- CHANGELOG.md | 1 + src/semantic/checks/block.rs | 17 +++ src/semantic/checks/function_declaration.rs | 24 +-- src/semantic/checks/mod.rs | 1 + src/semantic/checks/top_level_declaration.rs | 1 + src/syntax/ast/loops.rs | 15 ++ src/syntax/ast/mod.rs | 22 ++- src/syntax/parsers/block.rs | 19 ++- src/syntax/parsers/conditional.rs | 8 +- src/syntax/parsers/expression/mod.rs | 2 +- src/syntax/parsers/expression/primary.rs | 19 +-- src/syntax/parsers/for_loop.rs | 152 +++++++++++++++++++ src/syntax/parsers/mod.rs | 3 +- src/syntax/parsers/statement.rs | 12 +- src/syntax/utils.rs | 5 +- 15 files changed, 244 insertions(+), 57 deletions(-) create mode 100644 src/semantic/checks/block.rs create mode 100644 src/syntax/ast/loops.rs create mode 100644 src/syntax/parsers/for_loop.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index edf3264..224fec6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ - [x] Parse conditionals - [x] Parse arrays +- [x] Parse for loops ## v0.1.1 diff --git a/src/semantic/checks/block.rs b/src/semantic/checks/block.rs new file mode 100644 index 0000000..5141963 --- /dev/null +++ b/src/semantic/checks/block.rs @@ -0,0 +1,17 @@ +use crate::{ + error_handling::MistiError, + semantic::{impls::SemanticCheck, symbol_table::SymbolTable}, + syntax::ast::BlockMember, +}; + +impl<'a> SemanticCheck for BlockMember<'a> { + // TODO: A block may contain a function declaration statement, + // but (afaik) those are not allowed inside conditionals/loops + // somehow detect those? + fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> { + match self { + BlockMember::Stmt(s) => s.check_semantics(scope), + BlockMember::Expr(e) => e.check_semantics(scope), + } + } +} diff --git a/src/semantic/checks/function_declaration.rs b/src/semantic/checks/function_declaration.rs index b1e68fc..1f3735d 100644 --- a/src/semantic/checks/function_declaration.rs +++ b/src/semantic/checks/function_declaration.rs @@ -1,7 +1,7 @@ use crate::{ error_handling::{semantic_error::SemanticError, MistiError}, semantic::{impls::SemanticCheck, symbol_table::SymbolTable, types::Type}, - syntax::ast::{BlockMember, FunctionDeclaration, Statement}, + syntax::ast::FunctionDeclaration, }; impl SemanticCheck for FunctionDeclaration<'_> { @@ -31,27 +31,7 @@ impl SemanticCheck for FunctionDeclaration<'_> { // TODO: Check the return type of the function body // This should be the last expression in the block for stmt in self.block.members.iter() { - match stmt { - BlockMember::Stmt(Statement::Binding(b)) => { - if let Err(err) = b.check_semantics(&function_scope) { - return Err(err); - } - } - BlockMember::Stmt(Statement::FnDecl(f)) => { - // TODO: (for now) a function cannot be declared inside another function. - let error = SemanticError { - error_start: f.identifier.position, - error_end: f.identifier.get_end_position(), - reason: format!( - "A function cannot be defined inside another function." - ), - }; - - return Err(MistiError::Semantic(error)); - } - BlockMember::Stmt(Statement::Conditional(_)) => unimplemented!("check conditional"), - BlockMember::Expr(e) => e.check_semantics(scope)?, - } + stmt.check_semantics(&function_scope)?; } // TODO: Check that the return type of the function diff --git a/src/semantic/checks/mod.rs b/src/semantic/checks/mod.rs index 58ef15b..f9d6579 100644 --- a/src/semantic/checks/mod.rs +++ b/src/semantic/checks/mod.rs @@ -1,4 +1,5 @@ pub mod binding; +pub mod block; pub mod expression; pub mod function_declaration; pub mod top_level_declaration; diff --git a/src/semantic/checks/top_level_declaration.rs b/src/semantic/checks/top_level_declaration.rs index fa419f7..0bcfc02 100644 --- a/src/semantic/checks/top_level_declaration.rs +++ b/src/semantic/checks/top_level_declaration.rs @@ -23,6 +23,7 @@ impl SemanticCheck for Statement<'_> { Statement::Binding(b) => b.check_semantics(scope), Statement::FnDecl(f) => f.check_semantics(scope), Statement::Conditional(_) => unimplemented!("check conditional"), + Statement::ForLoop(_) => unimplemented!("check for loop"), } } } diff --git a/src/syntax/ast/loops.rs b/src/syntax/ast/loops.rs new file mode 100644 index 0000000..05656e3 --- /dev/null +++ b/src/syntax/ast/loops.rs @@ -0,0 +1,15 @@ +use crate::lexic::token::Token; + +use super::Block; + +#[derive(Debug)] +pub struct ForLoop<'a> { + /// the start position of the + /// `for` keyword + pub loop_start: usize, + /// the position of the closing bracket + pub loop_end: usize, + pub key: Option<&'a Token>, + pub value: &'a Token, + pub body: Block<'a>, +} diff --git a/src/syntax/ast/mod.rs b/src/syntax/ast/mod.rs index 57a2914..2e45ba3 100644 --- a/src/syntax/ast/mod.rs +++ b/src/syntax/ast/mod.rs @@ -1,9 +1,11 @@ use crate::lexic::token::Token; use self::functions::FunctionCall; +use loops::ForLoop; use var_binding::VariableBinding; pub mod functions; +pub mod loops; pub mod var_binding; /// Trait that allows nodes to inform @@ -33,13 +35,14 @@ pub enum Statement<'a> { FnDecl(FunctionDeclaration<'a>), // TODO: Implement conditionals as expressions Conditional(Conditional<'a>), + ForLoop(ForLoop<'a>), } #[derive(Debug)] pub struct Conditional<'a> { pub if_member: Condition<'a>, pub else_if_members: Vec>, - pub else_block: Option> + pub else_block: Option>, } #[derive(Debug)] @@ -48,7 +51,6 @@ pub struct Condition<'a> { pub body: Block<'a>, } - #[derive(Debug)] pub struct FunctionDeclaration<'a> { pub identifier: &'a Token, @@ -59,9 +61,17 @@ pub struct FunctionDeclaration<'a> { #[derive(Debug)] pub struct Block<'a> { + pub start: usize, + pub end: usize, pub members: Vec>, } +impl Positionable for Block<'_> { + fn get_position(&self) -> (usize, usize) { + (self.start, self.end) + } +} + /// Enum for productions available at the block level #[derive(Debug)] pub enum BlockMember<'a> { @@ -125,9 +135,11 @@ impl Positionable for Expression<'_> { let (_, end) = right_expr.get_position(); (start, end) } - Expression::Array(Array {start, end, exps: _}) => { - (*start, *end) - } + Expression::Array(Array { + start, + end, + exps: _, + }) => (*start, *end), } } } diff --git a/src/syntax/parsers/block.rs b/src/syntax/parsers/block.rs index f35d6f1..6b4aae2 100644 --- a/src/syntax/parsers/block.rs +++ b/src/syntax/parsers/block.rs @@ -44,6 +44,7 @@ impl<'a> Parseable<'a> for Block<'a> { Ok((prod, next_pos)) => { members.push(BlockMember::Expr(prod)); current_pos = next_pos; + continue; } Err(ParsingError::Err(error)) => { // TODO: Better error handling, write a better error message @@ -59,7 +60,7 @@ impl<'a> Parseable<'a> for Block<'a> { } // Parse closing brace - let (_closing_brace, next_pos) = + let (closing_brace, next_pos) = match parse_token_type(tokens, current_pos, TokenType::RightBrace) { Ok((t, next)) => (t, next), Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)), @@ -82,7 +83,11 @@ impl<'a> Parseable<'a> for Block<'a> { }; current_pos = next_pos; - let block = Block { members }; + let block = Block { + members, + start: opening_brace.position, + end: closing_brace.position, + }; Ok((block, current_pos)) } } @@ -152,24 +157,23 @@ mod tests { assert_eq!(block.members.len(), 1); } - /* #[test] fn test_parse_block_2() { let tokens = get_tokens(&String::from("{f()\ng()}")).unwrap(); - let block = parse_block(&tokens, 0); + let block = Block::try_parse(&tokens, 0); let block = match block { ParsingResult::Ok((p, _)) => p, _ => panic!("Expected a block, got: {:?}", block), }; - assert_eq!(block.statements.len(), 2); + assert_eq!(block.members.len(), 2); } #[test] fn test_parse_block_3() { let tokens = get_tokens(&String::from("{\n f()\n}")).unwrap(); - let block = parse_block(&tokens, 0); + let block = Block::try_parse(&tokens, 0); let block = match block { ParsingResult::Ok((p, _)) => p, @@ -178,7 +182,6 @@ mod tests { } }; - assert_eq!(block.statements.len(), 1); + assert_eq!(block.members.len(), 1); } - */ } diff --git a/src/syntax/parsers/conditional.rs b/src/syntax/parsers/conditional.rs index 9ff8218..078bcd6 100644 --- a/src/syntax/parsers/conditional.rs +++ b/src/syntax/parsers/conditional.rs @@ -141,18 +141,14 @@ impl<'a> Parseable<'a> for Conditional<'a> { Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)), Err(ParsingError::Mismatch(wrong_token)) => { return Err(ParsingError::Err(SyntaxError { - reason: String::from( - "Expected a block after the else keyword", - ), + reason: String::from("Expected a block after the else keyword"), error_start: wrong_token.position, error_end: wrong_token.get_end_position(), })); } Err(ParsingError::Unmatched) => { return Err(ParsingError::Err(SyntaxError { - reason: String::from( - "Expected a block after the else keyword", - ), + reason: String::from("Expected a block after the else keyword"), error_start: else_token.position, error_end: else_token.get_end_position(), })); diff --git a/src/syntax/parsers/expression/mod.rs b/src/syntax/parsers/expression/mod.rs index d6be261..1e807b5 100644 --- a/src/syntax/parsers/expression/mod.rs +++ b/src/syntax/parsers/expression/mod.rs @@ -1,6 +1,7 @@ use super::super::{ast::Expression, ParsingResult}; use crate::{lexic::token::Token, syntax::parseable::Parseable}; +mod array; mod comparison; mod equality; mod factor; @@ -9,7 +10,6 @@ mod primary; mod term; mod unary; mod utils; -mod array; impl<'a> Parseable<'a> for Expression<'a> { type Item = Expression<'a>; diff --git a/src/syntax/parsers/expression/primary.rs b/src/syntax/parsers/expression/primary.rs index 71883d2..fdc8219 100644 --- a/src/syntax/parsers/expression/primary.rs +++ b/src/syntax/parsers/expression/primary.rs @@ -1,7 +1,10 @@ use crate::{ lexic::token::{Token, TokenType}, syntax::{ - ast::{Array, Expression}, parseable::Parseable, utils::Tokenizer, ParsingError, ParsingResult, + ast::{Array, Expression}, + parseable::Parseable, + utils::Tokenizer, + ParsingError, ParsingResult, }, }; @@ -9,20 +12,18 @@ use crate::{ /// /// ```ebnf /// primary = array -/// | number -/// | string -/// | boolean -/// | identifier +/// | number +/// | string +/// | boolean +/// | identifier /// | ("(", expression, ")"); /// ``` pub fn try_parse(tokens: &Vec, pos: usize) -> ParsingResult { // array match Array::try_parse(tokens, pos) { - Ok((exp, next)) => { - return Ok((Expression::Array(exp), next)) - }, + Ok((exp, next)) => return Ok((Expression::Array(exp), next)), Err(ParsingError::Err(e)) => return Err(ParsingError::Err(e)), - Err(_) => {}, + Err(_) => {} } match tokens.get_significant(pos) { diff --git a/src/syntax/parsers/for_loop.rs b/src/syntax/parsers/for_loop.rs new file mode 100644 index 0000000..b7bd600 --- /dev/null +++ b/src/syntax/parsers/for_loop.rs @@ -0,0 +1,152 @@ +use crate::{ + error_handling::SyntaxError, + lexic::token::{Token, TokenType}, + syntax::{ + ast::{loops::ForLoop, Block, Expression, Positionable}, + parseable::{Parseable, ParsingError, ParsingResult}, + utils::parse_token_type, + }, +}; + +impl<'a> Parseable<'a> for ForLoop<'a> { + type Item = ForLoop<'a>; + + fn try_parse(tokens: &'a Vec, current_pos: usize) -> ParsingResult<'a, Self::Item> { + // for keyword + let (for_keyword, next) = match parse_token_type(tokens, current_pos, TokenType::FOR) { + Ok(tuple) => tuple, + _ => return Err(ParsingError::Unmatched), + }; + + // first identifier + let (first_id, next) = match parse_token_type(tokens, next, TokenType::Identifier) { + Ok(t) => t, + Err(ParsingError::Err(e)) => return Err(ParsingError::Err(e)), + Err(ParsingError::Mismatch(e)) => { + return Err(ParsingError::Err(SyntaxError { + error_start: e.position, + error_end: e.get_end_position(), + reason: format!( + "Expected an identifier after the `for` keyword, found {}", + e.value + ), + })) + } + Err(ParsingError::Unmatched) => { + return Err(ParsingError::Err(SyntaxError { + error_start: for_keyword.position, + error_end: for_keyword.get_end_position(), + reason: format!("Expected an identifier after the `for` keyword"), + })) + } + }; + + // comma and possible second identifier + let (second_id, next) = 'block: { + // attempt to parse comma + let (comma, next) = match parse_token_type(tokens, next, TokenType::Comma) { + Ok(t) => t, + _ => break 'block (None, next), + }; + + // parse second id + // if this fails then its a syntax error, because a comma was already commited + match parse_token_type(&tokens, next, TokenType::Identifier) { + Ok((second_id, next)) => (Some(second_id), next), + Err(ParsingError::Err(e)) => return Err(ParsingError::Err(e)), + Err(ParsingError::Mismatch(t)) => { + return Err(ParsingError::Err(SyntaxError { + error_start: t.position, + error_end: t.get_end_position(), + reason: format!( + "Expected an identifier after the comma, found `{}`", + t.value + ), + })) + } + Err(ParsingError::Unmatched) => { + return Err(ParsingError::Err(SyntaxError { + error_start: comma.position, + error_end: comma.get_end_position(), + reason: format!("Expected an identifier after the comma"), + })); + } + } + }; + + // in keyword + let (in_keyword, next) = match parse_token_type(tokens, next, TokenType::IN) { + Ok(tuple) => tuple, + Err(ParsingError::Err(e)) => return Err(ParsingError::Err(e)), + Err(ParsingError::Mismatch(t)) => { + return Err(ParsingError::Err(SyntaxError { + error_start: t.position, + error_end: t.get_end_position(), + reason: format!("Expected the `in` keyword, found `{}`", t.value), + })) + } + Err(ParsingError::Unmatched) => { + let previous_token = if second_id.is_none() { + first_id + } else { + second_id.unwrap() + }; + return Err(ParsingError::Err(SyntaxError { + error_start: previous_token.position, + error_end: previous_token.get_end_position(), + reason: format!("Expected the `in` keyword"), + })); + } + }; + + // expression + let (expr, next) = match Expression::try_parse(tokens, next) { + Ok(t) => t, + Err(ParsingError::Err(e)) => return Err(ParsingError::Err(e)), + Err(_) => { + return Err(ParsingError::Err(SyntaxError { + error_start: in_keyword.position, + error_end: in_keyword.get_end_position(), + reason: format!("Expected an expression after the `in` keyword"), + })) + } + }; + + // block + let (block, next) = match Block::try_parse(tokens, next) { + Ok(t) => t, + Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)), + Err(ParsingError::Mismatch(wrong_token)) => { + return Err(ParsingError::Err(SyntaxError { + reason: String::from("Expected a block after the collection"), + error_start: wrong_token.position, + error_end: wrong_token.get_end_position(), + })); + } + Err(ParsingError::Unmatched) => { + let (error_start, error_end) = expr.get_position(); + return Err(ParsingError::Err(SyntaxError { + reason: String::from("Expected a block after the collection"), + error_start, + error_end, + })); + } + }; + + // return + let (key, value) = match second_id { + Some(id) => (Some(first_id), id), + None => (None, first_id), + }; + + let (_, loop_end) = block.get_position(); + let for_loop = ForLoop { + loop_start: for_keyword.position, + loop_end, + key, + value, + body: block, + }; + Ok((for_loop, next)) + } +} diff --git a/src/syntax/parsers/mod.rs b/src/syntax/parsers/mod.rs index 8187861..8aa291f 100644 --- a/src/syntax/parsers/mod.rs +++ b/src/syntax/parsers/mod.rs @@ -1,7 +1,8 @@ pub mod binding; pub mod block; +pub mod conditional; pub mod expression; +pub mod for_loop; pub mod function_declaration; pub mod module; pub mod statement; -pub mod conditional; diff --git a/src/syntax/parsers/statement.rs b/src/syntax/parsers/statement.rs index 1e56db4..d42f158 100644 --- a/src/syntax/parsers/statement.rs +++ b/src/syntax/parsers/statement.rs @@ -1,7 +1,10 @@ use crate::{ lexic::token::Token, syntax::{ - ast::{var_binding::VariableBinding, Conditional, FunctionDeclaration, Statement}, + ast::{ + loops::ForLoop, var_binding::VariableBinding, Conditional, FunctionDeclaration, + Statement, + }, parseable::{Parseable, ParsingError, ParsingResult}, }, }; @@ -41,6 +44,13 @@ impl<'a> Parseable<'a> for Statement<'a> { _ => {} } + // Try to parse a for loop + match ForLoop::try_parse(tokens, current_pos) { + Ok((prod, next)) => return Ok((Statement::ForLoop(prod), next)), + Err(ParsingError::Err(e)) => return Err(ParsingError::Err(e)), + _ => {} + } + // Here nothing was parsed. Err(ParsingError::Unmatched) } diff --git a/src/syntax/utils.rs b/src/syntax/utils.rs index 6d6988b..514e949 100644 --- a/src/syntax/utils.rs +++ b/src/syntax/utils.rs @@ -85,10 +85,7 @@ pub fn parse_token_type( /// Ignores indentation, newlines and comments. /// /// Only returns: Ok or Unmatched. -pub fn parse_terminator( - tokens: &Vec, - pos: usize, -) -> ParsingResult<()> { +pub fn parse_terminator(tokens: &Vec, pos: usize) -> ParsingResult<()> { let mut current_pos = pos; // Ignore all whitespace, newlines and semicolons