From 778a1390a04c80a1495ec5d73900f7593a05726a Mon Sep 17 00:00:00 2001 From: Araozu Date: Sun, 2 Jun 2024 19:29:25 -0500 Subject: [PATCH] refactor: binding parsing --- CHANGELOG.md | 6 +- src/syntax/mod.rs | 3 - src/syntax/{ => parsers}/binding.rs | 269 ++++++++++++++-------------- src/syntax/parsers/expression.rs | 5 +- src/syntax/parsers/mod.rs | 1 + src/syntax/parsers/module.rs | 4 - src/syntax/parsers/statement.rs | 6 +- src/syntax/statement.rs | 8 +- 8 files changed, 148 insertions(+), 154 deletions(-) rename src/syntax/{ => parsers}/binding.rs (52%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02d571c..313e4fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,12 +29,12 @@ ## v0.0.13 - [ ] Begin work on a formal grammar -- [ ] Simplify AST +- [ ] Simplify/rewrite AST - [ ] Define the top level constructs - [ ] Include the original tokens in the AST -- [ ] Implement a hello world until semantic analysis +- [ ] Finish the workflow for a hello world - [ ] Refactor code -- [ ] Remove `PARSER couldn't parse any construction` error & replace with an actual error message +- [x] Remove `PARSER couldn't parse any construction` error & replace with an actual error message ## v0.0.12 diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 0a54f49..5e342ac 100755 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,6 +1,5 @@ use crate::error_handling::MistiError; -mod binding; mod block; mod expression; mod functions; @@ -36,7 +35,6 @@ mod tests { use tests::ast::Statement; // TODO: Reenable when statement parsing is rewritten - /* #[test] fn should_parse_top_level_construct_with_trailing_newline() { let input = String::from(" fun f1(){}\n"); @@ -52,7 +50,6 @@ mod tests { _ => panic!("Expected a function declaration"), } } - */ #[test] fn should_parse_2_top_level_construct() { diff --git a/src/syntax/binding.rs b/src/syntax/parsers/binding.rs similarity index 52% rename from src/syntax/binding.rs rename to src/syntax/parsers/binding.rs index 3d723db..8c5c056 100644 --- a/src/syntax/binding.rs +++ b/src/syntax/parsers/binding.rs @@ -1,143 +1,142 @@ -use super::ast::var_binding::VariableBinding; -use super::utils::{parse_token_type, try_operator}; -use super::{expression, ParsingError, ParsingResult}; -use crate::error_handling::SyntaxError; -use crate::lexic::token::{Token, TokenType}; +use crate::{ + error_handling::SyntaxError, + lexic::token::{Token, TokenType}, + syntax::{ + ast::{var_binding::VariableBinding, Expression}, + parseable::{Parseable, ParsingError, ParsingResult}, + utils::{parse_token_type, try_operator}, + }, +}; -/* -binding = val binding | var binding -val binding = "val", datatype?, binding remainder - | datatype, binding remainder +impl<'a> Parseable<'a> for VariableBinding<'a> { + type Item = VariableBinding<'a>; -var binding = "var", datatype?, binding remainder + fn try_parse(tokens: &'a Vec, current_pos: usize) -> ParsingResult<'a, Self::Item> { + let current_pos = current_pos; -binding remainder = identifier, "=", expression - */ -pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParsingResult { - let mut current_pos = pos; + /* + * val/var keyword + */ + let (is_var, binding_token, next_pos) = 'token: { + // check for VAL + if let Ok((val_token, next_pos)) = parse_token_type(tokens, current_pos, TokenType::VAL) + { + break 'token (false, Some(val_token), next_pos); + }; - /* - * val/var keyword - */ - let (is_var, binding_token, next_pos) = 'token: { - // check for VAL - if let Ok((val_token, next_pos)) = parse_token_type(tokens, current_pos, TokenType::VAL) { - break 'token (false, Some(val_token), next_pos); + // check for VAR + match parse_token_type(tokens, current_pos, TokenType::VAR) { + Ok((var_token, next_pos)) => (true, Some(var_token), next_pos), + // If a VAR is not found it is still possible that the binding is an implicit VAL + _ => (false, None, current_pos), + } }; - // check for VAR - match parse_token_type(tokens, current_pos, TokenType::VAR) { - Ok((var_token, next_pos)) => (true, Some(var_token), next_pos), - // If a VAR is not found it is still possible that the binding is an implicit VAL - _ => (false, None, current_pos), + /* + * datatype + */ + let (datatype, next_pos) = match parse_token_type(tokens, next_pos, TokenType::Datatype) + { + Ok((t, next)) => (Some(t), next), + _ => (None, next_pos), + }; + + // Here: + // If the binding is None and the datatype is None, then we didn't match a binding + if binding_token.is_none() && datatype.is_none() { + return Err(ParsingError::Unmatched); } - }; - current_pos = next_pos; - /* - * datatype - */ - let (datatype, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Datatype) { - Ok((t, next)) => (Some(t), next), - _ => (None, current_pos), - }; - current_pos = next_pos; - - // Here: - // If the binding is None and the datatype is None, then we didn't match a binding - if binding_token.is_none() && datatype.is_none() { - return Err(ParsingError::Unmatched); - } - - /* - * identifier - */ - let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier) - { - Ok((t, n)) => (t, n), - Err(ParsingError::Mismatch(token)) => { - // The parser found a token, but it's not an identifier - return Err(ParsingError::Err(SyntaxError { - error_start: token.position, - error_end: token.get_end_position(), - reason: "There should be an identifier after a binding".into(), - })); - } - _ => { - // The parser didn't find an Identifier after VAL/VAR or the Datatype - match (binding_token, datatype) { - (Some(binding_token), None) => { + /* + * identifier + */ + let (identifier, next_pos) = + match parse_token_type(tokens, next_pos, TokenType::Identifier) { + Ok((t, n)) => (t, n), + Err(ParsingError::Mismatch(token)) => { + // The parser found a token, but it's not an identifier return Err(ParsingError::Err(SyntaxError { - reason: format!( - "There should be an identifier after a `{}` token", - if is_var { "var" } else { "val" } - ), - error_start: binding_token.position, - error_end: binding_token.get_end_position(), - })); - } - (_, Some(datatype_token)) => { - return Err(ParsingError::Err(SyntaxError { - reason: "There should be an identifier after the datatype".into(), - error_start: datatype_token.position, - error_end: datatype_token.get_end_position(), + error_start: token.position, + error_end: token.get_end_position(), + reason: "There should be an identifier after a binding".into(), })); } _ => { - unreachable!("Illegal parser state: binding_token and datatype are both None") + // The parser didn't find an Identifier after VAL/VAR or the Datatype + match (binding_token, datatype) { + (Some(binding_token), None) => { + return Err(ParsingError::Err(SyntaxError { + reason: format!( + "There should be an identifier after a `{}` token", + if is_var { "var" } else { "val" } + ), + error_start: binding_token.position, + error_end: binding_token.get_end_position(), + })); + } + (_, Some(datatype_token)) => { + return Err(ParsingError::Err(SyntaxError { + reason: "There should be an identifier after the datatype".into(), + error_start: datatype_token.position, + error_end: datatype_token.get_end_position(), + })); + } + _ => { + unreachable!( + "Illegal parser state: binding_token and datatype are both None" + ) + } + }; } }; - } - }; - current_pos = next_pos; - /* - * Equal (=) operator - */ - let equal_operator = match try_operator(tokens, current_pos, String::from("=")) { - Ok((t, _)) => t, - Err(ParsingError::Mismatch(t)) => { - // The parser found a token, but it's not the `=` operator - return Err(ParsingError::Err(SyntaxError { - reason: format!("There should be an equal sign `=` after the identifier"), - error_start: t.position, - error_end: t.get_end_position(), - })); - } - _ => { - // The parser didn't find the `=` operator after the identifier - return Err(ParsingError::Err(SyntaxError { - reason: format!("There should be an equal sign `=` after the identifier",), - error_start: identifier.position, - error_end: identifier.get_end_position(), - })); - } - }; - current_pos += 1; + /* + * Equal (=) operator + */ + let equal_operator = match try_operator(tokens, next_pos, String::from("=")) { + Ok((t, _)) => t, + Err(ParsingError::Mismatch(t)) => { + // The parser found a token, but it's not the `=` operator + return Err(ParsingError::Err(SyntaxError { + reason: format!("There should be an equal sign `=` after the identifier"), + error_start: t.position, + error_end: t.get_end_position(), + })); + } + _ => { + // The parser didn't find the `=` operator after the identifier + return Err(ParsingError::Err(SyntaxError { + reason: format!("There should be an equal sign `=` after the identifier",), + error_start: identifier.position, + error_end: identifier.get_end_position(), + })); + } + }; + let next_pos = next_pos + 1; - /* - * Expression of the binding - */ - let (expression, next_pos) = match expression::try_parse(tokens, current_pos) { - Ok((exp, next)) => (exp, next), - _ => { - return Err(ParsingError::Err(SyntaxError { - reason: String::from("Expected an expression after the equal `=` operator"), - error_start: equal_operator.position, - error_end: equal_operator.get_end_position(), - })); - } - }; - current_pos = next_pos; + /* + * Expression of the binding + */ + let (expression, next_pos) = match Expression::try_parse(tokens, next_pos) { + Ok((exp, next)) => (exp, next), + _ => { + return Err(ParsingError::Err(SyntaxError { + reason: String::from("Expected an expression after the equal `=` operator"), + error_start: equal_operator.position, + error_end: equal_operator.get_end_position(), + })); + } + }; - let binding = VariableBinding { - datatype, - identifier: &identifier, - expression, - is_mutable: is_var, - }; + let binding = VariableBinding { + datatype, + identifier: &identifier, + expression, + is_mutable: is_var, + }; - Ok((binding, current_pos)) + Ok((binding, next_pos)) + } } #[cfg(test)] @@ -148,7 +147,7 @@ mod tests { #[test] fn should_parse_val_binding() { let tokens = get_tokens(&String::from("val identifier = 20")).unwrap(); - let Ok((binding, _)) = try_parse(&tokens, 0) else { + let Ok((binding, _)) = VariableBinding::try_parse(&tokens, 0) else { panic!() }; @@ -183,7 +182,7 @@ mod tests { #[test] fn should_parse_val_binding_with_datatype() { let tokens = get_tokens(&String::from("val Int identifier = 20")).unwrap(); - let (binding, _) = try_parse(&tokens, 0).unwrap(); + let (binding, _) = VariableBinding::try_parse(&tokens, 0).unwrap(); assert!(!binding.is_mutable); assert_eq!("Int", binding.datatype.unwrap().value); @@ -193,7 +192,7 @@ mod tests { #[test] fn should_parse_var_binding_with_datatype() { let tokens = get_tokens(&String::from("var Int identifier = 20")).unwrap(); - let (binding, _) = try_parse(&tokens, 0).unwrap(); + let (binding, _) = VariableBinding::try_parse(&tokens, 0).unwrap(); assert!(binding.is_mutable); assert!(binding.datatype.is_some()); @@ -204,7 +203,7 @@ mod tests { #[test] fn should_parse_implicit_val_binding() { let tokens = get_tokens(&String::from("Int identifier = 20")).unwrap(); - let (binding, _) = try_parse(&tokens, 0).unwrap(); + let (binding, _) = VariableBinding::try_parse(&tokens, 0).unwrap(); assert!(!binding.is_mutable); assert!(binding.datatype.is_some()); @@ -215,7 +214,7 @@ mod tests { #[test] fn should_return_error_on_implicit_val_binding() { let tokens = get_tokens(&String::from("Int => 20")).unwrap(); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -231,7 +230,7 @@ mod tests { let tokens = get_tokens(&String::from("val")).unwrap(); assert_eq!(TokenType::VAL, tokens[0].token_type); assert_eq!(0, tokens[0].position); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -247,7 +246,7 @@ mod tests { let tokens = get_tokens(&String::from("val 322")).unwrap(); assert_eq!(TokenType::VAL, tokens[0].token_type); assert_eq!(0, tokens[0].position); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -258,7 +257,7 @@ mod tests { } let tokens = get_tokens(&String::from("val \"hello\"")).unwrap(); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -276,7 +275,7 @@ mod tests { #[test] fn should_return_error_when_equal_op_is_wrong() { let tokens = get_tokens(&String::from("val id \"error\"")).unwrap(); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -290,7 +289,7 @@ mod tests { #[test] fn should_return_error_when_identifier_is_empty() { let tokens = get_tokens(&String::from("val String ")).unwrap(); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -308,7 +307,7 @@ mod tests { #[test] fn should_return_error_when_identifier_is_empty_2() { let tokens = get_tokens(&String::from("val ")).unwrap(); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -326,7 +325,7 @@ mod tests { #[test] fn should_error_when_equal_op_is_missing() { let tokens = get_tokens(&String::from("val identifier ")).unwrap(); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { @@ -344,7 +343,7 @@ mod tests { #[test] fn should_error_when_exp_is_empty() { let tokens = get_tokens(&String::from("val identifier = ")).unwrap(); - let binding = try_parse(&tokens, 0); + let binding = VariableBinding::try_parse(&tokens, 0); match binding { Err(ParsingError::Err(error)) => { diff --git a/src/syntax/parsers/expression.rs b/src/syntax/parsers/expression.rs index d1e0a2b..5ee0789 100644 --- a/src/syntax/parsers/expression.rs +++ b/src/syntax/parsers/expression.rs @@ -1,8 +1,7 @@ use crate::{ lexic::token::Token, syntax::{ - ast::Expression, - parseable::{Parseable, ParsingResult}, + ast::Expression, expression, parseable::{Parseable, ParsingResult} }, }; @@ -10,6 +9,6 @@ impl<'a> Parseable<'a> for Expression<'a> { type Item = Expression<'a>; fn try_parse(tokens: &'a Vec, current_pos: usize) -> ParsingResult<'a, Self::Item> { - todo!() + expression::try_parse(tokens, current_pos) } } diff --git a/src/syntax/parsers/mod.rs b/src/syntax/parsers/mod.rs index 2a18e89..1c6361e 100644 --- a/src/syntax/parsers/mod.rs +++ b/src/syntax/parsers/mod.rs @@ -1,3 +1,4 @@ +pub mod binding; pub mod expression; pub mod module; pub mod statement; diff --git a/src/syntax/parsers/module.rs b/src/syntax/parsers/module.rs index 316e642..4bb765d 100644 --- a/src/syntax/parsers/module.rs +++ b/src/syntax/parsers/module.rs @@ -24,10 +24,6 @@ impl<'a> Parseable<'a> for ModuleAST<'a> { // Minus one because last token is EOF // TODO: Does that EOF do anything? while current_pos < tokens_len - 1 { - println!( - "len: {} pos: {}, value: `{}`, type: {:?}", - tokens_len, current_pos, tokens[current_pos].value, tokens[current_pos].token_type - ); // Attempt to parse an statement match Statement::try_parse(tokens, current_pos) { Ok((prod, next_pos)) => { diff --git a/src/syntax/parsers/statement.rs b/src/syntax/parsers/statement.rs index 73db84d..2e11a28 100644 --- a/src/syntax/parsers/statement.rs +++ b/src/syntax/parsers/statement.rs @@ -1,6 +1,5 @@ use crate::syntax::{ - ast::Statement, - binding, + ast::{var_binding::VariableBinding, Statement}, functions::function_declaration, parseable::{Parseable, ParsingError}, }; @@ -13,8 +12,7 @@ impl<'a> Parseable<'a> for Statement<'a> { current_pos: usize, ) -> crate::syntax::parseable::ParsingResult<'a, Self::Item> { // Try to parse a variable binding - // TODO: Rewrite function_declaration to use Parseable - match binding::try_parse(tokens, current_pos) { + match VariableBinding::try_parse(tokens, current_pos) { Ok((prod, next)) => { return Ok((Statement::Binding(prod), next)); } diff --git a/src/syntax/statement.rs b/src/syntax/statement.rs index 0b0ecd8..6971bda 100644 --- a/src/syntax/statement.rs +++ b/src/syntax/statement.rs @@ -1,10 +1,14 @@ use crate::lexic::token::Token; -use super::{ast::Statement, binding, ParsingError, ParsingResult}; +use super::{ + ast::{var_binding::VariableBinding, Statement}, + parseable::Parseable, + ParsingError, ParsingResult, +}; pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParsingResult { // Try to parse a binding - match binding::try_parse(tokens, pos) { + match VariableBinding::try_parse(tokens, pos) { Ok((b, next)) => return Ok((Statement::Binding(b), next)), Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)), _ => {}