From 5dd104bcc94953f5ef0a8f51193cb0955aeb2bdf Mon Sep 17 00:00:00 2001 From: Araozu Date: Sun, 17 Sep 2023 17:58:56 -0500 Subject: [PATCH] Parse multiple top level declarations --- src/codegen/function_declaration.rs | 8 +-- src/codegen/module_ast.rs | 6 +- src/codegen/top_level_construct.rs | 8 +-- src/lexic/token.rs | 8 --- src/syntax/ast.rs | 4 +- src/syntax/binding.rs | 34 +++++------ src/syntax/function_declaration.rs | 16 ++--- src/syntax/grammar.md | 38 +++++++----- src/syntax/mod.rs | 93 ++++++++++++++++++++++++----- 9 files changed, 140 insertions(+), 75 deletions(-) diff --git a/src/codegen/function_declaration.rs b/src/codegen/function_declaration.rs index 9e4fe92..b3536ac 100644 --- a/src/codegen/function_declaration.rs +++ b/src/codegen/function_declaration.rs @@ -13,7 +13,7 @@ mod tests { use super::*; use crate::{ lexic::get_tokens, - syntax::{ast::TopLevelConstruct, construct_ast}, + syntax::{ast::TopLevelDeclaration, construct_ast}, }; #[test] @@ -21,11 +21,11 @@ mod tests { let tokens = get_tokens(&String::from("fun id() {}")).unwrap(); let result = construct_ast(&tokens).unwrap(); - let fun_dec = result.bindings.get(0).unwrap(); + let fun_dec = result.declarations.get(0).unwrap(); match fun_dec { - TopLevelConstruct::Binding(_) => panic!("Expected function declaration"), - TopLevelConstruct::FunctionDeclaration(fun_decl) => { + TopLevelDeclaration::Binding(_) => panic!("Expected function declaration"), + TopLevelDeclaration::FunctionDeclaration(fun_decl) => { let transpiled = fun_decl.transpile(); assert_eq!("function id() {}", transpiled); diff --git a/src/codegen/module_ast.rs b/src/codegen/module_ast.rs index 34cf456..a01d1c1 100644 --- a/src/codegen/module_ast.rs +++ b/src/codegen/module_ast.rs @@ -6,7 +6,7 @@ impl Transpilable for ModuleAST { /// nodes and leaves of the AST fn transpile(&self) -> String { let bindings_str: Vec = self - .bindings + .declarations .iter() .map(|binding| binding.transpile()) .collect(); @@ -18,7 +18,7 @@ impl Transpilable for ModuleAST { #[cfg(test)] mod tests { use super::*; - use crate::syntax::ast::{Binding, Expression, TopLevelConstruct, ValBinding}; + use crate::syntax::ast::{Binding, Expression, TopLevelDeclaration, ValBinding}; #[test] fn module_ast_should_transpile() { @@ -31,7 +31,7 @@ mod tests { }); let module = ModuleAST { - bindings: vec![TopLevelConstruct::Binding(binding)], + declarations: vec![TopLevelDeclaration::Binding(binding)], }; let result = module.transpile(); diff --git a/src/codegen/top_level_construct.rs b/src/codegen/top_level_construct.rs index 487ccba..9c3f412 100644 --- a/src/codegen/top_level_construct.rs +++ b/src/codegen/top_level_construct.rs @@ -1,12 +1,12 @@ -use crate::syntax::ast::TopLevelConstruct; +use crate::syntax::ast::TopLevelDeclaration; use super::Transpilable; -impl Transpilable for TopLevelConstruct { +impl Transpilable for TopLevelDeclaration { fn transpile(&self) -> String { match self { - TopLevelConstruct::Binding(binding) => binding.transpile(), - TopLevelConstruct::FunctionDeclaration(fun) => fun.transpile(), + TopLevelDeclaration::Binding(binding) => binding.transpile(), + TopLevelDeclaration::FunctionDeclaration(fun) => fun.transpile(), } } } diff --git a/src/lexic/token.rs b/src/lexic/token.rs index 5e2c680..6243f73 100755 --- a/src/lexic/token.rs +++ b/src/lexic/token.rs @@ -86,14 +86,6 @@ impl Token { } } - pub fn new_semicolon(position: usize) -> Token { - Token { - token_type: TokenType::NewLine, - value: String::from(";"), - position, - } - } - pub fn new_datatype(value: String, position: usize) -> Token { Token { token_type: TokenType::Datatype, diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 2aeb58c..d58b079 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,9 +1,9 @@ pub struct ModuleAST { - pub bindings: Vec, + pub declarations: Vec, } #[derive(Debug)] -pub enum TopLevelConstruct { +pub enum TopLevelDeclaration { Binding(Binding), FunctionDeclaration(FunctionDeclaration), } diff --git a/src/syntax/binding.rs b/src/syntax/binding.rs index f23c433..0becd91 100644 --- a/src/syntax/binding.rs +++ b/src/syntax/binding.rs @@ -9,13 +9,12 @@ use crate::utils::Result3; // - NotFound: the first token (var | val) was not found, so the parser should try other options // - Error: token (var | val) was found, but then other expected tokens were not found pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option { - let mut pos = pos; - + let mut current_pos = pos; // Optional datatype annotation let datatype_annotation = { - match try_token_type(tokens, pos, TokenType::Datatype) { + match try_token_type(tokens, current_pos, TokenType::Datatype) { Result3::Ok(t) => { - pos += 1; + current_pos += 1; Some(String::from(&t.value)) } Result3::Err(_) => None, @@ -29,11 +28,11 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option * val/var keyword */ let (is_val, binding_token) = { - let res1 = try_token_type(tokens, pos, TokenType::VAL); + let res1 = try_token_type(tokens, current_pos, TokenType::VAL); match res1 { Result3::Ok(val_token) => (true, val_token), _ => { - let res2 = try_token_type(tokens, pos, TokenType::VAR); + let res2 = try_token_type(tokens, current_pos, TokenType::VAR); match res2 { Result3::Ok(var_token) => (false, var_token), // Neither VAL nor VAR were matched, the parser should try @@ -47,7 +46,7 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option /* * identifier */ - let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) { + let identifier = match try_token_type(tokens, current_pos + 1, TokenType::Identifier) { Result3::Ok(t) => t, Result3::Err(t) => { // The parser found a token, but it's not an identifier @@ -76,7 +75,7 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option /* * Equal (=) operator */ - let equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) { + let equal_operator: &Token = match try_operator(tokens, current_pos + 2, String::from("=")) { Result3::Ok(t) => t, Result3::Err(t) => { // The parser found a token, but it's not the `=` operator @@ -96,7 +95,7 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option } }; - let expression = expression::try_parse(tokens, pos + 3); + let expression = expression::try_parse(tokens, current_pos + 3); if expression.is_none() { return Some(SyntaxResult::Err(SyntaxError { reason: String::from("Expected an expression after the equal `=` operator"), @@ -120,16 +119,17 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option }) }; - Some(SyntaxResult::Ok(super::ast::TopLevelConstruct::Binding( - binding, - ))) + Some(SyntaxResult::Ok( + super::ast::TopLevelDeclaration::Binding(binding), + current_pos + 4, + )) } /// Expects the token at `pos` to be of type `token_type` fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> Result3<&Token> { match tokens.get(pos) { Some(t) if t.token_type == token_type => Result3::Ok(t), - Some(t) if t.token_type == TokenType::NewLine || t.token_type == TokenType::EOF => { + Some(t) if t.token_type == TokenType::EOF => { Result3::None } Some(t) => Result3::Err(t), @@ -151,7 +151,7 @@ fn try_operator(tokens: &Vec, pos: usize, operator: String) -> Result3<&T #[cfg(test)] mod tests { use super::*; - use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct}; + use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration}; #[test] fn should_parse_val_binding() { @@ -159,7 +159,7 @@ mod tests { let binding = try_parse(&tokens, 0).unwrap(); match binding { - SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => { + SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => { assert_eq!("identifier", format!("{}", binding.identifier)); } _ => panic!(), @@ -197,7 +197,7 @@ mod tests { let binding = try_parse(&tokens, 0).unwrap(); match binding { - SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => { + SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => { assert_eq!(Some(String::from("Num")), binding.datatype); assert_eq!("identifier", format!("{}", binding.identifier)); } @@ -208,7 +208,7 @@ mod tests { let binding = try_parse(&tokens, 0).unwrap(); match binding { - SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Var(binding))) => { + SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Var(binding)), _) => { assert_eq!(Some(String::from("Bool")), binding.datatype); assert_eq!("identifier", format!("{}", binding.identifier)); } diff --git a/src/syntax/function_declaration.rs b/src/syntax/function_declaration.rs index 122aea6..d11f589 100644 --- a/src/syntax/function_declaration.rs +++ b/src/syntax/function_declaration.rs @@ -5,7 +5,7 @@ use crate::{ }; use super::{ - ast::{FunctionDeclaration, TopLevelConstruct}, + ast::{FunctionDeclaration, TopLevelDeclaration}, utils::try_token_type, SyntaxResult, }; @@ -154,18 +154,20 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option })); } }; + current_pos += 1; // Construct and return the function declaration - Some(SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration( - FunctionDeclaration { + Some(SyntaxResult::Ok( + TopLevelDeclaration::FunctionDeclaration(FunctionDeclaration { identifier: Box::new(identifier.value.clone()), - }, - ))) + }), + current_pos, + )) } #[cfg(test)] mod tests { - use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct}; + use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration}; use super::*; @@ -344,7 +346,7 @@ mod tests { let function_declaration = try_parse(&tokens, 0).unwrap(); match function_declaration { - SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration(declaration)) => { + SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => { assert_eq!(declaration.identifier, Box::new(String::from("id"))); } _ => panic!( diff --git a/src/syntax/grammar.md b/src/syntax/grammar.md index 6f26cb0..03ffb3b 100644 --- a/src/syntax/grammar.md +++ b/src/syntax/grammar.md @@ -1,35 +1,45 @@ # Grammar + ## Module A module is (commonly) a single source file. -- `module = variable_binding*` +```ebnf +module = top level declaration* +``` - -### `variable_binding` - -A declaration with `var` or `val`. +## Top level declaration ```ebnf -var = "var" -val = "val" -variable_binding = (var | val), identifier, "=", expression +top level declaration = function declaration ``` -### `expression` - -For now just a number, string or boolean +## Function declaration ```ebnf -expression = number | string | boolean +function declaration = "fun", identifier, params list, return type?, block +``` + +### Params list + +```ebnf +params list = "(", ")" +``` + +### Return type + +```ebnf +return type = ; ``` -## Type annotations +### Block ```ebnf -variable_binding = Datatype, (var | val), identifier, "=", expression +block = "{", "}" ``` + + diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 00a4be0..1d9c28d 100755 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -7,16 +7,16 @@ mod utils; pub mod ast; -use crate::lexic::token::Token; +use crate::lexic::token::{Token, TokenType}; use ast::ModuleAST; -use self::ast::TopLevelConstruct; +use self::ast::TopLevelDeclaration; #[derive(Debug)] pub enum SyntaxResult { /// /// A construct has been found - Ok(TopLevelConstruct), + Ok(TopLevelDeclaration, usize), /// /// No construct was found None, @@ -27,21 +27,38 @@ pub enum SyntaxResult { /// Constructs the Misti AST from a vector of tokens pub fn construct_ast<'a>(tokens: &'a Vec) -> Result { - let _token_amount = tokens.len(); - let current_pos = 0; + let mut top_level_declarations = Vec::new(); + let token_amount = tokens.len(); + let mut current_pos = 0; - match next_construct(tokens, current_pos) { - SyntaxResult::Ok(module) => Ok(ModuleAST { - bindings: vec![module], - }), - SyntaxResult::None => Err(MistiError::Syntax(SyntaxError { - reason: String::from("PARSER couldn't parse any construction"), - // FIXME: This should get the position of the _token_ that current_pos points to - error_start: current_pos, - error_end: current_pos, - })), - SyntaxResult::Err(err) => Err(MistiError::Syntax(err)), + // Minus one because the last token is always EOF + while current_pos < token_amount - 1 { + // Ignore newlines + if tokens[current_pos].token_type == TokenType::NewLine { + current_pos += 1; + continue; + } + + match next_construct(tokens, current_pos) { + SyntaxResult::Ok(module, next_pos) => { + top_level_declarations.push(module); + current_pos = next_pos; + } + SyntaxResult::None => { + return Err(MistiError::Syntax(SyntaxError { + reason: String::from("PARSER couldn't parse any construction"), + // FIXME: This should get the position of the _token_ that current_pos points to + error_start: current_pos, + error_end: current_pos, + })); + } + SyntaxResult::Err(err) => return Err(MistiError::Syntax(err)), + } } + + Ok(ModuleAST { + declarations: top_level_declarations, + }) } fn next_construct<'a>(tokens: &'a Vec, current_pos: usize) -> SyntaxResult { @@ -49,3 +66,47 @@ fn next_construct<'a>(tokens: &'a Vec, current_pos: usize) -> SyntaxResul .or_else(|| function_declaration::try_parse(tokens, current_pos)) .unwrap_or_else(|| SyntaxResult::None) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn should_parse_top_level_construct_with_trailing_newline() { + let input = String::from("fun f1(){}\n"); + let tokens = crate::lexic::get_tokens(&input).unwrap(); + let declarations = construct_ast(&tokens).unwrap().declarations; + + assert_eq!(declarations.len(), 1); + + match declarations.get(0).unwrap() { + TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"), + TopLevelDeclaration::FunctionDeclaration(_f) => { + assert!(true) + } + } + } + + #[test] + fn should_parse_2_top_level_construct() { + let input = String::from("fun f1(){} fun f2() {}"); + let tokens = crate::lexic::get_tokens(&input).unwrap(); + let declarations = construct_ast(&tokens).unwrap().declarations; + + assert_eq!(declarations.len(), 2); + + match declarations.get(0).unwrap() { + TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"), + TopLevelDeclaration::FunctionDeclaration(_f) => { + assert!(true) + } + } + + match declarations.get(1).unwrap() { + TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"), + TopLevelDeclaration::FunctionDeclaration(_f) => { + assert!(true) + } + } + } +}