diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index d5f3a17..a7927ae 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -3,6 +3,7 @@ use crate::syntax::ast::ModuleAST; mod binding; mod expression; mod module_ast; +mod top_level_construct; /// Trait that the AST and its nodes implement to support transformation to PHP trait Transpilable { diff --git a/src/codegen/module_ast.rs b/src/codegen/module_ast.rs index e72c01f..34cf456 100644 --- a/src/codegen/module_ast.rs +++ b/src/codegen/module_ast.rs @@ -18,7 +18,7 @@ impl Transpilable for ModuleAST { #[cfg(test)] mod tests { use super::*; - use crate::syntax::ast::{Binding, Expression, ValBinding}; + use crate::syntax::ast::{Binding, Expression, TopLevelConstruct, ValBinding}; #[test] fn module_ast_should_transpile() { @@ -31,7 +31,7 @@ mod tests { }); let module = ModuleAST { - bindings: vec![binding], + bindings: vec![TopLevelConstruct::Binding(binding)], }; let result = module.transpile(); diff --git a/src/codegen/top_level_construct.rs b/src/codegen/top_level_construct.rs new file mode 100644 index 0000000..caf77fb --- /dev/null +++ b/src/codegen/top_level_construct.rs @@ -0,0 +1,12 @@ +use crate::syntax::ast::TopLevelConstruct; + +use super::Transpilable; + +impl Transpilable for TopLevelConstruct { + fn transpile(&self) -> String { + match self { + TopLevelConstruct::Binding(binding) => binding.transpile(), + TopLevelConstruct::FunctionDeclaration(_) => todo!(), + } + } +} diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs index 7951867..99b7114 100755 --- a/src/lexic/scanner/identifier.rs +++ b/src/lexic/scanner/identifier.rs @@ -6,6 +6,7 @@ fn str_is_keyword(s: &String) -> Option { match s.as_str() { "var" => Some(TokenType::VAR), "val" => Some(TokenType::VAL), + "fun" => Some(TokenType::FUN), _ => None, } } diff --git a/src/lexic/token.rs b/src/lexic/token.rs index a034bfb..04ff33a 100755 --- a/src/lexic/token.rs +++ b/src/lexic/token.rs @@ -16,6 +16,7 @@ pub enum TokenType { VAR, VAL, EOF, + FUN, } #[derive(Debug)] diff --git a/src/main.rs b/src/main.rs index 2f25c66..9e06812 100755 --- a/src/main.rs +++ b/src/main.rs @@ -51,9 +51,7 @@ fn main() { let cli = Cli::parse(); match &cli.command { - Some(Commands::C { - file: input, - }) => file::compile_file(input), + Some(Commands::C { file: input }) => file::compile_file(input), Some(Commands::R {}) => { println!("{}", get_copyright()); let _ = repl::run(); diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 29ee47b..2aeb58c 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,24 +1,39 @@ pub struct ModuleAST { - pub bindings: Vec, + pub bindings: Vec, } +#[derive(Debug)] +pub enum TopLevelConstruct { + Binding(Binding), + FunctionDeclaration(FunctionDeclaration), +} + +#[derive(Debug)] +pub struct FunctionDeclaration { + pub identifier: Box, +} + +#[derive(Debug)] pub enum Binding { Val(ValBinding), Var(VarBinding), } +#[derive(Debug)] pub struct ValBinding { pub datatype: Option, pub identifier: Box, pub expression: Expression, } +#[derive(Debug)] pub struct VarBinding { pub datatype: Option, pub identifier: Box, pub expression: Expression, } +#[derive(Debug)] pub enum Expression { Number(Box), String(Box), diff --git a/src/syntax/binding.rs b/src/syntax/binding.rs index 3ca4b2f..aa64e14 100644 --- a/src/syntax/binding.rs +++ b/src/syntax/binding.rs @@ -120,7 +120,9 @@ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option }) }; - Some(SyntaxResult::Ok(binding)) + Some(SyntaxResult::Ok(super::ast::TopLevelConstruct::Binding( + binding, + ))) } /// Expects the token at `pos` to be of type `token_type` @@ -149,7 +151,7 @@ fn try_operator(tokens: &Vec, pos: usize, operator: String) -> Result3<&T #[cfg(test)] mod tests { use super::*; - use crate::lexic::get_tokens; + use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct}; #[test] fn should_parse_val_binding() { @@ -157,7 +159,7 @@ mod tests { let binding = try_parse(&tokens, 0).unwrap(); match binding { - SyntaxResult::Ok(Binding::Val(binding)) => { + SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => { assert_eq!("identifier", format!("{}", binding.identifier)); } _ => panic!(), @@ -195,7 +197,7 @@ mod tests { let binding = try_parse(&tokens, 0).unwrap(); match binding { - SyntaxResult::Ok(Binding::Val(binding)) => { + SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => { assert_eq!(Some(String::from("Num")), binding.datatype); assert_eq!("identifier", format!("{}", binding.identifier)); } @@ -206,11 +208,11 @@ mod tests { let binding = try_parse(&tokens, 0).unwrap(); match binding { - SyntaxResult::Ok(Binding::Var(binding)) => { + SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Var(binding))) => { assert_eq!(Some(String::from("Bool")), binding.datatype); assert_eq!("identifier", format!("{}", binding.identifier)); } - _ => panic!(), + _ => panic!("D: {:?}", binding), } } diff --git a/src/syntax/function_declaration.rs b/src/syntax/function_declaration.rs new file mode 100644 index 0000000..cc362a4 --- /dev/null +++ b/src/syntax/function_declaration.rs @@ -0,0 +1,356 @@ +use crate::{ + error_handling::SyntaxError, + lexic::token::{Token, TokenType}, + utils::Result3, +}; + +use super::{ + ast::{FunctionDeclaration, TopLevelConstruct}, + utils::try_token_type, + SyntaxResult, +}; + +pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> Option { + let mut current_pos = pos; + + // `fun` keyword + let fun_keyword = match try_token_type(tokens, current_pos, TokenType::FUN) { + Result3::Ok(t) => t, + Result3::Err(_token) => return None, + Result3::None => return None, + }; + current_pos += 1; + + // Parse identifier + let identifier = match try_token_type(tokens, current_pos, TokenType::Identifier) { + Result3::Ok(t) => t, + Result3::Err(t) => { + // The parser found a token, but it's not an identifier + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an identifier after a `fun` token, but found `{}`", + t.value + ), + error_start: t.position, + error_end: t.get_end_position(), + })); + } + Result3::None => { + // The parser didn't find any token + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an identifier after a `fun` token, but found nothing" + ), + error_start: fun_keyword.position, + error_end: fun_keyword.get_end_position(), + })); + } + }; + current_pos += 1; + + // Parse an opening paren + let opening_paren = match try_token_type(tokens, current_pos, TokenType::LeftParen) { + Result3::Ok(t) => t, + Result3::Err(t) => { + // The parser found a token, but it's not an opening paren + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an opening paren after the identifier, but found `{}`", + t.value + ), + error_start: t.position, + error_end: t.get_end_position(), + })); + } + Result3::None => { + // The parser didn't find any token + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an opening paren after the identifier, but found nothing" + ), + error_start: identifier.position, + error_end: identifier.get_end_position(), + })); + } + }; + current_pos += 1; + + // Parse a closing paren + let closing_paren = match try_token_type(tokens, current_pos, TokenType::RightParen) { + Result3::Ok(t) => t, + Result3::Err(t) => { + // The parser found a token, but it's not an opening paren + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be a closing paren after the parameter list, but found `{}`", + t.value + ), + error_start: t.position, + error_end: t.get_end_position(), + })); + } + Result3::None => { + // The parser didn't find any token + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be a closing paren after the parameter list, but found nothing" + ), + error_start: opening_paren.position, + error_end: opening_paren.get_end_position(), + })); + } + }; + current_pos += 1; + + // Parse opening brace + let opening_brace = match try_token_type(tokens, current_pos, TokenType::LeftBrace) { + Result3::Ok(t) => t, + Result3::Err(t) => { + // The parser found a token, but it's not an opening brace + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an opening brace after the parameter list, but found `{}`", + t.value + ), + error_start: t.position, + error_end: t.get_end_position(), + })); + } + Result3::None => { + // The parser didn't find any token + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be an opening brace after the parameter list, but found nothing" + ), + error_start: closing_paren.position, + error_end: closing_paren.get_end_position(), + })); + } + }; + current_pos += 1; + + // Parse closing brace + let closing_brace = match try_token_type(tokens, current_pos, TokenType::RightBrace) { + Result3::Ok(t) => t, + Result3::Err(t) => { + // The parser found a token, but it's not an opening brace + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be a closing brace after the function body, but found `{}`", + t.value + ), + error_start: t.position, + error_end: t.get_end_position(), + })); + } + Result3::None => { + // The parser didn't find any token + return Some(SyntaxResult::Err(SyntaxError { + reason: format!( + "There should be a closing brace after the function body, but found nothing" + ), + error_start: opening_brace.position, + error_end: opening_brace.get_end_position(), + })); + } + }; + + // Construct and return the function declaration + Some(SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration( + FunctionDeclaration { + identifier: Box::new(identifier.value.clone()), + }, + ))) +} + +#[cfg(test)] +mod tests { + use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct}; + + use super::*; + + #[test] + fn should_return_none_on_wrong_initial_token() { + let tokens = get_tokens(&String::from("val identifier = 20")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + + assert!(fun_decl.is_none()); + } + + #[test] + fn should_not_parse_fun_without_identifier() { + let tokens = get_tokens(&String::from("fun = 20")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be an identifier after a `fun` token, but found `=`" + ); + assert_eq!(err.error_start, 4); + assert_eq!(err.error_end, 5); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + + let tokens = get_tokens(&String::from("fun")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be an identifier after a `fun` token, but found nothing" + ); + assert_eq!(err.error_start, 0); + assert_eq!(err.error_end, 3); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + } + + #[test] + fn should_not_parse_fun_without_parens() { + let tokens = get_tokens(&String::from("fun id =")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be an opening paren after the identifier, but found `=`" + ); + assert_eq!(err.error_start, 7); + assert_eq!(err.error_end, 8); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + + let tokens = get_tokens(&String::from("fun id")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be an opening paren after the identifier, but found nothing" + ); + assert_eq!(err.error_start, 4); + assert_eq!(err.error_end, 6); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + } + + #[test] + fn should_not_parse_fun_without_closing_paren() { + let tokens = get_tokens(&String::from("fun id(=")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be a closing paren after the parameter list, but found `=`" + ); + assert_eq!(err.error_start, 7); + assert_eq!(err.error_end, 8); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + + let tokens = get_tokens(&String::from("fun id(")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be a closing paren after the parameter list, but found nothing" + ); + assert_eq!(err.error_start, 6); + assert_eq!(err.error_end, 7); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + } + + #[test] + fn should_not_parse_fun_without_opening_brace() { + let tokens = get_tokens(&String::from("fun id() =")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be an opening brace after the parameter list, but found `=`" + ); + assert_eq!(err.error_start, 9); + assert_eq!(err.error_end, 10); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + + let tokens = get_tokens(&String::from("fun id()")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be an opening brace after the parameter list, but found nothing" + ); + assert_eq!(err.error_start, 7); + assert_eq!(err.error_end, 8); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + } + + #[test] + fn should_not_parse_fun_without_closing_brace() { + let tokens = get_tokens(&String::from("fun id() { 20")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be a closing brace after the function body, but found `20`" + ); + assert_eq!(err.error_start, 11); + assert_eq!(err.error_end, 13); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + + let tokens = get_tokens(&String::from("fun id() {")).unwrap(); + let fun_decl = try_parse(&tokens, 0); + + match fun_decl { + Some(SyntaxResult::Err(err)) => { + assert_eq!( + err.reason, + "There should be a closing brace after the function body, but found nothing" + ); + assert_eq!(err.error_start, 9); + assert_eq!(err.error_end, 10); + } + _ => panic!("Expected an error: {:?}", fun_decl), + } + } + + #[test] + fn should_parse_simple_function_declaration() { + let tokens = get_tokens(&String::from("fun id() {}")).unwrap(); + let function_declaration = try_parse(&tokens, 0).unwrap(); + + match function_declaration { + SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration(declaration)) => { + assert_eq!(declaration.identifier, Box::new(String::from("id"))); + } + _ => panic!( + "Expected a function declaration: {:?}", + function_declaration + ), + } + } +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 1023bbc..cdfa81c 100755 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,16 +2,21 @@ use crate::error_handling::{MistiError, SyntaxError}; mod binding; mod expression; +mod function_declaration; +mod utils; + pub mod ast; -use ast::{Binding, ModuleAST}; use crate::lexic::token::Token; +use ast::{Binding, ModuleAST}; +use self::ast::TopLevelConstruct; +#[derive(Debug)] pub enum SyntaxResult { /// /// A construct has been found - Ok(Binding), + Ok(TopLevelConstruct), /// /// No construct was found None, diff --git a/src/syntax/utils.rs b/src/syntax/utils.rs new file mode 100644 index 0000000..0cb1f56 --- /dev/null +++ b/src/syntax/utils.rs @@ -0,0 +1,15 @@ +use crate::{ + lexic::token::{Token, TokenType}, + utils::Result3, +}; + +pub fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> Result3<&Token> { + match tokens.get(pos) { + Some(t) if t.token_type == token_type => Result3::Ok(t), + Some(t) if t.token_type == TokenType::Semicolon || t.token_type == TokenType::EOF => { + Result3::None + } + Some(t) => Result3::Err(t), + None => Result3::None, + } +}