diff --git a/src/codegen/binding.rs b/src/codegen/binding.rs index e2cc0c6..e94421a 100644 --- a/src/codegen/binding.rs +++ b/src/codegen/binding.rs @@ -13,7 +13,10 @@ impl Transpilable for Binding<'_> { #[cfg(test)] mod tests { use super::*; - use crate::{lexic::token::{Token, TokenType}, syntax::ast::{var_binding::Binding, Expression}}; + use crate::{ + lexic::token::{Token, TokenType}, + syntax::ast::{var_binding::Binding, Expression}, + }; #[test] fn binding_should_transpile() { diff --git a/src/codegen/function_declaration.rs b/src/codegen/function_declaration.rs index ca5824a..4f722a3 100644 --- a/src/codegen/function_declaration.rs +++ b/src/codegen/function_declaration.rs @@ -34,6 +34,7 @@ mod tests { assert_eq!("function id() {\n\n}", transpiled); } + _ => panic!("Not implemented: Expression at top level"), } } } diff --git a/src/codegen/module_ast.rs b/src/codegen/module_ast.rs index 632ae91..2cae2b5 100644 --- a/src/codegen/module_ast.rs +++ b/src/codegen/module_ast.rs @@ -18,7 +18,10 @@ impl Transpilable for ModuleAST<'_> { #[cfg(test)] mod tests { use super::*; - use crate::{lexic::token::{Token, TokenType}, syntax::ast::{var_binding::Binding, Expression, TopLevelDeclaration}}; + use crate::{ + lexic::token::{Token, TokenType}, + syntax::ast::{var_binding::Binding, Expression, TopLevelDeclaration}, + }; #[test] fn module_ast_should_transpile() { diff --git a/src/codegen/top_level_construct.rs b/src/codegen/top_level_construct.rs index 2e9fc9e..b21ab5d 100644 --- a/src/codegen/top_level_construct.rs +++ b/src/codegen/top_level_construct.rs @@ -7,6 +7,7 @@ impl Transpilable for TopLevelDeclaration<'_> { match self { TopLevelDeclaration::Binding(binding) => binding.transpile(), TopLevelDeclaration::FunctionDeclaration(fun) => fun.transpile(), + _ => panic!("Not implemented: Expression at top level"), } } } diff --git a/src/lexic/token.rs b/src/lexic/token.rs index d45fab5..6941508 100755 --- a/src/lexic/token.rs +++ b/src/lexic/token.rs @@ -22,7 +22,7 @@ pub enum TokenType { FUN, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Token { pub token_type: TokenType, // The token as a raw string diff --git a/src/semantic/impls.rs b/src/semantic/impls.rs index 3b97844..636e31f 100644 --- a/src/semantic/impls.rs +++ b/src/semantic/impls.rs @@ -51,12 +51,15 @@ impl SemanticCheck for TopLevelDeclaration<'_> { binding_name ), }; - + return Err(MistiError::Semantic(error)); } }; - scope.insert(binding_name.clone(), SymbolEntry::new_variable(datatype.value.clone())); + scope.insert( + binding_name.clone(), + SymbolEntry::new_variable(datatype.value.clone()), + ); Ok(()) } @@ -77,10 +80,14 @@ impl SemanticCheck for TopLevelDeclaration<'_> { return Err(MistiError::Semantic(error)); } - scope.insert(function_name, SymbolEntry::new_function(vec![], "Unit".into())); + scope.insert( + function_name, + SymbolEntry::new_function(vec![], "Unit".into()), + ); Ok(()) } + _ => panic!("Not implemented"), } } } diff --git a/src/syntax/ast/mod.rs b/src/syntax/ast/mod.rs index fefc7b5..0677042 100644 --- a/src/syntax/ast/mod.rs +++ b/src/syntax/ast/mod.rs @@ -14,6 +14,7 @@ pub struct ModuleAST<'a> { pub enum TopLevelDeclaration<'a> { Binding(var_binding::Binding<'a>), FunctionDeclaration(FunctionDeclaration<'a>), + Expression(Expression<'a>), } #[derive(Debug)] diff --git a/src/syntax/binding.rs b/src/syntax/binding.rs index 7065a4c..544a3ec 100644 --- a/src/syntax/binding.rs +++ b/src/syntax/binding.rs @@ -5,7 +5,7 @@ use crate::error_handling::SyntaxError; use crate::lexic::token::{Token, TokenType}; use crate::utils::Result3; -pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { +pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { let mut current_pos = pos; // TODO: Detect if the binding starts with a datatype diff --git a/src/syntax/block.rs b/src/syntax/block.rs index 8209e5a..baf7c61 100644 --- a/src/syntax/block.rs +++ b/src/syntax/block.rs @@ -6,7 +6,7 @@ use crate::{ use super::{ast::Block, utils::parse_token_type, ParseResult}; // Assumes that the token at `pos` is a { -pub fn parse_block<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { +pub fn parse_block<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { let mut current_pos = pos; let (opening_brace, next_pos) = diff --git a/src/syntax/expression/comparison.rs b/src/syntax/expression/comparison.rs index 729026b..1e6b6a0 100644 --- a/src/syntax/expression/comparison.rs +++ b/src/syntax/expression/comparison.rs @@ -8,7 +8,7 @@ use crate::{ /// ```ebnf /// comparison = term, ((">" | ">=" | "<" | "<="), term)*; /// ``` -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { let (term, next_pos) = match super::term::try_parse(tokens, pos) { ParseResult::Ok(expr, next_pos) => (expr, next_pos), _ => return ParseResult::Unmatched, @@ -21,7 +21,7 @@ fn parse_many<'a>( tokens: &'a Vec, pos: usize, prev_expr: Expression<'a>, -) -> ParseResult, ()> { +) -> ParseResult> { // comparison = term, ((">" | ">=" | "<" | "<="), term)*; match tokens.get(pos) { diff --git a/src/syntax/expression/equality.rs b/src/syntax/expression/equality.rs index dd34929..31ebb41 100644 --- a/src/syntax/expression/equality.rs +++ b/src/syntax/expression/equality.rs @@ -8,7 +8,7 @@ use crate::{ /// ```ebnf /// equality = comparison, (("==" | "!="), comparison )*; /// ``` -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { let (comparison, next_pos) = match super::comparison::try_parse(tokens, pos) { ParseResult::Ok(expr, next_pos) => (expr, next_pos), _ => return ParseResult::Unmatched, @@ -21,7 +21,7 @@ fn parse_many<'a>( tokens: &'a Vec, pos: usize, prev_expr: Expression<'a>, -) -> ParseResult, ()> { +) -> ParseResult> { // equality = comparison, (("==" | "!="), comparison )*; match tokens.get(pos) { diff --git a/src/syntax/expression/factor.rs b/src/syntax/expression/factor.rs index 1c4f568..bddfc8b 100644 --- a/src/syntax/expression/factor.rs +++ b/src/syntax/expression/factor.rs @@ -8,7 +8,7 @@ use crate::{ /// ```ebnf /// factor = unary, (("/" | "*"), unary)*; /// ``` -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { let (unary, next_pos) = match super::unary::try_parse(tokens, pos) { ParseResult::Ok(expr, next_pos) => (expr, next_pos), _ => return ParseResult::Unmatched, @@ -21,7 +21,7 @@ fn parse_many<'a>( tokens: &'a Vec, pos: usize, prev_expr: Expression<'a>, -) -> ParseResult, ()> { +) -> ParseResult> { // (("/" | "*"), unary)* match tokens.get(pos) { diff --git a/src/syntax/expression/function_call_expr.rs b/src/syntax/expression/function_call_expr.rs index 0ac781c..77e36f3 100644 --- a/src/syntax/expression/function_call_expr.rs +++ b/src/syntax/expression/function_call_expr.rs @@ -13,7 +13,7 @@ use crate::{ /// function call expr = primary, "(", (arguments list)?, ")" /// | primary; /// ``` -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { let (primary_expr, next_pos) = match super::primary::try_parse(tokens, pos) { ParseResult::Ok(expr, next_pos) => (expr, next_pos), _ => return ParseResult::Unmatched, diff --git a/src/syntax/expression/mod.rs b/src/syntax/expression/mod.rs index 6fb493a..db8684b 100644 --- a/src/syntax/expression/mod.rs +++ b/src/syntax/expression/mod.rs @@ -10,7 +10,7 @@ mod term; mod unary; /// Expression is defined in the grammar. -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { return equality::try_parse(tokens, pos); } diff --git a/src/syntax/expression/primary.rs b/src/syntax/expression/primary.rs index 78d5272..e235827 100644 --- a/src/syntax/expression/primary.rs +++ b/src/syntax/expression/primary.rs @@ -9,7 +9,7 @@ use crate::{ /// ```ebnf /// primary = number | string | boolean | identifier | ("(", expression, ")"); /// ``` -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { match tokens.get_significant(pos) { Some((token, token_pos)) => match token.token_type { TokenType::Number => ParseResult::Ok(Expression::Number(&token.value), token_pos + 1), @@ -27,7 +27,7 @@ pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult } } -fn parse_parenthesized_expression(tokens: &Vec, pos: usize) -> ParseResult { +fn parse_parenthesized_expression(tokens: &Vec, pos: usize) -> ParseResult { let expression = super::try_parse(tokens, pos + 1); match expression { ParseResult::Ok(expression, next_pos) => match tokens.get(next_pos) { diff --git a/src/syntax/expression/term.rs b/src/syntax/expression/term.rs index 6a1fe71..a6d1db5 100644 --- a/src/syntax/expression/term.rs +++ b/src/syntax/expression/term.rs @@ -8,7 +8,7 @@ use crate::{ /// ```ebnf /// term = factor, (("-" | "+"), factor)*; /// ``` -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { let (factor, next_pos) = match super::factor::try_parse(tokens, pos) { ParseResult::Ok(expr, next_pos) => (expr, next_pos), _ => return ParseResult::Unmatched, @@ -21,7 +21,7 @@ fn parse_many<'a>( tokens: &'a Vec, pos: usize, prev_expr: Expression<'a>, -) -> ParseResult, ()> { +) -> ParseResult> { // term = factor, (("-" | "+"), factor)*; match tokens.get(pos) { diff --git a/src/syntax/expression/unary.rs b/src/syntax/expression/unary.rs index 808a58e..70ef553 100644 --- a/src/syntax/expression/unary.rs +++ b/src/syntax/expression/unary.rs @@ -11,7 +11,7 @@ use super::function_call_expr; /// unary = ("!" | "-"), expression /// | function call expr; /// ``` -pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { +pub fn try_parse(tokens: &Vec, pos: usize) -> ParseResult { match tokens.get(pos) { Some(token) if token.value == "!" || token.value == "-" => { match super::try_parse(tokens, pos + 1) { diff --git a/src/syntax/functions/arguments_list.rs b/src/syntax/functions/arguments_list.rs index 77f67b1..ac1a90b 100644 --- a/src/syntax/functions/arguments_list.rs +++ b/src/syntax/functions/arguments_list.rs @@ -8,7 +8,7 @@ use crate::{ }, }; -pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { +pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { let mut current_pos = pos; let (opening_paren, next_pos) = diff --git a/src/syntax/functions/function_declaration.rs b/src/syntax/functions/function_declaration.rs index 5a60481..85a1dc4 100644 --- a/src/syntax/functions/function_declaration.rs +++ b/src/syntax/functions/function_declaration.rs @@ -14,7 +14,7 @@ use super::{ params_list::parse_params_list, }; -pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { +pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { let mut current_pos = pos; // `fun` keyword diff --git a/src/syntax/functions/params_list.rs b/src/syntax/functions/params_list.rs index 47f4017..f4619c9 100644 --- a/src/syntax/functions/params_list.rs +++ b/src/syntax/functions/params_list.rs @@ -9,10 +9,7 @@ use super::super::{ utils, ParseResult, }; -pub fn parse_params_list<'a>( - tokens: &'a Vec, - pos: usize, -) -> ParseResult { +pub fn parse_params_list<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { let mut current_pos = pos; let (opening_paren, next_pos) = @@ -24,6 +21,14 @@ pub fn parse_params_list<'a>( }; current_pos = next_pos; + /* + val (opening_paren, next_pos) = try parse_token_type(...) + + val (next_parameter, next_pos) = try parse_param_definition(...) catch + case ::Err(e) { return ::Err(e) } + else { break } + */ + // Parse parameters definitions, separated by commas let mut parameters = Vec::::new(); loop { @@ -79,10 +84,7 @@ pub fn parse_params_list<'a>( ParseResult::Ok(ParamsList {}, current_pos) } -fn parse_param_definition<'a>( - tokens: &'a Vec, - pos: usize, -) -> ParseResult { +fn parse_param_definition<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { // Parse a single parameter definition of the form: // - Type identifier // There will be more constructs in the future, like: diff --git a/src/syntax/grammar.md b/src/syntax/grammar.md index ed4f933..b1124ab 100644 --- a/src/syntax/grammar.md +++ b/src/syntax/grammar.md @@ -1,19 +1,21 @@ # Grammar -## Module - -A module is (commonly) a single source file. +## Source file ```ebnf -module = top level declaration* +source file = top level statement* ``` -## Top level declaration +## Top level statement + +Current focus: Have a mvp compiler (w lexical/syntactic/semantic analysis + codegen) for +simple function calls, and then implement other features top down ```ebnf -top level declaration = function declaration +top level statement = expression + | function declaration ``` diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 958f84c..7b54a5a 100755 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -15,7 +15,7 @@ use ast::ModuleAST; use self::ast::TopLevelDeclaration; #[derive(Debug)] -pub enum ParseResult { +pub enum ParseResult { /// The parsing was a success. The first element is the parsed construct, /// the second element is the position of the next token to parse Ok(A, usize), @@ -26,11 +26,19 @@ pub enum ParseResult { Err(SyntaxError), /// Some special value was expected, but something else was found. /// The inside element is the something else found. - Mismatch(B), + Mismatch(Token), /// This parsing didn't succeed, but it's not a fatal error. Unmatched, } +enum ParsingError { + Mismatch(Token), + Unmatch, + Error(SyntaxError), +} + +type ParsingResult = Result<(A, usize), ParsingError>; + /// Constructs the Misti AST from a vector of tokens pub fn construct_ast<'a>(tokens: &'a Vec) -> Result { let mut top_level_declarations = Vec::new(); @@ -70,7 +78,7 @@ pub fn construct_ast<'a>(tokens: &'a Vec) -> Result( tokens: &'a Vec, current_pos: usize, -) -> ParseResult { +) -> ParseResult { None.or_else( || match functions::function_declaration::try_parse(tokens, current_pos) { ParseResult::Ok(declaration, next_pos) => Some(ParseResult::Ok( @@ -81,6 +89,15 @@ fn next_construct<'a>( _ => None, }, ) + .or_else(|| match expression::try_parse(tokens, current_pos) { + ParseResult::Ok(expression, next_pos) => Some(ParseResult::Ok( + TopLevelDeclaration::Expression(expression), + next_pos, + )), + ParseResult::Err(_) => todo!(), + ParseResult::Mismatch(_) => todo!(), + ParseResult::Unmatched => todo!(), + }) .unwrap_or_else(|| ParseResult::Unmatched) } @@ -101,6 +118,7 @@ mod tests { TopLevelDeclaration::FunctionDeclaration(_f) => { assert!(true) } + _ => panic!("Not implemented: Expression at top level"), } } @@ -117,6 +135,7 @@ mod tests { TopLevelDeclaration::FunctionDeclaration(_f) => { assert!(true) } + _ => panic!("Not implemented: Expression at top level"), } match declarations.get(1).unwrap() { @@ -124,6 +143,7 @@ mod tests { TopLevelDeclaration::FunctionDeclaration(_f) => { assert!(true) } + _ => panic!("Not implemented: Expression at top level"), } } } diff --git a/src/syntax/statement.rs b/src/syntax/statement.rs index c2f48f0..ff449be 100644 --- a/src/syntax/statement.rs +++ b/src/syntax/statement.rs @@ -7,7 +7,7 @@ use super::{ ParseResult, }; -pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { +pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParseResult { None.or_else(|| match binding::try_parse(tokens, pos) { ParseResult::Ok(b, next) => Some(ParseResult::Ok(Statement::Binding(b), next)), ParseResult::Err(err) => Some(ParseResult::Err(err)), diff --git a/src/syntax/utils.rs b/src/syntax/utils.rs index b5057a6..7f685ca 100644 --- a/src/syntax/utils.rs +++ b/src/syntax/utils.rs @@ -33,7 +33,7 @@ impl Tokenizer for Vec { } } -/// Expects the token at `pos` to be of type `token_type` +/// Expects the token at `pos` to be of type `token_type`. Doesn't ignore whitespace or newlines pub fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> Result3<&Token> { match tokens.get(pos) { Some(t) if t.token_type == token_type => Result3::Ok(t), @@ -45,6 +45,7 @@ pub fn try_token_type(tokens: &Vec, pos: usize, token_type: TokenType) -> } } +/// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines pub fn try_operator(tokens: &Vec, pos: usize, operator: String) -> Result3<&Token> { match tokens.get(pos) { Some(t) if t.token_type == TokenType::Operator && t.value == operator => Result3::Ok(t), @@ -56,12 +57,12 @@ pub fn try_operator(tokens: &Vec, pos: usize, operator: String) -> Result } } -/// Expects the token at `pos` to be of type `token_type` +/// Expects the token at `pos` to be of type `token_type`, ignoring all whitespace & newlines pub fn parse_token_type( tokens: &Vec, pos: usize, token_type: TokenType, -) -> ParseResult<&Token, &Token> { +) -> ParseResult<&Token> { let mut current_pos = pos; // Ignore all whitespace and newlines @@ -81,7 +82,7 @@ pub fn parse_token_type( Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => { ParseResult::Unmatched } - Some(t) => ParseResult::Mismatch(t), + Some(t) => ParseResult::Mismatch(t.clone()), None => ParseResult::Unmatched, } }