Parse multiple top level declarations

master
Araozu 2023-09-17 17:58:56 -05:00
parent 1bccb70c97
commit 5dd104bcc9
9 changed files with 140 additions and 75 deletions

View File

@ -13,7 +13,7 @@ mod tests {
use super::*;
use crate::{
lexic::get_tokens,
syntax::{ast::TopLevelConstruct, construct_ast},
syntax::{ast::TopLevelDeclaration, construct_ast},
};
#[test]
@ -21,11 +21,11 @@ mod tests {
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
let result = construct_ast(&tokens).unwrap();
let fun_dec = result.bindings.get(0).unwrap();
let fun_dec = result.declarations.get(0).unwrap();
match fun_dec {
TopLevelConstruct::Binding(_) => panic!("Expected function declaration"),
TopLevelConstruct::FunctionDeclaration(fun_decl) => {
TopLevelDeclaration::Binding(_) => panic!("Expected function declaration"),
TopLevelDeclaration::FunctionDeclaration(fun_decl) => {
let transpiled = fun_decl.transpile();
assert_eq!("function id() {}", transpiled);

View File

@ -6,7 +6,7 @@ impl Transpilable for ModuleAST {
/// nodes and leaves of the AST
fn transpile(&self) -> String {
let bindings_str: Vec<String> = self
.bindings
.declarations
.iter()
.map(|binding| binding.transpile())
.collect();
@ -18,7 +18,7 @@ impl Transpilable for ModuleAST {
#[cfg(test)]
mod tests {
use super::*;
use crate::syntax::ast::{Binding, Expression, TopLevelConstruct, ValBinding};
use crate::syntax::ast::{Binding, Expression, TopLevelDeclaration, ValBinding};
#[test]
fn module_ast_should_transpile() {
@ -31,7 +31,7 @@ mod tests {
});
let module = ModuleAST {
bindings: vec![TopLevelConstruct::Binding(binding)],
declarations: vec![TopLevelDeclaration::Binding(binding)],
};
let result = module.transpile();

View File

@ -1,12 +1,12 @@
use crate::syntax::ast::TopLevelConstruct;
use crate::syntax::ast::TopLevelDeclaration;
use super::Transpilable;
impl Transpilable for TopLevelConstruct {
impl Transpilable for TopLevelDeclaration {
fn transpile(&self) -> String {
match self {
TopLevelConstruct::Binding(binding) => binding.transpile(),
TopLevelConstruct::FunctionDeclaration(fun) => fun.transpile(),
TopLevelDeclaration::Binding(binding) => binding.transpile(),
TopLevelDeclaration::FunctionDeclaration(fun) => fun.transpile(),
}
}
}

View File

@ -86,14 +86,6 @@ impl Token {
}
}
pub fn new_semicolon(position: usize) -> Token {
Token {
token_type: TokenType::NewLine,
value: String::from(";"),
position,
}
}
pub fn new_datatype(value: String, position: usize) -> Token {
Token {
token_type: TokenType::Datatype,

View File

@ -1,9 +1,9 @@
pub struct ModuleAST {
pub bindings: Vec<TopLevelConstruct>,
pub declarations: Vec<TopLevelDeclaration>,
}
#[derive(Debug)]
pub enum TopLevelConstruct {
pub enum TopLevelDeclaration {
Binding(Binding),
FunctionDeclaration(FunctionDeclaration),
}

View File

@ -9,13 +9,12 @@ use crate::utils::Result3;
// - NotFound: the first token (var | val) was not found, so the parser should try other options
// - Error: token (var | val) was found, but then other expected tokens were not found
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult> {
let mut pos = pos;
let mut current_pos = pos;
// Optional datatype annotation
let datatype_annotation = {
match try_token_type(tokens, pos, TokenType::Datatype) {
match try_token_type(tokens, current_pos, TokenType::Datatype) {
Result3::Ok(t) => {
pos += 1;
current_pos += 1;
Some(String::from(&t.value))
}
Result3::Err(_) => None,
@ -29,11 +28,11 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
* val/var keyword
*/
let (is_val, binding_token) = {
let res1 = try_token_type(tokens, pos, TokenType::VAL);
let res1 = try_token_type(tokens, current_pos, TokenType::VAL);
match res1 {
Result3::Ok(val_token) => (true, val_token),
_ => {
let res2 = try_token_type(tokens, pos, TokenType::VAR);
let res2 = try_token_type(tokens, current_pos, TokenType::VAR);
match res2 {
Result3::Ok(var_token) => (false, var_token),
// Neither VAL nor VAR were matched, the parser should try
@ -47,7 +46,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
/*
* identifier
*/
let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) {
let identifier = match try_token_type(tokens, current_pos + 1, TokenType::Identifier) {
Result3::Ok(t) => t,
Result3::Err(t) => {
// The parser found a token, but it's not an identifier
@ -76,7 +75,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
/*
* Equal (=) operator
*/
let equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) {
let equal_operator: &Token = match try_operator(tokens, current_pos + 2, String::from("=")) {
Result3::Ok(t) => t,
Result3::Err(t) => {
// The parser found a token, but it's not the `=` operator
@ -96,7 +95,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
}
};
let expression = expression::try_parse(tokens, pos + 3);
let expression = expression::try_parse(tokens, current_pos + 3);
if expression.is_none() {
return Some(SyntaxResult::Err(SyntaxError {
reason: String::from("Expected an expression after the equal `=` operator"),
@ -120,16 +119,17 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
})
};
Some(SyntaxResult::Ok(super::ast::TopLevelConstruct::Binding(
binding,
)))
Some(SyntaxResult::Ok(
super::ast::TopLevelDeclaration::Binding(binding),
current_pos + 4,
))
}
/// Expects the token at `pos` to be of type `token_type`
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) if t.token_type == TokenType::NewLine || t.token_type == TokenType::EOF => {
Some(t) if t.token_type == TokenType::EOF => {
Result3::None
}
Some(t) => Result3::Err(t),
@ -151,7 +151,7 @@ fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&T
#[cfg(test)]
mod tests {
use super::*;
use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct};
use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration};
#[test]
fn should_parse_val_binding() {
@ -159,7 +159,7 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap();
match binding {
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => {
SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => {
assert_eq!("identifier", format!("{}", binding.identifier));
}
_ => panic!(),
@ -197,7 +197,7 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap();
match binding {
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => {
SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => {
assert_eq!(Some(String::from("Num")), binding.datatype);
assert_eq!("identifier", format!("{}", binding.identifier));
}
@ -208,7 +208,7 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap();
match binding {
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Var(binding))) => {
SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Var(binding)), _) => {
assert_eq!(Some(String::from("Bool")), binding.datatype);
assert_eq!("identifier", format!("{}", binding.identifier));
}

View File

@ -5,7 +5,7 @@ use crate::{
};
use super::{
ast::{FunctionDeclaration, TopLevelConstruct},
ast::{FunctionDeclaration, TopLevelDeclaration},
utils::try_token_type,
SyntaxResult,
};
@ -154,18 +154,20 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
}));
}
};
current_pos += 1;
// Construct and return the function declaration
Some(SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration(
FunctionDeclaration {
Some(SyntaxResult::Ok(
TopLevelDeclaration::FunctionDeclaration(FunctionDeclaration {
identifier: Box::new(identifier.value.clone()),
},
)))
}),
current_pos,
))
}
#[cfg(test)]
mod tests {
use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct};
use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration};
use super::*;
@ -344,7 +346,7 @@ mod tests {
let function_declaration = try_parse(&tokens, 0).unwrap();
match function_declaration {
SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration(declaration)) => {
SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
assert_eq!(declaration.identifier, Box::new(String::from("id")));
}
_ => panic!(

View File

@ -1,35 +1,45 @@
# Grammar
## Module
A module is (commonly) a single source file.
- `module = variable_binding*`
```ebnf
module = top level declaration*
```
### `variable_binding`
A declaration with `var` or `val`.
## Top level declaration
```ebnf
var = "var"
val = "val"
variable_binding = (var | val), identifier, "=", expression
top level declaration = function declaration
```
### `expression`
For now just a number, string or boolean
## Function declaration
```ebnf
expression = number | string | boolean
function declaration = "fun", identifier, params list, return type?, block
```
### Params list
```ebnf
params list = "(", ")"
```
### Return type
```ebnf
return type = ;
```
## Type annotations
### Block
```ebnf
variable_binding = Datatype, (var | val), identifier, "=", expression
block = "{", "}"
```

View File

@ -7,16 +7,16 @@ mod utils;
pub mod ast;
use crate::lexic::token::Token;
use crate::lexic::token::{Token, TokenType};
use ast::ModuleAST;
use self::ast::TopLevelConstruct;
use self::ast::TopLevelDeclaration;
#[derive(Debug)]
pub enum SyntaxResult {
///
/// A construct has been found
Ok(TopLevelConstruct),
Ok(TopLevelDeclaration, usize),
///
/// No construct was found
None,
@ -27,21 +27,38 @@ pub enum SyntaxResult {
/// Constructs the Misti AST from a vector of tokens
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> {
let _token_amount = tokens.len();
let current_pos = 0;
let mut top_level_declarations = Vec::new();
let token_amount = tokens.len();
let mut current_pos = 0;
// Minus one because the last token is always EOF
while current_pos < token_amount - 1 {
// Ignore newlines
if tokens[current_pos].token_type == TokenType::NewLine {
current_pos += 1;
continue;
}
match next_construct(tokens, current_pos) {
SyntaxResult::Ok(module) => Ok(ModuleAST {
bindings: vec![module],
}),
SyntaxResult::None => Err(MistiError::Syntax(SyntaxError {
SyntaxResult::Ok(module, next_pos) => {
top_level_declarations.push(module);
current_pos = next_pos;
}
SyntaxResult::None => {
return Err(MistiError::Syntax(SyntaxError {
reason: String::from("PARSER couldn't parse any construction"),
// FIXME: This should get the position of the _token_ that current_pos points to
error_start: current_pos,
error_end: current_pos,
})),
SyntaxResult::Err(err) => Err(MistiError::Syntax(err)),
}));
}
SyntaxResult::Err(err) => return Err(MistiError::Syntax(err)),
}
}
Ok(ModuleAST {
declarations: top_level_declarations,
})
}
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
@ -49,3 +66,47 @@ fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResul
.or_else(|| function_declaration::try_parse(tokens, current_pos))
.unwrap_or_else(|| SyntaxResult::None)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn should_parse_top_level_construct_with_trailing_newline() {
let input = String::from("fun f1(){}\n");
let tokens = crate::lexic::get_tokens(&input).unwrap();
let declarations = construct_ast(&tokens).unwrap().declarations;
assert_eq!(declarations.len(), 1);
match declarations.get(0).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
assert!(true)
}
}
}
#[test]
fn should_parse_2_top_level_construct() {
let input = String::from("fun f1(){} fun f2() {}");
let tokens = crate::lexic::get_tokens(&input).unwrap();
let declarations = construct_ast(&tokens).unwrap().declarations;
assert_eq!(declarations.len(), 2);
match declarations.get(0).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
assert!(true)
}
}
match declarations.get(1).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
assert!(true)
}
}
}
}