Parse multiple top level declarations

master
Araozu 2023-09-17 17:58:56 -05:00
parent 1bccb70c97
commit 5dd104bcc9
9 changed files with 140 additions and 75 deletions

View File

@ -13,7 +13,7 @@ mod tests {
use super::*; use super::*;
use crate::{ use crate::{
lexic::get_tokens, lexic::get_tokens,
syntax::{ast::TopLevelConstruct, construct_ast}, syntax::{ast::TopLevelDeclaration, construct_ast},
}; };
#[test] #[test]
@ -21,11 +21,11 @@ mod tests {
let tokens = get_tokens(&String::from("fun id() {}")).unwrap(); let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
let result = construct_ast(&tokens).unwrap(); let result = construct_ast(&tokens).unwrap();
let fun_dec = result.bindings.get(0).unwrap(); let fun_dec = result.declarations.get(0).unwrap();
match fun_dec { match fun_dec {
TopLevelConstruct::Binding(_) => panic!("Expected function declaration"), TopLevelDeclaration::Binding(_) => panic!("Expected function declaration"),
TopLevelConstruct::FunctionDeclaration(fun_decl) => { TopLevelDeclaration::FunctionDeclaration(fun_decl) => {
let transpiled = fun_decl.transpile(); let transpiled = fun_decl.transpile();
assert_eq!("function id() {}", transpiled); assert_eq!("function id() {}", transpiled);

View File

@ -6,7 +6,7 @@ impl Transpilable for ModuleAST {
/// nodes and leaves of the AST /// nodes and leaves of the AST
fn transpile(&self) -> String { fn transpile(&self) -> String {
let bindings_str: Vec<String> = self let bindings_str: Vec<String> = self
.bindings .declarations
.iter() .iter()
.map(|binding| binding.transpile()) .map(|binding| binding.transpile())
.collect(); .collect();
@ -18,7 +18,7 @@ impl Transpilable for ModuleAST {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::syntax::ast::{Binding, Expression, TopLevelConstruct, ValBinding}; use crate::syntax::ast::{Binding, Expression, TopLevelDeclaration, ValBinding};
#[test] #[test]
fn module_ast_should_transpile() { fn module_ast_should_transpile() {
@ -31,7 +31,7 @@ mod tests {
}); });
let module = ModuleAST { let module = ModuleAST {
bindings: vec![TopLevelConstruct::Binding(binding)], declarations: vec![TopLevelDeclaration::Binding(binding)],
}; };
let result = module.transpile(); let result = module.transpile();

View File

@ -1,12 +1,12 @@
use crate::syntax::ast::TopLevelConstruct; use crate::syntax::ast::TopLevelDeclaration;
use super::Transpilable; use super::Transpilable;
impl Transpilable for TopLevelConstruct { impl Transpilable for TopLevelDeclaration {
fn transpile(&self) -> String { fn transpile(&self) -> String {
match self { match self {
TopLevelConstruct::Binding(binding) => binding.transpile(), TopLevelDeclaration::Binding(binding) => binding.transpile(),
TopLevelConstruct::FunctionDeclaration(fun) => fun.transpile(), TopLevelDeclaration::FunctionDeclaration(fun) => fun.transpile(),
} }
} }
} }

View File

@ -86,14 +86,6 @@ impl Token {
} }
} }
pub fn new_semicolon(position: usize) -> Token {
Token {
token_type: TokenType::NewLine,
value: String::from(";"),
position,
}
}
pub fn new_datatype(value: String, position: usize) -> Token { pub fn new_datatype(value: String, position: usize) -> Token {
Token { Token {
token_type: TokenType::Datatype, token_type: TokenType::Datatype,

View File

@ -1,9 +1,9 @@
pub struct ModuleAST { pub struct ModuleAST {
pub bindings: Vec<TopLevelConstruct>, pub declarations: Vec<TopLevelDeclaration>,
} }
#[derive(Debug)] #[derive(Debug)]
pub enum TopLevelConstruct { pub enum TopLevelDeclaration {
Binding(Binding), Binding(Binding),
FunctionDeclaration(FunctionDeclaration), FunctionDeclaration(FunctionDeclaration),
} }

View File

@ -9,13 +9,12 @@ use crate::utils::Result3;
// - NotFound: the first token (var | val) was not found, so the parser should try other options // - NotFound: the first token (var | val) was not found, so the parser should try other options
// - Error: token (var | val) was found, but then other expected tokens were not found // - Error: token (var | val) was found, but then other expected tokens were not found
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult> { pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult> {
let mut pos = pos; let mut current_pos = pos;
// Optional datatype annotation // Optional datatype annotation
let datatype_annotation = { let datatype_annotation = {
match try_token_type(tokens, pos, TokenType::Datatype) { match try_token_type(tokens, current_pos, TokenType::Datatype) {
Result3::Ok(t) => { Result3::Ok(t) => {
pos += 1; current_pos += 1;
Some(String::from(&t.value)) Some(String::from(&t.value))
} }
Result3::Err(_) => None, Result3::Err(_) => None,
@ -29,11 +28,11 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
* val/var keyword * val/var keyword
*/ */
let (is_val, binding_token) = { let (is_val, binding_token) = {
let res1 = try_token_type(tokens, pos, TokenType::VAL); let res1 = try_token_type(tokens, current_pos, TokenType::VAL);
match res1 { match res1 {
Result3::Ok(val_token) => (true, val_token), Result3::Ok(val_token) => (true, val_token),
_ => { _ => {
let res2 = try_token_type(tokens, pos, TokenType::VAR); let res2 = try_token_type(tokens, current_pos, TokenType::VAR);
match res2 { match res2 {
Result3::Ok(var_token) => (false, var_token), Result3::Ok(var_token) => (false, var_token),
// Neither VAL nor VAR were matched, the parser should try // Neither VAL nor VAR were matched, the parser should try
@ -47,7 +46,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
/* /*
* identifier * identifier
*/ */
let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) { let identifier = match try_token_type(tokens, current_pos + 1, TokenType::Identifier) {
Result3::Ok(t) => t, Result3::Ok(t) => t,
Result3::Err(t) => { Result3::Err(t) => {
// The parser found a token, but it's not an identifier // The parser found a token, but it's not an identifier
@ -76,7 +75,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
/* /*
* Equal (=) operator * Equal (=) operator
*/ */
let equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) { let equal_operator: &Token = match try_operator(tokens, current_pos + 2, String::from("=")) {
Result3::Ok(t) => t, Result3::Ok(t) => t,
Result3::Err(t) => { Result3::Err(t) => {
// The parser found a token, but it's not the `=` operator // The parser found a token, but it's not the `=` operator
@ -96,7 +95,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
} }
}; };
let expression = expression::try_parse(tokens, pos + 3); let expression = expression::try_parse(tokens, current_pos + 3);
if expression.is_none() { if expression.is_none() {
return Some(SyntaxResult::Err(SyntaxError { return Some(SyntaxResult::Err(SyntaxError {
reason: String::from("Expected an expression after the equal `=` operator"), reason: String::from("Expected an expression after the equal `=` operator"),
@ -120,16 +119,17 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
}) })
}; };
Some(SyntaxResult::Ok(super::ast::TopLevelConstruct::Binding( Some(SyntaxResult::Ok(
binding, super::ast::TopLevelDeclaration::Binding(binding),
))) current_pos + 4,
))
} }
/// Expects the token at `pos` to be of type `token_type` /// Expects the token at `pos` to be of type `token_type`
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> { fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
match tokens.get(pos) { match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t), Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) if t.token_type == TokenType::NewLine || t.token_type == TokenType::EOF => { Some(t) if t.token_type == TokenType::EOF => {
Result3::None Result3::None
} }
Some(t) => Result3::Err(t), Some(t) => Result3::Err(t),
@ -151,7 +151,7 @@ fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&T
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct}; use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration};
#[test] #[test]
fn should_parse_val_binding() { fn should_parse_val_binding() {
@ -159,7 +159,7 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap(); let binding = try_parse(&tokens, 0).unwrap();
match binding { match binding {
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => { SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => {
assert_eq!("identifier", format!("{}", binding.identifier)); assert_eq!("identifier", format!("{}", binding.identifier));
} }
_ => panic!(), _ => panic!(),
@ -197,7 +197,7 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap(); let binding = try_parse(&tokens, 0).unwrap();
match binding { match binding {
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => { SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => {
assert_eq!(Some(String::from("Num")), binding.datatype); assert_eq!(Some(String::from("Num")), binding.datatype);
assert_eq!("identifier", format!("{}", binding.identifier)); assert_eq!("identifier", format!("{}", binding.identifier));
} }
@ -208,7 +208,7 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap(); let binding = try_parse(&tokens, 0).unwrap();
match binding { match binding {
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Var(binding))) => { SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Var(binding)), _) => {
assert_eq!(Some(String::from("Bool")), binding.datatype); assert_eq!(Some(String::from("Bool")), binding.datatype);
assert_eq!("identifier", format!("{}", binding.identifier)); assert_eq!("identifier", format!("{}", binding.identifier));
} }

View File

@ -5,7 +5,7 @@ use crate::{
}; };
use super::{ use super::{
ast::{FunctionDeclaration, TopLevelConstruct}, ast::{FunctionDeclaration, TopLevelDeclaration},
utils::try_token_type, utils::try_token_type,
SyntaxResult, SyntaxResult,
}; };
@ -154,18 +154,20 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
})); }));
} }
}; };
current_pos += 1;
// Construct and return the function declaration // Construct and return the function declaration
Some(SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration( Some(SyntaxResult::Ok(
FunctionDeclaration { TopLevelDeclaration::FunctionDeclaration(FunctionDeclaration {
identifier: Box::new(identifier.value.clone()), identifier: Box::new(identifier.value.clone()),
}, }),
))) current_pos,
))
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct}; use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration};
use super::*; use super::*;
@ -344,7 +346,7 @@ mod tests {
let function_declaration = try_parse(&tokens, 0).unwrap(); let function_declaration = try_parse(&tokens, 0).unwrap();
match function_declaration { match function_declaration {
SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration(declaration)) => { SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
assert_eq!(declaration.identifier, Box::new(String::from("id"))); assert_eq!(declaration.identifier, Box::new(String::from("id")));
} }
_ => panic!( _ => panic!(

View File

@ -1,35 +1,45 @@
# Grammar # Grammar
## Module ## Module
A module is (commonly) a single source file. A module is (commonly) a single source file.
- `module = variable_binding*` ```ebnf
module = top level declaration*
```
## Top level declaration
### `variable_binding`
A declaration with `var` or `val`.
```ebnf ```ebnf
var = "var" top level declaration = function declaration
val = "val"
variable_binding = (var | val), identifier, "=", expression
``` ```
### `expression` ## Function declaration
For now just a number, string or boolean
```ebnf ```ebnf
expression = number | string | boolean function declaration = "fun", identifier, params list, return type?, block
```
### Params list
```ebnf
params list = "(", ")"
```
### Return type
```ebnf
return type = ;
``` ```
## Type annotations ### Block
```ebnf ```ebnf
variable_binding = Datatype, (var | val), identifier, "=", expression block = "{", "}"
``` ```

View File

@ -7,16 +7,16 @@ mod utils;
pub mod ast; pub mod ast;
use crate::lexic::token::Token; use crate::lexic::token::{Token, TokenType};
use ast::ModuleAST; use ast::ModuleAST;
use self::ast::TopLevelConstruct; use self::ast::TopLevelDeclaration;
#[derive(Debug)] #[derive(Debug)]
pub enum SyntaxResult { pub enum SyntaxResult {
/// ///
/// A construct has been found /// A construct has been found
Ok(TopLevelConstruct), Ok(TopLevelDeclaration, usize),
/// ///
/// No construct was found /// No construct was found
None, None,
@ -27,21 +27,38 @@ pub enum SyntaxResult {
/// Constructs the Misti AST from a vector of tokens /// Constructs the Misti AST from a vector of tokens
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> { pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> {
let _token_amount = tokens.len(); let mut top_level_declarations = Vec::new();
let current_pos = 0; let token_amount = tokens.len();
let mut current_pos = 0;
match next_construct(tokens, current_pos) { // Minus one because the last token is always EOF
SyntaxResult::Ok(module) => Ok(ModuleAST { while current_pos < token_amount - 1 {
bindings: vec![module], // Ignore newlines
}), if tokens[current_pos].token_type == TokenType::NewLine {
SyntaxResult::None => Err(MistiError::Syntax(SyntaxError { current_pos += 1;
reason: String::from("PARSER couldn't parse any construction"), continue;
// FIXME: This should get the position of the _token_ that current_pos points to }
error_start: current_pos,
error_end: current_pos, match next_construct(tokens, current_pos) {
})), SyntaxResult::Ok(module, next_pos) => {
SyntaxResult::Err(err) => Err(MistiError::Syntax(err)), top_level_declarations.push(module);
current_pos = next_pos;
}
SyntaxResult::None => {
return Err(MistiError::Syntax(SyntaxError {
reason: String::from("PARSER couldn't parse any construction"),
// FIXME: This should get the position of the _token_ that current_pos points to
error_start: current_pos,
error_end: current_pos,
}));
}
SyntaxResult::Err(err) => return Err(MistiError::Syntax(err)),
}
} }
Ok(ModuleAST {
declarations: top_level_declarations,
})
} }
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult { fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
@ -49,3 +66,47 @@ fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResul
.or_else(|| function_declaration::try_parse(tokens, current_pos)) .or_else(|| function_declaration::try_parse(tokens, current_pos))
.unwrap_or_else(|| SyntaxResult::None) .unwrap_or_else(|| SyntaxResult::None)
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn should_parse_top_level_construct_with_trailing_newline() {
let input = String::from("fun f1(){}\n");
let tokens = crate::lexic::get_tokens(&input).unwrap();
let declarations = construct_ast(&tokens).unwrap().declarations;
assert_eq!(declarations.len(), 1);
match declarations.get(0).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
assert!(true)
}
}
}
#[test]
fn should_parse_2_top_level_construct() {
let input = String::from("fun f1(){} fun f2() {}");
let tokens = crate::lexic::get_tokens(&input).unwrap();
let declarations = construct_ast(&tokens).unwrap().declarations;
assert_eq!(declarations.len(), 2);
match declarations.get(0).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
assert!(true)
}
}
match declarations.get(1).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
assert!(true)
}
}
}
}