Parse multiple top level declarations
This commit is contained in:
parent
1bccb70c97
commit
5dd104bcc9
@ -13,7 +13,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
lexic::get_tokens,
|
||||
syntax::{ast::TopLevelConstruct, construct_ast},
|
||||
syntax::{ast::TopLevelDeclaration, construct_ast},
|
||||
};
|
||||
|
||||
#[test]
|
||||
@ -21,11 +21,11 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
|
||||
let result = construct_ast(&tokens).unwrap();
|
||||
|
||||
let fun_dec = result.bindings.get(0).unwrap();
|
||||
let fun_dec = result.declarations.get(0).unwrap();
|
||||
|
||||
match fun_dec {
|
||||
TopLevelConstruct::Binding(_) => panic!("Expected function declaration"),
|
||||
TopLevelConstruct::FunctionDeclaration(fun_decl) => {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(fun_decl) => {
|
||||
let transpiled = fun_decl.transpile();
|
||||
|
||||
assert_eq!("function id() {}", transpiled);
|
||||
|
@ -6,7 +6,7 @@ impl Transpilable for ModuleAST {
|
||||
/// nodes and leaves of the AST
|
||||
fn transpile(&self) -> String {
|
||||
let bindings_str: Vec<String> = self
|
||||
.bindings
|
||||
.declarations
|
||||
.iter()
|
||||
.map(|binding| binding.transpile())
|
||||
.collect();
|
||||
@ -18,7 +18,7 @@ impl Transpilable for ModuleAST {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::syntax::ast::{Binding, Expression, TopLevelConstruct, ValBinding};
|
||||
use crate::syntax::ast::{Binding, Expression, TopLevelDeclaration, ValBinding};
|
||||
|
||||
#[test]
|
||||
fn module_ast_should_transpile() {
|
||||
@ -31,7 +31,7 @@ mod tests {
|
||||
});
|
||||
|
||||
let module = ModuleAST {
|
||||
bindings: vec![TopLevelConstruct::Binding(binding)],
|
||||
declarations: vec![TopLevelDeclaration::Binding(binding)],
|
||||
};
|
||||
|
||||
let result = module.transpile();
|
||||
|
@ -1,12 +1,12 @@
|
||||
use crate::syntax::ast::TopLevelConstruct;
|
||||
use crate::syntax::ast::TopLevelDeclaration;
|
||||
|
||||
use super::Transpilable;
|
||||
|
||||
impl Transpilable for TopLevelConstruct {
|
||||
impl Transpilable for TopLevelDeclaration {
|
||||
fn transpile(&self) -> String {
|
||||
match self {
|
||||
TopLevelConstruct::Binding(binding) => binding.transpile(),
|
||||
TopLevelConstruct::FunctionDeclaration(fun) => fun.transpile(),
|
||||
TopLevelDeclaration::Binding(binding) => binding.transpile(),
|
||||
TopLevelDeclaration::FunctionDeclaration(fun) => fun.transpile(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -86,14 +86,6 @@ impl Token {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_semicolon(position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::NewLine,
|
||||
value: String::from(";"),
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_datatype(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Datatype,
|
||||
|
@ -1,9 +1,9 @@
|
||||
pub struct ModuleAST {
|
||||
pub bindings: Vec<TopLevelConstruct>,
|
||||
pub declarations: Vec<TopLevelDeclaration>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TopLevelConstruct {
|
||||
pub enum TopLevelDeclaration {
|
||||
Binding(Binding),
|
||||
FunctionDeclaration(FunctionDeclaration),
|
||||
}
|
||||
|
@ -9,13 +9,12 @@ use crate::utils::Result3;
|
||||
// - NotFound: the first token (var | val) was not found, so the parser should try other options
|
||||
// - Error: token (var | val) was found, but then other expected tokens were not found
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult> {
|
||||
let mut pos = pos;
|
||||
|
||||
let mut current_pos = pos;
|
||||
// Optional datatype annotation
|
||||
let datatype_annotation = {
|
||||
match try_token_type(tokens, pos, TokenType::Datatype) {
|
||||
match try_token_type(tokens, current_pos, TokenType::Datatype) {
|
||||
Result3::Ok(t) => {
|
||||
pos += 1;
|
||||
current_pos += 1;
|
||||
Some(String::from(&t.value))
|
||||
}
|
||||
Result3::Err(_) => None,
|
||||
@ -29,11 +28,11 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
* val/var keyword
|
||||
*/
|
||||
let (is_val, binding_token) = {
|
||||
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
||||
let res1 = try_token_type(tokens, current_pos, TokenType::VAL);
|
||||
match res1 {
|
||||
Result3::Ok(val_token) => (true, val_token),
|
||||
_ => {
|
||||
let res2 = try_token_type(tokens, pos, TokenType::VAR);
|
||||
let res2 = try_token_type(tokens, current_pos, TokenType::VAR);
|
||||
match res2 {
|
||||
Result3::Ok(var_token) => (false, var_token),
|
||||
// Neither VAL nor VAR were matched, the parser should try
|
||||
@ -47,7 +46,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
/*
|
||||
* identifier
|
||||
*/
|
||||
let identifier = match try_token_type(tokens, pos + 1, TokenType::Identifier) {
|
||||
let identifier = match try_token_type(tokens, current_pos + 1, TokenType::Identifier) {
|
||||
Result3::Ok(t) => t,
|
||||
Result3::Err(t) => {
|
||||
// The parser found a token, but it's not an identifier
|
||||
@ -76,7 +75,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
/*
|
||||
* Equal (=) operator
|
||||
*/
|
||||
let equal_operator: &Token = match try_operator(tokens, pos + 2, String::from("=")) {
|
||||
let equal_operator: &Token = match try_operator(tokens, current_pos + 2, String::from("=")) {
|
||||
Result3::Ok(t) => t,
|
||||
Result3::Err(t) => {
|
||||
// The parser found a token, but it's not the `=` operator
|
||||
@ -96,7 +95,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
}
|
||||
};
|
||||
|
||||
let expression = expression::try_parse(tokens, pos + 3);
|
||||
let expression = expression::try_parse(tokens, current_pos + 3);
|
||||
if expression.is_none() {
|
||||
return Some(SyntaxResult::Err(SyntaxError {
|
||||
reason: String::from("Expected an expression after the equal `=` operator"),
|
||||
@ -120,16 +119,17 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
})
|
||||
};
|
||||
|
||||
Some(SyntaxResult::Ok(super::ast::TopLevelConstruct::Binding(
|
||||
binding,
|
||||
)))
|
||||
Some(SyntaxResult::Ok(
|
||||
super::ast::TopLevelDeclaration::Binding(binding),
|
||||
current_pos + 4,
|
||||
))
|
||||
}
|
||||
|
||||
/// Expects the token at `pos` to be of type `token_type`
|
||||
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
|
||||
match tokens.get(pos) {
|
||||
Some(t) if t.token_type == token_type => Result3::Ok(t),
|
||||
Some(t) if t.token_type == TokenType::NewLine || t.token_type == TokenType::EOF => {
|
||||
Some(t) if t.token_type == TokenType::EOF => {
|
||||
Result3::None
|
||||
}
|
||||
Some(t) => Result3::Err(t),
|
||||
@ -151,7 +151,7 @@ fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&T
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct};
|
||||
use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration};
|
||||
|
||||
#[test]
|
||||
fn should_parse_val_binding() {
|
||||
@ -159,7 +159,7 @@ mod tests {
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => {
|
||||
SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => {
|
||||
assert_eq!("identifier", format!("{}", binding.identifier));
|
||||
}
|
||||
_ => panic!(),
|
||||
@ -197,7 +197,7 @@ mod tests {
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Val(binding))) => {
|
||||
SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Val(binding)), _) => {
|
||||
assert_eq!(Some(String::from("Num")), binding.datatype);
|
||||
assert_eq!("identifier", format!("{}", binding.identifier));
|
||||
}
|
||||
@ -208,7 +208,7 @@ mod tests {
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
SyntaxResult::Ok(TopLevelConstruct::Binding(Binding::Var(binding))) => {
|
||||
SyntaxResult::Ok(TopLevelDeclaration::Binding(Binding::Var(binding)), _) => {
|
||||
assert_eq!(Some(String::from("Bool")), binding.datatype);
|
||||
assert_eq!("identifier", format!("{}", binding.identifier));
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ use crate::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
ast::{FunctionDeclaration, TopLevelConstruct},
|
||||
ast::{FunctionDeclaration, TopLevelDeclaration},
|
||||
utils::try_token_type,
|
||||
SyntaxResult,
|
||||
};
|
||||
@ -154,18 +154,20 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
|
||||
}));
|
||||
}
|
||||
};
|
||||
current_pos += 1;
|
||||
|
||||
// Construct and return the function declaration
|
||||
Some(SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration(
|
||||
FunctionDeclaration {
|
||||
Some(SyntaxResult::Ok(
|
||||
TopLevelDeclaration::FunctionDeclaration(FunctionDeclaration {
|
||||
identifier: Box::new(identifier.value.clone()),
|
||||
},
|
||||
)))
|
||||
}),
|
||||
current_pos,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{lexic::get_tokens, syntax::ast::TopLevelConstruct};
|
||||
use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration};
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -344,7 +346,7 @@ mod tests {
|
||||
let function_declaration = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match function_declaration {
|
||||
SyntaxResult::Ok(TopLevelConstruct::FunctionDeclaration(declaration)) => {
|
||||
SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
|
||||
assert_eq!(declaration.identifier, Box::new(String::from("id")));
|
||||
}
|
||||
_ => panic!(
|
||||
|
@ -1,35 +1,45 @@
|
||||
# Grammar
|
||||
|
||||
|
||||
## Module
|
||||
|
||||
A module is (commonly) a single source file.
|
||||
|
||||
- `module = variable_binding*`
|
||||
```ebnf
|
||||
module = top level declaration*
|
||||
```
|
||||
|
||||
|
||||
### `variable_binding`
|
||||
|
||||
A declaration with `var` or `val`.
|
||||
## Top level declaration
|
||||
|
||||
```ebnf
|
||||
var = "var"
|
||||
val = "val"
|
||||
variable_binding = (var | val), identifier, "=", expression
|
||||
top level declaration = function declaration
|
||||
```
|
||||
|
||||
|
||||
### `expression`
|
||||
|
||||
For now just a number, string or boolean
|
||||
## Function declaration
|
||||
|
||||
```ebnf
|
||||
expression = number | string | boolean
|
||||
function declaration = "fun", identifier, params list, return type?, block
|
||||
```
|
||||
|
||||
### Params list
|
||||
|
||||
```ebnf
|
||||
params list = "(", ")"
|
||||
```
|
||||
|
||||
### Return type
|
||||
|
||||
```ebnf
|
||||
return type = ;
|
||||
```
|
||||
|
||||
|
||||
## Type annotations
|
||||
### Block
|
||||
|
||||
```ebnf
|
||||
variable_binding = Datatype, (var | val), identifier, "=", expression
|
||||
block = "{", "}"
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
@ -7,16 +7,16 @@ mod utils;
|
||||
|
||||
pub mod ast;
|
||||
|
||||
use crate::lexic::token::Token;
|
||||
use crate::lexic::token::{Token, TokenType};
|
||||
use ast::ModuleAST;
|
||||
|
||||
use self::ast::TopLevelConstruct;
|
||||
use self::ast::TopLevelDeclaration;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SyntaxResult {
|
||||
///
|
||||
/// A construct has been found
|
||||
Ok(TopLevelConstruct),
|
||||
Ok(TopLevelDeclaration, usize),
|
||||
///
|
||||
/// No construct was found
|
||||
None,
|
||||
@ -27,21 +27,38 @@ pub enum SyntaxResult {
|
||||
|
||||
/// Constructs the Misti AST from a vector of tokens
|
||||
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> {
|
||||
let _token_amount = tokens.len();
|
||||
let current_pos = 0;
|
||||
let mut top_level_declarations = Vec::new();
|
||||
let token_amount = tokens.len();
|
||||
let mut current_pos = 0;
|
||||
|
||||
// Minus one because the last token is always EOF
|
||||
while current_pos < token_amount - 1 {
|
||||
// Ignore newlines
|
||||
if tokens[current_pos].token_type == TokenType::NewLine {
|
||||
current_pos += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
match next_construct(tokens, current_pos) {
|
||||
SyntaxResult::Ok(module) => Ok(ModuleAST {
|
||||
bindings: vec![module],
|
||||
}),
|
||||
SyntaxResult::None => Err(MistiError::Syntax(SyntaxError {
|
||||
SyntaxResult::Ok(module, next_pos) => {
|
||||
top_level_declarations.push(module);
|
||||
current_pos = next_pos;
|
||||
}
|
||||
SyntaxResult::None => {
|
||||
return Err(MistiError::Syntax(SyntaxError {
|
||||
reason: String::from("PARSER couldn't parse any construction"),
|
||||
// FIXME: This should get the position of the _token_ that current_pos points to
|
||||
error_start: current_pos,
|
||||
error_end: current_pos,
|
||||
})),
|
||||
SyntaxResult::Err(err) => Err(MistiError::Syntax(err)),
|
||||
}));
|
||||
}
|
||||
SyntaxResult::Err(err) => return Err(MistiError::Syntax(err)),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ModuleAST {
|
||||
declarations: top_level_declarations,
|
||||
})
|
||||
}
|
||||
|
||||
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
|
||||
@ -49,3 +66,47 @@ fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResul
|
||||
.or_else(|| function_declaration::try_parse(tokens, current_pos))
|
||||
.unwrap_or_else(|| SyntaxResult::None)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn should_parse_top_level_construct_with_trailing_newline() {
|
||||
let input = String::from("fun f1(){}\n");
|
||||
let tokens = crate::lexic::get_tokens(&input).unwrap();
|
||||
let declarations = construct_ast(&tokens).unwrap().declarations;
|
||||
|
||||
assert_eq!(declarations.len(), 1);
|
||||
|
||||
match declarations.get(0).unwrap() {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(_f) => {
|
||||
assert!(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_2_top_level_construct() {
|
||||
let input = String::from("fun f1(){} fun f2() {}");
|
||||
let tokens = crate::lexic::get_tokens(&input).unwrap();
|
||||
let declarations = construct_ast(&tokens).unwrap().declarations;
|
||||
|
||||
assert_eq!(declarations.len(), 2);
|
||||
|
||||
match declarations.get(0).unwrap() {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(_f) => {
|
||||
assert!(true)
|
||||
}
|
||||
}
|
||||
|
||||
match declarations.get(1).unwrap() {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(_f) => {
|
||||
assert!(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user