refactor: begin to simplify AST

This commit is contained in:
Araozu 2024-06-01 18:57:10 -05:00
parent 18f3f21eec
commit 5dafd6ca20
30 changed files with 117 additions and 221 deletions

View File

@ -28,10 +28,13 @@
## v0.0.13
- [ ] Define a formal grammar
- [ ] Begin work on a formal grammar
- [ ] Simplify AST
- [ ] Define the top level constructs
- [ ] Include the original tokens in the AST
- [ ] Implement a hello world until semantic analysis
- [ ] Refactor code
- [ ] Remove `PARSER couldn't parse any construction` error & replace with an actual error message
## v0.0.12

View File

@ -1,7 +1,7 @@
use super::Transpilable;
use crate::syntax::ast::var_binding::Binding;
use crate::syntax::ast::var_binding::VariableBinding;
impl Transpilable for Binding<'_> {
impl Transpilable for VariableBinding<'_> {
/// Transpiles val and var bindings into PHP.
fn transpile(&self) -> String {
let expression_str = self.expression.transpile();
@ -15,7 +15,7 @@ mod tests {
use super::*;
use crate::{
lexic::token::{Token, TokenType},
syntax::ast::{var_binding::Binding, Expression},
syntax::ast::{var_binding::VariableBinding, Expression},
};
#[test]
@ -27,7 +27,7 @@ mod tests {
position: 0,
};
let value = String::from("322");
let binding = Binding {
let binding = VariableBinding {
datatype: None,
identifier: &id_token,
expression: Expression::Int(&value),

View File

@ -17,19 +17,19 @@ mod tests {
use super::*;
use crate::{
lexic::get_tokens,
syntax::{ast::TopLevelDeclaration, construct_ast},
syntax::{ast::ModuleMembers, build_ast},
};
#[test]
fn should_transpile() {
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
let result = construct_ast(&tokens).unwrap();
let result = build_ast(&tokens).unwrap();
let fun_dec = result.declarations.get(0).unwrap();
let fun_dec = result.productions.get(0).unwrap();
match fun_dec {
TopLevelDeclaration::Binding(_) => panic!("Expected function declaration"),
TopLevelDeclaration::FunctionDeclaration(fun_decl) => {
ModuleMembers::Binding(_) => panic!("Expected function declaration"),
ModuleMembers::FunctionDeclaration(fun_decl) => {
let transpiled = fun_decl.transpile();
assert_eq!("function id() {\n\n}", transpiled);

View File

@ -6,7 +6,7 @@ impl Transpilable for ModuleAST<'_> {
/// nodes and leaves of the AST
fn transpile(&self) -> String {
let bindings_str: Vec<String> = self
.declarations
.productions
.iter()
.map(|binding| binding.transpile())
.collect();
@ -20,7 +20,7 @@ mod tests {
use super::*;
use crate::{
lexic::token::{Token, TokenType},
syntax::ast::{var_binding::Binding, Expression, TopLevelDeclaration},
syntax::ast::{var_binding::VariableBinding, Expression, ModuleMembers},
};
#[test]
@ -32,7 +32,7 @@ mod tests {
position: 0,
};
let value = String::from("322");
let binding = Binding {
let binding = VariableBinding {
datatype: None,
identifier: &id_token,
expression: Expression::Int(&value),
@ -40,7 +40,7 @@ mod tests {
};
let module = ModuleAST {
declarations: vec![TopLevelDeclaration::Binding(binding)],
productions: vec![ModuleMembers::Binding(binding)],
};
let result = module.transpile();

View File

@ -1,12 +1,12 @@
use crate::syntax::ast::statement::Statement;
use crate::syntax::ast::Statement;
use super::Transpilable;
impl Transpilable for Statement<'_> {
fn transpile(&self) -> String {
let stmt = match self {
Statement::FunctionCall(f) => f.transpile(),
Statement::Binding(b) => b.transpile(),
Statement::FnDecl(f) => f.transpile(),
Statement::VarBinding(b) => b.transpile(),
};
format!("{stmt};")

View File

@ -1,12 +1,12 @@
use crate::syntax::ast::TopLevelDeclaration;
use crate::syntax::ast::ModuleMembers;
use super::Transpilable;
impl Transpilable for TopLevelDeclaration<'_> {
impl Transpilable for ModuleMembers<'_> {
fn transpile(&self) -> String {
match self {
TopLevelDeclaration::Binding(binding) => binding.transpile(),
TopLevelDeclaration::FunctionDeclaration(fun) => fun.transpile(),
ModuleMembers::Binding(binding) => binding.transpile(),
ModuleMembers::FunctionDeclaration(fun) => fun.transpile(),
_ => panic!("Not implemented: Expression at top level"),
}
}

View File

@ -26,11 +26,11 @@ impl PrintableError for SyntaxError {
#[cfg(test)]
mod tests {
use super::*;
use crate::{error_handling::MistiError, lexic::get_tokens, syntax::construct_ast};
use crate::{error_handling::MistiError, lexic::get_tokens, syntax::build_ast};
fn _get_error_data(input: String) -> (Vec<char>, MistiError) {
let tokens = get_tokens(&input).unwrap();
let error_holder = construct_ast(&tokens);
let error_holder = build_ast(&tokens);
match error_holder {
Ok(_) => panic!(

View File

@ -81,7 +81,7 @@ fn compile(input: &String) -> Result<String, String> {
///
/// Prints the generated code in stdin
fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
let ast = syntax::construct_ast(&tokens);
let ast = syntax::build_ast(&tokens);
let ast = match ast {
Ok(ast) => ast,

View File

@ -16,8 +16,8 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
LexResult::Some(token, start_pos)
}
},
Some(_) | None => {
let token = Token::new(String::from(";"), start_pos, TokenType::NewLine);
_ => {
let token = Token::new(String::from(""), start_pos, TokenType::NewLine);
LexResult::Some(token, start_pos)
}
}

View File

@ -35,10 +35,7 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
// so this is used to retrieve the original START position of the token
let current_len = current.len();
LexResult::Some(
Token::new_int(current, start_pos - current_len),
start_pos,
)
LexResult::Some(Token::new_int(current, start_pos - current_len), start_pos)
}
}
}
@ -162,10 +159,7 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
// so this is used to retrieve the original START position of the token
let current_len = current.len();
(
Token::new_int(current, start_pos - current_len),
start_pos,
)
(Token::new_int(current, start_pos - current_len), start_pos)
}
}
}

View File

@ -203,8 +203,8 @@ mod tests {
match result {
LexResult::Err(reason) => {
assert_eq!("Incomplete string found", reason.reason)
},
_ => panic!("expected an error")
}
_ => panic!("expected an error"),
}
}
@ -216,8 +216,8 @@ mod tests {
match result {
LexResult::Err(reason) => {
assert_eq!("Incomplete string found", reason.reason)
},
_ => panic!("expected an error")
}
_ => panic!("expected an error"),
}
}
}

View File

@ -20,6 +20,7 @@ pub fn is_operator(c: char) -> bool {
|| c == '='
|| c == '*'
|| c == '!'
// TODO: ??? where is this used??
|| c == '\\'
|| c == '/'
|| c == '|'

View File

@ -1,7 +1,3 @@
// Follows https://phplang.org/spec/09-lexical-structure.html
struct PhpAst {
}
struct PhpAst {}

View File

@ -28,7 +28,7 @@ fn compile(input: &String) {
///
/// Prints the generated code in stdin
fn build_ast(input: &String, tokens: Vec<Token>) {
let ast = syntax::construct_ast(&tokens);
let ast = syntax::build_ast(&tokens);
match ast {
Ok(ast) => {

View File

@ -1,10 +1,10 @@
use crate::{
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry, types::Typed},
syntax::ast::var_binding::Binding,
syntax::ast::var_binding::VariableBinding,
};
impl SemanticCheck for Binding<'_> {
impl SemanticCheck for VariableBinding<'_> {
fn check_semantics(
&self,
scope: &crate::semantic::symbol_table::SymbolTable,

View File

@ -1,7 +1,10 @@
use crate::{
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{impls::SemanticCheck, symbol_table::{SymbolEntry, SymbolTable}},
syntax::ast::{statement::Statement, FunctionDeclaration},
semantic::{
impls::SemanticCheck,
symbol_table::{SymbolEntry, SymbolTable},
},
syntax::ast::{FunctionDeclaration, Statement},
};
impl SemanticCheck for FunctionDeclaration<'_> {
@ -34,10 +37,10 @@ impl SemanticCheck for FunctionDeclaration<'_> {
match stmt {
Statement::Binding(b) => {
if let Err(err) = b.check_semantics(&function_scope) {
return Err(err)
return Err(err);
}
}
Statement::FunctionCall(_) => panic!("FunctionCall semantic check not implemented")
Statement::FunctionCall(_) => panic!("FunctionCall semantic check not implemented"),
}
}

View File

@ -1,12 +1,14 @@
use crate::{semantic::impls::SemanticCheck, syntax::ast::TopLevelDeclaration};
use crate::{semantic::impls::SemanticCheck, syntax::ast::ModuleMembers};
impl SemanticCheck for TopLevelDeclaration<'_> {
fn check_semantics(&self, scope: &crate::semantic::symbol_table::SymbolTable) -> Result<(), crate::error_handling::MistiError> {
impl SemanticCheck for ModuleMembers<'_> {
fn check_semantics(
&self,
scope: &crate::semantic::symbol_table::SymbolTable,
) -> Result<(), crate::error_handling::MistiError> {
match self {
TopLevelDeclaration::Binding(binding) => binding.check_semantics(scope),
TopLevelDeclaration::FunctionDeclaration(function) => function.check_semantics(scope),
ModuleMembers::Binding(binding) => binding.check_semantics(scope),
ModuleMembers::FunctionDeclaration(function) => function.check_semantics(scope),
_ => panic!("Not implemented"),
}
}
}

View File

@ -10,7 +10,7 @@ pub trait SemanticCheck {
impl SemanticCheck for ModuleAST<'_> {
/// Checks that this AST is semantically correct, given a symbol table
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
for declaration in &self.declarations {
for declaration in &self.productions {
declaration.check_semantics(scope)?;
}

View File

@ -1,8 +1,8 @@
use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
mod checks;
mod impls;
mod symbol_table;
mod checks;
mod types;
use impls::SemanticCheck;

View File

@ -97,7 +97,7 @@ impl SymbolTableNode {
return match entry {
SymbolEntry::Variable(t) => Some(t.clone()),
SymbolEntry::Function(_, _) => None,
}
};
}
// Try to get the type in the parent scope

View File

@ -1,23 +1,32 @@
use crate::lexic::token::Token;
use self::functions::FunctionCall;
use var_binding::VariableBinding;
pub mod functions;
pub mod statement;
pub mod var_binding;
/// The AST for a whole THP file
#[derive(Debug)]
pub struct ModuleAST<'a> {
pub declarations: Vec<TopLevelDeclaration<'a>>,
/// All the productions in the file
pub productions: Vec<ModuleMembers<'a>>,
}
// TODO: this and Statement should merge
/// Enum for productions available at the top level of a file
#[derive(Debug)]
pub enum TopLevelDeclaration<'a> {
Binding(var_binding::Binding<'a>),
FunctionDeclaration(FunctionDeclaration<'a>),
pub enum ModuleMembers<'a> {
// TODO: In the future implement module import
Stmt(Statement<'a>),
Expression(Expression<'a>),
}
#[derive(Debug)]
pub enum Statement<'a> {
VarBinding(VariableBinding<'a>),
FnDecl(FunctionDeclaration<'a>),
}
#[derive(Debug)]
pub struct FunctionDeclaration<'a> {
pub identifier: &'a Token,
@ -29,7 +38,7 @@ pub struct FunctionDeclaration<'a> {
#[derive(Debug)]
pub struct Block<'a> {
// TODO: this should be a Vec of Statement|Expression
pub statements: Vec<statement::Statement<'a>>,
pub statements: Vec<Statement<'a>>,
}
#[derive(Debug)]

View File

@ -1,8 +0,0 @@
use super::{functions::FunctionCall, var_binding::Binding};
// TODO: this and TopLevelDeclaration should merge
#[derive(Debug)]
pub enum Statement<'a> {
FunctionCall(FunctionCall<'a>),
Binding(Binding<'a>),
}

View File

@ -3,7 +3,7 @@ use crate::lexic::token::Token;
use super::Expression;
#[derive(Debug)]
pub struct Binding<'a> {
pub struct VariableBinding<'a> {
pub datatype: Option<&'a Token>,
pub identifier: &'a Token,
pub expression: Expression<'a>,

View File

@ -1,4 +1,4 @@
use super::ast::var_binding::Binding;
use super::ast::var_binding::VariableBinding;
use super::utils::{parse_token_type, try_operator};
use super::{expression, ParsingError, ParsingResult};
use crate::error_handling::SyntaxError;
@ -13,7 +13,7 @@ var binding = "var", datatype?, binding remainder
binding remainder = identifier, "=", expression
*/
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Binding> {
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<VariableBinding> {
let mut current_pos = pos;
/*
@ -130,7 +130,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Bindin
};
current_pos = next_pos;
let binding = Binding {
let binding = VariableBinding {
datatype,
identifier: &identifier,
expression,
@ -264,7 +264,10 @@ mod tests {
Err(ParsingError::Err(error)) => {
assert_eq!(4, error.error_start);
assert_eq!(11, error.error_end);
assert_eq!("There should be an identifier after a binding", error.reason);
assert_eq!(
"There should be an identifier after a binding",
error.reason
);
}
_ => panic!("Error expected"),
}
@ -293,7 +296,10 @@ mod tests {
Err(ParsingError::Err(error)) => {
assert_eq!(4, error.error_start);
assert_eq!(10, error.error_end);
assert_eq!("There should be an identifier after the datatype", error.reason);
assert_eq!(
"There should be an identifier after the datatype",
error.reason
);
}
_ => panic!("Error expected"),
}
@ -308,7 +314,10 @@ mod tests {
Err(ParsingError::Err(error)) => {
assert_eq!(0, error.error_start);
assert_eq!(3, error.error_end);
assert_eq!("There should be an identifier after a `val` token", error.reason);
assert_eq!(
"There should be an identifier after a `val` token",
error.reason
);
}
_ => panic!("Error expected"),
}
@ -323,7 +332,10 @@ mod tests {
Err(ParsingError::Err(error)) => {
assert_eq!(4, error.error_start);
assert_eq!(14, error.error_end);
assert_eq!("There should be an equal sign `=` after the identifier", error.reason);
assert_eq!(
"There should be an equal sign `=` after the identifier",
error.reason
);
}
_ => panic!("Error expected"),
}
@ -338,10 +350,12 @@ mod tests {
Err(ParsingError::Err(error)) => {
assert_eq!(15, error.error_start);
assert_eq!(16, error.error_end);
assert_eq!("Expected an expression after the equal `=` operator", error.reason);
assert_eq!(
"Expected an expression after the equal `=` operator",
error.reason
);
}
_ => panic!("Error expected"),
}
}
}

View File

@ -80,7 +80,7 @@ mod tests {
match result {
Err(ParsingError::Unmatched) => assert!(true),
_ => panic!("Expected an Unmatched error")
_ => panic!("Expected an Unmatched error"),
}
}
}

View File

@ -43,7 +43,6 @@ fn parse_many<'a>(
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -85,4 +84,3 @@ mod tests {
}
}
}

View File

@ -181,7 +181,6 @@ mod tests {
assert_eq!(first_param.identifier, "x");
}
#[test]
fn should_parse_empty_param_list_with_1_parameter_with_trailing_comma() {
let tokens = get_tokens(&String::from("(Int x, )")).unwrap();

View File

@ -1,107 +0,0 @@
# Grammar
## Source file
```ebnf
source file = top level statement*
```
## Top level statement
Current focus: Have a mvp compiler (w lexical/syntactic/semantic analysis + codegen) for
simple function calls, and then implement other features top down
```ebnf
top level statement = expression
| function declaration
| binding
```
## Function declaration
```ebnf
function declaration = "fun", identifier, params list, return type?, block;
params list = "(", ")";
return type = ;
```
### Block
```ebnf
block = "{", (statement, (new line, statement)*)?, "}"
```
### Statement
```ebnf
statement = binding
| function call
```
## Function call
```ebnf
function call = identifier, arguments list;
arguments list = "(", ")"
```
## Binding
```ebnf
binding = val binding | var binding
val binding = "val", datatype?, binding remainder
| datatype, binding remainder
var binding = "var", datatype?, binding remainder
binding remainder = identifier, "=", expression
```
## Operator precedence
From highest to lowest:
- `== !=`, left associative
- `> >= < <=`, left associative
- `- +`, left associative
- `/ *`, left associative
- `! -`, left associative
## Expression
```ebnf
expression = equality;
equality = comparison, (("==" | "!="), comparison )*;
comparison = term, ((">" | ">=" | "<" | "<="), term)*;
term = factor, (("-" | "+"), factor)*;
factor = unary, (("/" | "*"), unary)*;
unary = ("!" | "-"), expression
| primary;
function call expr = primary, (arguments list)?
| primary;
primary = number | string | boolean | identifier | ("(", expression, ")");
```
```thp
primary()
```

View File

@ -12,7 +12,7 @@ pub mod ast;
use crate::lexic::token::{Token, TokenType};
use ast::ModuleAST;
use self::ast::TopLevelDeclaration;
use self::ast::ModuleMembers;
pub type ParsingResult<'a, A> = Result<(A, usize), ParsingError<'a>>;
@ -30,7 +30,7 @@ pub enum ParsingError<'a> {
}
/// Constructs the Misti AST from a vector of tokens
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> {
pub fn build_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> {
let mut top_level_declarations = Vec::new();
let token_amount = tokens.len();
let mut current_pos = 0;
@ -61,21 +61,15 @@ pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError
}
Ok(ModuleAST {
declarations: top_level_declarations,
productions: top_level_declarations,
})
}
fn next_construct<'a>(
tokens: &'a Vec<Token>,
current_pos: usize,
) -> ParsingResult<TopLevelDeclaration> {
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<ModuleMembers> {
// Try to parse a function declaration
match functions::function_declaration::try_parse(tokens, current_pos) {
Ok((declaration, next_pos)) => {
return Ok((
TopLevelDeclaration::FunctionDeclaration(declaration),
next_pos,
))
return Ok((ModuleMembers::FunctionDeclaration(declaration), next_pos))
}
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
_ => {}
@ -83,16 +77,14 @@ fn next_construct<'a>(
// Try to parse a binding
match binding::try_parse(tokens, current_pos) {
Ok((binding, next_pos)) => return Ok((TopLevelDeclaration::Binding(binding), next_pos)),
Ok((binding, next_pos)) => return Ok((ModuleMembers::Binding(binding), next_pos)),
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
_ => {}
}
// Try to parse an expression
match expression::try_parse(tokens, current_pos) {
Ok((expression, next_pos)) => {
return Ok((TopLevelDeclaration::Expression(expression), next_pos))
}
Ok((expression, next_pos)) => return Ok((ModuleMembers::Expression(expression), next_pos)),
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
_ => {}
}
@ -109,13 +101,13 @@ mod tests {
fn should_parse_top_level_construct_with_trailing_newline() {
let input = String::from(" fun f1(){}\n");
let tokens = crate::lexic::get_tokens(&input).unwrap();
let declarations = construct_ast(&tokens).unwrap().declarations;
let declarations = build_ast(&tokens).unwrap().productions;
assert_eq!(declarations.len(), 1);
match declarations.get(0).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
ModuleMembers::Binding(_) => panic!("Expected a function declaration"),
ModuleMembers::FunctionDeclaration(_f) => {
assert!(true)
}
_ => panic!("Not implemented: Expression at top level"),
@ -126,21 +118,21 @@ mod tests {
fn should_parse_2_top_level_construct() {
let input = String::from("fun f1(){} fun f2() {}");
let tokens = crate::lexic::get_tokens(&input).unwrap();
let declarations = construct_ast(&tokens).unwrap().declarations;
let declarations = build_ast(&tokens).unwrap().productions;
assert_eq!(declarations.len(), 2);
match declarations.get(0).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
ModuleMembers::Binding(_) => panic!("Expected a function declaration"),
ModuleMembers::FunctionDeclaration(_f) => {
assert!(true)
}
_ => panic!("Not implemented: Expression at top level"),
}
match declarations.get(1).unwrap() {
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
TopLevelDeclaration::FunctionDeclaration(_f) => {
ModuleMembers::Binding(_) => panic!("Expected a function declaration"),
ModuleMembers::FunctionDeclaration(_f) => {
assert!(true)
}
_ => panic!("Not implemented: Expression at top level"),

View File

@ -1,7 +1,7 @@
use crate::lexic::token::Token;
use super::{
ast::{statement::Statement, Expression},
ast::{Expression, Statement},
binding,
expression::function_call_expr,
ParsingError, ParsingResult,