Compare commits
5 Commits
19cba2a7b3
...
ade1a809aa
Author | SHA1 | Date | |
---|---|---|---|
ade1a809aa | |||
1d4cec5548 | |||
5dafd6ca20 | |||
18f3f21eec | |||
ab782b828d |
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
target
|
||||
examples
|
||||
tarpaulin-report.html
|
||||
|
24
CHANGELOG.md
24
CHANGELOG.md
@ -2,6 +2,7 @@
|
||||
|
||||
## TODO
|
||||
|
||||
- Implement functions as first class citizens
|
||||
- Implement AST transformation before codegen:
|
||||
Create a new AST to represent PHP source code
|
||||
and a THP ast -> PHP ast process, so that the
|
||||
@ -22,18 +23,29 @@
|
||||
- Formatter
|
||||
- Simple language server
|
||||
- Decide how to handle comments in the syntax (?)(should comments mean something like in rust?)
|
||||
- Not ignore comments & whitespace, for code formatting
|
||||
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
|
||||
|
||||
## v0.0.13
|
||||
|
||||
- [ ] Begin work on a formal grammar
|
||||
- [ ] Simplify AST
|
||||
- [ ] Define the top level constructs
|
||||
- [ ] Include the original tokens in the AST
|
||||
- [ ] Implement a hello world until semantic analysis
|
||||
- [ ] Refactor code
|
||||
- [ ] Remove `PARSER couldn't parse any construction` error & replace with an actual error message
|
||||
|
||||
|
||||
## v0.0.12
|
||||
|
||||
- [x] Infer datatype of an identifier
|
||||
- [x] Infer datatype of a binary operatior
|
||||
- [ ] Infer datatype of unary operator
|
||||
- [ ] Infer datatype of a function call expression
|
||||
- [ ] Infer datatype of binary operators
|
||||
- [x] Infer datatype of unary operator
|
||||
- [x] Infer datatype of binary operators
|
||||
- [x] Infer Int & Float as different types
|
||||
- [ ] Execute semantic analysis on the function's block
|
||||
- [ ] Write tests
|
||||
- [ ] Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
|
||||
- [x] Execute semantic analysis on the function's block
|
||||
- [x] Write tests
|
||||
|
||||
|
||||
## v0.0.11
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -20,7 +20,7 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "thp"
|
||||
version = "0.0.11"
|
||||
version = "0.0.12"
|
||||
dependencies = [
|
||||
"colored",
|
||||
]
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "thp"
|
||||
version = "0.0.11"
|
||||
version = "0.0.12"
|
||||
edition = "2021"
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
use super::Transpilable;
|
||||
use crate::syntax::ast::var_binding::Binding;
|
||||
use crate::syntax::ast::var_binding::VariableBinding;
|
||||
|
||||
impl Transpilable for Binding<'_> {
|
||||
impl Transpilable for VariableBinding<'_> {
|
||||
/// Transpiles val and var bindings into PHP.
|
||||
fn transpile(&self) -> String {
|
||||
let expression_str = self.expression.transpile();
|
||||
@ -15,7 +15,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
lexic::token::{Token, TokenType},
|
||||
syntax::ast::{var_binding::Binding, Expression},
|
||||
syntax::ast::{var_binding::VariableBinding, Expression},
|
||||
};
|
||||
|
||||
#[test]
|
||||
@ -27,7 +27,7 @@ mod tests {
|
||||
position: 0,
|
||||
};
|
||||
let value = String::from("322");
|
||||
let binding = Binding {
|
||||
let binding = VariableBinding {
|
||||
datatype: None,
|
||||
identifier: &id_token,
|
||||
expression: Expression::Int(&value),
|
||||
|
@ -17,24 +17,26 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
lexic::get_tokens,
|
||||
syntax::{ast::TopLevelDeclaration, construct_ast},
|
||||
syntax::{
|
||||
ast::{ModuleMembers, Statement},
|
||||
build_ast,
|
||||
},
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn should_transpile() {
|
||||
let tokens = get_tokens(&String::from("fun id() {}")).unwrap();
|
||||
let result = construct_ast(&tokens).unwrap();
|
||||
let result = build_ast(&tokens).unwrap();
|
||||
|
||||
let fun_dec = result.declarations.get(0).unwrap();
|
||||
let fun_dec = result.productions.get(0).unwrap();
|
||||
|
||||
match fun_dec {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(fun_decl) => {
|
||||
ModuleMembers::Stmt(Statement::FnDecl(fun_decl)) => {
|
||||
let transpiled = fun_decl.transpile();
|
||||
|
||||
assert_eq!("function id() {\n\n}", transpiled);
|
||||
}
|
||||
_ => panic!("Not implemented: Expression at top level"),
|
||||
_ => panic!("Expected a function declaration"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ impl Transpilable for ModuleAST<'_> {
|
||||
/// nodes and leaves of the AST
|
||||
fn transpile(&self) -> String {
|
||||
let bindings_str: Vec<String> = self
|
||||
.declarations
|
||||
.productions
|
||||
.iter()
|
||||
.map(|binding| binding.transpile())
|
||||
.collect();
|
||||
@ -20,7 +20,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
lexic::token::{Token, TokenType},
|
||||
syntax::ast::{var_binding::Binding, Expression, TopLevelDeclaration},
|
||||
syntax::ast::{var_binding::VariableBinding, Expression, ModuleMembers, Statement},
|
||||
};
|
||||
|
||||
#[test]
|
||||
@ -32,7 +32,7 @@ mod tests {
|
||||
position: 0,
|
||||
};
|
||||
let value = String::from("322");
|
||||
let binding = Binding {
|
||||
let binding = VariableBinding {
|
||||
datatype: None,
|
||||
identifier: &id_token,
|
||||
expression: Expression::Int(&value),
|
||||
@ -40,7 +40,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let module = ModuleAST {
|
||||
declarations: vec![TopLevelDeclaration::Binding(binding)],
|
||||
productions: vec![ModuleMembers::Stmt(Statement::Binding(binding))],
|
||||
};
|
||||
|
||||
let result = module.transpile();
|
||||
|
@ -1,11 +1,11 @@
|
||||
use crate::syntax::ast::statement::Statement;
|
||||
use crate::syntax::ast::Statement;
|
||||
|
||||
use super::Transpilable;
|
||||
|
||||
impl Transpilable for Statement<'_> {
|
||||
fn transpile(&self) -> String {
|
||||
let stmt = match self {
|
||||
Statement::FunctionCall(f) => f.transpile(),
|
||||
Statement::FnDecl(f) => f.transpile(),
|
||||
Statement::Binding(b) => b.transpile(),
|
||||
};
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
use crate::syntax::ast::TopLevelDeclaration;
|
||||
use crate::syntax::ast::{ModuleMembers, Statement};
|
||||
|
||||
use super::Transpilable;
|
||||
|
||||
impl Transpilable for TopLevelDeclaration<'_> {
|
||||
impl Transpilable for ModuleMembers<'_> {
|
||||
fn transpile(&self) -> String {
|
||||
match self {
|
||||
TopLevelDeclaration::Binding(binding) => binding.transpile(),
|
||||
TopLevelDeclaration::FunctionDeclaration(fun) => fun.transpile(),
|
||||
_ => panic!("Not implemented: Expression at top level"),
|
||||
ModuleMembers::Stmt(Statement::Binding(b)) => b.transpile(),
|
||||
ModuleMembers::Stmt(Statement::FnDecl(f)) => f.transpile(),
|
||||
_ => todo!("Not implemented: Transpilable for Expression"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -26,11 +26,11 @@ impl PrintableError for SyntaxError {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{error_handling::MistiError, lexic::get_tokens, syntax::construct_ast};
|
||||
use crate::{error_handling::MistiError, lexic::get_tokens, syntax::build_ast};
|
||||
|
||||
fn _get_error_data(input: String) -> (Vec<char>, MistiError) {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
let error_holder = construct_ast(&tokens);
|
||||
let error_holder = build_ast(&tokens);
|
||||
|
||||
match error_holder {
|
||||
Ok(_) => panic!(
|
||||
|
@ -81,7 +81,7 @@ fn compile(input: &String) -> Result<String, String> {
|
||||
///
|
||||
/// Prints the generated code in stdin
|
||||
fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
|
||||
let ast = syntax::construct_ast(&tokens);
|
||||
let ast = syntax::build_ast(&tokens);
|
||||
|
||||
let ast = match ast {
|
||||
Ok(ast) => ast,
|
||||
|
@ -16,8 +16,8 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
},
|
||||
Some(_) | None => {
|
||||
let token = Token::new(String::from(";"), start_pos, TokenType::NewLine);
|
||||
_ => {
|
||||
let token = Token::new(String::from(""), start_pos, TokenType::NewLine);
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
}
|
||||
|
@ -35,10 +35,7 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
|
||||
LexResult::Some(
|
||||
Token::new_int(current, start_pos - current_len),
|
||||
start_pos,
|
||||
)
|
||||
LexResult::Some(Token::new_int(current, start_pos - current_len), start_pos)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -162,10 +159,7 @@ fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Tok
|
||||
// so this is used to retrieve the original START position of the token
|
||||
let current_len = current.len();
|
||||
|
||||
(
|
||||
Token::new_int(current, start_pos - current_len),
|
||||
start_pos,
|
||||
)
|
||||
(Token::new_int(current, start_pos - current_len), start_pos)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -203,8 +203,8 @@ mod tests {
|
||||
match result {
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!("Incomplete string found", reason.reason)
|
||||
},
|
||||
_ => panic!("expected an error")
|
||||
}
|
||||
_ => panic!("expected an error"),
|
||||
}
|
||||
}
|
||||
|
||||
@ -216,8 +216,8 @@ mod tests {
|
||||
match result {
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!("Incomplete string found", reason.reason)
|
||||
},
|
||||
_ => panic!("expected an error")
|
||||
}
|
||||
_ => panic!("expected an error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ pub fn is_operator(c: char) -> bool {
|
||||
|| c == '='
|
||||
|| c == '*'
|
||||
|| c == '!'
|
||||
// TODO: ??? where is this used??
|
||||
|| c == '\\'
|
||||
|| c == '/'
|
||||
|| c == '|'
|
||||
|
@ -11,6 +11,8 @@ mod syntax;
|
||||
mod lexic;
|
||||
// Module to handle semantic analysis
|
||||
mod semantic;
|
||||
// Defines the PHP AST
|
||||
mod php_ast;
|
||||
// Transforms an AST to JS
|
||||
mod codegen;
|
||||
|
||||
|
3
src/php_ast/mod.rs
Normal file
3
src/php_ast/mod.rs
Normal file
@ -0,0 +1,3 @@
|
||||
// Follows https://phplang.org/spec/09-lexical-structure.html
|
||||
|
||||
struct PhpAst {}
|
@ -28,7 +28,7 @@ fn compile(input: &String) {
|
||||
///
|
||||
/// Prints the generated code in stdin
|
||||
fn build_ast(input: &String, tokens: Vec<Token>) {
|
||||
let ast = syntax::construct_ast(&tokens);
|
||||
let ast = syntax::build_ast(&tokens);
|
||||
|
||||
match ast {
|
||||
Ok(ast) => {
|
||||
|
@ -1,10 +1,10 @@
|
||||
use crate::{
|
||||
error_handling::{semantic_error::SemanticError, MistiError},
|
||||
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry, types::Typed},
|
||||
syntax::ast::var_binding::Binding,
|
||||
syntax::ast::var_binding::VariableBinding,
|
||||
};
|
||||
|
||||
impl SemanticCheck for Binding<'_> {
|
||||
impl SemanticCheck for VariableBinding<'_> {
|
||||
fn check_semantics(
|
||||
&self,
|
||||
scope: &crate::semantic::symbol_table::SymbolTable,
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{
|
||||
error_handling::{semantic_error::SemanticError, MistiError},
|
||||
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry},
|
||||
syntax::ast::FunctionDeclaration,
|
||||
semantic::{
|
||||
impls::SemanticCheck,
|
||||
symbol_table::{SymbolEntry, SymbolTable},
|
||||
},
|
||||
syntax::ast::{FunctionDeclaration, Statement},
|
||||
};
|
||||
|
||||
impl SemanticCheck for FunctionDeclaration<'_> {
|
||||
@ -25,8 +28,25 @@ impl SemanticCheck for FunctionDeclaration<'_> {
|
||||
return Err(MistiError::Semantic(error));
|
||||
}
|
||||
|
||||
// TODO: Check the return type of the function
|
||||
// Create a new scope and use it in the function block
|
||||
let function_scope = SymbolTable::new_from_parent(scope);
|
||||
|
||||
// TODO: Check the return type of the function body
|
||||
// This should be the last expression in the block
|
||||
for stmt in self.block.statements.iter() {
|
||||
match stmt {
|
||||
Statement::Binding(b) => {
|
||||
if let Err(err) = b.check_semantics(&function_scope) {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
Statement::FnDecl(_) => {
|
||||
todo!("Function declaration: semantic check not implemented")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Check the return type of the function
|
||||
|
||||
scope.insert(
|
||||
function_name,
|
||||
|
@ -1,12 +1,34 @@
|
||||
use crate::{semantic::impls::SemanticCheck, syntax::ast::TopLevelDeclaration};
|
||||
use crate::{
|
||||
error_handling::MistiError,
|
||||
semantic::{impls::SemanticCheck, symbol_table::SymbolTable},
|
||||
syntax::ast::{Expression, ModuleMembers, Statement},
|
||||
};
|
||||
|
||||
impl SemanticCheck for TopLevelDeclaration<'_> {
|
||||
fn check_semantics(&self, scope: &crate::semantic::symbol_table::SymbolTable) -> Result<(), crate::error_handling::MistiError> {
|
||||
impl SemanticCheck for ModuleMembers<'_> {
|
||||
fn check_semantics(
|
||||
&self,
|
||||
scope: &crate::semantic::symbol_table::SymbolTable,
|
||||
) -> Result<(), crate::error_handling::MistiError> {
|
||||
match self {
|
||||
TopLevelDeclaration::Binding(binding) => binding.check_semantics(scope),
|
||||
TopLevelDeclaration::FunctionDeclaration(function) => function.check_semantics(scope),
|
||||
_ => panic!("Not implemented"),
|
||||
ModuleMembers::Stmt(statement) => statement.check_semantics(scope),
|
||||
ModuleMembers::Expr(expression) => expression.check_semantics(scope),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Move to its own file
|
||||
impl SemanticCheck for Statement<'_> {
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
|
||||
match self {
|
||||
Statement::Binding(b) => b.check_semantics(scope),
|
||||
Statement::FnDecl(f) => f.check_semantics(scope),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Move to its own file
|
||||
impl SemanticCheck for Expression<'_> {
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
|
||||
todo!("Check semantics for expression")
|
||||
}
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ pub trait SemanticCheck {
|
||||
impl SemanticCheck for ModuleAST<'_> {
|
||||
/// Checks that this AST is semantically correct, given a symbol table
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
|
||||
for declaration in &self.declarations {
|
||||
for declaration in &self.productions {
|
||||
declaration.check_semantics(scope)?;
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
|
||||
|
||||
mod checks;
|
||||
mod impls;
|
||||
mod symbol_table;
|
||||
mod checks;
|
||||
mod types;
|
||||
|
||||
use impls::SemanticCheck;
|
||||
|
@ -97,7 +97,7 @@ impl SymbolTableNode {
|
||||
return match entry {
|
||||
SymbolEntry::Variable(t) => Some(t.clone()),
|
||||
SymbolEntry::Function(_, _) => None,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Try to get the type in the parent scope
|
||||
|
@ -29,8 +29,53 @@ impl Typed for Expression<'_> {
|
||||
|
||||
Ok(datatype)
|
||||
}
|
||||
Expression::FunctionCall(_) => todo!(),
|
||||
Expression::UnaryOperator(_, _) => todo!(),
|
||||
Expression::FunctionCall(_f) => {
|
||||
// TODO: Must implement functions as first class citizens
|
||||
// for this to work
|
||||
|
||||
// TODO: check the parameter types
|
||||
panic!("Not implemented: Get datatype of function call")
|
||||
}
|
||||
Expression::UnaryOperator(op, exp) => {
|
||||
let expr_type = match exp.get_type(scope) {
|
||||
Ok(t) => t,
|
||||
Err(_reason) => {
|
||||
return Err(MistiError::Semantic(SemanticError {
|
||||
error_start: 0,
|
||||
error_end: 1,
|
||||
reason: format!("Error getting type of expression"),
|
||||
}))
|
||||
}
|
||||
};
|
||||
|
||||
// Only supported unary operator: - & !
|
||||
if *op == "-" {
|
||||
if expr_type != "Int" && expr_type != "Float" {
|
||||
return Err(MistiError::Semantic(SemanticError {
|
||||
error_start: 0,
|
||||
error_end: 1,
|
||||
reason: format!(
|
||||
"Expected a Int or Float after unary `-`, got {}",
|
||||
expr_type
|
||||
),
|
||||
}));
|
||||
} else {
|
||||
return Ok("Int".into());
|
||||
}
|
||||
} else if *op == "!" {
|
||||
if expr_type != "Bool" {
|
||||
return Err(MistiError::Semantic(SemanticError {
|
||||
error_start: 0,
|
||||
error_end: 1,
|
||||
reason: format!("Expected a Bool after unary `!`, got {}", expr_type),
|
||||
}));
|
||||
} else {
|
||||
return Ok("Bool".into());
|
||||
}
|
||||
}
|
||||
|
||||
panic!("Illegal state: Found an unexpected unary operator during semantic analysis: {}", *op);
|
||||
}
|
||||
Expression::BinaryOperator(exp1, exp2, operator) => {
|
||||
let t1 = exp1.get_type(scope)?;
|
||||
let t2 = exp2.get_type(scope)?;
|
||||
|
@ -1,20 +1,30 @@
|
||||
use crate::lexic::token::Token;
|
||||
|
||||
use self::functions::FunctionCall;
|
||||
use var_binding::VariableBinding;
|
||||
|
||||
pub mod functions;
|
||||
pub mod statement;
|
||||
pub mod var_binding;
|
||||
|
||||
/// The AST for a whole THP file
|
||||
#[derive(Debug)]
|
||||
pub struct ModuleAST<'a> {
|
||||
pub declarations: Vec<TopLevelDeclaration<'a>>,
|
||||
/// All the productions in the file
|
||||
pub productions: Vec<ModuleMembers<'a>>,
|
||||
}
|
||||
|
||||
/// Enum for productions available at the top level of a file
|
||||
#[derive(Debug)]
|
||||
pub enum ModuleMembers<'a> {
|
||||
// TODO: In the future implement module import
|
||||
Stmt(Statement<'a>),
|
||||
Expr(Expression<'a>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TopLevelDeclaration<'a> {
|
||||
Binding(var_binding::Binding<'a>),
|
||||
FunctionDeclaration(FunctionDeclaration<'a>),
|
||||
Expression(Expression<'a>),
|
||||
pub enum Statement<'a> {
|
||||
Binding(VariableBinding<'a>),
|
||||
FnDecl(FunctionDeclaration<'a>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -27,7 +37,8 @@ pub struct FunctionDeclaration<'a> {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Block<'a> {
|
||||
pub statements: Vec<statement::Statement<'a>>,
|
||||
// TODO: this should be a Vec of Statement|Expression
|
||||
pub statements: Vec<Statement<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -1,7 +0,0 @@
|
||||
use super::{functions::FunctionCall, var_binding::Binding};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Statement<'a> {
|
||||
FunctionCall(FunctionCall<'a>),
|
||||
Binding(Binding<'a>),
|
||||
}
|
@ -3,7 +3,7 @@ use crate::lexic::token::Token;
|
||||
use super::Expression;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Binding<'a> {
|
||||
pub struct VariableBinding<'a> {
|
||||
pub datatype: Option<&'a Token>,
|
||||
pub identifier: &'a Token,
|
||||
pub expression: Expression<'a>,
|
||||
|
@ -1,4 +1,4 @@
|
||||
use super::ast::var_binding::Binding;
|
||||
use super::ast::var_binding::VariableBinding;
|
||||
use super::utils::{parse_token_type, try_operator};
|
||||
use super::{expression, ParsingError, ParsingResult};
|
||||
use crate::error_handling::SyntaxError;
|
||||
@ -13,7 +13,7 @@ var binding = "var", datatype?, binding remainder
|
||||
|
||||
binding remainder = identifier, "=", expression
|
||||
*/
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Binding> {
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<VariableBinding> {
|
||||
let mut current_pos = pos;
|
||||
|
||||
/*
|
||||
@ -130,7 +130,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Bindin
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
let binding = Binding {
|
||||
let binding = VariableBinding {
|
||||
datatype,
|
||||
identifier: &identifier,
|
||||
expression,
|
||||
@ -264,7 +264,10 @@ mod tests {
|
||||
Err(ParsingError::Err(error)) => {
|
||||
assert_eq!(4, error.error_start);
|
||||
assert_eq!(11, error.error_end);
|
||||
assert_eq!("There should be an identifier after a binding", error.reason);
|
||||
assert_eq!(
|
||||
"There should be an identifier after a binding",
|
||||
error.reason
|
||||
);
|
||||
}
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
@ -293,7 +296,10 @@ mod tests {
|
||||
Err(ParsingError::Err(error)) => {
|
||||
assert_eq!(4, error.error_start);
|
||||
assert_eq!(10, error.error_end);
|
||||
assert_eq!("There should be an identifier after the datatype", error.reason);
|
||||
assert_eq!(
|
||||
"There should be an identifier after the datatype",
|
||||
error.reason
|
||||
);
|
||||
}
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
@ -308,7 +314,10 @@ mod tests {
|
||||
Err(ParsingError::Err(error)) => {
|
||||
assert_eq!(0, error.error_start);
|
||||
assert_eq!(3, error.error_end);
|
||||
assert_eq!("There should be an identifier after a `val` token", error.reason);
|
||||
assert_eq!(
|
||||
"There should be an identifier after a `val` token",
|
||||
error.reason
|
||||
);
|
||||
}
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
@ -323,7 +332,10 @@ mod tests {
|
||||
Err(ParsingError::Err(error)) => {
|
||||
assert_eq!(4, error.error_start);
|
||||
assert_eq!(14, error.error_end);
|
||||
assert_eq!("There should be an equal sign `=` after the identifier", error.reason);
|
||||
assert_eq!(
|
||||
"There should be an equal sign `=` after the identifier",
|
||||
error.reason
|
||||
);
|
||||
}
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
@ -338,10 +350,12 @@ mod tests {
|
||||
Err(ParsingError::Err(error)) => {
|
||||
assert_eq!(15, error.error_start);
|
||||
assert_eq!(16, error.error_end);
|
||||
assert_eq!("Expected an expression after the equal `=` operator", error.reason);
|
||||
assert_eq!(
|
||||
"Expected an expression after the equal `=` operator",
|
||||
error.reason
|
||||
);
|
||||
}
|
||||
_ => panic!("Error expected"),
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -72,6 +72,8 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::lexic::get_tokens;
|
||||
|
||||
// TODO: rewrite, refactor
|
||||
/*
|
||||
#[test]
|
||||
fn test_parse_block() {
|
||||
let tokens = get_tokens(&String::from("{f()}")).unwrap();
|
||||
@ -112,4 +114,5 @@ mod tests {
|
||||
|
||||
assert_eq!(block.statements.len(), 1);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ mod tests {
|
||||
|
||||
match result {
|
||||
Err(ParsingError::Unmatched) => assert!(true),
|
||||
_ => panic!("Expected an Unmatched error")
|
||||
_ => panic!("Expected an Unmatched error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -43,7 +43,6 @@ fn parse_many<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -85,4 +84,3 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -181,7 +181,6 @@ mod tests {
|
||||
assert_eq!(first_param.identifier, "x");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list_with_1_parameter_with_trailing_comma() {
|
||||
let tokens = get_tokens(&String::from("(Int x, )")).unwrap();
|
||||
|
@ -1,107 +0,0 @@
|
||||
# Grammar
|
||||
|
||||
|
||||
## Source file
|
||||
|
||||
```ebnf
|
||||
source file = top level statement*
|
||||
```
|
||||
|
||||
|
||||
## Top level statement
|
||||
|
||||
Current focus: Have a mvp compiler (w lexical/syntactic/semantic analysis + codegen) for
|
||||
simple function calls, and then implement other features top down
|
||||
|
||||
```ebnf
|
||||
top level statement = expression
|
||||
| function declaration
|
||||
| binding
|
||||
```
|
||||
|
||||
|
||||
## Function declaration
|
||||
|
||||
```ebnf
|
||||
function declaration = "fun", identifier, params list, return type?, block;
|
||||
|
||||
params list = "(", ")";
|
||||
|
||||
return type = ;
|
||||
```
|
||||
|
||||
|
||||
### Block
|
||||
|
||||
```ebnf
|
||||
block = "{", (statement, (new line, statement)*)?, "}"
|
||||
```
|
||||
|
||||
|
||||
### Statement
|
||||
|
||||
```ebnf
|
||||
statement = binding
|
||||
| function call
|
||||
```
|
||||
|
||||
|
||||
## Function call
|
||||
|
||||
```ebnf
|
||||
function call = identifier, arguments list;
|
||||
|
||||
arguments list = "(", ")"
|
||||
```
|
||||
|
||||
|
||||
## Binding
|
||||
|
||||
```ebnf
|
||||
binding = val binding | var binding
|
||||
val binding = "val", datatype?, binding remainder
|
||||
| datatype, binding remainder
|
||||
|
||||
var binding = "var", datatype?, binding remainder
|
||||
|
||||
binding remainder = identifier, "=", expression
|
||||
```
|
||||
|
||||
|
||||
## Operator precedence
|
||||
|
||||
From highest to lowest:
|
||||
|
||||
- `== !=`, left associative
|
||||
- `> >= < <=`, left associative
|
||||
- `- +`, left associative
|
||||
- `/ *`, left associative
|
||||
- `! -`, left associative
|
||||
|
||||
## Expression
|
||||
|
||||
```ebnf
|
||||
expression = equality;
|
||||
|
||||
equality = comparison, (("==" | "!="), comparison )*;
|
||||
|
||||
comparison = term, ((">" | ">=" | "<" | "<="), term)*;
|
||||
|
||||
term = factor, (("-" | "+"), factor)*;
|
||||
|
||||
factor = unary, (("/" | "*"), unary)*;
|
||||
|
||||
unary = ("!" | "-"), expression
|
||||
| primary;
|
||||
|
||||
function call expr = primary, (arguments list)?
|
||||
| primary;
|
||||
|
||||
primary = number | string | boolean | identifier | ("(", expression, ")");
|
||||
```
|
||||
|
||||
```thp
|
||||
primary()
|
||||
```
|
||||
|
||||
|
@ -1,149 +1,79 @@
|
||||
use crate::error_handling::{MistiError, SyntaxError};
|
||||
use crate::error_handling::MistiError;
|
||||
|
||||
mod binding;
|
||||
mod block;
|
||||
mod expression;
|
||||
mod functions;
|
||||
mod parseable;
|
||||
mod parsers;
|
||||
mod statement;
|
||||
mod utils;
|
||||
|
||||
pub mod ast;
|
||||
|
||||
use crate::lexic::token::{Token, TokenType};
|
||||
use crate::lexic::token::Token;
|
||||
use ast::ModuleAST;
|
||||
|
||||
use self::ast::TopLevelDeclaration;
|
||||
use self::parseable::{Parseable, ParsingError, ParsingResult};
|
||||
|
||||
pub type ParsingResult<'a, A> = Result<(A, usize), ParsingError<'a>>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ParsingError<'a> {
|
||||
/// Some other token was found than the expected one
|
||||
Mismatch(&'a Token),
|
||||
/// The parsing didn't succeed, but it's not a fatal error
|
||||
Unmatched,
|
||||
/// The parsing failed past a point of no return.
|
||||
///
|
||||
/// For example, when parsing a function declaration
|
||||
/// the `fun` token is found, but then no identifier
|
||||
Err(SyntaxError),
|
||||
}
|
||||
|
||||
/// Constructs the Misti AST from a vector of tokens
|
||||
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> {
|
||||
let mut top_level_declarations = Vec::new();
|
||||
let token_amount = tokens.len();
|
||||
let mut current_pos = 0;
|
||||
|
||||
// Minus one because the last token is always EOF
|
||||
while current_pos < token_amount - 1 {
|
||||
// Ignore newlines
|
||||
if tokens[current_pos].token_type == TokenType::NewLine {
|
||||
current_pos += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
match next_construct(tokens, current_pos) {
|
||||
Ok((module, next_pos)) => {
|
||||
top_level_declarations.push(module);
|
||||
current_pos = next_pos;
|
||||
}
|
||||
Err(ParsingError::Err(err)) => return Err(MistiError::Syntax(err)),
|
||||
/// Builds the Misti AST from a vector of tokens
|
||||
pub fn build_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST, MistiError> {
|
||||
match ModuleAST::try_parse(tokens, 0) {
|
||||
Ok((module, _)) => Ok(module),
|
||||
Err(ParsingError::Err(error)) => Err(MistiError::Syntax(error)),
|
||||
_ => {
|
||||
return Err(MistiError::Syntax(SyntaxError {
|
||||
reason: String::from("PARSER couldn't parse any construction"),
|
||||
// FIXME: This should get the position of the _token_ that current_pos points to
|
||||
error_start: current_pos,
|
||||
error_end: current_pos,
|
||||
}));
|
||||
// This shouldn't happen. The module parser returns an error if it finds nothing to parse.
|
||||
unreachable!("Illegal state during parsing: The Module parse should always return a result or error")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ModuleAST {
|
||||
declarations: top_level_declarations,
|
||||
})
|
||||
}
|
||||
|
||||
fn next_construct<'a>(
|
||||
tokens: &'a Vec<Token>,
|
||||
current_pos: usize,
|
||||
) -> ParsingResult<TopLevelDeclaration> {
|
||||
// Try to parse a function declaration
|
||||
match functions::function_declaration::try_parse(tokens, current_pos) {
|
||||
Ok((declaration, next_pos)) => {
|
||||
return Ok((
|
||||
TopLevelDeclaration::FunctionDeclaration(declaration),
|
||||
next_pos,
|
||||
))
|
||||
}
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Try to parse a binding
|
||||
match binding::try_parse(tokens, current_pos) {
|
||||
Ok((binding, next_pos)) => return Ok((TopLevelDeclaration::Binding(binding), next_pos)),
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Try to parse an expression
|
||||
match expression::try_parse(tokens, current_pos) {
|
||||
Ok((expression, next_pos)) => {
|
||||
return Ok((TopLevelDeclaration::Expression(expression), next_pos))
|
||||
}
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// No top level construct was found, return unmatched
|
||||
Err(ParsingError::Unmatched)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::lexic::get_tokens;
|
||||
use ast::ModuleMembers;
|
||||
use tests::ast::Statement;
|
||||
|
||||
// TODO: Reenable when statement parsing is rewritten
|
||||
/*
|
||||
#[test]
|
||||
fn should_parse_top_level_construct_with_trailing_newline() {
|
||||
let input = String::from(" fun f1(){}\n");
|
||||
let tokens = crate::lexic::get_tokens(&input).unwrap();
|
||||
let declarations = construct_ast(&tokens).unwrap().declarations;
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
let productions = build_ast(&tokens).unwrap().productions;
|
||||
|
||||
assert_eq!(declarations.len(), 1);
|
||||
assert_eq!(productions.len(), 1);
|
||||
|
||||
match declarations.get(0).unwrap() {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(_f) => {
|
||||
match productions.get(0).unwrap() {
|
||||
ModuleMembers::Stmt(Statement::FnDecl(_f)) => {
|
||||
assert!(true)
|
||||
}
|
||||
_ => panic!("Not implemented: Expression at top level"),
|
||||
_ => panic!("Expected a function declaration"),
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#[test]
|
||||
fn should_parse_2_top_level_construct() {
|
||||
let input = String::from("fun f1(){} fun f2() {}");
|
||||
let tokens = crate::lexic::get_tokens(&input).unwrap();
|
||||
let declarations = construct_ast(&tokens).unwrap().declarations;
|
||||
let declarations = build_ast(&tokens).unwrap().productions;
|
||||
|
||||
assert_eq!(declarations.len(), 2);
|
||||
|
||||
match declarations.get(0).unwrap() {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(_f) => {
|
||||
ModuleMembers::Stmt(Statement::FnDecl(_f)) => {
|
||||
assert!(true)
|
||||
}
|
||||
_ => panic!("Not implemented: Expression at top level"),
|
||||
_ => panic!("Expected a function declaration as first production"),
|
||||
}
|
||||
|
||||
match declarations.get(1).unwrap() {
|
||||
TopLevelDeclaration::Binding(_) => panic!("Expected a function declaration"),
|
||||
TopLevelDeclaration::FunctionDeclaration(_f) => {
|
||||
ModuleMembers::Stmt(Statement::FnDecl(_f)) => {
|
||||
assert!(true)
|
||||
}
|
||||
_ => panic!("Not implemented: Expression at top level"),
|
||||
_ => panic!("Expected a function declaration as first production"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
27
src/syntax/parseable.rs
Normal file
27
src/syntax/parseable.rs
Normal file
@ -0,0 +1,27 @@
|
||||
use crate::{error_handling::SyntaxError, lexic::token::Token};
|
||||
|
||||
/// The result of a parsing operation.
|
||||
/// On success, it contains the item and the position of the next token
|
||||
/// On failure, it contains the error
|
||||
pub type ParsingResult<'a, A> = Result<(A, usize), ParsingError<'a>>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ParsingError<'a> {
|
||||
/// The parsing didn't succeed, but it's not a fatal error
|
||||
Unmatched,
|
||||
/// Some other token was found than the expected one
|
||||
Mismatch(&'a Token),
|
||||
/// The parsing failed past a point of no return.
|
||||
///
|
||||
/// For example, when parsing a function declaration
|
||||
/// the `fun` token is found, but then no identifier
|
||||
Err(SyntaxError),
|
||||
}
|
||||
|
||||
/// Represents a type that can be parsed using Recursive Descent
|
||||
pub trait Parseable<'a> {
|
||||
type Item;
|
||||
|
||||
/// Try to parse the current production.
|
||||
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item>;
|
||||
}
|
15
src/syntax/parsers/expression.rs
Normal file
15
src/syntax/parsers/expression.rs
Normal file
@ -0,0 +1,15 @@
|
||||
use crate::{
|
||||
lexic::token::Token,
|
||||
syntax::{
|
||||
ast::Expression,
|
||||
parseable::{Parseable, ParsingResult},
|
||||
},
|
||||
};
|
||||
|
||||
impl<'a> Parseable<'a> for Expression<'a> {
|
||||
type Item = Expression<'a>;
|
||||
|
||||
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
|
||||
todo!()
|
||||
}
|
||||
}
|
3
src/syntax/parsers/mod.rs
Normal file
3
src/syntax/parsers/mod.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod expression;
|
||||
pub mod module;
|
||||
pub mod statement;
|
66
src/syntax/parsers/module.rs
Normal file
66
src/syntax/parsers/module.rs
Normal file
@ -0,0 +1,66 @@
|
||||
use crate::{
|
||||
error_handling::SyntaxError,
|
||||
lexic::token::Token,
|
||||
syntax::{
|
||||
ast::{Expression, ModuleAST, ModuleMembers, Statement},
|
||||
parseable::{Parseable, ParsingError, ParsingResult},
|
||||
},
|
||||
};
|
||||
|
||||
impl<'a> Parseable<'a> for ModuleAST<'a> {
|
||||
type Item = ModuleAST<'a>;
|
||||
|
||||
/// Parses a THP module/source file
|
||||
///
|
||||
/// As this function parses the whole file, it ignores `current_pos` and
|
||||
/// always starts from token 0.
|
||||
///
|
||||
/// Its grammar is defined it the spec, at the webpage
|
||||
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
|
||||
let mut productions = Vec::<ModuleMembers>::new();
|
||||
let tokens_len = tokens.len();
|
||||
let mut current_pos = 0;
|
||||
|
||||
// Minus one because last token is EOF
|
||||
// TODO: Does that EOF do anything?
|
||||
while current_pos < tokens_len - 1 {
|
||||
// Attempt to parse an statement
|
||||
match Statement::try_parse(tokens, current_pos) {
|
||||
Ok((prod, next_pos)) => {
|
||||
productions.push(ModuleMembers::Stmt(prod));
|
||||
current_pos = next_pos;
|
||||
continue;
|
||||
}
|
||||
Err(ParsingError::Err(error)) => {
|
||||
// TODO: Better error handling, write a better error message
|
||||
return Err(ParsingError::Err(error));
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// Attempt to parse an expression
|
||||
match Expression::try_parse(tokens, current_pos) {
|
||||
Ok((prod, next_pos)) => {
|
||||
productions.push(ModuleMembers::Expr(prod));
|
||||
current_pos = next_pos;
|
||||
}
|
||||
Err(ParsingError::Err(error)) => {
|
||||
// TODO: Better error handling, write a better error message
|
||||
return Err(ParsingError::Err(error));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// If we reached this point we didn't match any productions and fail
|
||||
let t = &tokens[current_pos];
|
||||
|
||||
return Err(ParsingError::Err(SyntaxError {
|
||||
error_start: t.position,
|
||||
error_end: t.get_end_position(),
|
||||
reason: "Expected an statement or an expresion at the top level.".into(),
|
||||
}));
|
||||
}
|
||||
|
||||
Ok((ModuleAST { productions }, current_pos))
|
||||
}
|
||||
}
|
37
src/syntax/parsers/statement.rs
Normal file
37
src/syntax/parsers/statement.rs
Normal file
@ -0,0 +1,37 @@
|
||||
use crate::syntax::{
|
||||
ast::Statement, binding, functions::function_declaration, parseable::Parseable,
|
||||
};
|
||||
|
||||
impl<'a> Parseable<'a> for Statement<'a> {
|
||||
type Item = Statement<'a>;
|
||||
|
||||
fn try_parse(
|
||||
tokens: &'a Vec<crate::lexic::token::Token>,
|
||||
current_pos: usize,
|
||||
) -> crate::syntax::parseable::ParsingResult<'a, Self::Item> {
|
||||
// Try to parse a variable binding
|
||||
// TODO: Rewrite function_declaration to use Parseable
|
||||
match binding::try_parse(tokens, current_pos) {
|
||||
Ok((prod, next)) => {
|
||||
return Ok((Statement::Binding(prod), next));
|
||||
}
|
||||
Err(_) => {
|
||||
// TODO
|
||||
}
|
||||
}
|
||||
|
||||
// Try to parse a function declaration
|
||||
// TODO: Rewrite function_declaration to use Parseable
|
||||
match function_declaration::try_parse(tokens, current_pos) {
|
||||
Ok((prod, next)) => {
|
||||
return Ok((Statement::FnDecl(prod), next));
|
||||
}
|
||||
Err(_) => {
|
||||
// TODO
|
||||
}
|
||||
}
|
||||
|
||||
// Here nothing was parsed. Should fail
|
||||
todo!("Nothing was parsed. Should fail")
|
||||
}
|
||||
}
|
@ -1,11 +1,6 @@
|
||||
use crate::lexic::token::Token;
|
||||
|
||||
use super::{
|
||||
ast::{statement::Statement, Expression},
|
||||
binding,
|
||||
expression::function_call_expr,
|
||||
ParsingError, ParsingResult,
|
||||
};
|
||||
use super::{ast::Statement, binding, ParsingError, ParsingResult};
|
||||
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Statement> {
|
||||
// Try to parse a binding
|
||||
@ -15,12 +10,15 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Statem
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// A function call is an expression, not a statement. Remove
|
||||
// Try to parse a function call
|
||||
/*
|
||||
match function_call_expr::try_parse(tokens, pos) {
|
||||
Ok((Expression::FunctionCall(f), next)) => return Ok((Statement::FunctionCall(f), next)),
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
_ => {}
|
||||
};
|
||||
*/
|
||||
|
||||
// Return unmatched
|
||||
Err(ParsingError::Unmatched)
|
||||
@ -30,23 +28,6 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Statem
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn should_parse_function_call() {
|
||||
let input = String::from("f1()");
|
||||
let tokens = crate::lexic::get_tokens(&input).unwrap();
|
||||
let statement = try_parse(&tokens, 0);
|
||||
|
||||
let statement = match statement {
|
||||
Ok((s, _)) => s,
|
||||
_ => panic!("Expected a statement"),
|
||||
};
|
||||
|
||||
match statement {
|
||||
Statement::FunctionCall(_) => assert!(true),
|
||||
_ => panic!("Expected a function call"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_binding() {
|
||||
let input = String::from("val identifier = 20");
|
||||
|
Loading…
Reference in New Issue
Block a user