feat: semantic analysis for hello world

master v0.1.0
Araozu 2024-08-01 10:34:08 -05:00
parent c0e20ad283
commit 4c565df699
19 changed files with 263 additions and 153 deletions

View File

@ -21,17 +21,20 @@
- Not ignore comments & whitespace, for code formatting
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
- Include the original tokens in the AST
- Include comments in the AST
- Begin work on the code formatter
## v0.0.15
## v0.1.0
- [x] Complete workflow for "Hello world"
- [x] Multiline comments
- [x] Nested multiline comments
- [ ] Include comments in the AST
- [ ] Replace all panics with actual errors
- [ ] Remove all old codegen
- [ ] Test codegen
- [ ] Begin work on the code formatter
- [x] Replace all panics with actual errors
- [x] Remove all old codegen
- [x] Test codegen
- [x] Reenable semantic analysis
- [x] Create minimal type definitions for the stdlib
## v0.0.14

View File

@ -1 +1 @@
mod primary_expression;
mod primary_expression;

View File

@ -1,9 +1,9 @@
use super::Transpilable;
use crate::php_ast::PhpExpression;
mod expression;
pub mod statement;
pub mod statement_list;
mod expression;
impl Transpilable for PhpExpression<'_> {
fn transpile(&self) -> String {

View File

@ -18,10 +18,9 @@ mod tests {
#[test]
fn should_transpile_empty_file() {
let ast = PhpAst {statements: vec![]};
let ast = PhpAst { statements: vec![] };
let output = ast.transpile();
assert_eq!("<?php\n", output);
}
}

View File

@ -51,7 +51,7 @@ pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
}
/// Implementation that scans the multiline comment.
///
///
/// May only error if EOF is found before the comment is finished.
/// If Err, returns the last position where a char was available.
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Result<(Vec<char>, usize), usize> {

View File

@ -45,7 +45,7 @@ impl Token {
TokenType::Comment => self.position + self.value.len() + 2,
// 2 extra characters for ""
TokenType::String => self.position + self.value.len() + 2,
_ => self.position + self.value.len()
_ => self.position + self.value.len(),
}
}
}

View File

@ -1,9 +1,8 @@
/// This AST implements a subset of the PHP AST as defined
/// by https://phplang.org/spec/19-grammar.html#syntactic-grammar
///
///
/// This subset only includes nodes that can be generated by
/// THP
pub mod transformers;
/// Represents `statement-list` on the grammar,
@ -13,9 +12,9 @@ pub struct PhpAst<'a> {
}
/// https://phplang.org/spec/19-grammar.html#grammar-statement
///
///
/// Not fully implemented
///
///
/// statement:
/// echo-statement
pub enum PhpStatement<'a> {
@ -31,7 +30,7 @@ pub enum PhpExpression<'a> {
}
/// https://phplang.org/spec/19-grammar.html#grammar-primary-expression
///
///
/// primary-expression:
/// literal
pub enum PhpPrimaryExpression<'a> {
@ -39,4 +38,3 @@ pub enum PhpPrimaryExpression<'a> {
FloatingLiteral(&'a String),
StringLiteral(&'a String),
}

View File

@ -12,7 +12,7 @@ impl<'a> PHPTransformable<'a> for Expression<'_> {
Expression::String(value) => {
let expr = PhpPrimaryExpression::StringLiteral(value);
PhpExpression::PrimaryExpression(expr)
},
}
_ => todo!("transformation for expression: {:?}", self),
}
}

View File

@ -34,10 +34,10 @@ impl<'a> PHPTransformable<'a> for ModuleAST<'_> {
match e {
Expression::String(v) => {
expressions.push(
PhpExpression::PrimaryExpression(PhpPrimaryExpression::StringLiteral(v.clone()))
PhpExpression::PrimaryExpression(PhpPrimaryExpression::StringLiteral(v))
)
},
_ => panic!("Non string expressions not supported")
_ => todo!("Non string expressions not supported")
}
}

View File

@ -4,62 +4,12 @@ use colored::Colorize;
use crate::codegen::Transpilable;
use crate::error_handling::PrintableError;
use crate::lexic::token::Token;
use super::codegen;
use super::lexic;
use super::syntax;
use crate::php_ast::transformers::PHPTransformable;
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
fn compile(input: &String) {
let tokens = lexic::get_tokens(input);
match tokens {
Ok(tokens) => {
build_ast(input, tokens);
}
Err(error) => {
let chars: Vec<char> = input.chars().into_iter().collect();
eprintln!("{}", error.get_error_str(&chars))
}
}
}
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
///
/// Prints the generated code in stdin
fn build_ast(input: &String, tokens: Vec<Token>) {
let ast = syntax::build_ast(&tokens);
match ast {
Ok(ast) => {
/*
let res1 = crate::semantic::check_semantics(&ast);
TODO: Disabled to test the PHP codegen. Reenable
match res1 {
Ok(_) => {}
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
eprintln!("{}", error);
return;
}
}
*/
let php_ast = ast.into_php_ast();
let js_code = php_ast.transpile();
println!("{}", js_code)
}
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
eprintln!("{}", reason.get_error_str(&chars))
}
}
}
/// Executes the REPL, reading from stdin, compiling and emitting PHP to stdout
pub fn run() -> io::Result<()> {
let stdin = io::stdin();
@ -87,3 +37,54 @@ pub fn run() -> io::Result<()> {
};
}
}
/// Full pipeline from THP source code to PHP output
fn compile(input: &String) {
//
// Lexical analysis
//
let tokens = match lexic::get_tokens(input) {
Ok(t) => t,
Err(error) => {
let chars: Vec<char> = input.chars().into_iter().collect();
eprintln!("{}", error.get_error_str(&chars));
return;
}
};
//
// Syntax analysis
//
let ast = match syntax::build_ast(&tokens) {
Ok(ast) => ast,
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
eprintln!("{}", reason.get_error_str(&chars));
return;
}
};
//
// Semantic analysis
//
let res1 = crate::semantic::check_semantics(&ast);
match res1 {
Ok(_) => {}
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
eprintln!("{}", error);
return;
}
}
//
// Intermediate representation (THP -> PHP ast)
//
let php_ast = ast.into_php_ast();
//
// Codegen
//
println!("{}", php_ast.transpile());
}

View File

@ -1,6 +1,9 @@
use crate::{
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry, types::Typed},
semantic::{
impls::SemanticCheck,
types::{Type, Typed},
},
syntax::ast::var_binding::VariableBinding,
};
@ -31,7 +34,7 @@ impl SemanticCheck for VariableBinding<'_> {
let expression_datatype = self.expression.get_type(scope)?;
let datatype = match self.datatype {
Some(t) => t.value.clone(),
Some(t) => Type::Value(t.value.clone()),
// If the datatype is not defined, we use the expression datatype
None => expression_datatype.clone(),
};
@ -42,7 +45,7 @@ impl SemanticCheck for VariableBinding<'_> {
error_start: self.identifier.position,
error_end: self.identifier.get_end_position(),
reason: format!(
"The variable `{}` was declared as `{}` but its expression has type `{}`",
"The variable `{}` was declared as `{:?}` but its expression has type `{:?}`",
binding_name, datatype, expression_datatype
),
};
@ -50,7 +53,7 @@ impl SemanticCheck for VariableBinding<'_> {
return Err(MistiError::Semantic(error));
}
scope.insert(binding_name.clone(), SymbolEntry::new_variable(datatype));
scope.insert(binding_name.clone(), datatype);
Ok(())
}

View File

@ -1,9 +1,6 @@
use crate::{
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{
impls::SemanticCheck,
symbol_table::{SymbolEntry, SymbolTable},
},
semantic::{impls::SemanticCheck, symbol_table::SymbolTable, types::Type},
syntax::ast::{BlockMember, FunctionDeclaration, Statement},
};
@ -49,10 +46,7 @@ impl SemanticCheck for FunctionDeclaration<'_> {
// TODO: Check the return type of the function
scope.insert(
function_name,
SymbolEntry::new_function(vec![], "Unit".into()),
);
scope.insert(function_name, Type::Function(vec![], "Unit".into()));
Ok(())
}

View File

@ -1,6 +1,10 @@
use crate::{
error_handling::MistiError,
semantic::{impls::SemanticCheck, symbol_table::SymbolTable},
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{
impls::SemanticCheck,
symbol_table::SymbolTable,
types::{Type, Typed},
},
syntax::ast::{Expression, ModuleMembers, Statement},
};
@ -16,7 +20,7 @@ impl SemanticCheck for ModuleMembers<'_> {
}
}
// TODO: Move to its own file
// TODO: Move to its own file when it grows
impl SemanticCheck for Statement<'_> {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
match self {
@ -26,9 +30,83 @@ impl SemanticCheck for Statement<'_> {
}
}
// TODO: Move to its own file
// TODO: Move to its own file when it grows
impl SemanticCheck for Expression<'_> {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
todo!("Check semantics for expression")
// How to get the global definition into the symbol table?
// maybe just when creating the symbol table inject all
// the global elements at once?
// Store the global elements as binary/JSON
// and load them along with the symbol table
// then for efficiency they could be grouped by module?
// and stored as binary files?
// then the binary files are searched for and loaded when
// requested?
// For a function call:
// check that the function exists
// check its signature
// check parameters
match self {
Expression::FunctionCall(f) => {
let fun = &*f.function;
let arguments = &*f.arguments.arguments;
let function_datatype = fun.get_type(scope)?;
match function_datatype {
Type::Function(parameters, _return_type) => {
// Check parameters length
if parameters.len() != arguments.len() {
return Err(MistiError::Semantic(SemanticError {
// TODO: fix
error_start: 0,
error_end: 1,
reason: format!(
"Expected {} arguments, found {}",
parameters.len(),
arguments.len(),
),
}));
}
// Check that each argument matches the required datatype
for i in 0..parameters.len() {
let parameter = &parameters[i];
let argument = &arguments[i];
let argument_datatype = argument.get_type(scope)?;
if !argument_datatype.is_value(parameter) {
// The argument and the parameter have diferent types
return Err(MistiError::Semantic(SemanticError {
// TODO: fix
error_start: 0,
error_end: 1,
reason: format!(
"Expected datatype {}, got {:?}",
parameter, argument
),
}));
}
}
}
_ => {
return Err(MistiError::Semantic(SemanticError {
// TODO: fix
error_start: 0,
error_end: 1,
reason: format!(
"Expected a function type, got {:?}",
function_datatype
),
}));
}
}
}
_ => todo!("Check semantics for expression other than function call"),
}
Ok(())
}
}

View File

@ -2,6 +2,7 @@ use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
mod checks;
mod impls;
mod std;
mod symbol_table;
mod types;
@ -18,19 +19,22 @@ pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
// For now there's only support for a single file
// TODO: Receive a symbol table as a reference and work on it.
// this way we can implement a unique symbol table for REPL session
let global_scope = symbol_table::SymbolTable::new();
let mut global_scope = symbol_table::SymbolTable::new();
std::populate(&mut global_scope);
ast.check_semantics(&global_scope)
}
#[cfg(test)]
mod tests {
use super::symbol_table::{SymbolEntry, SymbolTable};
use crate::semantic::types::Type;
use super::symbol_table::SymbolTable;
#[test]
fn test_1() {
let global_scope = SymbolTable::new();
let main_function = SymbolEntry::new_function(vec![], String::from("Unit"));
let main_function = Type::Function(vec![], String::from("Unit"));
global_scope.insert("main".into(), main_function);
@ -41,17 +45,16 @@ mod tests {
fn test_2() {
let global_scope = SymbolTable::new();
let main_function = SymbolEntry::new_function(vec![], String::from("Unit"));
let main_function = Type::Function(vec![], String::from("Unit"));
global_scope.insert("main".into(), main_function);
global_scope.insert("db_url".into(), SymbolEntry::Variable("String".into()));
global_scope.insert("db_url".into(), Type::Value("String".into()));
let add_function =
SymbolEntry::new_function(vec!["Int".into(), "Int".into()], "Int".into());
let add_function = Type::Function(vec!["Int".into(), "Int".into()], "Int".into());
global_scope.insert("add".into(), add_function);
let main_function_scope = SymbolTable::new_from_parent(&global_scope);
main_function_scope.insert("message".into(), SymbolEntry::Variable("String".into()));
main_function_scope.insert("message".into(), Type::Value("String".into()));
assert!(main_function_scope.test(&"message".into()));
assert!(main_function_scope.test(&"db_url".into()));
@ -59,10 +62,10 @@ mod tests {
let add_function_scope = SymbolTable::new_from_parent(&global_scope);
add_function_scope.insert("a".into(), SymbolEntry::Variable("Int".into()));
add_function_scope.insert("b".into(), SymbolEntry::Variable("Int".into()));
add_function_scope.insert("a".into(), Type::Value("Int".into()));
add_function_scope.insert("b".into(), Type::Value("Int".into()));
assert!(add_function_scope.test(&"a".into()));
global_scope.insert("test".into(), SymbolEntry::Variable("Int".into()));
global_scope.insert("test".into(), Type::Value("Int".into()));
}
}

12
src/semantic/std.rs Normal file
View File

@ -0,0 +1,12 @@
//! Naively provides the standard library for THP
//! by directly inserting the definitions into the
//! Symbol Table
use super::{symbol_table::SymbolTable, types::Type};
/// Populates the symbol table with the stdlib
pub fn populate(table: &mut SymbolTable) {
// print: (String) -> (Void)
let print_fn = Type::Function(vec!["String".into()], "Void".into());
table.insert("print".into(), print_fn);
}

View File

@ -1,5 +1,7 @@
use std::{cell::RefCell, collections::HashMap, rc::Rc};
use super::types::Type;
/// Public interface for the symbol table
pub struct SymbolTable {
node: Rc<RefCell<SymbolTableNode>>,
@ -10,14 +12,7 @@ struct SymbolTableNode {
// the parent scope
parent: Option<Rc<RefCell<SymbolTableNode>>>,
// the current scope
scope: HashMap<String, SymbolEntry>,
}
pub enum SymbolEntry {
// Just a Datatype
Variable(String),
// Contains: parameters, return type
Function(Vec<String>, String),
scope: HashMap<String, Type>,
}
impl SymbolTable {
@ -37,7 +32,7 @@ impl SymbolTable {
}
/// Inserts a new symbol into the current table scope
pub fn insert(&self, key: String, value: SymbolEntry) {
pub fn insert(&self, key: String, value: Type) {
self.node.borrow_mut().insert(key, value);
}
@ -47,7 +42,7 @@ impl SymbolTable {
}
/// Gets the datatype of a symbol, if it exists
pub fn get_type(&self, key: &String) -> Option<String> {
pub fn get_type<'a>(&'a self, key: &String) -> Option<Type> {
self.node.borrow_mut().get_type(key)
}
}
@ -62,7 +57,7 @@ impl SymbolTableNode {
}
/// Creates a new symbol table with a parent
pub fn new_from_parent<'a>(parent: &Rc<RefCell<SymbolTableNode>>) -> SymbolTableNode {
pub fn new_from_parent(parent: &Rc<RefCell<SymbolTableNode>>) -> SymbolTableNode {
SymbolTableNode {
parent: Some(Rc::clone(&parent)),
scope: HashMap::new(),
@ -70,7 +65,7 @@ impl SymbolTableNode {
}
/// Inserts a new symbol into the current scope
pub fn insert(&mut self, key: String, value: SymbolEntry) {
pub fn insert(&mut self, key: String, value: Type) {
self.scope.insert(key, value);
}
@ -90,33 +85,20 @@ impl SymbolTableNode {
}
/// Returns the symbol's datatype
pub fn get_type(&mut self, key: &String) -> Option<String> {
pub fn get_type<'a>(&'a mut self, key: &String) -> Option<Type> {
// Try to get the type in the current scope
if let Some(entry) = self.scope.get(key) {
// TODO: Change to allow other types of datatypes: functions, classes, maps
return match entry {
SymbolEntry::Variable(t) => Some(t.clone()),
SymbolEntry::Function(_, _) => None,
};
return Some(entry.clone());
}
// Try to get the type in the parent scope
match &self.parent {
Some(parent) => {
let mut parent = parent.as_ref().borrow_mut();
parent.get_type(key)
parent.as_ref().borrow_mut().get_type(key)
// parent.get_type(key)
}
None => None,
}
}
}
impl SymbolEntry {
pub fn new_variable(datatype: String) -> SymbolEntry {
SymbolEntry::Variable(datatype)
}
pub fn new_function(parameters: Vec<String>, return_type: String) -> SymbolEntry {
SymbolEntry::Function(parameters, return_type)
}
}

View File

@ -4,16 +4,16 @@ use crate::{
syntax::ast::Expression,
};
use super::Typed;
use super::{Type, Typed};
impl Typed for Expression<'_> {
/// Attempts to get the datatype for an expression.
fn get_type(&self, scope: &SymbolTable) -> Result<String, MistiError> {
fn get_type(&self, scope: &SymbolTable) -> Result<Type, MistiError> {
match self {
Expression::Int(_) => Ok("Int".into()),
Expression::Float(_) => Ok("Float".into()),
Expression::String(_) => Ok("String".into()),
Expression::Boolean(_) => Ok("Bool".into()),
Expression::Int(_) => Ok(Type::Value("Int".into())),
Expression::Float(_) => Ok(Type::Value("Float".into())),
Expression::String(_) => Ok(Type::Value("String".into())),
Expression::Boolean(_) => Ok(Type::Value("Bool".into())),
Expression::Identifier(identifier) => {
// Attempt to get the datatype of the identifier in the current scope
let datatype = match scope.get_type(identifier) {
@ -27,14 +27,30 @@ impl Typed for Expression<'_> {
}
};
// TODO: use lifetimes
Ok(datatype)
}
Expression::FunctionCall(_f) => {
Expression::FunctionCall(f) => {
// TODO: Must implement functions as first class citizens
// for this to work
// for this to work with any arbitrary expression.
// for now it justs expects an identifier
// TODO: check the parameter types
panic!("Not implemented: Get datatype of function call")
match &*f.function {
Expression::Identifier(id) => {
match scope.get_type(id) {
Some(t) => Ok(t),
None => Err(MistiError::Semantic(SemanticError {
// TODO: Actually find the start and end position
// this requires the token to be stored, rather than
// just the string value
error_start: 0,
error_end: 1,
reason: format!("Type not found for symbol {}", id),
})),
}
}
_ => todo!("Get datatype of an expression that resolves into a function call"),
}
}
Expression::UnaryOperator(op, exp) => {
let expr_type = match exp.get_type(scope) {
@ -50,41 +66,41 @@ impl Typed for Expression<'_> {
// Only supported unary operator: - & !
if *op == "-" {
if expr_type != "Int" && expr_type != "Float" {
if !expr_type.is_value("Int") && !expr_type.is_value("Float") {
return Err(MistiError::Semantic(SemanticError {
error_start: 0,
error_end: 1,
reason: format!(
"Expected a Int or Float after unary `-`, got {}",
"Expected a Int or Float after unary `-`, got {:?}",
expr_type
),
}));
} else {
return Ok("Int".into());
return Ok(Type::Value("Int".into()));
}
} else if *op == "!" {
if expr_type != "Bool" {
if !expr_type.is_value("Bool") {
return Err(MistiError::Semantic(SemanticError {
error_start: 0,
error_end: 1,
reason: format!("Expected a Bool after unary `!`, got {}", expr_type),
reason: format!("Expected a Bool after unary `!`, got {:?}", expr_type),
}));
} else {
return Ok("Bool".into());
return Ok(Type::Value("Bool".into()));
}
}
panic!("Illegal state: Found an unexpected unary operator during semantic analysis: {}", *op);
unreachable!("Illegal state: Found an unexpected unary operator during semantic analysis: {}", *op);
}
Expression::BinaryOperator(exp1, exp2, operator) => {
let t1 = exp1.get_type(scope)?;
let t2 = exp2.get_type(scope)?;
// TODO: There's definitely a better way to do this
if *operator == "+" && t1 == "Int" && t2 == "Int" {
return Ok("Int".into());
} else if *operator == "-" && t1 == "Int" && t2 == "Int" {
return Ok("Int".into());
if *operator == "+" && t1.is_value("Int") && t2.is_value("Int") {
return Ok(Type::Value("Int".into()));
} else if *operator == "-" && t1.is_value("Int") && t2.is_value("Int") {
return Ok(Type::Value("Int".into()));
}
return Err(MistiError::Semantic(SemanticError {

View File

@ -1,5 +1,5 @@
// This crate provides an interface and implementations
// for determining the datatypes of the language constructs.
//! This crate provides an interface and implementations
//! for determining the datatypes of the language constructs.
use crate::error_handling::MistiError;
@ -7,6 +7,26 @@ use super::symbol_table::SymbolTable;
mod expression;
pub trait Typed {
fn get_type(&self, scope: &SymbolTable) -> Result<String, MistiError>;
#[derive(Debug, Clone, PartialEq)]
pub enum Type {
Value(String),
// TODO: Use Type instead of String to allow
// arbitrary types
Function(Vec<String>, String),
// TODO: tuple, union types
// TODO: generics
}
impl Type {
/// Checks if this type is a value and has the specified type
pub fn is_value(&self, datatype: impl Into<String>) -> bool {
match self {
Type::Value(v) if *v == datatype.into() => true,
_ => false,
}
}
}
pub trait Typed {
fn get_type(&self, scope: &SymbolTable) -> Result<Type, MistiError>;
}