Improve function semantic check

master
Araozu 2024-03-09 08:05:40 -05:00
parent a39b0c0d5a
commit f97b8e2e07
9 changed files with 183 additions and 105 deletions

View File

@ -2,6 +2,11 @@
## TODO
- Implement AST transformation before codegen:
Create a new AST to represent PHP source code
and a THP ast -> PHP ast process, so that the
codegen section can focus only in codegen, not in
translation of thp->php.
- Parse __more__ binary operators
- Parse `Type name = value` bindings
- Parse more complex bindings
@ -26,9 +31,9 @@
- [x] Begin work on semantic analysis
- [x] Minimal symbol table
- [x] Check duplicate function declarations
- [ ] Improve REPL/File compilation code
- [x] Improve REPL/File compilation code
- [ ] Typecheck bindings
- [ ] Typecheck functions
- [x] Typecheck functions
- [ ] Transform simple THP expression into PHP statements
## v0.0.9

View File

@ -1,5 +1,9 @@
use self::semantic_error::SemanticError;
mod lex_error;
pub mod semantic_error;
mod syntax_error;
mod utils;
pub trait PrintableError {
fn get_error_str(&self, chars: &Vec<char>) -> String;
@ -9,6 +13,7 @@ pub trait PrintableError {
pub enum MistiError {
Lex(LexError),
Syntax(SyntaxError),
Semantic(SemanticError),
}
#[derive(Debug)]
@ -29,6 +34,7 @@ impl PrintableError for MistiError {
match self {
Self::Lex(err) => err.get_error_str(chars),
Self::Syntax(err) => err.get_error_str(chars),
Self::Semantic(err) => err.get_error_str(chars),
}
}
}

View File

@ -0,0 +1,31 @@
use super::utils::{get_line, get_line_number};
use super::PrintableError;
#[derive(Debug)]
pub struct SemanticError {
pub error_start: usize,
pub error_end: usize,
pub reason: String,
}
impl PrintableError for SemanticError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
let (line, before, length) = get_line(chars, self.error_start, self.error_end);
let line_number = get_line_number(chars, self.error_start);
let line_number_whitespace = " ".repeat(line_number.to_string().len());
let whitespace = vec![' '; before].iter().collect::<String>();
let indicator = vec!['^'; length].iter().collect::<String>();
let reason = &self.reason;
format!(
r#"
{line_number_whitespace} |
{line_number } | {line}
{line_number_whitespace} | {whitespace}{indicator}
{reason} at line {line_number}:{before}"#,
)
}
}

View File

@ -1,5 +1,5 @@
use super::utils::{get_line, get_line_number};
use super::{PrintableError, SyntaxError};
use std::collections::VecDeque;
impl PrintableError for SyntaxError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
@ -23,96 +23,6 @@ impl PrintableError for SyntaxError {
}
}
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
fn get_line(
chars: &Vec<char>,
start_position: usize,
end_position: usize,
) -> (String, usize, usize) {
let mut result_chars = VecDeque::<char>::new();
// Push chars to the front until a new line is found
let mut before_pos = start_position;
loop {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before start_position
before_pos += 1;
break;
}
result_chars.push_front(current_char);
if before_pos == 0 {
break;
}
before_pos -= 1;
}
// Push chars to the end until a new line is found
let mut after_pos = start_position + 1;
let char_count = chars.len();
while after_pos < char_count {
let current_char = chars[after_pos];
if current_char == '\n' {
break;
}
result_chars.push_back(current_char);
after_pos += 1;
}
(
result_chars.iter().collect::<String>(),
start_position - before_pos,
end_position - start_position,
)
}
fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
let mut count = 1;
for (pos, char) in chars.iter().enumerate() {
if pos >= target_pos {
break;
}
if *char == '\n' {
count += 1;
}
}
count
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -0,0 +1,91 @@
use std::collections::VecDeque;
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
pub fn get_line(
chars: &Vec<char>,
start_position: usize,
end_position: usize,
) -> (String, usize, usize) {
let mut result_chars = VecDeque::<char>::new();
// Push chars to the front until a new line is found
let mut before_pos = start_position;
loop {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before start_position
before_pos += 1;
break;
}
result_chars.push_front(current_char);
if before_pos == 0 {
break;
}
before_pos -= 1;
}
// Push chars to the end until a new line is found
let mut after_pos = start_position + 1;
let char_count = chars.len();
while after_pos < char_count {
let current_char = chars[after_pos];
if current_char == '\n' {
break;
}
result_chars.push_back(current_char);
after_pos += 1;
}
(
result_chars.iter().collect::<String>(),
start_position - before_pos,
end_position - start_position,
)
}
pub fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
let mut count = 1;
for (pos, char) in chars.iter().enumerate() {
if pos >= target_pos {
break;
}
if *char == '\n' {
count += 1;
}
}
count
}

View File

@ -70,7 +70,7 @@ fn compile(input: &String) -> Result<String, String> {
"{}:\n{}",
"syntax error".on_red(),
error.get_error_str(&chars)
))
));
}
};
@ -88,11 +88,18 @@ fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
return Err(error)
return Err(error);
}
};
crate::semantic::check_semantics(&ast)?;
match crate::semantic::check_semantics(&ast) {
Ok(_) => {}
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
return Err(error);
}
};
Ok(codegen::codegen(&ast))
}

View File

@ -1,5 +1,7 @@
use std::io::{self, Write};
use colored::Colorize;
use crate::error_handling::PrintableError;
use crate::lexic::token::Token;
@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
match res1 {
Ok(_) => {}
Err(reason) => {
eprintln!("{}", reason);
let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
eprintln!("{}", error);
return;
}
}

View File

@ -1,13 +1,18 @@
use crate::syntax::ast::{ModuleAST, TopLevelDeclaration};
use crate::{
error_handling::semantic_error::SemanticError,
error_handling::MistiError,
syntax::ast::{ModuleAST, TopLevelDeclaration},
};
use super::symbol_table::SymbolTable;
pub trait SemanticCheck {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>;
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>;
}
impl SemanticCheck for ModuleAST {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
/// Checks that this AST is semantically correct, given a symbol table
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
for declaration in &self.declarations {
declaration.check_semantics(scope)?;
}
@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST {
}
impl SemanticCheck for TopLevelDeclaration {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
match self {
TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()),
TopLevelDeclaration::Binding(_) => {
let error = SemanticError {
error_start: 0,
error_end: 0,
reason: "Binding typechecking: Not implemented".into(),
};
Err(MistiError::Semantic(error))
}
TopLevelDeclaration::FunctionDeclaration(function) => {
let function_name = function.identifier.as_ref().clone();
if scope.test(&function_name) {
return Err(format!("Function {} already defined", function_name));
let error = SemanticError {
// TODO: Get the position of the function name. For this, these structs
// should store the token instead of just the string
error_start: 0,
error_end: 0,
reason: format!("Function {} already defined", function_name),
};
return Err(MistiError::Semantic(error));
}
scope.insert(

View File

@ -1,4 +1,4 @@
use crate::syntax::ast::ModuleAST;
use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
mod impls;
mod symbol_table;
@ -11,8 +11,11 @@ use impls::SemanticCheck;
// 3. Add the symbols declared to the symbol table, annotating them with their type
// 4. Check if the symbols used are declared
pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> {
/// Checks that the AST is semantically correct
pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
// For now there's only support for a single file
// TODO: Receive a symbol table as a reference and work on it.
// this way we can implement a unique symbol table for REPL session
let global_scope = symbol_table::SymbolTable::new();
ast.check_semantics(&global_scope)