Improve function semantic check
This commit is contained in:
parent
a39b0c0d5a
commit
f97b8e2e07
@ -2,6 +2,11 @@
|
||||
|
||||
## TODO
|
||||
|
||||
- Implement AST transformation before codegen:
|
||||
Create a new AST to represent PHP source code
|
||||
and a THP ast -> PHP ast process, so that the
|
||||
codegen section can focus only in codegen, not in
|
||||
translation of thp->php.
|
||||
- Parse __more__ binary operators
|
||||
- Parse `Type name = value` bindings
|
||||
- Parse more complex bindings
|
||||
@ -26,9 +31,9 @@
|
||||
- [x] Begin work on semantic analysis
|
||||
- [x] Minimal symbol table
|
||||
- [x] Check duplicate function declarations
|
||||
- [ ] Improve REPL/File compilation code
|
||||
- [x] Improve REPL/File compilation code
|
||||
- [ ] Typecheck bindings
|
||||
- [ ] Typecheck functions
|
||||
- [x] Typecheck functions
|
||||
- [ ] Transform simple THP expression into PHP statements
|
||||
|
||||
## v0.0.9
|
||||
|
@ -1,5 +1,9 @@
|
||||
use self::semantic_error::SemanticError;
|
||||
|
||||
mod lex_error;
|
||||
pub mod semantic_error;
|
||||
mod syntax_error;
|
||||
mod utils;
|
||||
|
||||
pub trait PrintableError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String;
|
||||
@ -9,6 +13,7 @@ pub trait PrintableError {
|
||||
pub enum MistiError {
|
||||
Lex(LexError),
|
||||
Syntax(SyntaxError),
|
||||
Semantic(SemanticError),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -29,6 +34,7 @@ impl PrintableError for MistiError {
|
||||
match self {
|
||||
Self::Lex(err) => err.get_error_str(chars),
|
||||
Self::Syntax(err) => err.get_error_str(chars),
|
||||
Self::Semantic(err) => err.get_error_str(chars),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
31
src/error_handling/semantic_error.rs
Normal file
31
src/error_handling/semantic_error.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use super::utils::{get_line, get_line_number};
|
||||
use super::PrintableError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SemanticError {
|
||||
pub error_start: usize,
|
||||
pub error_end: usize,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
impl PrintableError for SemanticError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||
let (line, before, length) = get_line(chars, self.error_start, self.error_end);
|
||||
|
||||
let line_number = get_line_number(chars, self.error_start);
|
||||
let line_number_whitespace = " ".repeat(line_number.to_string().len());
|
||||
|
||||
let whitespace = vec![' '; before].iter().collect::<String>();
|
||||
let indicator = vec!['^'; length].iter().collect::<String>();
|
||||
let reason = &self.reason;
|
||||
|
||||
format!(
|
||||
r#"
|
||||
{line_number_whitespace} |
|
||||
{line_number } | {line}
|
||||
{line_number_whitespace} | {whitespace}{indicator}
|
||||
|
||||
{reason} at line {line_number}:{before}"#,
|
||||
)
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
use super::utils::{get_line, get_line_number};
|
||||
use super::{PrintableError, SyntaxError};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
impl PrintableError for SyntaxError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||
@ -23,96 +23,6 @@ impl PrintableError for SyntaxError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts a line of code
|
||||
///
|
||||
/// - `chars`: Input where to extract the line from
|
||||
/// - `start_position`: Position where the erroneous code starts
|
||||
/// - `end_position`: Position where the erroneous code ends
|
||||
///
|
||||
/// Returns a tuple of:
|
||||
///
|
||||
/// - `String`: The faulty line
|
||||
/// - `usize`: The amount of chars *before* the faulty code
|
||||
/// - `usize`: The lenght of the faulty code
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
|
||||
/// let start_position = 13;
|
||||
/// let end_position = 15;
|
||||
///
|
||||
/// let (line, before, length) = get_line(&input, start_position, end_position);
|
||||
///
|
||||
/// assert_eq!("val number == 50", line);
|
||||
/// assert_eq!(11, before);
|
||||
/// assert_eq!(2, length);
|
||||
/// ```
|
||||
fn get_line(
|
||||
chars: &Vec<char>,
|
||||
start_position: usize,
|
||||
end_position: usize,
|
||||
) -> (String, usize, usize) {
|
||||
let mut result_chars = VecDeque::<char>::new();
|
||||
|
||||
// Push chars to the front until a new line is found
|
||||
let mut before_pos = start_position;
|
||||
loop {
|
||||
let current_char = chars[before_pos];
|
||||
|
||||
if current_char == '\n' {
|
||||
// This is important because before_pos will be used to calculate
|
||||
// the number of chars before start_position
|
||||
before_pos += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
result_chars.push_front(current_char);
|
||||
|
||||
if before_pos == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
before_pos -= 1;
|
||||
}
|
||||
|
||||
// Push chars to the end until a new line is found
|
||||
let mut after_pos = start_position + 1;
|
||||
let char_count = chars.len();
|
||||
while after_pos < char_count {
|
||||
let current_char = chars[after_pos];
|
||||
|
||||
if current_char == '\n' {
|
||||
break;
|
||||
}
|
||||
|
||||
result_chars.push_back(current_char);
|
||||
after_pos += 1;
|
||||
}
|
||||
|
||||
(
|
||||
result_chars.iter().collect::<String>(),
|
||||
start_position - before_pos,
|
||||
end_position - start_position,
|
||||
)
|
||||
}
|
||||
|
||||
fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
|
||||
let mut count = 1;
|
||||
|
||||
for (pos, char) in chars.iter().enumerate() {
|
||||
if pos >= target_pos {
|
||||
break;
|
||||
}
|
||||
|
||||
if *char == '\n' {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
91
src/error_handling/utils.rs
Normal file
91
src/error_handling/utils.rs
Normal file
@ -0,0 +1,91 @@
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Extracts a line of code
|
||||
///
|
||||
/// - `chars`: Input where to extract the line from
|
||||
/// - `start_position`: Position where the erroneous code starts
|
||||
/// - `end_position`: Position where the erroneous code ends
|
||||
///
|
||||
/// Returns a tuple of:
|
||||
///
|
||||
/// - `String`: The faulty line
|
||||
/// - `usize`: The amount of chars *before* the faulty code
|
||||
/// - `usize`: The lenght of the faulty code
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
|
||||
/// let start_position = 13;
|
||||
/// let end_position = 15;
|
||||
///
|
||||
/// let (line, before, length) = get_line(&input, start_position, end_position);
|
||||
///
|
||||
/// assert_eq!("val number == 50", line);
|
||||
/// assert_eq!(11, before);
|
||||
/// assert_eq!(2, length);
|
||||
/// ```
|
||||
pub fn get_line(
|
||||
chars: &Vec<char>,
|
||||
start_position: usize,
|
||||
end_position: usize,
|
||||
) -> (String, usize, usize) {
|
||||
let mut result_chars = VecDeque::<char>::new();
|
||||
|
||||
// Push chars to the front until a new line is found
|
||||
let mut before_pos = start_position;
|
||||
loop {
|
||||
let current_char = chars[before_pos];
|
||||
|
||||
if current_char == '\n' {
|
||||
// This is important because before_pos will be used to calculate
|
||||
// the number of chars before start_position
|
||||
before_pos += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
result_chars.push_front(current_char);
|
||||
|
||||
if before_pos == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
before_pos -= 1;
|
||||
}
|
||||
|
||||
// Push chars to the end until a new line is found
|
||||
let mut after_pos = start_position + 1;
|
||||
let char_count = chars.len();
|
||||
while after_pos < char_count {
|
||||
let current_char = chars[after_pos];
|
||||
|
||||
if current_char == '\n' {
|
||||
break;
|
||||
}
|
||||
|
||||
result_chars.push_back(current_char);
|
||||
after_pos += 1;
|
||||
}
|
||||
|
||||
(
|
||||
result_chars.iter().collect::<String>(),
|
||||
start_position - before_pos,
|
||||
end_position - start_position,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
|
||||
let mut count = 1;
|
||||
|
||||
for (pos, char) in chars.iter().enumerate() {
|
||||
if pos >= target_pos {
|
||||
break;
|
||||
}
|
||||
|
||||
if *char == '\n' {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
count
|
||||
}
|
@ -70,7 +70,7 @@ fn compile(input: &String) -> Result<String, String> {
|
||||
"{}:\n{}",
|
||||
"syntax error".on_red(),
|
||||
error.get_error_str(&chars)
|
||||
))
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
@ -88,11 +88,18 @@ fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
|
||||
Err(reason) => {
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
|
||||
return Err(error)
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
|
||||
crate::semantic::check_semantics(&ast)?;
|
||||
match crate::semantic::check_semantics(&ast) {
|
||||
Ok(_) => {}
|
||||
Err(reason) => {
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
|
||||
Ok(codegen::codegen(&ast))
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
use std::io::{self, Write};
|
||||
|
||||
use colored::Colorize;
|
||||
|
||||
use crate::error_handling::PrintableError;
|
||||
use crate::lexic::token::Token;
|
||||
|
||||
@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
|
||||
match res1 {
|
||||
Ok(_) => {}
|
||||
Err(reason) => {
|
||||
eprintln!("{}", reason);
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
|
||||
eprintln!("{}", error);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1,13 +1,18 @@
|
||||
use crate::syntax::ast::{ModuleAST, TopLevelDeclaration};
|
||||
use crate::{
|
||||
error_handling::semantic_error::SemanticError,
|
||||
error_handling::MistiError,
|
||||
syntax::ast::{ModuleAST, TopLevelDeclaration},
|
||||
};
|
||||
|
||||
use super::symbol_table::SymbolTable;
|
||||
|
||||
pub trait SemanticCheck {
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>;
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>;
|
||||
}
|
||||
|
||||
impl SemanticCheck for ModuleAST {
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
|
||||
/// Checks that this AST is semantically correct, given a symbol table
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
|
||||
for declaration in &self.declarations {
|
||||
declaration.check_semantics(scope)?;
|
||||
}
|
||||
@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST {
|
||||
}
|
||||
|
||||
impl SemanticCheck for TopLevelDeclaration {
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
|
||||
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
|
||||
match self {
|
||||
TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()),
|
||||
TopLevelDeclaration::Binding(_) => {
|
||||
let error = SemanticError {
|
||||
error_start: 0,
|
||||
error_end: 0,
|
||||
reason: "Binding typechecking: Not implemented".into(),
|
||||
};
|
||||
|
||||
Err(MistiError::Semantic(error))
|
||||
}
|
||||
TopLevelDeclaration::FunctionDeclaration(function) => {
|
||||
let function_name = function.identifier.as_ref().clone();
|
||||
|
||||
if scope.test(&function_name) {
|
||||
return Err(format!("Function {} already defined", function_name));
|
||||
let error = SemanticError {
|
||||
// TODO: Get the position of the function name. For this, these structs
|
||||
// should store the token instead of just the string
|
||||
error_start: 0,
|
||||
error_end: 0,
|
||||
reason: format!("Function {} already defined", function_name),
|
||||
};
|
||||
|
||||
return Err(MistiError::Semantic(error));
|
||||
}
|
||||
|
||||
scope.insert(
|
||||
|
@ -1,4 +1,4 @@
|
||||
use crate::syntax::ast::ModuleAST;
|
||||
use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
|
||||
|
||||
mod impls;
|
||||
mod symbol_table;
|
||||
@ -11,8 +11,11 @@ use impls::SemanticCheck;
|
||||
// 3. Add the symbols declared to the symbol table, annotating them with their type
|
||||
// 4. Check if the symbols used are declared
|
||||
|
||||
pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> {
|
||||
/// Checks that the AST is semantically correct
|
||||
pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
|
||||
// For now there's only support for a single file
|
||||
// TODO: Receive a symbol table as a reference and work on it.
|
||||
// this way we can implement a unique symbol table for REPL session
|
||||
let global_scope = symbol_table::SymbolTable::new();
|
||||
|
||||
ast.check_semantics(&global_scope)
|
||||
|
Loading…
Reference in New Issue
Block a user