diff --git a/CHANGELOG.md b/CHANGELOG.md index 509e72e..7e72f38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## TODO +- Implement AST transformation before codegen: + Create a new AST to represent PHP source code + and a THP ast -> PHP ast process, so that the + codegen section can focus only in codegen, not in + translation of thp->php. - Parse __more__ binary operators - Parse `Type name = value` bindings - Parse more complex bindings @@ -26,9 +31,9 @@ - [x] Begin work on semantic analysis - [x] Minimal symbol table - [x] Check duplicate function declarations -- [ ] Improve REPL/File compilation code +- [x] Improve REPL/File compilation code - [ ] Typecheck bindings -- [ ] Typecheck functions +- [x] Typecheck functions - [ ] Transform simple THP expression into PHP statements ## v0.0.9 diff --git a/src/error_handling/mod.rs b/src/error_handling/mod.rs index 2d08f20..5a8a86a 100644 --- a/src/error_handling/mod.rs +++ b/src/error_handling/mod.rs @@ -1,5 +1,9 @@ +use self::semantic_error::SemanticError; + mod lex_error; +pub mod semantic_error; mod syntax_error; +mod utils; pub trait PrintableError { fn get_error_str(&self, chars: &Vec) -> String; @@ -9,6 +13,7 @@ pub trait PrintableError { pub enum MistiError { Lex(LexError), Syntax(SyntaxError), + Semantic(SemanticError), } #[derive(Debug)] @@ -29,6 +34,7 @@ impl PrintableError for MistiError { match self { Self::Lex(err) => err.get_error_str(chars), Self::Syntax(err) => err.get_error_str(chars), + Self::Semantic(err) => err.get_error_str(chars), } } } diff --git a/src/error_handling/semantic_error.rs b/src/error_handling/semantic_error.rs new file mode 100644 index 0000000..1084f75 --- /dev/null +++ b/src/error_handling/semantic_error.rs @@ -0,0 +1,31 @@ +use super::utils::{get_line, get_line_number}; +use super::PrintableError; + +#[derive(Debug)] +pub struct SemanticError { + pub error_start: usize, + pub error_end: usize, + pub reason: String, +} + +impl PrintableError for SemanticError { + fn get_error_str(&self, chars: &Vec) -> String { + let (line, before, length) = get_line(chars, self.error_start, self.error_end); + + let line_number = get_line_number(chars, self.error_start); + let line_number_whitespace = " ".repeat(line_number.to_string().len()); + + let whitespace = vec![' '; before].iter().collect::(); + let indicator = vec!['^'; length].iter().collect::(); + let reason = &self.reason; + + format!( + r#" +{line_number_whitespace} | +{line_number } | {line} +{line_number_whitespace} | {whitespace}{indicator} + +{reason} at line {line_number}:{before}"#, + ) + } +} diff --git a/src/error_handling/syntax_error.rs b/src/error_handling/syntax_error.rs index e585c25..97e5067 100644 --- a/src/error_handling/syntax_error.rs +++ b/src/error_handling/syntax_error.rs @@ -1,5 +1,5 @@ +use super::utils::{get_line, get_line_number}; use super::{PrintableError, SyntaxError}; -use std::collections::VecDeque; impl PrintableError for SyntaxError { fn get_error_str(&self, chars: &Vec) -> String { @@ -23,96 +23,6 @@ impl PrintableError for SyntaxError { } } -/// Extracts a line of code -/// -/// - `chars`: Input where to extract the line from -/// - `start_position`: Position where the erroneous code starts -/// - `end_position`: Position where the erroneous code ends -/// -/// Returns a tuple of: -/// -/// - `String`: The faulty line -/// - `usize`: The amount of chars *before* the faulty code -/// - `usize`: The lenght of the faulty code -/// -/// ## Example -/// -/// ``` -/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect(); -/// let start_position = 13; -/// let end_position = 15; -/// -/// let (line, before, length) = get_line(&input, start_position, end_position); -/// -/// assert_eq!("val number == 50", line); -/// assert_eq!(11, before); -/// assert_eq!(2, length); -/// ``` -fn get_line( - chars: &Vec, - start_position: usize, - end_position: usize, -) -> (String, usize, usize) { - let mut result_chars = VecDeque::::new(); - - // Push chars to the front until a new line is found - let mut before_pos = start_position; - loop { - let current_char = chars[before_pos]; - - if current_char == '\n' { - // This is important because before_pos will be used to calculate - // the number of chars before start_position - before_pos += 1; - break; - } - - result_chars.push_front(current_char); - - if before_pos == 0 { - break; - } - - before_pos -= 1; - } - - // Push chars to the end until a new line is found - let mut after_pos = start_position + 1; - let char_count = chars.len(); - while after_pos < char_count { - let current_char = chars[after_pos]; - - if current_char == '\n' { - break; - } - - result_chars.push_back(current_char); - after_pos += 1; - } - - ( - result_chars.iter().collect::(), - start_position - before_pos, - end_position - start_position, - ) -} - -fn get_line_number(chars: &Vec, target_pos: usize) -> usize { - let mut count = 1; - - for (pos, char) in chars.iter().enumerate() { - if pos >= target_pos { - break; - } - - if *char == '\n' { - count += 1; - } - } - - count -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/error_handling/utils.rs b/src/error_handling/utils.rs new file mode 100644 index 0000000..38cc83e --- /dev/null +++ b/src/error_handling/utils.rs @@ -0,0 +1,91 @@ +use std::collections::VecDeque; + +/// Extracts a line of code +/// +/// - `chars`: Input where to extract the line from +/// - `start_position`: Position where the erroneous code starts +/// - `end_position`: Position where the erroneous code ends +/// +/// Returns a tuple of: +/// +/// - `String`: The faulty line +/// - `usize`: The amount of chars *before* the faulty code +/// - `usize`: The lenght of the faulty code +/// +/// ## Example +/// +/// ``` +/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect(); +/// let start_position = 13; +/// let end_position = 15; +/// +/// let (line, before, length) = get_line(&input, start_position, end_position); +/// +/// assert_eq!("val number == 50", line); +/// assert_eq!(11, before); +/// assert_eq!(2, length); +/// ``` +pub fn get_line( + chars: &Vec, + start_position: usize, + end_position: usize, +) -> (String, usize, usize) { + let mut result_chars = VecDeque::::new(); + + // Push chars to the front until a new line is found + let mut before_pos = start_position; + loop { + let current_char = chars[before_pos]; + + if current_char == '\n' { + // This is important because before_pos will be used to calculate + // the number of chars before start_position + before_pos += 1; + break; + } + + result_chars.push_front(current_char); + + if before_pos == 0 { + break; + } + + before_pos -= 1; + } + + // Push chars to the end until a new line is found + let mut after_pos = start_position + 1; + let char_count = chars.len(); + while after_pos < char_count { + let current_char = chars[after_pos]; + + if current_char == '\n' { + break; + } + + result_chars.push_back(current_char); + after_pos += 1; + } + + ( + result_chars.iter().collect::(), + start_position - before_pos, + end_position - start_position, + ) +} + +pub fn get_line_number(chars: &Vec, target_pos: usize) -> usize { + let mut count = 1; + + for (pos, char) in chars.iter().enumerate() { + if pos >= target_pos { + break; + } + + if *char == '\n' { + count += 1; + } + } + + count +} diff --git a/src/file/mod.rs b/src/file/mod.rs index 60ce7a0..5e2c13f 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -70,7 +70,7 @@ fn compile(input: &String) -> Result { "{}:\n{}", "syntax error".on_red(), error.get_error_str(&chars) - )) + )); } }; @@ -88,11 +88,18 @@ fn build_ast(input: &String, tokens: Vec) -> Result { Err(reason) => { let chars: Vec = input.chars().into_iter().collect(); let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars)); - return Err(error) + return Err(error); } }; - crate::semantic::check_semantics(&ast)?; + match crate::semantic::check_semantics(&ast) { + Ok(_) => {} + Err(reason) => { + let chars: Vec = input.chars().into_iter().collect(); + let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars)); + return Err(error); + } + }; Ok(codegen::codegen(&ast)) } diff --git a/src/repl/mod.rs b/src/repl/mod.rs index c308b59..5c8a589 100755 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -1,5 +1,7 @@ use std::io::{self, Write}; +use colored::Colorize; + use crate::error_handling::PrintableError; use crate::lexic::token::Token; @@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec) { match res1 { Ok(_) => {} Err(reason) => { - eprintln!("{}", reason); + let chars: Vec = input.chars().into_iter().collect(); + let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars)); + eprintln!("{}", error); return; } } diff --git a/src/semantic/impls.rs b/src/semantic/impls.rs index 0440cb7..17735f7 100644 --- a/src/semantic/impls.rs +++ b/src/semantic/impls.rs @@ -1,13 +1,18 @@ -use crate::syntax::ast::{ModuleAST, TopLevelDeclaration}; +use crate::{ + error_handling::semantic_error::SemanticError, + error_handling::MistiError, + syntax::ast::{ModuleAST, TopLevelDeclaration}, +}; use super::symbol_table::SymbolTable; pub trait SemanticCheck { - fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>; + fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>; } impl SemanticCheck for ModuleAST { - fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> { + /// Checks that this AST is semantically correct, given a symbol table + fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> { for declaration in &self.declarations { declaration.check_semantics(scope)?; } @@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST { } impl SemanticCheck for TopLevelDeclaration { - fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> { + fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> { match self { - TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()), + TopLevelDeclaration::Binding(_) => { + let error = SemanticError { + error_start: 0, + error_end: 0, + reason: "Binding typechecking: Not implemented".into(), + }; + + Err(MistiError::Semantic(error)) + } TopLevelDeclaration::FunctionDeclaration(function) => { let function_name = function.identifier.as_ref().clone(); if scope.test(&function_name) { - return Err(format!("Function {} already defined", function_name)); + let error = SemanticError { + // TODO: Get the position of the function name. For this, these structs + // should store the token instead of just the string + error_start: 0, + error_end: 0, + reason: format!("Function {} already defined", function_name), + }; + + return Err(MistiError::Semantic(error)); } scope.insert( diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index 4e65c6f..37045d2 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -1,4 +1,4 @@ -use crate::syntax::ast::ModuleAST; +use crate::{error_handling::MistiError, syntax::ast::ModuleAST}; mod impls; mod symbol_table; @@ -11,8 +11,11 @@ use impls::SemanticCheck; // 3. Add the symbols declared to the symbol table, annotating them with their type // 4. Check if the symbols used are declared -pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> { +/// Checks that the AST is semantically correct +pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> { // For now there's only support for a single file + // TODO: Receive a symbol table as a reference and work on it. + // this way we can implement a unique symbol table for REPL session let global_scope = symbol_table::SymbolTable::new(); ast.check_semantics(&global_scope)