Improve function semantic check

2024-03-09 08:05:40 -05:00 · 2024-03-09 08:05:40 -05:00 · f97b8e2e07
commit f97b8e2e07
parent a39b0c0d5a
9 changed files with 183 additions and 105 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,11 @@
 ## TODO
 - Implement AST transformation before codegen:
    Create a new AST to represent PHP source code
    and a THP ast -> PHP ast process, so that the
    codegen section can focus only in codegen, not in
    translation of thp->php.
 - Parse __more__ binary operators
 - Parse `Type name = value` bindings
 - Parse more complex bindings
@ -26,9 +31,9 @@
 - [x] Begin work on semantic analysis
 - [x] Minimal symbol table
 - [x] Check duplicate function declarations
- [ ] Improve REPL/File compilation code
+- [x] Improve REPL/File compilation code
 - [ ] Typecheck bindings
- [ ] Typecheck functions
+- [x] Typecheck functions
 - [ ] Transform simple THP expression into PHP statements
 ## v0.0.9
--- a/src/error_handling/mod.rs
+++ b/src/error_handling/mod.rs
@ -1,5 +1,9 @@
 use self::semantic_error::SemanticError;
 mod lex_error;
 pub mod semantic_error;
 mod syntax_error;
 mod utils;
 pub trait PrintableError {
    fn get_error_str(&self, chars: &Vec<char>) -> String;
@ -9,6 +13,7 @@ pub trait PrintableError {
 pub enum MistiError {
    Lex(LexError),
    Syntax(SyntaxError),
    Semantic(SemanticError),
 }
 #[derive(Debug)]
@ -29,6 +34,7 @@ impl PrintableError for MistiError {
        match self {
            Self::Lex(err) => err.get_error_str(chars),
            Self::Syntax(err) => err.get_error_str(chars),
            Self::Semantic(err) => err.get_error_str(chars),
        }
    }
 }
--- a/src/error_handling/semantic_error.rs
+++ b/src/error_handling/semantic_error.rs
@ -0,0 +1,31 @@
 use super::utils::{get_line, get_line_number};
 use super::PrintableError;
 #[derive(Debug)]
 pub struct SemanticError {
    pub error_start: usize,
    pub error_end: usize,
    pub reason: String,
 }
 impl PrintableError for SemanticError {
    fn get_error_str(&self, chars: &Vec<char>) -> String {
        let (line, before, length) = get_line(chars, self.error_start, self.error_end);
        let line_number = get_line_number(chars, self.error_start);
        let line_number_whitespace = " ".repeat(line_number.to_string().len());
        let whitespace = vec![' '; before].iter().collect::<String>();
        let indicator = vec!['^'; length].iter().collect::<String>();
        let reason = &self.reason;
        format!(
            r#"
 {line_number_whitespace} |
 {line_number           } | {line}
 {line_number_whitespace} | {whitespace}{indicator}
 {reason} at line {line_number}:{before}"#,
        )
    }
 }
--- a/src/error_handling/syntax_error.rs
+++ b/src/error_handling/syntax_error.rs
@ -1,5 +1,5 @@
 use super::utils::{get_line, get_line_number};
 use super::{PrintableError, SyntaxError};
 use std::collections::VecDeque;
 impl PrintableError for SyntaxError {
    fn get_error_str(&self, chars: &Vec<char>) -> String {
@ -23,96 +23,6 @@ impl PrintableError for SyntaxError {
    }
 }
 /// Extracts a line of code
 ///
 /// - `chars`: Input where to extract the line from
 /// - `start_position`: Position where the erroneous code starts
 /// - `end_position`: Position where the erroneous code ends
 ///
 /// Returns a tuple of:
 ///
 /// - `String`: The faulty line
 /// - `usize`: The amount of chars *before* the faulty code
 /// - `usize`: The lenght of the faulty code
 ///
 /// ## Example
 ///
 /// ```
 /// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
 /// let start_position = 13;
 /// let end_position = 15;
 ///
 /// let (line, before, length) = get_line(&input, start_position, end_position);
 ///
 /// assert_eq!("val number == 50", line);
 /// assert_eq!(11, before);
 /// assert_eq!(2, length);
 /// ```
 fn get_line(
    chars: &Vec<char>,
    start_position: usize,
    end_position: usize,
 ) -> (String, usize, usize) {
    let mut result_chars = VecDeque::<char>::new();
    // Push chars to the front until a new line is found
    let mut before_pos = start_position;
    loop {
        let current_char = chars[before_pos];
        if current_char == '\n' {
            // This is important because before_pos will be used to calculate
            // the number of chars before start_position
            before_pos += 1;
            break;
        }
        result_chars.push_front(current_char);
        if before_pos == 0 {
            break;
        }
        before_pos -= 1;
    }
    // Push chars to the end until a new line is found
    let mut after_pos = start_position + 1;
    let char_count = chars.len();
    while after_pos < char_count {
        let current_char = chars[after_pos];
        if current_char == '\n' {
            break;
        }
        result_chars.push_back(current_char);
        after_pos += 1;
    }
    (
        result_chars.iter().collect::<String>(),
        start_position - before_pos,
        end_position - start_position,
    )
 }
 fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
    let mut count = 1;
    for (pos, char) in chars.iter().enumerate() {
        if pos >= target_pos {
            break;
        }
        if *char == '\n' {
            count += 1;
        }
    }
    count
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/error_handling/utils.rs
+++ b/src/error_handling/utils.rs
@ -0,0 +1,91 @@
 use std::collections::VecDeque;
 /// Extracts a line of code
 ///
 /// - `chars`: Input where to extract the line from
 /// - `start_position`: Position where the erroneous code starts
 /// - `end_position`: Position where the erroneous code ends
 ///
 /// Returns a tuple of:
 ///
 /// - `String`: The faulty line
 /// - `usize`: The amount of chars *before* the faulty code
 /// - `usize`: The lenght of the faulty code
 ///
 /// ## Example
 ///
 /// ```
 /// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
 /// let start_position = 13;
 /// let end_position = 15;
 ///
 /// let (line, before, length) = get_line(&input, start_position, end_position);
 ///
 /// assert_eq!("val number == 50", line);
 /// assert_eq!(11, before);
 /// assert_eq!(2, length);
 /// ```
 pub fn get_line(
    chars: &Vec<char>,
    start_position: usize,
    end_position: usize,
 ) -> (String, usize, usize) {
    let mut result_chars = VecDeque::<char>::new();
    // Push chars to the front until a new line is found
    let mut before_pos = start_position;
    loop {
        let current_char = chars[before_pos];
        if current_char == '\n' {
            // This is important because before_pos will be used to calculate
            // the number of chars before start_position
            before_pos += 1;
            break;
        }
        result_chars.push_front(current_char);
        if before_pos == 0 {
            break;
        }
        before_pos -= 1;
    }
    // Push chars to the end until a new line is found
    let mut after_pos = start_position + 1;
    let char_count = chars.len();
    while after_pos < char_count {
        let current_char = chars[after_pos];
        if current_char == '\n' {
            break;
        }
        result_chars.push_back(current_char);
        after_pos += 1;
    }
    (
        result_chars.iter().collect::<String>(),
        start_position - before_pos,
        end_position - start_position,
    )
 }
 pub fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
    let mut count = 1;
    for (pos, char) in chars.iter().enumerate() {
        if pos >= target_pos {
            break;
        }
        if *char == '\n' {
            count += 1;
        }
    }
    count
 }
--- a/src/file/mod.rs
+++ b/src/file/mod.rs
@ -70,7 +70,7 @@ fn compile(input: &String) -> Result<String, String> {
                "{}:\n{}",
                "syntax error".on_red(),
                error.get_error_str(&chars)
-            ))
+            ));
        }
    };
@ -88,11 +88,18 @@ fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
        Err(reason) => {
            let chars: Vec<char> = input.chars().into_iter().collect();
            let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
-            return Err(error)
+            return Err(error);
        }
    };
-    crate::semantic::check_semantics(&ast)?;
+    match crate::semantic::check_semantics(&ast) {
        Ok(_) => {}
        Err(reason) => {
            let chars: Vec<char> = input.chars().into_iter().collect();
            let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
            return Err(error);
        }
    };
    Ok(codegen::codegen(&ast))
 }
--- a/src/repl/mod.rs
+++ b/src/repl/mod.rs
@ -1,5 +1,7 @@
 use std::io::{self, Write};
 use colored::Colorize;
 use crate::error_handling::PrintableError;
 use crate::lexic::token::Token;
@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
            match res1 {
                Ok(_) => {}
                Err(reason) => {
-                    eprintln!("{}", reason);
+                    let chars: Vec<char> = input.chars().into_iter().collect();
                    let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
                    eprintln!("{}", error);
                    return;
                }
            }
--- a/src/semantic/impls.rs
+++ b/src/semantic/impls.rs
@ -1,13 +1,18 @@
-use crate::syntax::ast::{ModuleAST, TopLevelDeclaration};
+use crate::{
    error_handling::semantic_error::SemanticError,
    error_handling::MistiError,
    syntax::ast::{ModuleAST, TopLevelDeclaration},
 };
 use super::symbol_table::SymbolTable;
 pub trait SemanticCheck {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>;
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>;
 }
 impl SemanticCheck for ModuleAST {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
+    /// Checks that this AST is semantically correct, given a symbol table
    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
        for declaration in &self.declarations {
            declaration.check_semantics(scope)?;
        }
@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST {
 }
 impl SemanticCheck for TopLevelDeclaration {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
        match self {
-            TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()),
+            TopLevelDeclaration::Binding(_) => {
                let error = SemanticError {
                    error_start: 0,
                    error_end: 0,
                    reason: "Binding typechecking: Not implemented".into(),
                };
                Err(MistiError::Semantic(error))
            }
            TopLevelDeclaration::FunctionDeclaration(function) => {
                let function_name = function.identifier.as_ref().clone();
                if scope.test(&function_name) {
-                    return Err(format!("Function {} already defined", function_name));
+                    let error = SemanticError {
                        // TODO: Get the position of the function name. For this, these structs
                        // should store the token instead of just the string
                        error_start: 0,
                        error_end: 0,
                        reason: format!("Function {} already defined", function_name),
                    };
                    return Err(MistiError::Semantic(error));
                }
                scope.insert(
--- a/src/semantic/mod.rs
+++ b/src/semantic/mod.rs
@ -1,4 +1,4 @@
-use crate::syntax::ast::ModuleAST;
+use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
 mod impls;
 mod symbol_table;
@ -11,8 +11,11 @@ use impls::SemanticCheck;
 // 3. Add the symbols declared to the symbol table, annotating them with their type
 // 4. Check if the symbols used are declared
-pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> {
+/// Checks that the AST is semantically correct
 pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
    // For now there's only support for a single file
    // TODO: Receive a symbol table as a reference and work on it.
    // this way we can implement a unique symbol table for REPL session
    let global_scope = symbol_table::SymbolTable::new();
    ast.check_semantics(&global_scope)