Improve function semantic check

2024-03-09 08:05:40 -05:00 · 2024-03-09 08:05:40 -05:00 · f97b8e2e07
parent a39b0c0d5a
commit f97b8e2e07
9 changed files with 183 additions and 105 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,11 @@

 ## TODO

+- Implement AST transformation before codegen:
+    Create a new AST to represent PHP source code
+    and a THP ast -> PHP ast process, so that the
+    codegen section can focus only in codegen, not in
+    translation of thp->php.
 - Parse __more__ binary operators
 - Parse `Type name = value` bindings
 - Parse more complex bindings
@ -26,9 +31,9 @@
 - [x] Begin work on semantic analysis
 - [x] Minimal symbol table
 - [x] Check duplicate function declarations
- [ ] Improve REPL/File compilation code
+- [x] Improve REPL/File compilation code
 - [ ] Typecheck bindings
- [ ] Typecheck functions
+- [x] Typecheck functions
 - [ ] Transform simple THP expression into PHP statements

 ## v0.0.9
--- a/src/error_handling/mod.rs
+++ b/src/error_handling/mod.rs
@ -1,5 +1,9 @@
+use self::semantic_error::SemanticError;
+
 mod lex_error;
+pub mod semantic_error;
 mod syntax_error;
+mod utils;

 pub trait PrintableError {
    fn get_error_str(&self, chars: &Vec<char>) -> String;
@ -9,6 +13,7 @@ pub trait PrintableError {
 pub enum MistiError {
    Lex(LexError),
    Syntax(SyntaxError),
+    Semantic(SemanticError),
 }

 #[derive(Debug)]
@ -29,6 +34,7 @@ impl PrintableError for MistiError {
        match self {
            Self::Lex(err) => err.get_error_str(chars),
            Self::Syntax(err) => err.get_error_str(chars),
+            Self::Semantic(err) => err.get_error_str(chars),
        }
    }
 }
--- a/src/error_handling/semantic_error.rs
+++ b/src/error_handling/semantic_error.rs
@ -0,0 +1,31 @@
+use super::utils::{get_line, get_line_number};
+use super::PrintableError;
+
+#[derive(Debug)]
+pub struct SemanticError {
+    pub error_start: usize,
+    pub error_end: usize,
+    pub reason: String,
+}
+
+impl PrintableError for SemanticError {
+    fn get_error_str(&self, chars: &Vec<char>) -> String {
+        let (line, before, length) = get_line(chars, self.error_start, self.error_end);
+
+        let line_number = get_line_number(chars, self.error_start);
+        let line_number_whitespace = " ".repeat(line_number.to_string().len());
+
+        let whitespace = vec![' '; before].iter().collect::<String>();
+        let indicator = vec!['^'; length].iter().collect::<String>();
+        let reason = &self.reason;
+
+        format!(
+            r#"
+{line_number_whitespace} |
+{line_number           } | {line}
+{line_number_whitespace} | {whitespace}{indicator}
+
+{reason} at line {line_number}:{before}"#,
+        )
+    }
+}
--- a/src/error_handling/syntax_error.rs
+++ b/src/error_handling/syntax_error.rs
@ -1,5 +1,5 @@
+use super::utils::{get_line, get_line_number};
 use super::{PrintableError, SyntaxError};
-use std::collections::VecDeque;

 impl PrintableError for SyntaxError {
    fn get_error_str(&self, chars: &Vec<char>) -> String {
@ -23,96 +23,6 @@ impl PrintableError for SyntaxError {
    }
 }

-/// Extracts a line of code
-///
-/// - `chars`: Input where to extract the line from
-/// - `start_position`: Position where the erroneous code starts
-/// - `end_position`: Position where the erroneous code ends
-///
-/// Returns a tuple of:
-///
-/// - `String`: The faulty line
-/// - `usize`: The amount of chars *before* the faulty code
-/// - `usize`: The lenght of the faulty code
-///
-/// ## Example
-///
-/// ```
-/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
-/// let start_position = 13;
-/// let end_position = 15;
-///
-/// let (line, before, length) = get_line(&input, start_position, end_position);
-///
-/// assert_eq!("val number == 50", line);
-/// assert_eq!(11, before);
-/// assert_eq!(2, length);
-/// ```
-fn get_line(
-    chars: &Vec<char>,
-    start_position: usize,
-    end_position: usize,
-) -> (String, usize, usize) {
-    let mut result_chars = VecDeque::<char>::new();
-
-    // Push chars to the front until a new line is found
-    let mut before_pos = start_position;
-    loop {
-        let current_char = chars[before_pos];
-
-        if current_char == '\n' {
-            // This is important because before_pos will be used to calculate
-            // the number of chars before start_position
-            before_pos += 1;
-            break;
-        }
-
-        result_chars.push_front(current_char);
-
-        if before_pos == 0 {
-            break;
-        }
-
-        before_pos -= 1;
-    }
-
-    // Push chars to the end until a new line is found
-    let mut after_pos = start_position + 1;
-    let char_count = chars.len();
-    while after_pos < char_count {
-        let current_char = chars[after_pos];
-
-        if current_char == '\n' {
-            break;
-        }
-
-        result_chars.push_back(current_char);
-        after_pos += 1;
-    }
-
-    (
-        result_chars.iter().collect::<String>(),
-        start_position - before_pos,
-        end_position - start_position,
-    )
-}
-
-fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
-    let mut count = 1;
-
-    for (pos, char) in chars.iter().enumerate() {
-        if pos >= target_pos {
-            break;
-        }
-
-        if *char == '\n' {
-            count += 1;
-        }
-    }
-
-    count
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/error_handling/utils.rs
+++ b/src/error_handling/utils.rs
@ -0,0 +1,91 @@
+use std::collections::VecDeque;
+
+/// Extracts a line of code
+///
+/// - `chars`: Input where to extract the line from
+/// - `start_position`: Position where the erroneous code starts
+/// - `end_position`: Position where the erroneous code ends
+///
+/// Returns a tuple of:
+///
+/// - `String`: The faulty line
+/// - `usize`: The amount of chars *before* the faulty code
+/// - `usize`: The lenght of the faulty code
+///
+/// ## Example
+///
+/// ```
+/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
+/// let start_position = 13;
+/// let end_position = 15;
+///
+/// let (line, before, length) = get_line(&input, start_position, end_position);
+///
+/// assert_eq!("val number == 50", line);
+/// assert_eq!(11, before);
+/// assert_eq!(2, length);
+/// ```
+pub fn get_line(
+    chars: &Vec<char>,
+    start_position: usize,
+    end_position: usize,
+) -> (String, usize, usize) {
+    let mut result_chars = VecDeque::<char>::new();
+
+    // Push chars to the front until a new line is found
+    let mut before_pos = start_position;
+    loop {
+        let current_char = chars[before_pos];
+
+        if current_char == '\n' {
+            // This is important because before_pos will be used to calculate
+            // the number of chars before start_position
+            before_pos += 1;
+            break;
+        }
+
+        result_chars.push_front(current_char);
+
+        if before_pos == 0 {
+            break;
+        }
+
+        before_pos -= 1;
+    }
+
+    // Push chars to the end until a new line is found
+    let mut after_pos = start_position + 1;
+    let char_count = chars.len();
+    while after_pos < char_count {
+        let current_char = chars[after_pos];
+
+        if current_char == '\n' {
+            break;
+        }
+
+        result_chars.push_back(current_char);
+        after_pos += 1;
+    }
+
+    (
+        result_chars.iter().collect::<String>(),
+        start_position - before_pos,
+        end_position - start_position,
+    )
+}
+
+pub fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
+    let mut count = 1;
+
+    for (pos, char) in chars.iter().enumerate() {
+        if pos >= target_pos {
+            break;
+        }
+
+        if *char == '\n' {
+            count += 1;
+        }
+    }
+
+    count
+}
--- a/src/file/mod.rs
+++ b/src/file/mod.rs
@ -70,7 +70,7 @@ fn compile(input: &String) -> Result<String, String> {
                "{}:\n{}",
                "syntax error".on_red(),
                error.get_error_str(&chars)
-            ))
+            ));
        }
    };

@ -88,11 +88,18 @@ fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
        Err(reason) => {
            let chars: Vec<char> = input.chars().into_iter().collect();
            let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
-            return Err(error)
+            return Err(error);
        }
    };

-    crate::semantic::check_semantics(&ast)?;
+    match crate::semantic::check_semantics(&ast) {
+        Ok(_) => {}
+        Err(reason) => {
+            let chars: Vec<char> = input.chars().into_iter().collect();
+            let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
+            return Err(error);
+        }
+    };

    Ok(codegen::codegen(&ast))
 }
--- a/src/repl/mod.rs
+++ b/src/repl/mod.rs
@ -1,5 +1,7 @@
 use std::io::{self, Write};

+use colored::Colorize;
+
 use crate::error_handling::PrintableError;
 use crate::lexic::token::Token;

@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
            match res1 {
                Ok(_) => {}
                Err(reason) => {
-                    eprintln!("{}", reason);
+                    let chars: Vec<char> = input.chars().into_iter().collect();
+                    let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
+                    eprintln!("{}", error);
                    return;
                }
            }
--- a/src/semantic/impls.rs
+++ b/src/semantic/impls.rs
@ -1,13 +1,18 @@
-use crate::syntax::ast::{ModuleAST, TopLevelDeclaration};
+use crate::{
+    error_handling::semantic_error::SemanticError,
+    error_handling::MistiError,
+    syntax::ast::{ModuleAST, TopLevelDeclaration},
+};

 use super::symbol_table::SymbolTable;

 pub trait SemanticCheck {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>;
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>;
 }

 impl SemanticCheck for ModuleAST {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
+    /// Checks that this AST is semantically correct, given a symbol table
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
        for declaration in &self.declarations {
            declaration.check_semantics(scope)?;
        }
@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST {
 }

 impl SemanticCheck for TopLevelDeclaration {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
        match self {
-            TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()),
+            TopLevelDeclaration::Binding(_) => {
+                let error = SemanticError {
+                    error_start: 0,
+                    error_end: 0,
+                    reason: "Binding typechecking: Not implemented".into(),
+                };
+
+                Err(MistiError::Semantic(error))
+            }
            TopLevelDeclaration::FunctionDeclaration(function) => {
                let function_name = function.identifier.as_ref().clone();

                if scope.test(&function_name) {
-                    return Err(format!("Function {} already defined", function_name));
+                    let error = SemanticError {
+                        // TODO: Get the position of the function name. For this, these structs
+                        // should store the token instead of just the string
+                        error_start: 0,
+                        error_end: 0,
+                        reason: format!("Function {} already defined", function_name),
+                    };
+
+                    return Err(MistiError::Semantic(error));
                }

                scope.insert(
--- a/src/semantic/mod.rs
+++ b/src/semantic/mod.rs
@ -1,4 +1,4 @@
-use crate::syntax::ast::ModuleAST;
+use crate::{error_handling::MistiError, syntax::ast::ModuleAST};

 mod impls;
 mod symbol_table;
@ -11,8 +11,11 @@ use impls::SemanticCheck;
 // 3. Add the symbols declared to the symbol table, annotating them with their type
 // 4. Check if the symbols used are declared

-pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> {
+/// Checks that the AST is semantically correct
+pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
    // For now there's only support for a single file
+    // TODO: Receive a symbol table as a reference and work on it.
+    // this way we can implement a unique symbol table for REPL session
    let global_scope = symbol_table::SymbolTable::new();

    ast.check_semantics(&global_scope)