Improve function semantic check

Properly handle errors in compilation pipeline
Add a return type to main
2024-03-09 08:05:51 -05:00 · 2024-03-01 17:38:04 -05:00 · 2024-03-01 16:52:32 -05:00
16 changed files with 274 additions and 160 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,11 @@

 ## TODO

+- Implement AST transformation before codegen:
+    Create a new AST to represent PHP source code
+    and a THP ast -> PHP ast process, so that the
+    codegen section can focus only in codegen, not in
+    translation of thp->php.
 - Parse __more__ binary operators
 - Parse `Type name = value` bindings
 - Parse more complex bindings
@ -26,8 +31,9 @@
 - [x] Begin work on semantic analysis
 - [x] Minimal symbol table
 - [x] Check duplicate function declarations
+- [x] Improve REPL/File compilation code
 - [ ] Typecheck bindings
- [ ] Typecheck functions
+- [x] Typecheck functions
 - [ ] Transform simple THP expression into PHP statements

 ## v0.0.9
--- a/src/cli/compile.rs
+++ b/src/cli/compile.rs
@ -1,19 +1,19 @@
 use colored::*;

-pub fn compile_command(arguments: Vec<String>) {
+pub fn compile_command(arguments: Vec<String>) -> Result<(), ()> {
    if arguments.is_empty() {
-        println!("{}", compile_help());
-        println!("{}: {}", "error".on_red(), "No file specified");
-        return;
+        eprintln!("{}", compile_help());
+        eprintln!("{}: {}", "error".on_red(), "No file specified");
+        return Err(());
    }
    if arguments.len() > 1 {
-        println!("{}", compile_help());
-        println!(
+        eprintln!("{}", compile_help());
+        eprintln!(
            "{}: {}",
            "error".on_red(),
            "Only a single file can be compiled at a time"
        );
-        return;
+        return Err(());
    }

    let argument = &arguments[0];
@ -23,16 +23,16 @@ pub fn compile_command(arguments: Vec<String>) {
        println!("{}", compile_help());

        if opt_str != "-h" && opt_str != "--help" {
-            println!(
+            eprintln!(
                "{}: {}",
                "error".on_red(),
-                "Invalid option. The compile command only accepts the `-h` or `--help` option"
+                "Invalid option. The compile command only accepts the `-h` or `--help` options"
            );
        }
-        return;
+        return Err(());
    }

-    crate::file::compile_file(argument);
+    crate::file::compile_file(argument)
 }

 fn compile_help() -> String {
--- a/src/cli/empty.rs
+++ b/src/cli/empty.rs
@ -7,7 +7,7 @@ enum EmptyOptions {
    Version,
 }

-pub fn empty_command(arguments: Vec<String>) {
+pub fn empty_command(arguments: Vec<String>) -> Result<(), ()> {
    // Add all options to a set
    let mut options_set = std::collections::HashSet::new();
    for option in arguments {
@ -16,9 +16,9 @@ pub fn empty_command(arguments: Vec<String>) {
                options_set.insert(o);
            }
            Err(invalid_option) => {
-                println!("{}", get_help_text());
-                println!("{}: invalid option: `{}`", "error".on_red(), invalid_option);
-                return;
+                eprintln!("{}", get_help_text());
+                eprintln!("{}: invalid option: `{}`", "error".on_red(), invalid_option);
+                return Err(());
            }
        };
    }
@ -35,6 +35,8 @@ pub fn empty_command(arguments: Vec<String>) {
            println!("{}", get_help_text());
        }
    }
+
+    Ok(())
 }

 fn expand_option(option: &String) -> Result<EmptyOptions, String> {
--- a/src/cli/help.rs
+++ b/src/cli/help.rs
@ -1,7 +1,7 @@
 use crate::cli::get_help_text;
 use colored::*;

-pub fn help_command(arguments: Vec<String>) {
+pub fn help_command(arguments: Vec<String>) -> Result<(), ()> {
    println!("{}", get_help_text());

    if arguments.len() > 0 {
@ -11,4 +11,6 @@ pub fn help_command(arguments: Vec<String>) {
            "The help command doesn't take any argument."
        );
    }
+
+    Ok(())
 }
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@ -40,17 +40,17 @@ fn get_version() -> String {
    format!("The THP compiler, linter & formatter, v{}", crate_version)
 }

-pub fn run_cli() {
+pub fn run_cli() -> Result<(), ()> {
    let (command, args) = match parse_args() {
        Ok(c) => c,
        Err(reason) => {
-            println!("{}", get_help_text());
-            println!("{}: {}", "error".on_red(), reason);
-            return;
+            eprintln!("{}", get_help_text());
+            eprintln!("{}: {}", "error".on_red(), reason);
+            return Err(());
        }
    };

-    command.run(args);
+    command.run(args)
 }

 fn parse_args() -> Result<(CommandType, Vec<String>), String> {
--- a/src/cli/repl.rs
+++ b/src/cli/repl.rs
@ -1,4 +1,13 @@
-pub fn repl_command(_arguments: Vec<String>) {
+use colored::Colorize;
+
+pub fn repl_command(_arguments: Vec<String>) -> Result<(), ()> {
    println!("{}", super::get_version());
-    let _ = crate::repl::run();
+    let result = crate::repl::run();
+
+    if let Err(e) = result {
+        eprintln!("{}: {}", "error".on_red(), e);
+        return Err(());
+    }
+
+    Ok(())
 }
--- a/src/cli/types.rs
+++ b/src/cli/types.rs
@ -12,14 +12,15 @@ pub enum CommandType {
 }

 impl CommandType {
-    pub fn run(&self, options: Vec<String>) {
+    pub fn run(&self, options: Vec<String>) -> Result<(), ()> {
        match self {
            CommandType::Help => super::help::help_command(options),
            CommandType::Compile => super::compile::compile_command(options),
            CommandType::Repl => super::repl::repl_command(options),
            CommandType::None => super::empty::empty_command(options),
            _ => {
-                println!("Not implemented yet! {:?} {:?}", self, options);
+                eprintln!("Not implemented yet! {:?} {:?}", self, options);
+                Err(())
            }
        }
    }
--- a/src/error_handling/mod.rs
+++ b/src/error_handling/mod.rs
@ -1,5 +1,9 @@
+use self::semantic_error::SemanticError;
+
 mod lex_error;
+pub mod semantic_error;
 mod syntax_error;
+mod utils;

 pub trait PrintableError {
    fn get_error_str(&self, chars: &Vec<char>) -> String;
@ -9,6 +13,7 @@ pub trait PrintableError {
 pub enum MistiError {
    Lex(LexError),
    Syntax(SyntaxError),
+    Semantic(SemanticError),
 }

 #[derive(Debug)]
@ -29,6 +34,7 @@ impl PrintableError for MistiError {
        match self {
            Self::Lex(err) => err.get_error_str(chars),
            Self::Syntax(err) => err.get_error_str(chars),
+            Self::Semantic(err) => err.get_error_str(chars),
        }
    }
 }
--- a/src/error_handling/semantic_error.rs
+++ b/src/error_handling/semantic_error.rs
@ -0,0 +1,31 @@
+use super::utils::{get_line, get_line_number};
+use super::PrintableError;
+
+#[derive(Debug)]
+pub struct SemanticError {
+    pub error_start: usize,
+    pub error_end: usize,
+    pub reason: String,
+}
+
+impl PrintableError for SemanticError {
+    fn get_error_str(&self, chars: &Vec<char>) -> String {
+        let (line, before, length) = get_line(chars, self.error_start, self.error_end);
+
+        let line_number = get_line_number(chars, self.error_start);
+        let line_number_whitespace = " ".repeat(line_number.to_string().len());
+
+        let whitespace = vec![' '; before].iter().collect::<String>();
+        let indicator = vec!['^'; length].iter().collect::<String>();
+        let reason = &self.reason;
+
+        format!(
+            r#"
+{line_number_whitespace} |
+{line_number           } | {line}
+{line_number_whitespace} | {whitespace}{indicator}
+
+{reason} at line {line_number}:{before}"#,
+        )
+    }
+}
--- a/src/error_handling/syntax_error.rs
+++ b/src/error_handling/syntax_error.rs
@ -1,5 +1,5 @@
+use super::utils::{get_line, get_line_number};
 use super::{PrintableError, SyntaxError};
-use std::collections::VecDeque;

 impl PrintableError for SyntaxError {
    fn get_error_str(&self, chars: &Vec<char>) -> String {
@ -23,96 +23,6 @@ impl PrintableError for SyntaxError {
    }
 }

-/// Extracts a line of code
-///
-/// - `chars`: Input where to extract the line from
-/// - `start_position`: Position where the erroneous code starts
-/// - `end_position`: Position where the erroneous code ends
-///
-/// Returns a tuple of:
-///
-/// - `String`: The faulty line
-/// - `usize`: The amount of chars *before* the faulty code
-/// - `usize`: The lenght of the faulty code
-///
-/// ## Example
-///
-/// ```
-/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
-/// let start_position = 13;
-/// let end_position = 15;
-///
-/// let (line, before, length) = get_line(&input, start_position, end_position);
-///
-/// assert_eq!("val number == 50", line);
-/// assert_eq!(11, before);
-/// assert_eq!(2, length);
-/// ```
-fn get_line(
-    chars: &Vec<char>,
-    start_position: usize,
-    end_position: usize,
-) -> (String, usize, usize) {
-    let mut result_chars = VecDeque::<char>::new();
-
-    // Push chars to the front until a new line is found
-    let mut before_pos = start_position;
-    loop {
-        let current_char = chars[before_pos];
-
-        if current_char == '\n' {
-            // This is important because before_pos will be used to calculate
-            // the number of chars before start_position
-            before_pos += 1;
-            break;
-        }
-
-        result_chars.push_front(current_char);
-
-        if before_pos == 0 {
-            break;
-        }
-
-        before_pos -= 1;
-    }
-
-    // Push chars to the end until a new line is found
-    let mut after_pos = start_position + 1;
-    let char_count = chars.len();
-    while after_pos < char_count {
-        let current_char = chars[after_pos];
-
-        if current_char == '\n' {
-            break;
-        }
-
-        result_chars.push_back(current_char);
-        after_pos += 1;
-    }
-
-    (
-        result_chars.iter().collect::<String>(),
-        start_position - before_pos,
-        end_position - start_position,
-    )
-}
-
-fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
-    let mut count = 1;
-
-    for (pos, char) in chars.iter().enumerate() {
-        if pos >= target_pos {
-            break;
-        }
-
-        if *char == '\n' {
-            count += 1;
-        }
-    }
-
-    count
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/error_handling/utils.rs
+++ b/src/error_handling/utils.rs
@ -0,0 +1,91 @@
+use std::collections::VecDeque;
+
+/// Extracts a line of code
+///
+/// - `chars`: Input where to extract the line from
+/// - `start_position`: Position where the erroneous code starts
+/// - `end_position`: Position where the erroneous code ends
+///
+/// Returns a tuple of:
+///
+/// - `String`: The faulty line
+/// - `usize`: The amount of chars *before* the faulty code
+/// - `usize`: The lenght of the faulty code
+///
+/// ## Example
+///
+/// ```
+/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
+/// let start_position = 13;
+/// let end_position = 15;
+///
+/// let (line, before, length) = get_line(&input, start_position, end_position);
+///
+/// assert_eq!("val number == 50", line);
+/// assert_eq!(11, before);
+/// assert_eq!(2, length);
+/// ```
+pub fn get_line(
+    chars: &Vec<char>,
+    start_position: usize,
+    end_position: usize,
+) -> (String, usize, usize) {
+    let mut result_chars = VecDeque::<char>::new();
+
+    // Push chars to the front until a new line is found
+    let mut before_pos = start_position;
+    loop {
+        let current_char = chars[before_pos];
+
+        if current_char == '\n' {
+            // This is important because before_pos will be used to calculate
+            // the number of chars before start_position
+            before_pos += 1;
+            break;
+        }
+
+        result_chars.push_front(current_char);
+
+        if before_pos == 0 {
+            break;
+        }
+
+        before_pos -= 1;
+    }
+
+    // Push chars to the end until a new line is found
+    let mut after_pos = start_position + 1;
+    let char_count = chars.len();
+    while after_pos < char_count {
+        let current_char = chars[after_pos];
+
+        if current_char == '\n' {
+            break;
+        }
+
+        result_chars.push_back(current_char);
+        after_pos += 1;
+    }
+
+    (
+        result_chars.iter().collect::<String>(),
+        start_position - before_pos,
+        end_position - start_position,
+    )
+}
+
+pub fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
+    let mut count = 1;
+
+    for (pos, char) in chars.iter().enumerate() {
+        if pos >= target_pos {
+            break;
+        }
+
+        if *char == '\n' {
+            count += 1;
+        }
+    }
+
+    count
+}
--- a/src/file/mod.rs
+++ b/src/file/mod.rs
@ -4,77 +4,102 @@ use std::{fs, path::Path};
 use crate::lexic::token::Token;
 use crate::{codegen, error_handling::PrintableError, lexic, syntax};

-pub fn compile_file(input: &String) {
+pub fn compile_file(input: &String) -> Result<(), ()> {
    let input_path = Path::new(input);

    if !input_path.is_file() {
-        println!(
+        eprintln!(
            "{}: {} {}",
            "error".on_red(),
            "Input path is not a valid file:".red(),
            input
        );
-        return;
+        return Err(());
    }

-    let bytes = fs::read(input_path).expect("INPUT_PATH should be valid");
+    let bytes = match fs::read(input_path) {
+        Ok(bytes) => bytes,
+        Err(error) => {
+            eprintln!("{}: Error reading input file", "error".on_red());
+            eprintln!("{}", error);
+            return Err(());
+        }
+    };

    let contents = match String::from_utf8(bytes) {
        Ok(str) => str,
-        Err(_) => {
-            println!("{}: Input file contains invalid UTF-8", "error".on_red());
-            return;
+        Err(error) => {
+            eprintln!("{}: Input file contains invalid UTF-8", "error".on_red());
+            eprintln!("{}", error);
+            return Err(());
        }
    };

-    let Some(out_code) = compile(&contents) else {
-        return;
+    let out_code = match compile(&contents) {
+        Ok(out_code) => out_code,
+        Err(error) => {
+            eprintln!("{}", error);
+            return Err(());
+        }
    };

-    let mut output_path = Path::new(input).canonicalize().unwrap();
+    let mut output_path = Path::new(input)
+        .canonicalize()
+        .expect("Invalid input path: Cannot be canonicalized");
    output_path.set_extension("php");

-    fs::write(output_path, out_code).expect("Error writing to output path");
+    match fs::write(output_path, out_code) {
+        Ok(_) => Ok(()),
+        Err(error) => {
+            eprintln!("{}: Error writing output file", "error".on_red());
+            eprintln!("{}", error);
+            Err(())
+        }
+    }
 }

-/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
-fn compile(input: &String) -> Option<String> {
+/// THP source code goes in, PHP code or an error comes out
+fn compile(input: &String) -> Result<String, String> {
    let tokens = lexic::get_tokens(input);

-    match tokens {
-        Ok(tokens) => Some(build_ast(input, tokens)),
+    let tokens = match tokens {
+        Ok(tokens) => tokens,
        Err(error) => {
            let chars: Vec<char> = input.chars().into_iter().collect();
-            println!(
+            return Err(format!(
                "{}:\n{}",
                "syntax error".on_red(),
                error.get_error_str(&chars)
-            );
-            None
+            ));
        }
-    }
+    };
+
+    build_ast(input, tokens)
 }

 /// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
 ///
 /// Prints the generated code in stdin
-fn build_ast(input: &String, tokens: Vec<Token>) -> String {
+fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
    let ast = syntax::construct_ast(&tokens);

-    match ast {
-        Ok(ast) => {
-            match crate::semantic::check_semantics(&ast) {
-                Ok(_) => {}
-                Err(reason) => {
-                    panic!("{}", reason)
-                }
-            };
-
-            codegen::codegen(&ast)
-        }
+    let ast = match ast {
+        Ok(ast) => ast,
        Err(reason) => {
            let chars: Vec<char> = input.chars().into_iter().collect();
-            panic!("{}", reason.get_error_str(&chars))
+            let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
+            return Err(error);
        }
-    }
+    };
+
+    match crate::semantic::check_semantics(&ast) {
+        Ok(_) => {}
+        Err(reason) => {
+            let chars: Vec<char> = input.chars().into_iter().collect();
+            let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
+            return Err(error);
+        }
+    };
+
+    Ok(codegen::codegen(&ast))
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -18,5 +18,8 @@ mod utils;
 mod error_handling;

 fn main() {
-    cli::run_cli();
+    match cli::run_cli() {
+        Ok(_) => (),
+        Err(_) => std::process::exit(1),
+    }
 }
--- a/src/repl/mod.rs
+++ b/src/repl/mod.rs
@ -1,5 +1,7 @@
 use std::io::{self, Write};

+use colored::Colorize;
+
 use crate::error_handling::PrintableError;
 use crate::lexic::token::Token;

@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
            match res1 {
                Ok(_) => {}
                Err(reason) => {
-                    eprintln!("{}", reason);
+                    let chars: Vec<char> = input.chars().into_iter().collect();
+                    let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
+                    eprintln!("{}", error);
                    return;
                }
            }
--- a/src/semantic/impls.rs
+++ b/src/semantic/impls.rs
@ -1,13 +1,18 @@
-use crate::syntax::ast::{ModuleAST, TopLevelDeclaration};
+use crate::{
+    error_handling::semantic_error::SemanticError,
+    error_handling::MistiError,
+    syntax::ast::{ModuleAST, TopLevelDeclaration},
+};

 use super::symbol_table::SymbolTable;

 pub trait SemanticCheck {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>;
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>;
 }

 impl SemanticCheck for ModuleAST {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
+    /// Checks that this AST is semantically correct, given a symbol table
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
        for declaration in &self.declarations {
            declaration.check_semantics(scope)?;
        }
@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST {
 }

 impl SemanticCheck for TopLevelDeclaration {
-    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
+    fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
        match self {
-            TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()),
+            TopLevelDeclaration::Binding(_) => {
+                let error = SemanticError {
+                    error_start: 0,
+                    error_end: 0,
+                    reason: "Binding typechecking: Not implemented".into(),
+                };
+
+                Err(MistiError::Semantic(error))
+            }
            TopLevelDeclaration::FunctionDeclaration(function) => {
                let function_name = function.identifier.as_ref().clone();

                if scope.test(&function_name) {
-                    return Err(format!("Function {} already defined", function_name));
+                    let error = SemanticError {
+                        // TODO: Get the position of the function name. For this, these structs
+                        // should store the token instead of just the string
+                        error_start: 0,
+                        error_end: 0,
+                        reason: format!("Function {} already defined", function_name),
+                    };
+
+                    return Err(MistiError::Semantic(error));
                }

                scope.insert(
--- a/src/semantic/mod.rs
+++ b/src/semantic/mod.rs
@ -1,4 +1,4 @@
-use crate::syntax::ast::ModuleAST;
+use crate::{error_handling::MistiError, syntax::ast::ModuleAST};

 mod impls;
 mod symbol_table;
@ -11,8 +11,11 @@ use impls::SemanticCheck;
 // 3. Add the symbols declared to the symbol table, annotating them with their type
 // 4. Check if the symbols used are declared

-pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> {
+/// Checks that the AST is semantically correct
+pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
    // For now there's only support for a single file
+    // TODO: Receive a symbol table as a reference and work on it.
+    // this way we can implement a unique symbol table for REPL session
    let global_scope = symbol_table::SymbolTable::new();

    ast.check_semantics(&global_scope)
Author	SHA1	Message	Date
Araozu	f97b8e2e07	Improve function semantic check	2024-03-09 08:05:51 -05:00
Araozu	a39b0c0d5a	Properly handle errors in compilation pipeline	2024-03-01 17:38:04 -05:00
Araozu	a219faf283	Add a return type to main	2024-03-01 16:52:32 -05:00