Compare commits

..

3 Commits

Author SHA1 Message Date
f97b8e2e07 Improve function semantic check 2024-03-09 08:05:51 -05:00
a39b0c0d5a Properly handle errors in compilation pipeline 2024-03-01 17:38:04 -05:00
a219faf283 Add a return type to main 2024-03-01 16:52:32 -05:00
16 changed files with 274 additions and 160 deletions

View File

@ -2,6 +2,11 @@
## TODO ## TODO
- Implement AST transformation before codegen:
Create a new AST to represent PHP source code
and a THP ast -> PHP ast process, so that the
codegen section can focus only in codegen, not in
translation of thp->php.
- Parse __more__ binary operators - Parse __more__ binary operators
- Parse `Type name = value` bindings - Parse `Type name = value` bindings
- Parse more complex bindings - Parse more complex bindings
@ -26,8 +31,9 @@
- [x] Begin work on semantic analysis - [x] Begin work on semantic analysis
- [x] Minimal symbol table - [x] Minimal symbol table
- [x] Check duplicate function declarations - [x] Check duplicate function declarations
- [x] Improve REPL/File compilation code
- [ ] Typecheck bindings - [ ] Typecheck bindings
- [ ] Typecheck functions - [x] Typecheck functions
- [ ] Transform simple THP expression into PHP statements - [ ] Transform simple THP expression into PHP statements
## v0.0.9 ## v0.0.9

View File

@ -1,19 +1,19 @@
use colored::*; use colored::*;
pub fn compile_command(arguments: Vec<String>) { pub fn compile_command(arguments: Vec<String>) -> Result<(), ()> {
if arguments.is_empty() { if arguments.is_empty() {
println!("{}", compile_help()); eprintln!("{}", compile_help());
println!("{}: {}", "error".on_red(), "No file specified"); eprintln!("{}: {}", "error".on_red(), "No file specified");
return; return Err(());
} }
if arguments.len() > 1 { if arguments.len() > 1 {
println!("{}", compile_help()); eprintln!("{}", compile_help());
println!( eprintln!(
"{}: {}", "{}: {}",
"error".on_red(), "error".on_red(),
"Only a single file can be compiled at a time" "Only a single file can be compiled at a time"
); );
return; return Err(());
} }
let argument = &arguments[0]; let argument = &arguments[0];
@ -23,16 +23,16 @@ pub fn compile_command(arguments: Vec<String>) {
println!("{}", compile_help()); println!("{}", compile_help());
if opt_str != "-h" && opt_str != "--help" { if opt_str != "-h" && opt_str != "--help" {
println!( eprintln!(
"{}: {}", "{}: {}",
"error".on_red(), "error".on_red(),
"Invalid option. The compile command only accepts the `-h` or `--help` option" "Invalid option. The compile command only accepts the `-h` or `--help` options"
); );
} }
return; return Err(());
} }
crate::file::compile_file(argument); crate::file::compile_file(argument)
} }
fn compile_help() -> String { fn compile_help() -> String {

View File

@ -7,7 +7,7 @@ enum EmptyOptions {
Version, Version,
} }
pub fn empty_command(arguments: Vec<String>) { pub fn empty_command(arguments: Vec<String>) -> Result<(), ()> {
// Add all options to a set // Add all options to a set
let mut options_set = std::collections::HashSet::new(); let mut options_set = std::collections::HashSet::new();
for option in arguments { for option in arguments {
@ -16,9 +16,9 @@ pub fn empty_command(arguments: Vec<String>) {
options_set.insert(o); options_set.insert(o);
} }
Err(invalid_option) => { Err(invalid_option) => {
println!("{}", get_help_text()); eprintln!("{}", get_help_text());
println!("{}: invalid option: `{}`", "error".on_red(), invalid_option); eprintln!("{}: invalid option: `{}`", "error".on_red(), invalid_option);
return; return Err(());
} }
}; };
} }
@ -35,6 +35,8 @@ pub fn empty_command(arguments: Vec<String>) {
println!("{}", get_help_text()); println!("{}", get_help_text());
} }
} }
Ok(())
} }
fn expand_option(option: &String) -> Result<EmptyOptions, String> { fn expand_option(option: &String) -> Result<EmptyOptions, String> {

View File

@ -1,7 +1,7 @@
use crate::cli::get_help_text; use crate::cli::get_help_text;
use colored::*; use colored::*;
pub fn help_command(arguments: Vec<String>) { pub fn help_command(arguments: Vec<String>) -> Result<(), ()> {
println!("{}", get_help_text()); println!("{}", get_help_text());
if arguments.len() > 0 { if arguments.len() > 0 {
@ -11,4 +11,6 @@ pub fn help_command(arguments: Vec<String>) {
"The help command doesn't take any argument." "The help command doesn't take any argument."
); );
} }
Ok(())
} }

View File

@ -40,17 +40,17 @@ fn get_version() -> String {
format!("The THP compiler, linter & formatter, v{}", crate_version) format!("The THP compiler, linter & formatter, v{}", crate_version)
} }
pub fn run_cli() { pub fn run_cli() -> Result<(), ()> {
let (command, args) = match parse_args() { let (command, args) = match parse_args() {
Ok(c) => c, Ok(c) => c,
Err(reason) => { Err(reason) => {
println!("{}", get_help_text()); eprintln!("{}", get_help_text());
println!("{}: {}", "error".on_red(), reason); eprintln!("{}: {}", "error".on_red(), reason);
return; return Err(());
} }
}; };
command.run(args); command.run(args)
} }
fn parse_args() -> Result<(CommandType, Vec<String>), String> { fn parse_args() -> Result<(CommandType, Vec<String>), String> {

View File

@ -1,4 +1,13 @@
pub fn repl_command(_arguments: Vec<String>) { use colored::Colorize;
pub fn repl_command(_arguments: Vec<String>) -> Result<(), ()> {
println!("{}", super::get_version()); println!("{}", super::get_version());
let _ = crate::repl::run(); let result = crate::repl::run();
if let Err(e) = result {
eprintln!("{}: {}", "error".on_red(), e);
return Err(());
}
Ok(())
} }

View File

@ -12,14 +12,15 @@ pub enum CommandType {
} }
impl CommandType { impl CommandType {
pub fn run(&self, options: Vec<String>) { pub fn run(&self, options: Vec<String>) -> Result<(), ()> {
match self { match self {
CommandType::Help => super::help::help_command(options), CommandType::Help => super::help::help_command(options),
CommandType::Compile => super::compile::compile_command(options), CommandType::Compile => super::compile::compile_command(options),
CommandType::Repl => super::repl::repl_command(options), CommandType::Repl => super::repl::repl_command(options),
CommandType::None => super::empty::empty_command(options), CommandType::None => super::empty::empty_command(options),
_ => { _ => {
println!("Not implemented yet! {:?} {:?}", self, options); eprintln!("Not implemented yet! {:?} {:?}", self, options);
Err(())
} }
} }
} }

View File

@ -1,5 +1,9 @@
use self::semantic_error::SemanticError;
mod lex_error; mod lex_error;
pub mod semantic_error;
mod syntax_error; mod syntax_error;
mod utils;
pub trait PrintableError { pub trait PrintableError {
fn get_error_str(&self, chars: &Vec<char>) -> String; fn get_error_str(&self, chars: &Vec<char>) -> String;
@ -9,6 +13,7 @@ pub trait PrintableError {
pub enum MistiError { pub enum MistiError {
Lex(LexError), Lex(LexError),
Syntax(SyntaxError), Syntax(SyntaxError),
Semantic(SemanticError),
} }
#[derive(Debug)] #[derive(Debug)]
@ -29,6 +34,7 @@ impl PrintableError for MistiError {
match self { match self {
Self::Lex(err) => err.get_error_str(chars), Self::Lex(err) => err.get_error_str(chars),
Self::Syntax(err) => err.get_error_str(chars), Self::Syntax(err) => err.get_error_str(chars),
Self::Semantic(err) => err.get_error_str(chars),
} }
} }
} }

View File

@ -0,0 +1,31 @@
use super::utils::{get_line, get_line_number};
use super::PrintableError;
#[derive(Debug)]
pub struct SemanticError {
pub error_start: usize,
pub error_end: usize,
pub reason: String,
}
impl PrintableError for SemanticError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
let (line, before, length) = get_line(chars, self.error_start, self.error_end);
let line_number = get_line_number(chars, self.error_start);
let line_number_whitespace = " ".repeat(line_number.to_string().len());
let whitespace = vec![' '; before].iter().collect::<String>();
let indicator = vec!['^'; length].iter().collect::<String>();
let reason = &self.reason;
format!(
r#"
{line_number_whitespace} |
{line_number } | {line}
{line_number_whitespace} | {whitespace}{indicator}
{reason} at line {line_number}:{before}"#,
)
}
}

View File

@ -1,5 +1,5 @@
use super::utils::{get_line, get_line_number};
use super::{PrintableError, SyntaxError}; use super::{PrintableError, SyntaxError};
use std::collections::VecDeque;
impl PrintableError for SyntaxError { impl PrintableError for SyntaxError {
fn get_error_str(&self, chars: &Vec<char>) -> String { fn get_error_str(&self, chars: &Vec<char>) -> String {
@ -23,96 +23,6 @@ impl PrintableError for SyntaxError {
} }
} }
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
fn get_line(
chars: &Vec<char>,
start_position: usize,
end_position: usize,
) -> (String, usize, usize) {
let mut result_chars = VecDeque::<char>::new();
// Push chars to the front until a new line is found
let mut before_pos = start_position;
loop {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before start_position
before_pos += 1;
break;
}
result_chars.push_front(current_char);
if before_pos == 0 {
break;
}
before_pos -= 1;
}
// Push chars to the end until a new line is found
let mut after_pos = start_position + 1;
let char_count = chars.len();
while after_pos < char_count {
let current_char = chars[after_pos];
if current_char == '\n' {
break;
}
result_chars.push_back(current_char);
after_pos += 1;
}
(
result_chars.iter().collect::<String>(),
start_position - before_pos,
end_position - start_position,
)
}
fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
let mut count = 1;
for (pos, char) in chars.iter().enumerate() {
if pos >= target_pos {
break;
}
if *char == '\n' {
count += 1;
}
}
count
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@ -0,0 +1,91 @@
use std::collections::VecDeque;
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
pub fn get_line(
chars: &Vec<char>,
start_position: usize,
end_position: usize,
) -> (String, usize, usize) {
let mut result_chars = VecDeque::<char>::new();
// Push chars to the front until a new line is found
let mut before_pos = start_position;
loop {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before start_position
before_pos += 1;
break;
}
result_chars.push_front(current_char);
if before_pos == 0 {
break;
}
before_pos -= 1;
}
// Push chars to the end until a new line is found
let mut after_pos = start_position + 1;
let char_count = chars.len();
while after_pos < char_count {
let current_char = chars[after_pos];
if current_char == '\n' {
break;
}
result_chars.push_back(current_char);
after_pos += 1;
}
(
result_chars.iter().collect::<String>(),
start_position - before_pos,
end_position - start_position,
)
}
pub fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
let mut count = 1;
for (pos, char) in chars.iter().enumerate() {
if pos >= target_pos {
break;
}
if *char == '\n' {
count += 1;
}
}
count
}

View File

@ -4,77 +4,102 @@ use std::{fs, path::Path};
use crate::lexic::token::Token; use crate::lexic::token::Token;
use crate::{codegen, error_handling::PrintableError, lexic, syntax}; use crate::{codegen, error_handling::PrintableError, lexic, syntax};
pub fn compile_file(input: &String) { pub fn compile_file(input: &String) -> Result<(), ()> {
let input_path = Path::new(input); let input_path = Path::new(input);
if !input_path.is_file() { if !input_path.is_file() {
println!( eprintln!(
"{}: {} {}", "{}: {} {}",
"error".on_red(), "error".on_red(),
"Input path is not a valid file:".red(), "Input path is not a valid file:".red(),
input input
); );
return; return Err(());
} }
let bytes = fs::read(input_path).expect("INPUT_PATH should be valid"); let bytes = match fs::read(input_path) {
Ok(bytes) => bytes,
Err(error) => {
eprintln!("{}: Error reading input file", "error".on_red());
eprintln!("{}", error);
return Err(());
}
};
let contents = match String::from_utf8(bytes) { let contents = match String::from_utf8(bytes) {
Ok(str) => str, Ok(str) => str,
Err(_) => { Err(error) => {
println!("{}: Input file contains invalid UTF-8", "error".on_red()); eprintln!("{}: Input file contains invalid UTF-8", "error".on_red());
return; eprintln!("{}", error);
return Err(());
} }
}; };
let Some(out_code) = compile(&contents) else { let out_code = match compile(&contents) {
return; Ok(out_code) => out_code,
Err(error) => {
eprintln!("{}", error);
return Err(());
}
}; };
let mut output_path = Path::new(input).canonicalize().unwrap(); let mut output_path = Path::new(input)
.canonicalize()
.expect("Invalid input path: Cannot be canonicalized");
output_path.set_extension("php"); output_path.set_extension("php");
fs::write(output_path, out_code).expect("Error writing to output path"); match fs::write(output_path, out_code) {
Ok(_) => Ok(()),
Err(error) => {
eprintln!("{}: Error writing output file", "error".on_red());
eprintln!("{}", error);
Err(())
}
}
} }
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase /// THP source code goes in, PHP code or an error comes out
fn compile(input: &String) -> Option<String> { fn compile(input: &String) -> Result<String, String> {
let tokens = lexic::get_tokens(input); let tokens = lexic::get_tokens(input);
match tokens { let tokens = match tokens {
Ok(tokens) => Some(build_ast(input, tokens)), Ok(tokens) => tokens,
Err(error) => { Err(error) => {
let chars: Vec<char> = input.chars().into_iter().collect(); let chars: Vec<char> = input.chars().into_iter().collect();
println!( return Err(format!(
"{}:\n{}", "{}:\n{}",
"syntax error".on_red(), "syntax error".on_red(),
error.get_error_str(&chars) error.get_error_str(&chars)
); ));
None
}
} }
};
build_ast(input, tokens)
} }
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation. /// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
/// ///
/// Prints the generated code in stdin /// Prints the generated code in stdin
fn build_ast(input: &String, tokens: Vec<Token>) -> String { fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
let ast = syntax::construct_ast(&tokens); let ast = syntax::construct_ast(&tokens);
match ast { let ast = match ast {
Ok(ast) => { Ok(ast) => ast,
match crate::semantic::check_semantics(&ast) {
Ok(_) => {}
Err(reason) => { Err(reason) => {
panic!("{}", reason) let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
return Err(error);
} }
}; };
codegen::codegen(&ast) match crate::semantic::check_semantics(&ast) {
} Ok(_) => {}
Err(reason) => { Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect(); let chars: Vec<char> = input.chars().into_iter().collect();
panic!("{}", reason.get_error_str(&chars)) let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
} return Err(error);
} }
};
Ok(codegen::codegen(&ast))
} }

View File

@ -18,5 +18,8 @@ mod utils;
mod error_handling; mod error_handling;
fn main() { fn main() {
cli::run_cli(); match cli::run_cli() {
Ok(_) => (),
Err(_) => std::process::exit(1),
}
} }

View File

@ -1,5 +1,7 @@
use std::io::{self, Write}; use std::io::{self, Write};
use colored::Colorize;
use crate::error_handling::PrintableError; use crate::error_handling::PrintableError;
use crate::lexic::token::Token; use crate::lexic::token::Token;
@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
match res1 { match res1 {
Ok(_) => {} Ok(_) => {}
Err(reason) => { Err(reason) => {
eprintln!("{}", reason); let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
eprintln!("{}", error);
return; return;
} }
} }

View File

@ -1,13 +1,18 @@
use crate::syntax::ast::{ModuleAST, TopLevelDeclaration}; use crate::{
error_handling::semantic_error::SemanticError,
error_handling::MistiError,
syntax::ast::{ModuleAST, TopLevelDeclaration},
};
use super::symbol_table::SymbolTable; use super::symbol_table::SymbolTable;
pub trait SemanticCheck { pub trait SemanticCheck {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>; fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>;
} }
impl SemanticCheck for ModuleAST { impl SemanticCheck for ModuleAST {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> { /// Checks that this AST is semantically correct, given a symbol table
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
for declaration in &self.declarations { for declaration in &self.declarations {
declaration.check_semantics(scope)?; declaration.check_semantics(scope)?;
} }
@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST {
} }
impl SemanticCheck for TopLevelDeclaration { impl SemanticCheck for TopLevelDeclaration {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> { fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
match self { match self {
TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()), TopLevelDeclaration::Binding(_) => {
let error = SemanticError {
error_start: 0,
error_end: 0,
reason: "Binding typechecking: Not implemented".into(),
};
Err(MistiError::Semantic(error))
}
TopLevelDeclaration::FunctionDeclaration(function) => { TopLevelDeclaration::FunctionDeclaration(function) => {
let function_name = function.identifier.as_ref().clone(); let function_name = function.identifier.as_ref().clone();
if scope.test(&function_name) { if scope.test(&function_name) {
return Err(format!("Function {} already defined", function_name)); let error = SemanticError {
// TODO: Get the position of the function name. For this, these structs
// should store the token instead of just the string
error_start: 0,
error_end: 0,
reason: format!("Function {} already defined", function_name),
};
return Err(MistiError::Semantic(error));
} }
scope.insert( scope.insert(

View File

@ -1,4 +1,4 @@
use crate::syntax::ast::ModuleAST; use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
mod impls; mod impls;
mod symbol_table; mod symbol_table;
@ -11,8 +11,11 @@ use impls::SemanticCheck;
// 3. Add the symbols declared to the symbol table, annotating them with their type // 3. Add the symbols declared to the symbol table, annotating them with their type
// 4. Check if the symbols used are declared // 4. Check if the symbols used are declared
pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> { /// Checks that the AST is semantically correct
pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
// For now there's only support for a single file // For now there's only support for a single file
// TODO: Receive a symbol table as a reference and work on it.
// this way we can implement a unique symbol table for REPL session
let global_scope = symbol_table::SymbolTable::new(); let global_scope = symbol_table::SymbolTable::new();
ast.check_semantics(&global_scope) ast.check_semantics(&global_scope)