Compare commits

...

3 Commits

Author SHA1 Message Date
f97b8e2e07 Improve function semantic check 2024-03-09 08:05:51 -05:00
a39b0c0d5a Properly handle errors in compilation pipeline 2024-03-01 17:38:04 -05:00
a219faf283 Add a return type to main 2024-03-01 16:52:32 -05:00
16 changed files with 274 additions and 160 deletions

View File

@ -2,6 +2,11 @@
## TODO
- Implement AST transformation before codegen:
Create a new AST to represent PHP source code
and a THP ast -> PHP ast process, so that the
codegen section can focus only in codegen, not in
translation of thp->php.
- Parse __more__ binary operators
- Parse `Type name = value` bindings
- Parse more complex bindings
@ -26,8 +31,9 @@
- [x] Begin work on semantic analysis
- [x] Minimal symbol table
- [x] Check duplicate function declarations
- [x] Improve REPL/File compilation code
- [ ] Typecheck bindings
- [ ] Typecheck functions
- [x] Typecheck functions
- [ ] Transform simple THP expression into PHP statements
## v0.0.9

View File

@ -1,19 +1,19 @@
use colored::*;
pub fn compile_command(arguments: Vec<String>) {
pub fn compile_command(arguments: Vec<String>) -> Result<(), ()> {
if arguments.is_empty() {
println!("{}", compile_help());
println!("{}: {}", "error".on_red(), "No file specified");
return;
eprintln!("{}", compile_help());
eprintln!("{}: {}", "error".on_red(), "No file specified");
return Err(());
}
if arguments.len() > 1 {
println!("{}", compile_help());
println!(
eprintln!("{}", compile_help());
eprintln!(
"{}: {}",
"error".on_red(),
"Only a single file can be compiled at a time"
);
return;
return Err(());
}
let argument = &arguments[0];
@ -23,16 +23,16 @@ pub fn compile_command(arguments: Vec<String>) {
println!("{}", compile_help());
if opt_str != "-h" && opt_str != "--help" {
println!(
eprintln!(
"{}: {}",
"error".on_red(),
"Invalid option. The compile command only accepts the `-h` or `--help` option"
"Invalid option. The compile command only accepts the `-h` or `--help` options"
);
}
return;
return Err(());
}
crate::file::compile_file(argument);
crate::file::compile_file(argument)
}
fn compile_help() -> String {

View File

@ -7,7 +7,7 @@ enum EmptyOptions {
Version,
}
pub fn empty_command(arguments: Vec<String>) {
pub fn empty_command(arguments: Vec<String>) -> Result<(), ()> {
// Add all options to a set
let mut options_set = std::collections::HashSet::new();
for option in arguments {
@ -16,9 +16,9 @@ pub fn empty_command(arguments: Vec<String>) {
options_set.insert(o);
}
Err(invalid_option) => {
println!("{}", get_help_text());
println!("{}: invalid option: `{}`", "error".on_red(), invalid_option);
return;
eprintln!("{}", get_help_text());
eprintln!("{}: invalid option: `{}`", "error".on_red(), invalid_option);
return Err(());
}
};
}
@ -35,6 +35,8 @@ pub fn empty_command(arguments: Vec<String>) {
println!("{}", get_help_text());
}
}
Ok(())
}
fn expand_option(option: &String) -> Result<EmptyOptions, String> {

View File

@ -1,7 +1,7 @@
use crate::cli::get_help_text;
use colored::*;
pub fn help_command(arguments: Vec<String>) {
pub fn help_command(arguments: Vec<String>) -> Result<(), ()> {
println!("{}", get_help_text());
if arguments.len() > 0 {
@ -11,4 +11,6 @@ pub fn help_command(arguments: Vec<String>) {
"The help command doesn't take any argument."
);
}
Ok(())
}

View File

@ -40,17 +40,17 @@ fn get_version() -> String {
format!("The THP compiler, linter & formatter, v{}", crate_version)
}
pub fn run_cli() {
pub fn run_cli() -> Result<(), ()> {
let (command, args) = match parse_args() {
Ok(c) => c,
Err(reason) => {
println!("{}", get_help_text());
println!("{}: {}", "error".on_red(), reason);
return;
eprintln!("{}", get_help_text());
eprintln!("{}: {}", "error".on_red(), reason);
return Err(());
}
};
command.run(args);
command.run(args)
}
fn parse_args() -> Result<(CommandType, Vec<String>), String> {

View File

@ -1,4 +1,13 @@
pub fn repl_command(_arguments: Vec<String>) {
use colored::Colorize;
pub fn repl_command(_arguments: Vec<String>) -> Result<(), ()> {
println!("{}", super::get_version());
let _ = crate::repl::run();
let result = crate::repl::run();
if let Err(e) = result {
eprintln!("{}: {}", "error".on_red(), e);
return Err(());
}
Ok(())
}

View File

@ -12,14 +12,15 @@ pub enum CommandType {
}
impl CommandType {
pub fn run(&self, options: Vec<String>) {
pub fn run(&self, options: Vec<String>) -> Result<(), ()> {
match self {
CommandType::Help => super::help::help_command(options),
CommandType::Compile => super::compile::compile_command(options),
CommandType::Repl => super::repl::repl_command(options),
CommandType::None => super::empty::empty_command(options),
_ => {
println!("Not implemented yet! {:?} {:?}", self, options);
eprintln!("Not implemented yet! {:?} {:?}", self, options);
Err(())
}
}
}

View File

@ -1,5 +1,9 @@
use self::semantic_error::SemanticError;
mod lex_error;
pub mod semantic_error;
mod syntax_error;
mod utils;
pub trait PrintableError {
fn get_error_str(&self, chars: &Vec<char>) -> String;
@ -9,6 +13,7 @@ pub trait PrintableError {
pub enum MistiError {
Lex(LexError),
Syntax(SyntaxError),
Semantic(SemanticError),
}
#[derive(Debug)]
@ -29,6 +34,7 @@ impl PrintableError for MistiError {
match self {
Self::Lex(err) => err.get_error_str(chars),
Self::Syntax(err) => err.get_error_str(chars),
Self::Semantic(err) => err.get_error_str(chars),
}
}
}

View File

@ -0,0 +1,31 @@
use super::utils::{get_line, get_line_number};
use super::PrintableError;
#[derive(Debug)]
pub struct SemanticError {
pub error_start: usize,
pub error_end: usize,
pub reason: String,
}
impl PrintableError for SemanticError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
let (line, before, length) = get_line(chars, self.error_start, self.error_end);
let line_number = get_line_number(chars, self.error_start);
let line_number_whitespace = " ".repeat(line_number.to_string().len());
let whitespace = vec![' '; before].iter().collect::<String>();
let indicator = vec!['^'; length].iter().collect::<String>();
let reason = &self.reason;
format!(
r#"
{line_number_whitespace} |
{line_number } | {line}
{line_number_whitespace} | {whitespace}{indicator}
{reason} at line {line_number}:{before}"#,
)
}
}

View File

@ -1,5 +1,5 @@
use super::utils::{get_line, get_line_number};
use super::{PrintableError, SyntaxError};
use std::collections::VecDeque;
impl PrintableError for SyntaxError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
@ -23,96 +23,6 @@ impl PrintableError for SyntaxError {
}
}
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
fn get_line(
chars: &Vec<char>,
start_position: usize,
end_position: usize,
) -> (String, usize, usize) {
let mut result_chars = VecDeque::<char>::new();
// Push chars to the front until a new line is found
let mut before_pos = start_position;
loop {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before start_position
before_pos += 1;
break;
}
result_chars.push_front(current_char);
if before_pos == 0 {
break;
}
before_pos -= 1;
}
// Push chars to the end until a new line is found
let mut after_pos = start_position + 1;
let char_count = chars.len();
while after_pos < char_count {
let current_char = chars[after_pos];
if current_char == '\n' {
break;
}
result_chars.push_back(current_char);
after_pos += 1;
}
(
result_chars.iter().collect::<String>(),
start_position - before_pos,
end_position - start_position,
)
}
fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
let mut count = 1;
for (pos, char) in chars.iter().enumerate() {
if pos >= target_pos {
break;
}
if *char == '\n' {
count += 1;
}
}
count
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -0,0 +1,91 @@
use std::collections::VecDeque;
/// Extracts a line of code
///
/// - `chars`: Input where to extract the line from
/// - `start_position`: Position where the erroneous code starts
/// - `end_position`: Position where the erroneous code ends
///
/// Returns a tuple of:
///
/// - `String`: The faulty line
/// - `usize`: The amount of chars *before* the faulty code
/// - `usize`: The lenght of the faulty code
///
/// ## Example
///
/// ```
/// let input = String::from("\n\nval number == 50\n\n").chars().into_iter().collect();
/// let start_position = 13;
/// let end_position = 15;
///
/// let (line, before, length) = get_line(&input, start_position, end_position);
///
/// assert_eq!("val number == 50", line);
/// assert_eq!(11, before);
/// assert_eq!(2, length);
/// ```
pub fn get_line(
chars: &Vec<char>,
start_position: usize,
end_position: usize,
) -> (String, usize, usize) {
let mut result_chars = VecDeque::<char>::new();
// Push chars to the front until a new line is found
let mut before_pos = start_position;
loop {
let current_char = chars[before_pos];
if current_char == '\n' {
// This is important because before_pos will be used to calculate
// the number of chars before start_position
before_pos += 1;
break;
}
result_chars.push_front(current_char);
if before_pos == 0 {
break;
}
before_pos -= 1;
}
// Push chars to the end until a new line is found
let mut after_pos = start_position + 1;
let char_count = chars.len();
while after_pos < char_count {
let current_char = chars[after_pos];
if current_char == '\n' {
break;
}
result_chars.push_back(current_char);
after_pos += 1;
}
(
result_chars.iter().collect::<String>(),
start_position - before_pos,
end_position - start_position,
)
}
pub fn get_line_number(chars: &Vec<char>, target_pos: usize) -> usize {
let mut count = 1;
for (pos, char) in chars.iter().enumerate() {
if pos >= target_pos {
break;
}
if *char == '\n' {
count += 1;
}
}
count
}

View File

@ -4,77 +4,102 @@ use std::{fs, path::Path};
use crate::lexic::token::Token;
use crate::{codegen, error_handling::PrintableError, lexic, syntax};
pub fn compile_file(input: &String) {
pub fn compile_file(input: &String) -> Result<(), ()> {
let input_path = Path::new(input);
if !input_path.is_file() {
println!(
eprintln!(
"{}: {} {}",
"error".on_red(),
"Input path is not a valid file:".red(),
input
);
return;
return Err(());
}
let bytes = fs::read(input_path).expect("INPUT_PATH should be valid");
let bytes = match fs::read(input_path) {
Ok(bytes) => bytes,
Err(error) => {
eprintln!("{}: Error reading input file", "error".on_red());
eprintln!("{}", error);
return Err(());
}
};
let contents = match String::from_utf8(bytes) {
Ok(str) => str,
Err(_) => {
println!("{}: Input file contains invalid UTF-8", "error".on_red());
return;
Err(error) => {
eprintln!("{}: Input file contains invalid UTF-8", "error".on_red());
eprintln!("{}", error);
return Err(());
}
};
let Some(out_code) = compile(&contents) else {
return;
let out_code = match compile(&contents) {
Ok(out_code) => out_code,
Err(error) => {
eprintln!("{}", error);
return Err(());
}
};
let mut output_path = Path::new(input).canonicalize().unwrap();
let mut output_path = Path::new(input)
.canonicalize()
.expect("Invalid input path: Cannot be canonicalized");
output_path.set_extension("php");
fs::write(output_path, out_code).expect("Error writing to output path");
match fs::write(output_path, out_code) {
Ok(_) => Ok(()),
Err(error) => {
eprintln!("{}: Error writing output file", "error".on_red());
eprintln!("{}", error);
Err(())
}
}
}
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
fn compile(input: &String) -> Option<String> {
/// THP source code goes in, PHP code or an error comes out
fn compile(input: &String) -> Result<String, String> {
let tokens = lexic::get_tokens(input);
match tokens {
Ok(tokens) => Some(build_ast(input, tokens)),
let tokens = match tokens {
Ok(tokens) => tokens,
Err(error) => {
let chars: Vec<char> = input.chars().into_iter().collect();
println!(
return Err(format!(
"{}:\n{}",
"syntax error".on_red(),
error.get_error_str(&chars)
);
None
));
}
}
};
build_ast(input, tokens)
}
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
///
/// Prints the generated code in stdin
fn build_ast(input: &String, tokens: Vec<Token>) -> String {
fn build_ast(input: &String, tokens: Vec<Token>) -> Result<String, String> {
let ast = syntax::construct_ast(&tokens);
match ast {
Ok(ast) => {
match crate::semantic::check_semantics(&ast) {
Ok(_) => {}
Err(reason) => {
panic!("{}", reason)
}
};
codegen::codegen(&ast)
}
let ast = match ast {
Ok(ast) => ast,
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
panic!("{}", reason.get_error_str(&chars))
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
return Err(error);
}
}
};
match crate::semantic::check_semantics(&ast) {
Ok(_) => {}
Err(reason) => {
let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
return Err(error);
}
};
Ok(codegen::codegen(&ast))
}

View File

@ -18,5 +18,8 @@ mod utils;
mod error_handling;
fn main() {
cli::run_cli();
match cli::run_cli() {
Ok(_) => (),
Err(_) => std::process::exit(1),
}
}

View File

@ -1,5 +1,7 @@
use std::io::{self, Write};
use colored::Colorize;
use crate::error_handling::PrintableError;
use crate::lexic::token::Token;
@ -34,7 +36,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
match res1 {
Ok(_) => {}
Err(reason) => {
eprintln!("{}", reason);
let chars: Vec<char> = input.chars().into_iter().collect();
let error = format!("{}: {}", "error".on_red(), reason.get_error_str(&chars));
eprintln!("{}", error);
return;
}
}

View File

@ -1,13 +1,18 @@
use crate::syntax::ast::{ModuleAST, TopLevelDeclaration};
use crate::{
error_handling::semantic_error::SemanticError,
error_handling::MistiError,
syntax::ast::{ModuleAST, TopLevelDeclaration},
};
use super::symbol_table::SymbolTable;
pub trait SemanticCheck {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String>;
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError>;
}
impl SemanticCheck for ModuleAST {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
/// Checks that this AST is semantically correct, given a symbol table
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
for declaration in &self.declarations {
declaration.check_semantics(scope)?;
}
@ -17,14 +22,30 @@ impl SemanticCheck for ModuleAST {
}
impl SemanticCheck for TopLevelDeclaration {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), String> {
fn check_semantics(&self, scope: &SymbolTable) -> Result<(), MistiError> {
match self {
TopLevelDeclaration::Binding(_) => Err("Binding not implemented".into()),
TopLevelDeclaration::Binding(_) => {
let error = SemanticError {
error_start: 0,
error_end: 0,
reason: "Binding typechecking: Not implemented".into(),
};
Err(MistiError::Semantic(error))
}
TopLevelDeclaration::FunctionDeclaration(function) => {
let function_name = function.identifier.as_ref().clone();
if scope.test(&function_name) {
return Err(format!("Function {} already defined", function_name));
let error = SemanticError {
// TODO: Get the position of the function name. For this, these structs
// should store the token instead of just the string
error_start: 0,
error_end: 0,
reason: format!("Function {} already defined", function_name),
};
return Err(MistiError::Semantic(error));
}
scope.insert(

View File

@ -1,4 +1,4 @@
use crate::syntax::ast::ModuleAST;
use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
mod impls;
mod symbol_table;
@ -11,8 +11,11 @@ use impls::SemanticCheck;
// 3. Add the symbols declared to the symbol table, annotating them with their type
// 4. Check if the symbols used are declared
pub fn check_semantics(ast: &ModuleAST) -> Result<(), String> {
/// Checks that the AST is semantically correct
pub fn check_semantics(ast: &ModuleAST) -> Result<(), MistiError> {
// For now there's only support for a single file
// TODO: Receive a symbol table as a reference and work on it.
// this way we can implement a unique symbol table for REPL session
let global_scope = symbol_table::SymbolTable::new();
ast.check_semantics(&global_scope)