Compare commits

..

3 Commits

Author SHA1 Message Date
Araozu 46d9d04c75 Add files 2024-05-04 11:55:45 -05:00
Araozu 774f1d65ca Add tests 2024-03-18 17:21:02 -05:00
Araozu a7417e8a99 Add tests to function parameters 2024-03-18 16:41:11 -05:00
14 changed files with 341 additions and 35 deletions

View File

@ -28,6 +28,7 @@
- [ ] Infer datatype of binary operators - [ ] Infer datatype of binary operators
- [ ] Execute semantic analysis on the function's block - [ ] Execute semantic analysis on the function's block
- [ ] Write tests - [ ] Write tests
- [ ] Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
## v0.0.11 ## v0.0.11
@ -39,7 +40,7 @@
- [ ] Infer datatype of a `val variable = value` in the AST: Use the infered datatype - [ ] Infer datatype of a `val variable = value` in the AST: Use the infered datatype
- [x] Formally define the top level constructs - [x] Formally define the top level constructs
- [x] Parse bindings and function declarations as top level constructs - [x] Parse bindings and function declarations as top level constructs
- [ ] Parse function declaration arguments (`Type id`) - [x] Parse function declaration arguments (`Type id`)
- [x] Parse function return datatype (`fun f() -> Type`) - [x] Parse function return datatype (`fun f() -> Type`)
- [x] Return parsing to variables to var/val - [x] Return parsing to variables to var/val
- [ ] Write tests - [ ] Write tests

2
Cargo.lock generated
View File

@ -20,7 +20,7 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] [[package]]
name = "thp" name = "thp"
version = "0.0.9" version = "0.0.10"
dependencies = [ dependencies = [
"colored", "colored",
] ]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "thp" name = "thp"
version = "0.0.9" version = "0.0.10"
edition = "2021" edition = "2021"

View File

@ -4,19 +4,41 @@ Types and a new syntax for PHP, because I'm forced to use it at work.
## Usage ## Usage
### Singular files TBD.
Inside an existing PHP codebase, files are converted to THP Requirements: A *nix system & cargo
one at a time, or new files are written in THP.
There must be a thp.config.yaml at the root of the project, ```sh
which configures the compiler. # Clone the repo
git clone https://github.com/Araozu/thp-lang.git
Every file is compiled in place. # Generate an executable
cargo build --release
# The executable will be located in ./target/release/thp
### Project mode # And then run it follow the instructions!
```
The whole project uses THP. Work in progress. ```sh
Usage: `thp [command] [options]`
Commands
c _file_ Compiles _file_ in-place
f _file_ Formats _file_
r Starts the REPL
init Initializes a new project in the current directory
build Builds the project
fmt Formats all files in the project
watch, w Starts compilation of the project in watch mode
help, h Print this message & exit
General options
-h, --help Print command-specific usage
-v, --version Print version & exit
```

View File

@ -467,4 +467,12 @@ mod indentation_tests {
assert_eq!(TokenType::Comment, tokens[0].token_type); assert_eq!(TokenType::Comment, tokens[0].token_type);
} }
#[test]
fn should_emit_error_on_incorrect_indentation() {
let input = String::from("1\n 2\n 3");
let tokens = get_tokens(&input);
assert!(tokens.is_err());
}
} }

View File

@ -436,4 +436,36 @@ mod tests {
panic!("Expected some value") panic!("Expected some value")
}; };
} }
#[test]
fn should_not_scan_invalid_scientific_notation() {
let input = str_to_vec("1e");
let start_pos = 0;
match scan(&input, start_pos) {
LexResult::Err(reason) => {
assert_eq!(
"The characters after 'e' are not + or -, or are not followed by a number",
reason.reason
)
}
_ => panic!("Expected an error"),
}
}
#[test]
fn should_not_scan_invalid_scientific_notation_2() {
let input = str_to_vec("1e+f");
let start_pos = 0;
match scan(&input, start_pos) {
LexResult::Err(reason) => {
assert_eq!(
"The characters after 'e' are not + or -, or are not followed by a number",
reason.reason
)
}
_ => panic!("Expected an error"),
}
}
} }

View File

@ -179,4 +179,45 @@ mod tests {
panic!() panic!()
} }
} }
#[test]
fn should_scan_non_escape_characters_preceded_by_bsls() {
let input = str_to_vec("\"Sample\\atext\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\atext\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
}
}
#[test]
fn shouldnt_panic_when_encountering_eof_after_bsls() {
let input = str_to_vec("\"Sample\\");
let start_pos = 1;
let result = scan(&input, start_pos);
match result {
LexResult::Err(reason) => {
assert_eq!("Incomplete string found", reason.reason)
},
_ => panic!("expected an error")
}
}
#[test]
fn should_not_scan_an_unfinished_string() {
let input = str_to_vec("\"Hello, world!");
let result = scan(&input, 1);
match result {
LexResult::Err(reason) => {
assert_eq!("Incomplete string found", reason.reason)
},
_ => panic!("expected an error")
}
}
} }

View File

@ -0,0 +1,58 @@
use crate::{
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry},
syntax::ast::var_binding::Binding,
};
impl SemanticCheck for Binding<'_> {
fn check_semantics(
&self,
scope: &crate::semantic::symbol_table::SymbolTable,
) -> Result<(), crate::error_handling::MistiError> {
let binding_name = &self.identifier.value;
// TODO: Define if variables can be redeclared.
// If so, it is irrelevant to check if the variable is already defined
if scope.test(binding_name) {
let error = SemanticError {
error_start: self.identifier.position,
error_end: self.identifier.get_end_position(),
reason: format!(
"Duplicated: A symbol with name {} was already defined",
binding_name
),
};
return Err(MistiError::Semantic(error));
}
todo!("");
/*
let expression_datatype = self.expression.get_type();
let datatype = match self.datatype {
Some(t) => t.value.clone(),
// If the datatype is not defined, we use the expression datatype
None => expression_datatype.clone(),
};
// Both the declared & actual datatypes must be the same
if datatype != expression_datatype {
let error = SemanticError {
error_start: self.identifier.position,
error_end: self.identifier.get_end_position(),
reason: format!(
"The variable `{}` was declared as `{}` but its expression has type `{}`",
binding_name, datatype, expression_datatype
),
};
return Err(MistiError::Semantic(error));
}
scope.insert(binding_name.clone(), SymbolEntry::new_variable(datatype));
Ok(())
*/
}
}

View File

@ -0,0 +1,38 @@
use crate::{
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry},
syntax::ast::FunctionDeclaration,
};
impl SemanticCheck for FunctionDeclaration<'_> {
fn check_semantics(
&self,
scope: &crate::semantic::symbol_table::SymbolTable,
) -> Result<(), crate::error_handling::MistiError> {
let function_name = self.identifier.value.clone();
// Check that the function is not already defined
if scope.test(&function_name) {
let error = SemanticError {
error_start: self.identifier.position,
error_end: self.identifier.get_end_position(),
reason: format!(
"Duplicated: A symbol with name {} was already defined",
function_name
),
};
return Err(MistiError::Semantic(error));
}
// TODO: Check the return type of the function
// TODO: Check the return type of the function body
scope.insert(
function_name,
SymbolEntry::new_function(vec![], "Unit".into()),
);
Ok(())
}
}

View File

@ -0,0 +1,2 @@
pub mod binding;
pub mod function_declaration;

View File

@ -2,6 +2,7 @@ use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
mod impls; mod impls;
mod symbol_table; mod symbol_table;
mod checks;
use impls::SemanticCheck; use impls::SemanticCheck;

View File

@ -21,7 +21,7 @@ pub enum TopLevelDeclaration<'a> {
pub struct FunctionDeclaration<'a> { pub struct FunctionDeclaration<'a> {
pub identifier: &'a Token, pub identifier: &'a Token,
pub return_type: Option<&'a Token>, pub return_type: Option<&'a Token>,
pub params_list: Box<ParamsList>, pub params_list: Box<ParamsList<'a>>,
pub block: Box<Block<'a>>, pub block: Box<Block<'a>>,
} }
@ -31,8 +31,11 @@ pub struct Block<'a> {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct ParamsList {} pub struct ParamsList<'a> {
pub parameters: Vec<Parameter<'a>>,
}
#[derive(Debug)]
pub struct Parameter<'a> { pub struct Parameter<'a> {
pub identifier: &'a String, pub identifier: &'a String,
pub datatype: &'a String, pub datatype: &'a String,

View File

@ -12,9 +12,7 @@ use super::{
/* /*
function declaration = "fun", identifier, params list, return type?, block; function declaration = "fun", identifier, params list, return type?, block;
params list = "(", ")"; return type = "->", datatype;
return type = ;
*/ */
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<FunctionDeclaration> { pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<FunctionDeclaration> {
let mut current_pos = pos; let mut current_pos = pos;

View File

@ -9,6 +9,14 @@ use super::super::{
utils, utils,
}; };
/*
# Basically, every parameter can have a trailing comma.
params list = "("
, ( datatype pair, (",", datatype pair)*, ","? )?
, ")";
datatype pair = datatype, identifier;
*/
pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<ParamsList> { pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<ParamsList> {
let mut current_pos = pos; let mut current_pos = pos;
@ -21,14 +29,6 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
}; };
current_pos = next_pos; current_pos = next_pos;
/*
val (opening_paren, next_pos) = try parse_token_type(...)
val (next_parameter, next_pos) = try parse_param_definition(...) catch
case ::Err(e) { return ::Err(e) }
else { break }
*/
// Parse parameters definitions, separated by commas // Parse parameters definitions, separated by commas
let mut parameters = Vec::<Parameter>::new(); let mut parameters = Vec::<Parameter>::new();
loop { loop {
@ -81,17 +81,17 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
}; };
current_pos = next_pos; current_pos = next_pos;
Ok((ParamsList {}, current_pos)) Ok((ParamsList { parameters }, current_pos))
} }
/// Parse a single parameter definition of the form:
/// - `Type identifier`
///
/// There will be more constructs in the future, like:
/// - `Type identifier = default_value`
/// - `FunctionType identifier`
/// - `Pattern identifier` (e.g. `Some[String] value`)?
fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Parameter> { fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Parameter> {
// Parse a single parameter definition of the form:
// - Type identifier
// There will be more constructs in the future, like:
// - Type identifier = default_value
// - FunctionType identifier
// - Pattern identifier (e.g. Some[String] value)?
let mut current_pos = pos; let mut current_pos = pos;
let (datatype, next_pos) = let (datatype, next_pos) =
match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) { match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) {
@ -100,9 +100,8 @@ fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResu
return Err(ParsingError::Err(err)); return Err(ParsingError::Err(err));
} }
// If there is no datatype this construction doesn't apply. // If there is no datatype this construction doesn't apply.
// Return a mismatch and let the caller handle it // Return an unmatch and let the caller handle it
Err(ParsingError::Mismatch(t)) => return Err(ParsingError::Mismatch(t)), _ => return Err(ParsingError::Unmatched),
Err(ParsingError::Unmatched) => return Err(ParsingError::Unmatched),
}; };
current_pos = next_pos; current_pos = next_pos;
@ -137,3 +136,106 @@ fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResu
next_pos, next_pos,
)) ))
} }
#[cfg(test)]
mod tests {
use super::*;
use crate::lexic::get_tokens;
#[test]
fn should_parse_empty_param_list() {
let tokens = get_tokens(&String::from("()")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 2);
assert_eq!(result.parameters.len(), 0);
}
#[test]
fn should_parse_empty_param_list_with_whitespace() {
let tokens = get_tokens(&String::from("( )")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 2);
assert_eq!(result.parameters.len(), 0);
}
#[test]
fn should_parse_empty_param_list_with_newlines() {
let tokens = get_tokens(&String::from("(\n \n)")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 3);
assert_eq!(result.parameters.len(), 0);
}
#[test]
fn should_parse_empty_param_list_with_1_parameter() {
let tokens = get_tokens(&String::from("(Int x)")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 4);
assert_eq!(result.parameters.len(), 1);
let first_param = &result.parameters[0];
assert_eq!(first_param.datatype, "Int");
assert_eq!(first_param.identifier, "x");
}
#[test]
fn should_parse_empty_param_list_with_1_parameter_with_trailing_comma() {
let tokens = get_tokens(&String::from("(Int x, )")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 5);
assert_eq!(result.parameters.len(), 1);
let first_param = &result.parameters[0];
assert_eq!(first_param.datatype, "Int");
assert_eq!(first_param.identifier, "x");
}
#[test]
fn should_parse_empty_param_list_with_2_parameters() {
let tokens = get_tokens(&String::from("(Int x, String y)")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 7);
assert_eq!(result.parameters.len(), 2);
let first_param = &result.parameters[0];
assert_eq!(first_param.datatype, "Int");
assert_eq!(first_param.identifier, "x");
let second_param = &result.parameters[1];
assert_eq!(second_param.datatype, "String");
assert_eq!(second_param.identifier, "y");
}
#[test]
fn should_parse_empty_param_list_with_2_parameters_and_trailing_comma() {
let tokens = get_tokens(&String::from("(Int x, String y, )")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 8);
assert_eq!(result.parameters.len(), 2);
let first_param = &result.parameters[0];
assert_eq!(first_param.datatype, "Int");
assert_eq!(first_param.identifier, "x");
let second_param = &result.parameters[1];
assert_eq!(second_param.datatype, "String");
assert_eq!(second_param.identifier, "y");
}
#[test]
fn should_parse_multiline_params() {
let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 13);
assert_eq!(result.parameters.len(), 2);
let first_param = &result.parameters[0];
assert_eq!(first_param.datatype, "Int");
assert_eq!(first_param.identifier, "x");
let second_param = &result.parameters[1];
assert_eq!(second_param.datatype, "String");
assert_eq!(second_param.identifier, "y");
}
}