Compare commits
3 Commits
2d32f1a0bc
...
46d9d04c75
Author | SHA1 | Date | |
---|---|---|---|
46d9d04c75 | |||
774f1d65ca | |||
a7417e8a99 |
@ -28,6 +28,7 @@
|
||||
- [ ] Infer datatype of binary operators
|
||||
- [ ] Execute semantic analysis on the function's block
|
||||
- [ ] Write tests
|
||||
- [ ] Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
|
||||
|
||||
|
||||
## v0.0.11
|
||||
@ -39,7 +40,7 @@
|
||||
- [ ] Infer datatype of a `val variable = value` in the AST: Use the infered datatype
|
||||
- [x] Formally define the top level constructs
|
||||
- [x] Parse bindings and function declarations as top level constructs
|
||||
- [ ] Parse function declaration arguments (`Type id`)
|
||||
- [x] Parse function declaration arguments (`Type id`)
|
||||
- [x] Parse function return datatype (`fun f() -> Type`)
|
||||
- [x] Return parsing to variables to var/val
|
||||
- [ ] Write tests
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -20,7 +20,7 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "thp"
|
||||
version = "0.0.9"
|
||||
version = "0.0.10"
|
||||
dependencies = [
|
||||
"colored",
|
||||
]
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "thp"
|
||||
version = "0.0.9"
|
||||
version = "0.0.10"
|
||||
edition = "2021"
|
||||
|
||||
|
||||
|
38
README.md
38
README.md
@ -4,19 +4,41 @@ Types and a new syntax for PHP, because I'm forced to use it at work.
|
||||
|
||||
## Usage
|
||||
|
||||
### Singular files
|
||||
TBD.
|
||||
|
||||
Inside an existing PHP codebase, files are converted to THP
|
||||
one at a time, or new files are written in THP.
|
||||
Requirements: A *nix system & cargo
|
||||
|
||||
There must be a thp.config.yaml at the root of the project,
|
||||
which configures the compiler.
|
||||
```sh
|
||||
# Clone the repo
|
||||
git clone https://github.com/Araozu/thp-lang.git
|
||||
|
||||
Every file is compiled in place.
|
||||
# Generate an executable
|
||||
cargo build --release
|
||||
|
||||
# The executable will be located in ./target/release/thp
|
||||
|
||||
### Project mode
|
||||
# And then run it follow the instructions!
|
||||
```
|
||||
|
||||
The whole project uses THP. Work in progress.
|
||||
```sh
|
||||
Usage: `thp [command] [options]`
|
||||
|
||||
Commands
|
||||
|
||||
c _file_ Compiles _file_ in-place
|
||||
f _file_ Formats _file_
|
||||
r Starts the REPL
|
||||
|
||||
init Initializes a new project in the current directory
|
||||
build Builds the project
|
||||
fmt Formats all files in the project
|
||||
watch, w Starts compilation of the project in watch mode
|
||||
|
||||
help, h Print this message & exit
|
||||
|
||||
General options
|
||||
|
||||
-h, --help Print command-specific usage
|
||||
-v, --version Print version & exit
|
||||
```
|
||||
|
||||
|
@ -467,4 +467,12 @@ mod indentation_tests {
|
||||
|
||||
assert_eq!(TokenType::Comment, tokens[0].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_emit_error_on_incorrect_indentation() {
|
||||
let input = String::from("1\n 2\n 3");
|
||||
let tokens = get_tokens(&input);
|
||||
|
||||
assert!(tokens.is_err());
|
||||
}
|
||||
}
|
||||
|
@ -436,4 +436,36 @@ mod tests {
|
||||
panic!("Expected some value")
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_not_scan_invalid_scientific_notation() {
|
||||
let input = str_to_vec("1e");
|
||||
let start_pos = 0;
|
||||
|
||||
match scan(&input, start_pos) {
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!(
|
||||
"The characters after 'e' are not + or -, or are not followed by a number",
|
||||
reason.reason
|
||||
)
|
||||
}
|
||||
_ => panic!("Expected an error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_not_scan_invalid_scientific_notation_2() {
|
||||
let input = str_to_vec("1e+f");
|
||||
let start_pos = 0;
|
||||
|
||||
match scan(&input, start_pos) {
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!(
|
||||
"The characters after 'e' are not + or -, or are not followed by a number",
|
||||
reason.reason
|
||||
)
|
||||
}
|
||||
_ => panic!("Expected an error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -179,4 +179,45 @@ mod tests {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_non_escape_characters_preceded_by_bsls() {
|
||||
let input = str_to_vec("\"Sample\\atext\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\atext\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shouldnt_panic_when_encountering_eof_after_bsls() {
|
||||
let input = str_to_vec("\"Sample\\");
|
||||
let start_pos = 1;
|
||||
let result = scan(&input, start_pos);
|
||||
|
||||
match result {
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!("Incomplete string found", reason.reason)
|
||||
},
|
||||
_ => panic!("expected an error")
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_not_scan_an_unfinished_string() {
|
||||
let input = str_to_vec("\"Hello, world!");
|
||||
let result = scan(&input, 1);
|
||||
|
||||
match result {
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!("Incomplete string found", reason.reason)
|
||||
},
|
||||
_ => panic!("expected an error")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
58
src/semantic/checks/binding.rs
Normal file
58
src/semantic/checks/binding.rs
Normal file
@ -0,0 +1,58 @@
|
||||
use crate::{
|
||||
error_handling::{semantic_error::SemanticError, MistiError},
|
||||
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry},
|
||||
syntax::ast::var_binding::Binding,
|
||||
};
|
||||
|
||||
impl SemanticCheck for Binding<'_> {
|
||||
fn check_semantics(
|
||||
&self,
|
||||
scope: &crate::semantic::symbol_table::SymbolTable,
|
||||
) -> Result<(), crate::error_handling::MistiError> {
|
||||
let binding_name = &self.identifier.value;
|
||||
|
||||
// TODO: Define if variables can be redeclared.
|
||||
// If so, it is irrelevant to check if the variable is already defined
|
||||
if scope.test(binding_name) {
|
||||
let error = SemanticError {
|
||||
error_start: self.identifier.position,
|
||||
error_end: self.identifier.get_end_position(),
|
||||
reason: format!(
|
||||
"Duplicated: A symbol with name {} was already defined",
|
||||
binding_name
|
||||
),
|
||||
};
|
||||
|
||||
return Err(MistiError::Semantic(error));
|
||||
}
|
||||
|
||||
todo!("");
|
||||
/*
|
||||
let expression_datatype = self.expression.get_type();
|
||||
|
||||
let datatype = match self.datatype {
|
||||
Some(t) => t.value.clone(),
|
||||
// If the datatype is not defined, we use the expression datatype
|
||||
None => expression_datatype.clone(),
|
||||
};
|
||||
|
||||
// Both the declared & actual datatypes must be the same
|
||||
if datatype != expression_datatype {
|
||||
let error = SemanticError {
|
||||
error_start: self.identifier.position,
|
||||
error_end: self.identifier.get_end_position(),
|
||||
reason: format!(
|
||||
"The variable `{}` was declared as `{}` but its expression has type `{}`",
|
||||
binding_name, datatype, expression_datatype
|
||||
),
|
||||
};
|
||||
|
||||
return Err(MistiError::Semantic(error));
|
||||
}
|
||||
|
||||
scope.insert(binding_name.clone(), SymbolEntry::new_variable(datatype));
|
||||
|
||||
Ok(())
|
||||
*/
|
||||
}
|
||||
}
|
38
src/semantic/checks/function_declaration.rs
Normal file
38
src/semantic/checks/function_declaration.rs
Normal file
@ -0,0 +1,38 @@
|
||||
use crate::{
|
||||
error_handling::{semantic_error::SemanticError, MistiError},
|
||||
semantic::{impls::SemanticCheck, symbol_table::SymbolEntry},
|
||||
syntax::ast::FunctionDeclaration,
|
||||
};
|
||||
|
||||
impl SemanticCheck for FunctionDeclaration<'_> {
|
||||
fn check_semantics(
|
||||
&self,
|
||||
scope: &crate::semantic::symbol_table::SymbolTable,
|
||||
) -> Result<(), crate::error_handling::MistiError> {
|
||||
let function_name = self.identifier.value.clone();
|
||||
|
||||
// Check that the function is not already defined
|
||||
if scope.test(&function_name) {
|
||||
let error = SemanticError {
|
||||
error_start: self.identifier.position,
|
||||
error_end: self.identifier.get_end_position(),
|
||||
reason: format!(
|
||||
"Duplicated: A symbol with name {} was already defined",
|
||||
function_name
|
||||
),
|
||||
};
|
||||
|
||||
return Err(MistiError::Semantic(error));
|
||||
}
|
||||
|
||||
// TODO: Check the return type of the function
|
||||
// TODO: Check the return type of the function body
|
||||
|
||||
scope.insert(
|
||||
function_name,
|
||||
SymbolEntry::new_function(vec![], "Unit".into()),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
2
src/semantic/checks/mod.rs
Normal file
2
src/semantic/checks/mod.rs
Normal file
@ -0,0 +1,2 @@
|
||||
pub mod binding;
|
||||
pub mod function_declaration;
|
@ -2,6 +2,7 @@ use crate::{error_handling::MistiError, syntax::ast::ModuleAST};
|
||||
|
||||
mod impls;
|
||||
mod symbol_table;
|
||||
mod checks;
|
||||
|
||||
use impls::SemanticCheck;
|
||||
|
||||
|
@ -21,7 +21,7 @@ pub enum TopLevelDeclaration<'a> {
|
||||
pub struct FunctionDeclaration<'a> {
|
||||
pub identifier: &'a Token,
|
||||
pub return_type: Option<&'a Token>,
|
||||
pub params_list: Box<ParamsList>,
|
||||
pub params_list: Box<ParamsList<'a>>,
|
||||
pub block: Box<Block<'a>>,
|
||||
}
|
||||
|
||||
@ -31,8 +31,11 @@ pub struct Block<'a> {
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParamsList {}
|
||||
pub struct ParamsList<'a> {
|
||||
pub parameters: Vec<Parameter<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Parameter<'a> {
|
||||
pub identifier: &'a String,
|
||||
pub datatype: &'a String,
|
||||
|
@ -12,9 +12,7 @@ use super::{
|
||||
/*
|
||||
function declaration = "fun", identifier, params list, return type?, block;
|
||||
|
||||
params list = "(", ")";
|
||||
|
||||
return type = ;
|
||||
return type = "->", datatype;
|
||||
*/
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<FunctionDeclaration> {
|
||||
let mut current_pos = pos;
|
||||
|
@ -9,6 +9,14 @@ use super::super::{
|
||||
utils,
|
||||
};
|
||||
|
||||
/*
|
||||
# Basically, every parameter can have a trailing comma.
|
||||
params list = "("
|
||||
, ( datatype pair, (",", datatype pair)*, ","? )?
|
||||
, ")";
|
||||
|
||||
datatype pair = datatype, identifier;
|
||||
*/
|
||||
pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<ParamsList> {
|
||||
let mut current_pos = pos;
|
||||
|
||||
@ -21,14 +29,6 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
/*
|
||||
val (opening_paren, next_pos) = try parse_token_type(...)
|
||||
|
||||
val (next_parameter, next_pos) = try parse_param_definition(...) catch
|
||||
case ::Err(e) { return ::Err(e) }
|
||||
else { break }
|
||||
*/
|
||||
|
||||
// Parse parameters definitions, separated by commas
|
||||
let mut parameters = Vec::<Parameter>::new();
|
||||
loop {
|
||||
@ -81,17 +81,17 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
Ok((ParamsList {}, current_pos))
|
||||
Ok((ParamsList { parameters }, current_pos))
|
||||
}
|
||||
|
||||
/// Parse a single parameter definition of the form:
|
||||
/// - `Type identifier`
|
||||
///
|
||||
/// There will be more constructs in the future, like:
|
||||
/// - `Type identifier = default_value`
|
||||
/// - `FunctionType identifier`
|
||||
/// - `Pattern identifier` (e.g. `Some[String] value`)?
|
||||
fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Parameter> {
|
||||
// Parse a single parameter definition of the form:
|
||||
// - Type identifier
|
||||
// There will be more constructs in the future, like:
|
||||
// - Type identifier = default_value
|
||||
// - FunctionType identifier
|
||||
// - Pattern identifier (e.g. Some[String] value)?
|
||||
|
||||
let mut current_pos = pos;
|
||||
let (datatype, next_pos) =
|
||||
match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) {
|
||||
@ -100,9 +100,8 @@ fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResu
|
||||
return Err(ParsingError::Err(err));
|
||||
}
|
||||
// If there is no datatype this construction doesn't apply.
|
||||
// Return a mismatch and let the caller handle it
|
||||
Err(ParsingError::Mismatch(t)) => return Err(ParsingError::Mismatch(t)),
|
||||
Err(ParsingError::Unmatched) => return Err(ParsingError::Unmatched),
|
||||
// Return an unmatch and let the caller handle it
|
||||
_ => return Err(ParsingError::Unmatched),
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
@ -137,3 +136,106 @@ fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResu
|
||||
next_pos,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::lexic::get_tokens;
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list() {
|
||||
let tokens = get_tokens(&String::from("()")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 2);
|
||||
assert_eq!(result.parameters.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list_with_whitespace() {
|
||||
let tokens = get_tokens(&String::from("( )")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 2);
|
||||
assert_eq!(result.parameters.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list_with_newlines() {
|
||||
let tokens = get_tokens(&String::from("(\n \n)")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 3);
|
||||
assert_eq!(result.parameters.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list_with_1_parameter() {
|
||||
let tokens = get_tokens(&String::from("(Int x)")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 4);
|
||||
assert_eq!(result.parameters.len(), 1);
|
||||
let first_param = &result.parameters[0];
|
||||
assert_eq!(first_param.datatype, "Int");
|
||||
assert_eq!(first_param.identifier, "x");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list_with_1_parameter_with_trailing_comma() {
|
||||
let tokens = get_tokens(&String::from("(Int x, )")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 5);
|
||||
assert_eq!(result.parameters.len(), 1);
|
||||
let first_param = &result.parameters[0];
|
||||
assert_eq!(first_param.datatype, "Int");
|
||||
assert_eq!(first_param.identifier, "x");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list_with_2_parameters() {
|
||||
let tokens = get_tokens(&String::from("(Int x, String y)")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 7);
|
||||
assert_eq!(result.parameters.len(), 2);
|
||||
let first_param = &result.parameters[0];
|
||||
assert_eq!(first_param.datatype, "Int");
|
||||
assert_eq!(first_param.identifier, "x");
|
||||
let second_param = &result.parameters[1];
|
||||
assert_eq!(second_param.datatype, "String");
|
||||
assert_eq!(second_param.identifier, "y");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_empty_param_list_with_2_parameters_and_trailing_comma() {
|
||||
let tokens = get_tokens(&String::from("(Int x, String y, )")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 8);
|
||||
assert_eq!(result.parameters.len(), 2);
|
||||
let first_param = &result.parameters[0];
|
||||
assert_eq!(first_param.datatype, "Int");
|
||||
assert_eq!(first_param.identifier, "x");
|
||||
let second_param = &result.parameters[1];
|
||||
assert_eq!(second_param.datatype, "String");
|
||||
assert_eq!(second_param.identifier, "y");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_multiline_params() {
|
||||
let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 13);
|
||||
assert_eq!(result.parameters.len(), 2);
|
||||
let first_param = &result.parameters[0];
|
||||
assert_eq!(first_param.datatype, "Int");
|
||||
assert_eq!(first_param.identifier, "x");
|
||||
let second_param = &result.parameters[1];
|
||||
assert_eq!(second_param.datatype, "String");
|
||||
assert_eq!(second_param.identifier, "y");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user