diff --git a/CHANGELOG.md b/CHANGELOG.md index 61b4383..5f1e47c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ - [ ] Infer datatype of binary operators - [ ] Execute semantic analysis on the function's block - [ ] Write tests +- [ ] Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place ## v0.0.11 @@ -39,7 +40,7 @@ - [ ] Infer datatype of a `val variable = value` in the AST: Use the infered datatype - [x] Formally define the top level constructs - [x] Parse bindings and function declarations as top level constructs -- [ ] Parse function declaration arguments (`Type id`) +- [x] Parse function declaration arguments (`Type id`) - [x] Parse function return datatype (`fun f() -> Type`) - [x] Return parsing to variables to var/val - [ ] Write tests diff --git a/README.md b/README.md index 88a499c..a9517ce 100644 --- a/README.md +++ b/README.md @@ -4,19 +4,41 @@ Types and a new syntax for PHP, because I'm forced to use it at work. ## Usage -### Singular files +TBD. -Inside an existing PHP codebase, files are converted to THP -one at a time, or new files are written in THP. +Requirements: A *nix system & cargo -There must be a thp.config.yaml at the root of the project, -which configures the compiler. +```sh +# Clone the repo +git clone https://github.com/Araozu/thp-lang.git -Every file is compiled in place. +# Generate an executable +cargo build --release +# The executable will be located in ./target/release/thp -### Project mode +# And then run it follow the instructions! +``` -The whole project uses THP. Work in progress. +```sh +Usage: `thp [command] [options]` +Commands + + c _file_ Compiles _file_ in-place + f _file_ Formats _file_ + r Starts the REPL + + init Initializes a new project in the current directory + build Builds the project + fmt Formats all files in the project + watch, w Starts compilation of the project in watch mode + + help, h Print this message & exit + +General options + + -h, --help Print command-specific usage + -v, --version Print version & exit +``` diff --git a/src/syntax/ast/mod.rs b/src/syntax/ast/mod.rs index 8d8aee5..76c8685 100644 --- a/src/syntax/ast/mod.rs +++ b/src/syntax/ast/mod.rs @@ -21,7 +21,7 @@ pub enum TopLevelDeclaration<'a> { pub struct FunctionDeclaration<'a> { pub identifier: &'a Token, pub return_type: Option<&'a Token>, - pub params_list: Box, + pub params_list: Box>, pub block: Box>, } @@ -31,8 +31,11 @@ pub struct Block<'a> { } #[derive(Debug)] -pub struct ParamsList {} +pub struct ParamsList<'a> { + pub parameters: Vec>, +} +#[derive(Debug)] pub struct Parameter<'a> { pub identifier: &'a String, pub datatype: &'a String, diff --git a/src/syntax/functions/function_declaration.rs b/src/syntax/functions/function_declaration.rs index d8f098d..3dc08e2 100644 --- a/src/syntax/functions/function_declaration.rs +++ b/src/syntax/functions/function_declaration.rs @@ -12,9 +12,7 @@ use super::{ /* function declaration = "fun", identifier, params list, return type?, block; -params list = "(", ")"; - -return type = ; +return type = "->", datatype; */ pub fn try_parse<'a>(tokens: &'a Vec, pos: usize) -> ParsingResult { let mut current_pos = pos; diff --git a/src/syntax/functions/params_list.rs b/src/syntax/functions/params_list.rs index b53c565..1d59875 100644 --- a/src/syntax/functions/params_list.rs +++ b/src/syntax/functions/params_list.rs @@ -9,6 +9,14 @@ use super::super::{ utils, }; +/* +# Basically, every parameter can have a trailing comma. +params list = "(" + , ( datatype pair, (",", datatype pair)*, ","? )? + , ")"; + +datatype pair = datatype, identifier; + */ pub fn parse_params_list<'a>(tokens: &'a Vec, pos: usize) -> ParsingResult { let mut current_pos = pos; @@ -21,14 +29,6 @@ pub fn parse_params_list<'a>(tokens: &'a Vec, pos: usize) -> ParsingResul }; current_pos = next_pos; - /* - val (opening_paren, next_pos) = try parse_token_type(...) - - val (next_parameter, next_pos) = try parse_param_definition(...) catch - case ::Err(e) { return ::Err(e) } - else { break } - */ - // Parse parameters definitions, separated by commas let mut parameters = Vec::::new(); loop { @@ -81,17 +81,17 @@ pub fn parse_params_list<'a>(tokens: &'a Vec, pos: usize) -> ParsingResul }; current_pos = next_pos; - Ok((ParamsList {}, current_pos)) + Ok((ParamsList { parameters }, current_pos)) } +/// Parse a single parameter definition of the form: +/// - `Type identifier` +/// +/// There will be more constructs in the future, like: +/// - `Type identifier = default_value` +/// - `FunctionType identifier` +/// - `Pattern identifier` (e.g. `Some[String] value`)? fn parse_param_definition<'a>(tokens: &'a Vec, pos: usize) -> ParsingResult { - // Parse a single parameter definition of the form: - // - Type identifier - // There will be more constructs in the future, like: - // - Type identifier = default_value - // - FunctionType identifier - // - Pattern identifier (e.g. Some[String] value)? - let mut current_pos = pos; let (datatype, next_pos) = match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) { @@ -100,9 +100,8 @@ fn parse_param_definition<'a>(tokens: &'a Vec, pos: usize) -> ParsingResu return Err(ParsingError::Err(err)); } // If there is no datatype this construction doesn't apply. - // Return a mismatch and let the caller handle it - Err(ParsingError::Mismatch(t)) => return Err(ParsingError::Mismatch(t)), - Err(ParsingError::Unmatched) => return Err(ParsingError::Unmatched), + // Return an unmatch and let the caller handle it + _ => return Err(ParsingError::Unmatched), }; current_pos = next_pos; @@ -137,3 +136,106 @@ fn parse_param_definition<'a>(tokens: &'a Vec, pos: usize) -> ParsingResu next_pos, )) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::lexic::get_tokens; + + #[test] + fn should_parse_empty_param_list() { + let tokens = get_tokens(&String::from("()")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 2); + assert_eq!(result.parameters.len(), 0); + } + + #[test] + fn should_parse_empty_param_list_with_whitespace() { + let tokens = get_tokens(&String::from("( )")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 2); + assert_eq!(result.parameters.len(), 0); + } + + #[test] + fn should_parse_empty_param_list_with_newlines() { + let tokens = get_tokens(&String::from("(\n \n)")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 3); + assert_eq!(result.parameters.len(), 0); + } + + #[test] + fn should_parse_empty_param_list_with_1_parameter() { + let tokens = get_tokens(&String::from("(Int x)")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 4); + assert_eq!(result.parameters.len(), 1); + let first_param = &result.parameters[0]; + assert_eq!(first_param.datatype, "Int"); + assert_eq!(first_param.identifier, "x"); + } + + + #[test] + fn should_parse_empty_param_list_with_1_parameter_with_trailing_comma() { + let tokens = get_tokens(&String::from("(Int x, )")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 5); + assert_eq!(result.parameters.len(), 1); + let first_param = &result.parameters[0]; + assert_eq!(first_param.datatype, "Int"); + assert_eq!(first_param.identifier, "x"); + } + + #[test] + fn should_parse_empty_param_list_with_2_parameters() { + let tokens = get_tokens(&String::from("(Int x, String y)")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 7); + assert_eq!(result.parameters.len(), 2); + let first_param = &result.parameters[0]; + assert_eq!(first_param.datatype, "Int"); + assert_eq!(first_param.identifier, "x"); + let second_param = &result.parameters[1]; + assert_eq!(second_param.datatype, "String"); + assert_eq!(second_param.identifier, "y"); + } + + #[test] + fn should_parse_empty_param_list_with_2_parameters_and_trailing_comma() { + let tokens = get_tokens(&String::from("(Int x, String y, )")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 8); + assert_eq!(result.parameters.len(), 2); + let first_param = &result.parameters[0]; + assert_eq!(first_param.datatype, "Int"); + assert_eq!(first_param.identifier, "x"); + let second_param = &result.parameters[1]; + assert_eq!(second_param.datatype, "String"); + assert_eq!(second_param.identifier, "y"); + } + + #[test] + fn should_parse_multiline_params() { + let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap(); + let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); + + assert_eq!(next_pos, 13); + assert_eq!(result.parameters.len(), 2); + let first_param = &result.parameters[0]; + assert_eq!(first_param.datatype, "Int"); + assert_eq!(first_param.identifier, "x"); + let second_param = &result.parameters[1]; + assert_eq!(second_param.datatype, "String"); + assert_eq!(second_param.identifier, "y"); + } +}