Compare commits

...

2 Commits

21 changed files with 269 additions and 117 deletions

View File

@ -4,16 +4,9 @@
- Test correct operator precedence - Test correct operator precedence
- Implement functions as first class citizens - Implement functions as first class citizens
- Implement AST transformation before codegen:
Create a new AST to represent PHP source code
and a THP ast -> PHP ast process, so that the
codegen section can focus only in codegen, not in
translation of thp->php.
- Ignore indentation where it doesn't matter
- Parse __more__ binary operators - Parse __more__ binary operators
- Store tokens for the semantic analysis phase, to have actual error reporting - Store tokens for the semantic analysis phase, to have actual error reporting
- Parse more complex bindings - Parse more complex bindings
- Watch mode
- Rework error messages - Rework error messages
- Parse other language constructions - Parse other language constructions
- Type checking - Type checking
@ -27,6 +20,17 @@
- Decide how to handle comments in the syntax (?)(should comments mean something like in rust?) - Decide how to handle comments in the syntax (?)(should comments mean something like in rust?)
- Not ignore comments & whitespace, for code formatting - Not ignore comments & whitespace, for code formatting
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place - Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
- Include the original tokens in the AST
## v0.0.14
- [ ] Define a minimal PHP AST
- [ ] Transform THP AST into PHP AST
- [ ] Implement minimal codegen for the PHP AST
- [ ] Remove old codegen
- [ ] Finish the workflow for a hello world
## v0.0.13 ## v0.0.13
@ -34,8 +38,7 @@
- [x] Simplify/rewrite AST - [x] Simplify/rewrite AST
- [x] Properly parse expression indentation/dedentation - [x] Properly parse expression indentation/dedentation
- [x] Define the top level constructs - [x] Define the top level constructs
- [ ] Include the original tokens in the AST - [x] Emit INDENT/DEDENT alone instead of NewLine+INDENT/DEDENT
- [ ] Finish the workflow for a hello world
- [x] Refactor code - [x] Refactor code
- [x] Remove `PARSER couldn't parse any construction` error & replace with an actual error message - [x] Remove `PARSER couldn't parse any construction` error & replace with an actual error message

82
Cargo.lock generated
View File

@ -12,19 +12,99 @@ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]]
name = "itoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "serde"
version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.120"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "2.0.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]] [[package]]
name = "thp" name = "thp"
version = "0.0.12" version = "0.0.13"
dependencies = [ dependencies = [
"colored", "colored",
"serde",
"serde_json",
] ]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"

View File

@ -1,6 +1,6 @@
[package] [package]
name = "thp" name = "thp"
version = "0.0.12" version = "0.0.13"
edition = "2021" edition = "2021"
@ -8,3 +8,5 @@ edition = "2021"
[dependencies] [dependencies]
colored = "2.1.0" colored = "2.1.0"
serde = { version = "1.0.203", features = ["derive"] }
serde_json = "1.0.120"

View File

@ -12,6 +12,7 @@ init Initializes a new project in the current directory
build, b Builds the project build, b Builds the project
fmt Formats all files in the project fmt Formats all files in the project
watch, w Starts compilation of the project in watch mode watch, w Starts compilation of the project in watch mode
tokenize Tokenize code from STDIN and output tokens as JSON to STDOUT
help, h Print this message & exit help, h Print this message & exit

View File

@ -2,6 +2,7 @@ mod compile;
mod empty; mod empty;
mod help; mod help;
mod repl; mod repl;
mod tokenize;
mod types; mod types;
use types::CommandType; use types::CommandType;
@ -23,6 +24,7 @@ Commands
build Builds the project build Builds the project
fmt Formats all files in the project fmt Formats all files in the project
watch, w Starts compilation of the project in watch mode watch, w Starts compilation of the project in watch mode
tokenize Tokenize code from STDIN and output tokens as JSON to STDOUT
help, h Print this message & exit help, h Print this message & exit
@ -67,6 +69,7 @@ fn parse_args() -> Result<(CommandType, Vec<String>), String> {
"init" => CommandType::Init, "init" => CommandType::Init,
"build" => CommandType::Build, "build" => CommandType::Build,
"fmt" => CommandType::Fmt, "fmt" => CommandType::Fmt,
"tokenize" => CommandType::Tokenize,
"watch" | "w" => CommandType::Watch, "watch" | "w" => CommandType::Watch,
"help" | "h" => CommandType::Help, "help" | "h" => CommandType::Help,
_ => return Err(format!("Unknown command `{}`", command)), _ => return Err(format!("Unknown command `{}`", command)),

28
src/cli/tokenize.rs Normal file
View File

@ -0,0 +1,28 @@
use std::io::{self, BufRead};
use crate::lexic::get_tokens;
pub fn tokenize_command(_options: Vec<String>) -> Result<(), ()> {
// Get the input from stdin
let stdin = io::stdin();
let mut lines = Vec::new();
for line in stdin.lock().lines() {
match line {
Ok(line) => {
lines.push(line)
}
Err(reason) => {
eprintln!("Error reading input: {}", reason);
return Err(())
}
}
}
let input_code = lines.join("\n");
let tokens = get_tokens(&input_code);
let json = serde_json::to_string(&tokens).unwrap();
println!("{}", json);
Ok(())
}

View File

@ -8,6 +8,7 @@ pub enum CommandType {
Fmt, Fmt,
Watch, Watch,
Help, Help,
Tokenize,
None, None,
} }
@ -18,6 +19,7 @@ impl CommandType {
CommandType::Compile => super::compile::compile_command(options), CommandType::Compile => super::compile::compile_command(options),
CommandType::Repl => super::repl::repl_command(options), CommandType::Repl => super::repl::repl_command(options),
CommandType::None => super::empty::empty_command(options), CommandType::None => super::empty::empty_command(options),
CommandType::Tokenize => super::tokenize::tokenize_command(options),
_ => { _ => {
eprintln!("Not implemented yet! {:?} {:?}", self, options); eprintln!("Not implemented yet! {:?} {:?}", self, options);
Err(()) Err(())

View File

@ -1,3 +1,5 @@
use serde::Serialize;
use self::semantic_error::SemanticError; use self::semantic_error::SemanticError;
mod lex_error; mod lex_error;
@ -9,20 +11,20 @@ pub trait PrintableError {
fn get_error_str(&self, chars: &Vec<char>) -> String; fn get_error_str(&self, chars: &Vec<char>) -> String;
} }
#[derive(Debug)] #[derive(Serialize, Debug)]
pub enum MistiError { pub enum MistiError {
Lex(LexError), Lex(LexError),
Syntax(SyntaxError), Syntax(SyntaxError),
Semantic(SemanticError), Semantic(SemanticError),
} }
#[derive(Debug)] #[derive(Serialize, Debug)]
pub struct LexError { pub struct LexError {
pub position: usize, pub position: usize,
pub reason: String, pub reason: String,
} }
#[derive(Debug)] #[derive(Serialize, Debug)]
pub struct SyntaxError { pub struct SyntaxError {
pub error_start: usize, pub error_start: usize,
pub error_end: usize, pub error_end: usize,

View File

@ -1,7 +1,9 @@
use serde::Serialize;
use super::utils::{get_line, get_line_number}; use super::utils::{get_line, get_line_number};
use super::PrintableError; use super::PrintableError;
#[derive(Debug)] #[derive(Serialize, Debug)]
pub struct SemanticError { pub struct SemanticError {
pub error_start: usize, pub error_start: usize,
pub error_end: usize, pub error_end: usize,

View File

@ -50,12 +50,30 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
while has_input(&chars, current_pos) { while has_input(&chars, current_pos) {
match next_token(&chars, current_pos, &mut indentation_stack, at_new_line) { match next_token(&chars, current_pos, &mut indentation_stack, at_new_line) {
LexResult::Some(token, next_pos) => { LexResult::Some(token, next_pos) => {
// When a INDENT/DEDENT is returned it is because there is a NewLine.
// Remove that NewLine token and then insert the corresponding INDENT/DEDENT
if token.token_type == TokenType::INDENT || token.token_type == TokenType::DEDENT {
results.pop();
}
at_new_line = token.token_type == TokenType::NewLine; at_new_line = token.token_type == TokenType::NewLine;
results.push(token); results.push(token);
current_pos = next_pos; current_pos = next_pos;
} }
LexResult::Multiple(tokens, next_pos) => { LexResult::Multiple(tokens, next_pos) => {
// When a INDENT/DEDENT is returned it is because there is a NewLine.
// Remove that NewLine token and then insert the corresponding INDENT/DEDENT
match tokens.get(0) {
Some(t)
if t.token_type == TokenType::INDENT
|| t.token_type == TokenType::DEDENT =>
{
results.pop();
}
_ => {}
}
at_new_line = match tokens.last() { at_new_line = match tokens.last() {
Some(t) if t.token_type == TokenType::NewLine => true, Some(t) if t.token_type == TokenType::NewLine => true,
// This may be None if there are newlines followed by EOF. // This may be None if there are newlines followed by EOF.
@ -346,9 +364,8 @@ mod tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
} }
#[test] #[test]
@ -357,12 +374,10 @@ mod tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type); assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type); assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type);
} }
#[test] #[test]
@ -371,11 +386,10 @@ mod tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type); assert_eq!(TokenType::NewLine, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type); assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::Int, tokens[5].token_type);
} }
#[test] #[test]
@ -384,12 +398,10 @@ mod tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type); assert_eq!(TokenType::DEDENT, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type); assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type);
} }
#[test] #[test]
@ -398,17 +410,13 @@ mod tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type); assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type); assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type); assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type); assert_eq!(TokenType::Int, tokens[6].token_type);
assert_eq!(TokenType::NewLine, tokens[7].token_type); assert_eq!(TokenType::DEDENT, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::Int, tokens[9].token_type);
assert_eq!(TokenType::NewLine, tokens[10].token_type);
assert_eq!(TokenType::DEDENT, tokens[11].token_type);
} }
#[test] #[test]
@ -417,15 +425,13 @@ mod tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type); assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type); assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type); assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type); assert_eq!(TokenType::DEDENT, tokens[6].token_type);
assert_eq!(TokenType::NewLine, tokens[7].token_type); assert_eq!(TokenType::Int, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::DEDENT, tokens[9].token_type);
} }
#[test] #[test]
@ -450,11 +456,10 @@ mod indentation_tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type); assert_eq!(TokenType::DEDENT, tokens[3].token_type);
assert_eq!(TokenType::DEDENT, tokens[4].token_type); assert_eq!(TokenType::EOF, tokens[4].token_type);
assert_eq!(TokenType::EOF, tokens[5].token_type);
} }
#[test] #[test]
@ -463,15 +468,13 @@ mod indentation_tests {
let tokens = get_tokens(&input).unwrap(); let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type); assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type); assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type); assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type); assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type); assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type); assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type); assert_eq!(TokenType::DEDENT, tokens[6].token_type);
assert_eq!(TokenType::DEDENT, tokens[7].token_type); assert_eq!(TokenType::EOF, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::EOF, tokens[9].token_type);
} }
#[test] #[test]

View File

@ -1,4 +1,6 @@
#[derive(PartialEq, Debug, Clone)] use serde::Serialize;
#[derive(Serialize, PartialEq, Debug, Clone)]
pub enum TokenType { pub enum TokenType {
Identifier, Identifier,
Datatype, Datatype,
@ -23,7 +25,7 @@ pub enum TokenType {
FUN, FUN,
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Serialize, Debug, Clone, PartialEq)]
pub struct Token { pub struct Token {
pub token_type: TokenType, pub token_type: TokenType,
// The token as a raw string // The token as a raw string

View File

@ -1,3 +1,3 @@
// Follows https://phplang.org/spec/09-lexical-structure.html // Follows https://phplang.org/spec/19-grammar.html#syntactic-grammar
struct PhpAst {} struct PhpAst {}

View File

@ -228,7 +228,7 @@ mod tests {
let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap(); let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 13); assert_eq!(next_pos, 11);
assert_eq!(result.parameters.len(), 2); assert_eq!(result.parameters.len(), 2);
let first_param = &result.parameters[0]; let first_param = &result.parameters[0];
assert_eq!(first_param.datatype, "Int"); assert_eq!(first_param.datatype, "Int");

View File

@ -106,7 +106,7 @@ mod tests {
let tokens = get_tokens(&String::from("{\n fun f(){}\n}")).unwrap(); let tokens = get_tokens(&String::from("{\n fun f(){}\n}")).unwrap();
let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap(); let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap();
assert_eq!(12, next_pos); assert_eq!(10, next_pos);
assert_eq!(1, block.members.len()); assert_eq!(1, block.members.len());
let member = &block.members[0]; let member = &block.members[0];
@ -123,7 +123,7 @@ mod tests {
let tokens = get_tokens(&String::from("{\n fun f(){}\nfun g(){}\n}")).unwrap(); let tokens = get_tokens(&String::from("{\n fun f(){}\nfun g(){}\n}")).unwrap();
let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap(); let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap();
assert_eq!(19, next_pos); assert_eq!(17, next_pos);
assert_eq!(2, block.members.len()); assert_eq!(2, block.members.len());
let member = &block.members[0]; let member = &block.members[0];

View File

@ -93,8 +93,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n >= b")).unwrap(); let tokens = get_tokens(&String::from("a\n >= b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT); assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 6); assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -108,7 +108,7 @@ mod tests {
fn should_parse_indented_2() { fn should_parse_indented_2() {
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap(); let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 11); assert_eq!(next, 9);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -123,8 +123,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n <= b <= c")).unwrap(); let tokens = get_tokens(&String::from("a\n <= b <= c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT); assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 8); assert_eq!(next, 7);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -139,7 +139,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap(); let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9); assert_eq!(next, 8);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -154,7 +154,7 @@ mod tests {
let tokens = get_tokens(&String::from("a >=\n b")).unwrap(); let tokens = get_tokens(&String::from("a >=\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6); assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {

View File

@ -92,8 +92,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b")).unwrap(); let tokens = get_tokens(&String::from("a\n == b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT); assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 6); assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -108,9 +108,9 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap(); let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[9].token_type, TokenType::DEDENT); assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(tokens[10].token_type, TokenType::DEDENT); assert_eq!(tokens[8].token_type, TokenType::DEDENT);
assert_eq!(next, 11); assert_eq!(next, 9);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -125,8 +125,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b == c")).unwrap(); let tokens = get_tokens(&String::from("a\n == b == c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT); assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 8); assert_eq!(next, 7);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -141,7 +141,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap(); let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9); assert_eq!(next, 8);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -156,7 +156,7 @@ mod tests {
let tokens = get_tokens(&String::from("a ==\n b")).unwrap(); let tokens = get_tokens(&String::from("a ==\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6); assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {

View File

@ -96,8 +96,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b")).unwrap(); let tokens = get_tokens(&String::from("a\n * b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT); assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 6); assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -112,9 +112,9 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap(); let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[9].token_type, TokenType::DEDENT); assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(tokens[10].token_type, TokenType::DEDENT); assert_eq!(tokens[8].token_type, TokenType::DEDENT);
assert_eq!(next, 11); assert_eq!(next, 9);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -129,8 +129,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b * c")).unwrap(); let tokens = get_tokens(&String::from("a\n * b * c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT); assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 8); assert_eq!(next, 7);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -145,7 +145,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap(); let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9); assert_eq!(next, 8);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -160,7 +160,7 @@ mod tests {
let tokens = get_tokens(&String::from("a /\n b")).unwrap(); let tokens = get_tokens(&String::from("a /\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6); assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -175,7 +175,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n /\n b")).unwrap(); let tokens = get_tokens(&String::from("a\n /\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9); assert_eq!(next, 7);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {

View File

@ -14,7 +14,6 @@ impl<'a> Parseable<'a> for Expression<'a> {
type Item = Expression<'a>; type Item = Expression<'a>;
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> { fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
// TODO: This must be newline/indentation aware
equality::try_parse(tokens, current_pos) equality::try_parse(tokens, current_pos)
} }
} }

View File

@ -97,8 +97,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n + b")).unwrap(); let tokens = get_tokens(&String::from("a\n + b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT); assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 6); assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -112,7 +112,7 @@ mod tests {
fn should_parse_indented_2() { fn should_parse_indented_2() {
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap(); let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 11); assert_eq!(next, 9);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -127,8 +127,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n + b + c")).unwrap(); let tokens = get_tokens(&String::from("a\n + b + c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT); assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 8); assert_eq!(next, 7);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -143,7 +143,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap(); let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9); assert_eq!(next, 8);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {
@ -158,7 +158,22 @@ mod tests {
let tokens = get_tokens(&String::from("a +\n b")).unwrap(); let tokens = get_tokens(&String::from("a +\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6); assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
}
_ => panic!("Expected a binary operator"),
}
}
#[test]
fn should_parse_indented_6() {
let tokens = get_tokens(&String::from("a\n + b\nc")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 5);
match result { match result {
Expression::BinaryOperator(_, _, op) => { Expression::BinaryOperator(_, _, op) => {

View File

@ -32,14 +32,14 @@ where
let pos = original_pos; let pos = original_pos;
// handle possible opening indentation // handle possible opening indentation
let pos = match (tokens.get(pos), tokens.get(pos + 1)) { let pos = match tokens.get(pos) {
// New indentation level // New indentation level
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { Some(t2) if t2.token_type == INDENT => {
indent_count += 1; indent_count += 1;
pos + 2 pos + 1
} }
// when indented, ignore newlines // when indented, ignore newlines
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
// let other handlers handle this // let other handlers handle this
_ => pos, _ => pos,
}; };
@ -52,14 +52,14 @@ where
}; };
// handle possible closing indentation // handle possible closing indentation
let pos = match (tokens.get(pos), tokens.get(pos + 1)) { let pos = match tokens.get(pos) {
// New indentation level // New indentation level
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { Some(t2) if t2.token_type == INDENT => {
indent_count += 1; indent_count += 1;
pos + 2 pos + 1
} }
// when indented, ignore newlines // when indented, ignore newlines
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
// let other handlers handle this // let other handlers handle this
_ => pos, _ => pos,
}; };
@ -70,7 +70,7 @@ where
x => return x, x => return x,
}; };
// handle the possible dedentation before/after the operator // handle dedentation before/after the operator
for _ in 0..indent_count { for _ in 0..indent_count {
// expect a DEDENT for each INDENT matched // expect a DEDENT for each INDENT matched
match tokens.get(next_pos) { match tokens.get(next_pos) {

View File

@ -42,6 +42,7 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
Ok((prod, next_pos)) => { Ok((prod, next_pos)) => {
productions.push(ModuleMembers::Expr(prod)); productions.push(ModuleMembers::Expr(prod));
current_pos = next_pos; current_pos = next_pos;
continue;
} }
Err(ParsingError::Err(error)) => { Err(ParsingError::Err(error)) => {
// TODO: Better error handling, write a better error message // TODO: Better error handling, write a better error message
@ -92,4 +93,13 @@ mod test {
} }
} }
} }
#[test]
fn should_parse_expression() {
let tokens = get_tokens(&String::from("1")).unwrap();
let (module, next) = ModuleAST::try_parse(&tokens, 0).unwrap();
assert_eq!(next, 1);
}
} }