Compare commits
2 Commits
78d01a8fc8
...
5102d25676
Author | SHA1 | Date | |
---|---|---|---|
5102d25676 | |||
3f892e91c2 |
21
CHANGELOG.md
21
CHANGELOG.md
@ -4,16 +4,9 @@
|
||||
|
||||
- Test correct operator precedence
|
||||
- Implement functions as first class citizens
|
||||
- Implement AST transformation before codegen:
|
||||
Create a new AST to represent PHP source code
|
||||
and a THP ast -> PHP ast process, so that the
|
||||
codegen section can focus only in codegen, not in
|
||||
translation of thp->php.
|
||||
- Ignore indentation where it doesn't matter
|
||||
- Parse __more__ binary operators
|
||||
- Store tokens for the semantic analysis phase, to have actual error reporting
|
||||
- Parse more complex bindings
|
||||
- Watch mode
|
||||
- Rework error messages
|
||||
- Parse other language constructions
|
||||
- Type checking
|
||||
@ -27,6 +20,17 @@
|
||||
- Decide how to handle comments in the syntax (?)(should comments mean something like in rust?)
|
||||
- Not ignore comments & whitespace, for code formatting
|
||||
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
|
||||
- Include the original tokens in the AST
|
||||
|
||||
|
||||
## v0.0.14
|
||||
|
||||
- [ ] Define a minimal PHP AST
|
||||
- [ ] Transform THP AST into PHP AST
|
||||
- [ ] Implement minimal codegen for the PHP AST
|
||||
- [ ] Remove old codegen
|
||||
- [ ] Finish the workflow for a hello world
|
||||
|
||||
|
||||
## v0.0.13
|
||||
|
||||
@ -34,8 +38,7 @@
|
||||
- [x] Simplify/rewrite AST
|
||||
- [x] Properly parse expression indentation/dedentation
|
||||
- [x] Define the top level constructs
|
||||
- [ ] Include the original tokens in the AST
|
||||
- [ ] Finish the workflow for a hello world
|
||||
- [x] Emit INDENT/DEDENT alone instead of NewLine+INDENT/DEDENT
|
||||
- [x] Refactor code
|
||||
- [x] Remove `PARSER couldn't parse any construction` error & replace with an actual error message
|
||||
|
||||
|
82
Cargo.lock
generated
82
Cargo.lock
generated
@ -12,19 +12,99 @@ dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.203"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.203"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.120"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thp"
|
||||
version = "0.0.12"
|
||||
version = "0.0.13"
|
||||
dependencies = [
|
||||
"colored",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "thp"
|
||||
version = "0.0.12"
|
||||
version = "0.0.13"
|
||||
edition = "2021"
|
||||
|
||||
|
||||
@ -8,3 +8,5 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
colored = "2.1.0"
|
||||
serde = { version = "1.0.203", features = ["derive"] }
|
||||
serde_json = "1.0.120"
|
||||
|
@ -12,6 +12,7 @@ init Initializes a new project in the current directory
|
||||
build, b Builds the project
|
||||
fmt Formats all files in the project
|
||||
watch, w Starts compilation of the project in watch mode
|
||||
tokenize Tokenize code from STDIN and output tokens as JSON to STDOUT
|
||||
|
||||
|
||||
help, h Print this message & exit
|
||||
|
@ -2,6 +2,7 @@ mod compile;
|
||||
mod empty;
|
||||
mod help;
|
||||
mod repl;
|
||||
mod tokenize;
|
||||
mod types;
|
||||
|
||||
use types::CommandType;
|
||||
@ -23,6 +24,7 @@ Commands
|
||||
build Builds the project
|
||||
fmt Formats all files in the project
|
||||
watch, w Starts compilation of the project in watch mode
|
||||
tokenize Tokenize code from STDIN and output tokens as JSON to STDOUT
|
||||
|
||||
help, h Print this message & exit
|
||||
|
||||
@ -67,6 +69,7 @@ fn parse_args() -> Result<(CommandType, Vec<String>), String> {
|
||||
"init" => CommandType::Init,
|
||||
"build" => CommandType::Build,
|
||||
"fmt" => CommandType::Fmt,
|
||||
"tokenize" => CommandType::Tokenize,
|
||||
"watch" | "w" => CommandType::Watch,
|
||||
"help" | "h" => CommandType::Help,
|
||||
_ => return Err(format!("Unknown command `{}`", command)),
|
||||
|
28
src/cli/tokenize.rs
Normal file
28
src/cli/tokenize.rs
Normal file
@ -0,0 +1,28 @@
|
||||
use std::io::{self, BufRead};
|
||||
use crate::lexic::get_tokens;
|
||||
|
||||
pub fn tokenize_command(_options: Vec<String>) -> Result<(), ()> {
|
||||
// Get the input from stdin
|
||||
let stdin = io::stdin();
|
||||
|
||||
let mut lines = Vec::new();
|
||||
for line in stdin.lock().lines() {
|
||||
match line {
|
||||
Ok(line) => {
|
||||
lines.push(line)
|
||||
}
|
||||
Err(reason) => {
|
||||
eprintln!("Error reading input: {}", reason);
|
||||
return Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let input_code = lines.join("\n");
|
||||
let tokens = get_tokens(&input_code);
|
||||
|
||||
let json = serde_json::to_string(&tokens).unwrap();
|
||||
println!("{}", json);
|
||||
|
||||
Ok(())
|
||||
}
|
@ -8,6 +8,7 @@ pub enum CommandType {
|
||||
Fmt,
|
||||
Watch,
|
||||
Help,
|
||||
Tokenize,
|
||||
None,
|
||||
}
|
||||
|
||||
@ -18,6 +19,7 @@ impl CommandType {
|
||||
CommandType::Compile => super::compile::compile_command(options),
|
||||
CommandType::Repl => super::repl::repl_command(options),
|
||||
CommandType::None => super::empty::empty_command(options),
|
||||
CommandType::Tokenize => super::tokenize::tokenize_command(options),
|
||||
_ => {
|
||||
eprintln!("Not implemented yet! {:?} {:?}", self, options);
|
||||
Err(())
|
||||
|
@ -1,3 +1,5 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use self::semantic_error::SemanticError;
|
||||
|
||||
mod lex_error;
|
||||
@ -9,20 +11,20 @@ pub trait PrintableError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Serialize, Debug)]
|
||||
pub enum MistiError {
|
||||
Lex(LexError),
|
||||
Syntax(SyntaxError),
|
||||
Semantic(SemanticError),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct LexError {
|
||||
pub position: usize,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct SyntaxError {
|
||||
pub error_start: usize,
|
||||
pub error_end: usize,
|
||||
|
@ -1,7 +1,9 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use super::utils::{get_line, get_line_number};
|
||||
use super::PrintableError;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct SemanticError {
|
||||
pub error_start: usize,
|
||||
pub error_end: usize,
|
||||
|
109
src/lexic/mod.rs
109
src/lexic/mod.rs
@ -50,12 +50,30 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
||||
while has_input(&chars, current_pos) {
|
||||
match next_token(&chars, current_pos, &mut indentation_stack, at_new_line) {
|
||||
LexResult::Some(token, next_pos) => {
|
||||
// When a INDENT/DEDENT is returned it is because there is a NewLine.
|
||||
// Remove that NewLine token and then insert the corresponding INDENT/DEDENT
|
||||
if token.token_type == TokenType::INDENT || token.token_type == TokenType::DEDENT {
|
||||
results.pop();
|
||||
}
|
||||
|
||||
at_new_line = token.token_type == TokenType::NewLine;
|
||||
|
||||
results.push(token);
|
||||
current_pos = next_pos;
|
||||
}
|
||||
LexResult::Multiple(tokens, next_pos) => {
|
||||
// When a INDENT/DEDENT is returned it is because there is a NewLine.
|
||||
// Remove that NewLine token and then insert the corresponding INDENT/DEDENT
|
||||
match tokens.get(0) {
|
||||
Some(t)
|
||||
if t.token_type == TokenType::INDENT
|
||||
|| t.token_type == TokenType::DEDENT =>
|
||||
{
|
||||
results.pop();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
at_new_line = match tokens.last() {
|
||||
Some(t) if t.token_type == TokenType::NewLine => true,
|
||||
// This may be None if there are newlines followed by EOF.
|
||||
@ -346,9 +364,8 @@ mod tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -357,12 +374,10 @@ mod tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[6].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[3].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[4].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -371,11 +386,10 @@ mod tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[5].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[3].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[4].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -384,12 +398,10 @@ mod tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[6].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[3].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[4].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -398,17 +410,13 @@ mod tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[3].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[4].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[6].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[7].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[9].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[10].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[11].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[7].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -417,15 +425,13 @@ mod tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[6].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[7].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[9].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[3].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[4].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[6].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[7].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -450,11 +456,10 @@ mod indentation_tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[4].token_type);
|
||||
assert_eq!(TokenType::EOF, tokens[5].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[3].token_type);
|
||||
assert_eq!(TokenType::EOF, tokens[4].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -463,15 +468,13 @@ mod indentation_tests {
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Int, tokens[0].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[3].token_type);
|
||||
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[6].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[7].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
|
||||
assert_eq!(TokenType::EOF, tokens[9].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[2].token_type);
|
||||
assert_eq!(TokenType::INDENT, tokens[3].token_type);
|
||||
assert_eq!(TokenType::Int, tokens[4].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
|
||||
assert_eq!(TokenType::DEDENT, tokens[6].token_type);
|
||||
assert_eq!(TokenType::EOF, tokens[7].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1,4 +1,6 @@
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Serialize, PartialEq, Debug, Clone)]
|
||||
pub enum TokenType {
|
||||
Identifier,
|
||||
Datatype,
|
||||
@ -23,7 +25,7 @@ pub enum TokenType {
|
||||
FUN,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
pub struct Token {
|
||||
pub token_type: TokenType,
|
||||
// The token as a raw string
|
||||
|
@ -1,3 +1,3 @@
|
||||
// Follows https://phplang.org/spec/09-lexical-structure.html
|
||||
// Follows https://phplang.org/spec/19-grammar.html#syntactic-grammar
|
||||
|
||||
struct PhpAst {}
|
||||
|
@ -228,7 +228,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap();
|
||||
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next_pos, 13);
|
||||
assert_eq!(next_pos, 11);
|
||||
assert_eq!(result.parameters.len(), 2);
|
||||
let first_param = &result.parameters[0];
|
||||
assert_eq!(first_param.datatype, "Int");
|
||||
|
@ -106,7 +106,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("{\n fun f(){}\n}")).unwrap();
|
||||
let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(12, next_pos);
|
||||
assert_eq!(10, next_pos);
|
||||
assert_eq!(1, block.members.len());
|
||||
|
||||
let member = &block.members[0];
|
||||
@ -123,7 +123,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("{\n fun f(){}\nfun g(){}\n}")).unwrap();
|
||||
let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(19, next_pos);
|
||||
assert_eq!(17, next_pos);
|
||||
assert_eq!(2, block.members.len());
|
||||
|
||||
let member = &block.members[0];
|
||||
|
@ -93,8 +93,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n >= b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -108,7 +108,7 @@ mod tests {
|
||||
fn should_parse_indented_2() {
|
||||
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
assert_eq!(next, 11);
|
||||
assert_eq!(next, 9);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -123,8 +123,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n <= b <= c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 8);
|
||||
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 7);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -139,7 +139,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 9);
|
||||
assert_eq!(next, 8);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -154,7 +154,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a >=\n b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
|
@ -92,8 +92,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n == b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -108,9 +108,9 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[9].token_type, TokenType::DEDENT);
|
||||
assert_eq!(tokens[10].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 11);
|
||||
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||
assert_eq!(tokens[8].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 9);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -125,8 +125,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n == b == c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 8);
|
||||
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 7);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -141,7 +141,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 9);
|
||||
assert_eq!(next, 8);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -156,7 +156,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a ==\n b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
|
@ -96,8 +96,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n * b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -112,9 +112,9 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[9].token_type, TokenType::DEDENT);
|
||||
assert_eq!(tokens[10].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 11);
|
||||
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||
assert_eq!(tokens[8].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 9);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -129,8 +129,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n * b * c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 8);
|
||||
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 7);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -145,7 +145,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 9);
|
||||
assert_eq!(next, 8);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -160,7 +160,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a /\n b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -175,7 +175,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n /\n b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 9);
|
||||
assert_eq!(next, 7);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
|
@ -14,7 +14,6 @@ impl<'a> Parseable<'a> for Expression<'a> {
|
||||
type Item = Expression<'a>;
|
||||
|
||||
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
|
||||
// TODO: This must be newline/indentation aware
|
||||
equality::try_parse(tokens, current_pos)
|
||||
}
|
||||
}
|
||||
|
@ -97,8 +97,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n + b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -112,7 +112,7 @@ mod tests {
|
||||
fn should_parse_indented_2() {
|
||||
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
assert_eq!(next, 11);
|
||||
assert_eq!(next, 9);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -127,8 +127,8 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n + b + c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 8);
|
||||
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
|
||||
assert_eq!(next, 7);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -143,7 +143,7 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 9);
|
||||
assert_eq!(next, 8);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
@ -158,7 +158,22 @@ mod tests {
|
||||
let tokens = get_tokens(&String::from("a +\n b")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 6);
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
assert_eq!(op, "+")
|
||||
}
|
||||
_ => panic!("Expected a binary operator"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_indented_6() {
|
||||
let tokens = get_tokens(&String::from("a\n + b\nc")).unwrap();
|
||||
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 5);
|
||||
|
||||
match result {
|
||||
Expression::BinaryOperator(_, _, op) => {
|
||||
|
@ -32,14 +32,14 @@ where
|
||||
let pos = original_pos;
|
||||
|
||||
// handle possible opening indentation
|
||||
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
|
||||
let pos = match tokens.get(pos) {
|
||||
// New indentation level
|
||||
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
|
||||
Some(t2) if t2.token_type == INDENT => {
|
||||
indent_count += 1;
|
||||
pos + 2
|
||||
pos + 1
|
||||
}
|
||||
// when indented, ignore newlines
|
||||
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
|
||||
Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
|
||||
// let other handlers handle this
|
||||
_ => pos,
|
||||
};
|
||||
@ -52,14 +52,14 @@ where
|
||||
};
|
||||
|
||||
// handle possible closing indentation
|
||||
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
|
||||
let pos = match tokens.get(pos) {
|
||||
// New indentation level
|
||||
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
|
||||
Some(t2) if t2.token_type == INDENT => {
|
||||
indent_count += 1;
|
||||
pos + 2
|
||||
pos + 1
|
||||
}
|
||||
// when indented, ignore newlines
|
||||
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
|
||||
Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
|
||||
// let other handlers handle this
|
||||
_ => pos,
|
||||
};
|
||||
@ -70,7 +70,7 @@ where
|
||||
x => return x,
|
||||
};
|
||||
|
||||
// handle the possible dedentation before/after the operator
|
||||
// handle dedentation before/after the operator
|
||||
for _ in 0..indent_count {
|
||||
// expect a DEDENT for each INDENT matched
|
||||
match tokens.get(next_pos) {
|
||||
|
@ -42,6 +42,7 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
|
||||
Ok((prod, next_pos)) => {
|
||||
productions.push(ModuleMembers::Expr(prod));
|
||||
current_pos = next_pos;
|
||||
continue;
|
||||
}
|
||||
Err(ParsingError::Err(error)) => {
|
||||
// TODO: Better error handling, write a better error message
|
||||
@ -92,4 +93,13 @@ mod test {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_expression() {
|
||||
let tokens = get_tokens(&String::from("1")).unwrap();
|
||||
|
||||
let (module, next) = ModuleAST::try_parse(&tokens, 0).unwrap();
|
||||
|
||||
assert_eq!(next, 1);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user