refactor(lex): dont emit newline before INDENT/DEDENT

This commit is contained in:
Araozu 2024-06-19 11:33:47 -05:00
parent 78d01a8fc8
commit 3f892e91c2
11 changed files with 121 additions and 102 deletions

View File

@ -27,6 +27,8 @@
- Decide how to handle comments in the syntax (?)(should comments mean something like in rust?)
- Not ignore comments & whitespace, for code formatting
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
- Include the original tokens in the AST
- Finish the workflow for a hello world
## v0.0.13
@ -34,8 +36,7 @@
- [x] Simplify/rewrite AST
- [x] Properly parse expression indentation/dedentation
- [x] Define the top level constructs
- [ ] Include the original tokens in the AST
- [ ] Finish the workflow for a hello world
- [x] Emit INDENT/DEDENT alone instead of NewLine+INDENT/DEDENT
- [x] Refactor code
- [x] Remove `PARSER couldn't parse any construction` error & replace with an actual error message

2
Cargo.lock generated
View File

@ -20,7 +20,7 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "thp"
version = "0.0.12"
version = "0.0.13"
dependencies = [
"colored",
]

View File

@ -1,6 +1,6 @@
[package]
name = "thp"
version = "0.0.12"
version = "0.0.13"
edition = "2021"

View File

@ -50,12 +50,30 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
while has_input(&chars, current_pos) {
match next_token(&chars, current_pos, &mut indentation_stack, at_new_line) {
LexResult::Some(token, next_pos) => {
// When a INDENT/DEDENT is returned it is because there is a NewLine.
// Remove that NewLine token and then insert the corresponding INDENT/DEDENT
if token.token_type == TokenType::INDENT || token.token_type == TokenType::DEDENT {
results.pop();
}
at_new_line = token.token_type == TokenType::NewLine;
results.push(token);
current_pos = next_pos;
}
LexResult::Multiple(tokens, next_pos) => {
// When a INDENT/DEDENT is returned it is because there is a NewLine.
// Remove that NewLine token and then insert the corresponding INDENT/DEDENT
match tokens.get(0) {
Some(t)
if t.token_type == TokenType::INDENT
|| t.token_type == TokenType::DEDENT =>
{
results.pop();
}
_ => {}
}
at_new_line = match tokens.last() {
Some(t) if t.token_type == TokenType::NewLine => true,
// This may be None if there are newlines followed by EOF.
@ -346,9 +364,8 @@ mod tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
}
#[test]
@ -357,12 +374,10 @@ mod tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::Int, tokens[4].token_type);
}
#[test]
@ -371,11 +386,10 @@ mod tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::Int, tokens[5].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::NewLine, tokens[3].token_type);
assert_eq!(TokenType::Int, tokens[4].token_type);
}
#[test]
@ -384,12 +398,10 @@ mod tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::DEDENT, tokens[3].token_type);
assert_eq!(TokenType::Int, tokens[4].token_type);
}
#[test]
@ -398,17 +410,13 @@ mod tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type);
assert_eq!(TokenType::NewLine, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::Int, tokens[9].token_type);
assert_eq!(TokenType::NewLine, tokens[10].token_type);
assert_eq!(TokenType::DEDENT, tokens[11].token_type);
assert_eq!(TokenType::DEDENT, tokens[7].token_type);
}
#[test]
@ -417,15 +425,13 @@ mod tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type);
assert_eq!(TokenType::NewLine, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::DEDENT, tokens[9].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::DEDENT, tokens[6].token_type);
assert_eq!(TokenType::Int, tokens[7].token_type);
}
#[test]
@ -450,11 +456,10 @@ mod indentation_tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::DEDENT, tokens[4].token_type);
assert_eq!(TokenType::EOF, tokens[5].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::DEDENT, tokens[3].token_type);
assert_eq!(TokenType::EOF, tokens[4].token_type);
}
#[test]
@ -463,15 +468,13 @@ mod indentation_tests {
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Int, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Int, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type);
assert_eq!(TokenType::Int, tokens[6].token_type);
assert_eq!(TokenType::DEDENT, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::EOF, tokens[9].token_type);
assert_eq!(TokenType::INDENT, tokens[1].token_type);
assert_eq!(TokenType::Int, tokens[2].token_type);
assert_eq!(TokenType::INDENT, tokens[3].token_type);
assert_eq!(TokenType::Int, tokens[4].token_type);
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::DEDENT, tokens[6].token_type);
assert_eq!(TokenType::EOF, tokens[7].token_type);
}
#[test]

View File

@ -228,7 +228,7 @@ mod tests {
let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap();
let (result, next_pos) = parse_params_list(&tokens, 0).unwrap();
assert_eq!(next_pos, 13);
assert_eq!(next_pos, 11);
assert_eq!(result.parameters.len(), 2);
let first_param = &result.parameters[0];
assert_eq!(first_param.datatype, "Int");

View File

@ -106,7 +106,7 @@ mod tests {
let tokens = get_tokens(&String::from("{\n fun f(){}\n}")).unwrap();
let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap();
assert_eq!(12, next_pos);
assert_eq!(10, next_pos);
assert_eq!(1, block.members.len());
let member = &block.members[0];
@ -123,7 +123,7 @@ mod tests {
let tokens = get_tokens(&String::from("{\n fun f(){}\nfun g(){}\n}")).unwrap();
let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap();
assert_eq!(19, next_pos);
assert_eq!(17, next_pos);
assert_eq!(2, block.members.len());
let member = &block.members[0];

View File

@ -93,8 +93,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n >= b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
assert_eq!(next, 6);
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -108,7 +108,7 @@ mod tests {
fn should_parse_indented_2() {
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 11);
assert_eq!(next, 9);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -123,8 +123,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n <= b <= c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(next, 8);
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 7);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -139,7 +139,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9);
assert_eq!(next, 8);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -154,7 +154,7 @@ mod tests {
let tokens = get_tokens(&String::from("a >=\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {

View File

@ -92,8 +92,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
assert_eq!(next, 6);
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -108,9 +108,9 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[9].token_type, TokenType::DEDENT);
assert_eq!(tokens[10].token_type, TokenType::DEDENT);
assert_eq!(next, 11);
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(tokens[8].token_type, TokenType::DEDENT);
assert_eq!(next, 9);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -125,8 +125,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b == c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(next, 8);
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 7);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -141,7 +141,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9);
assert_eq!(next, 8);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -156,7 +156,7 @@ mod tests {
let tokens = get_tokens(&String::from("a ==\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {

View File

@ -96,8 +96,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
assert_eq!(next, 6);
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -112,9 +112,9 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[9].token_type, TokenType::DEDENT);
assert_eq!(tokens[10].token_type, TokenType::DEDENT);
assert_eq!(next, 11);
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(tokens[8].token_type, TokenType::DEDENT);
assert_eq!(next, 9);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -129,8 +129,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b * c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(next, 8);
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 7);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -145,7 +145,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9);
assert_eq!(next, 8);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -160,7 +160,7 @@ mod tests {
let tokens = get_tokens(&String::from("a /\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -175,7 +175,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n /\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9);
assert_eq!(next, 7);
match result {
Expression::BinaryOperator(_, _, op) => {

View File

@ -97,8 +97,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n + b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
assert_eq!(next, 6);
assert_eq!(tokens[4].token_type, TokenType::DEDENT);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -112,7 +112,7 @@ mod tests {
fn should_parse_indented_2() {
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 11);
assert_eq!(next, 9);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -127,8 +127,8 @@ mod tests {
let tokens = get_tokens(&String::from("a\n + b + c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
assert_eq!(next, 8);
assert_eq!(tokens[6].token_type, TokenType::DEDENT);
assert_eq!(next, 7);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -143,7 +143,7 @@ mod tests {
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 9);
assert_eq!(next, 8);
match result {
Expression::BinaryOperator(_, _, op) => {
@ -158,7 +158,22 @@ mod tests {
let tokens = get_tokens(&String::from("a +\n b")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 6);
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
}
_ => panic!("Expected a binary operator"),
}
}
#[test]
fn should_parse_indented_6() {
let tokens = get_tokens(&String::from("a\n + b\nc")).unwrap();
let (result, next) = try_parse(&tokens, 0).unwrap();
assert_eq!(next, 5);
match result {
Expression::BinaryOperator(_, _, op) => {

View File

@ -32,14 +32,14 @@ where
let pos = original_pos;
// handle possible opening indentation
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
let pos = match tokens.get(pos) {
// New indentation level
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
Some(t2) if t2.token_type == INDENT => {
indent_count += 1;
pos + 2
pos + 1
}
// when indented, ignore newlines
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
// let other handlers handle this
_ => pos,
};
@ -52,14 +52,14 @@ where
};
// handle possible closing indentation
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
let pos = match tokens.get(pos) {
// New indentation level
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
Some(t2) if t2.token_type == INDENT => {
indent_count += 1;
pos + 2
pos + 1
}
// when indented, ignore newlines
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
// let other handlers handle this
_ => pos,
};
@ -70,7 +70,7 @@ where
x => return x,
};
// handle the possible dedentation before/after the operator
// handle dedentation before/after the operator
for _ in 0..indent_count {
// expect a DEDENT for each INDENT matched
match tokens.get(next_pos) {