diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e95878..d77793d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ - Decide how to handle comments in the syntax (?)(should comments mean something like in rust?) - Not ignore comments & whitespace, for code formatting - Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place +- Include the original tokens in the AST +- Finish the workflow for a hello world ## v0.0.13 @@ -34,8 +36,7 @@ - [x] Simplify/rewrite AST - [x] Properly parse expression indentation/dedentation - [x] Define the top level constructs -- [ ] Include the original tokens in the AST -- [ ] Finish the workflow for a hello world +- [x] Emit INDENT/DEDENT alone instead of NewLine+INDENT/DEDENT - [x] Refactor code - [x] Remove `PARSER couldn't parse any construction` error & replace with an actual error message diff --git a/Cargo.lock b/Cargo.lock index 1013e7f..edca375 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,7 +20,7 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "thp" -version = "0.0.12" +version = "0.0.13" dependencies = [ "colored", ] diff --git a/Cargo.toml b/Cargo.toml index 01c3bec..cbbd2e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thp" -version = "0.0.12" +version = "0.0.13" edition = "2021" diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 084dfe3..267d849 100755 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -50,12 +50,30 @@ pub fn get_tokens(input: &String) -> Result, MistiError> { while has_input(&chars, current_pos) { match next_token(&chars, current_pos, &mut indentation_stack, at_new_line) { LexResult::Some(token, next_pos) => { + // When a INDENT/DEDENT is returned it is because there is a NewLine. + // Remove that NewLine token and then insert the corresponding INDENT/DEDENT + if token.token_type == TokenType::INDENT || token.token_type == TokenType::DEDENT { + results.pop(); + } + at_new_line = token.token_type == TokenType::NewLine; results.push(token); current_pos = next_pos; } LexResult::Multiple(tokens, next_pos) => { + // When a INDENT/DEDENT is returned it is because there is a NewLine. + // Remove that NewLine token and then insert the corresponding INDENT/DEDENT + match tokens.get(0) { + Some(t) + if t.token_type == TokenType::INDENT + || t.token_type == TokenType::DEDENT => + { + results.pop(); + } + _ => {} + } + at_new_line = match tokens.last() { Some(t) if t.token_type == TokenType::NewLine => true, // This may be None if there are newlines followed by EOF. @@ -346,9 +364,8 @@ mod tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); } #[test] @@ -357,12 +374,10 @@ mod tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); - assert_eq!(TokenType::NewLine, tokens[4].token_type); - assert_eq!(TokenType::INDENT, tokens[5].token_type); - assert_eq!(TokenType::Int, tokens[6].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); + assert_eq!(TokenType::INDENT, tokens[3].token_type); + assert_eq!(TokenType::Int, tokens[4].token_type); } #[test] @@ -371,11 +386,10 @@ mod tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); - assert_eq!(TokenType::NewLine, tokens[4].token_type); - assert_eq!(TokenType::Int, tokens[5].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); + assert_eq!(TokenType::NewLine, tokens[3].token_type); + assert_eq!(TokenType::Int, tokens[4].token_type); } #[test] @@ -384,12 +398,10 @@ mod tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); - assert_eq!(TokenType::NewLine, tokens[4].token_type); - assert_eq!(TokenType::DEDENT, tokens[5].token_type); - assert_eq!(TokenType::Int, tokens[6].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); + assert_eq!(TokenType::DEDENT, tokens[3].token_type); + assert_eq!(TokenType::Int, tokens[4].token_type); } #[test] @@ -398,17 +410,13 @@ mod tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); - assert_eq!(TokenType::NewLine, tokens[4].token_type); - assert_eq!(TokenType::INDENT, tokens[5].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); + assert_eq!(TokenType::INDENT, tokens[3].token_type); + assert_eq!(TokenType::Int, tokens[4].token_type); + assert_eq!(TokenType::DEDENT, tokens[5].token_type); assert_eq!(TokenType::Int, tokens[6].token_type); - assert_eq!(TokenType::NewLine, tokens[7].token_type); - assert_eq!(TokenType::DEDENT, tokens[8].token_type); - assert_eq!(TokenType::Int, tokens[9].token_type); - assert_eq!(TokenType::NewLine, tokens[10].token_type); - assert_eq!(TokenType::DEDENT, tokens[11].token_type); + assert_eq!(TokenType::DEDENT, tokens[7].token_type); } #[test] @@ -417,15 +425,13 @@ mod tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); - assert_eq!(TokenType::NewLine, tokens[4].token_type); - assert_eq!(TokenType::INDENT, tokens[5].token_type); - assert_eq!(TokenType::Int, tokens[6].token_type); - assert_eq!(TokenType::NewLine, tokens[7].token_type); - assert_eq!(TokenType::DEDENT, tokens[8].token_type); - assert_eq!(TokenType::DEDENT, tokens[9].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); + assert_eq!(TokenType::INDENT, tokens[3].token_type); + assert_eq!(TokenType::Int, tokens[4].token_type); + assert_eq!(TokenType::DEDENT, tokens[5].token_type); + assert_eq!(TokenType::DEDENT, tokens[6].token_type); + assert_eq!(TokenType::Int, tokens[7].token_type); } #[test] @@ -450,11 +456,10 @@ mod indentation_tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); - assert_eq!(TokenType::DEDENT, tokens[4].token_type); - assert_eq!(TokenType::EOF, tokens[5].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); + assert_eq!(TokenType::DEDENT, tokens[3].token_type); + assert_eq!(TokenType::EOF, tokens[4].token_type); } #[test] @@ -463,15 +468,13 @@ mod indentation_tests { let tokens = get_tokens(&input).unwrap(); assert_eq!(TokenType::Int, tokens[0].token_type); - assert_eq!(TokenType::NewLine, tokens[1].token_type); - assert_eq!(TokenType::INDENT, tokens[2].token_type); - assert_eq!(TokenType::Int, tokens[3].token_type); - assert_eq!(TokenType::NewLine, tokens[4].token_type); - assert_eq!(TokenType::INDENT, tokens[5].token_type); - assert_eq!(TokenType::Int, tokens[6].token_type); - assert_eq!(TokenType::DEDENT, tokens[7].token_type); - assert_eq!(TokenType::DEDENT, tokens[8].token_type); - assert_eq!(TokenType::EOF, tokens[9].token_type); + assert_eq!(TokenType::INDENT, tokens[1].token_type); + assert_eq!(TokenType::Int, tokens[2].token_type); + assert_eq!(TokenType::INDENT, tokens[3].token_type); + assert_eq!(TokenType::Int, tokens[4].token_type); + assert_eq!(TokenType::DEDENT, tokens[5].token_type); + assert_eq!(TokenType::DEDENT, tokens[6].token_type); + assert_eq!(TokenType::EOF, tokens[7].token_type); } #[test] diff --git a/src/syntax/functions/params_list.rs b/src/syntax/functions/params_list.rs index 122a932..7f48150 100644 --- a/src/syntax/functions/params_list.rs +++ b/src/syntax/functions/params_list.rs @@ -228,7 +228,7 @@ mod tests { let tokens = get_tokens(&String::from("(\n Int x,\n String y,\n)")).unwrap(); let (result, next_pos) = parse_params_list(&tokens, 0).unwrap(); - assert_eq!(next_pos, 13); + assert_eq!(next_pos, 11); assert_eq!(result.parameters.len(), 2); let first_param = &result.parameters[0]; assert_eq!(first_param.datatype, "Int"); diff --git a/src/syntax/parsers/block.rs b/src/syntax/parsers/block.rs index c51fa43..f35d6f1 100644 --- a/src/syntax/parsers/block.rs +++ b/src/syntax/parsers/block.rs @@ -106,7 +106,7 @@ mod tests { let tokens = get_tokens(&String::from("{\n fun f(){}\n}")).unwrap(); let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap(); - assert_eq!(12, next_pos); + assert_eq!(10, next_pos); assert_eq!(1, block.members.len()); let member = &block.members[0]; @@ -123,7 +123,7 @@ mod tests { let tokens = get_tokens(&String::from("{\n fun f(){}\nfun g(){}\n}")).unwrap(); let (block, next_pos) = Block::try_parse(&tokens, 0).unwrap(); - assert_eq!(19, next_pos); + assert_eq!(17, next_pos); assert_eq!(2, block.members.len()); let member = &block.members[0]; diff --git a/src/syntax/parsers/expression/comparison.rs b/src/syntax/parsers/expression/comparison.rs index 90a40eb..f1101e3 100644 --- a/src/syntax/parsers/expression/comparison.rs +++ b/src/syntax/parsers/expression/comparison.rs @@ -93,8 +93,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n >= b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[5].token_type, TokenType::DEDENT); - assert_eq!(next, 6); + assert_eq!(tokens[4].token_type, TokenType::DEDENT); + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { @@ -108,7 +108,7 @@ mod tests { fn should_parse_indented_2() { let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 11); + assert_eq!(next, 9); match result { Expression::BinaryOperator(_, _, op) => { @@ -123,8 +123,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n <= b <= c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[7].token_type, TokenType::DEDENT); - assert_eq!(next, 8); + assert_eq!(tokens[6].token_type, TokenType::DEDENT); + assert_eq!(next, 7); match result { Expression::BinaryOperator(_, _, op) => { @@ -139,7 +139,7 @@ mod tests { let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 9); + assert_eq!(next, 8); match result { Expression::BinaryOperator(_, _, op) => { @@ -154,7 +154,7 @@ mod tests { let tokens = get_tokens(&String::from("a >=\n b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 6); + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { diff --git a/src/syntax/parsers/expression/equality.rs b/src/syntax/parsers/expression/equality.rs index 22622d3..8c81f7d 100644 --- a/src/syntax/parsers/expression/equality.rs +++ b/src/syntax/parsers/expression/equality.rs @@ -92,8 +92,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n == b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[5].token_type, TokenType::DEDENT); - assert_eq!(next, 6); + assert_eq!(tokens[4].token_type, TokenType::DEDENT); + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { @@ -108,9 +108,9 @@ mod tests { let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[9].token_type, TokenType::DEDENT); - assert_eq!(tokens[10].token_type, TokenType::DEDENT); - assert_eq!(next, 11); + assert_eq!(tokens[7].token_type, TokenType::DEDENT); + assert_eq!(tokens[8].token_type, TokenType::DEDENT); + assert_eq!(next, 9); match result { Expression::BinaryOperator(_, _, op) => { @@ -125,8 +125,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n == b == c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[7].token_type, TokenType::DEDENT); - assert_eq!(next, 8); + assert_eq!(tokens[6].token_type, TokenType::DEDENT); + assert_eq!(next, 7); match result { Expression::BinaryOperator(_, _, op) => { @@ -141,7 +141,7 @@ mod tests { let tokens = get_tokens(&String::from("a\n == b\n == c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 9); + assert_eq!(next, 8); match result { Expression::BinaryOperator(_, _, op) => { @@ -156,7 +156,7 @@ mod tests { let tokens = get_tokens(&String::from("a ==\n b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 6); + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { diff --git a/src/syntax/parsers/expression/factor.rs b/src/syntax/parsers/expression/factor.rs index a426d5f..ca2f18d 100644 --- a/src/syntax/parsers/expression/factor.rs +++ b/src/syntax/parsers/expression/factor.rs @@ -96,8 +96,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n * b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[5].token_type, TokenType::DEDENT); - assert_eq!(next, 6); + assert_eq!(tokens[4].token_type, TokenType::DEDENT); + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { @@ -112,9 +112,9 @@ mod tests { let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[9].token_type, TokenType::DEDENT); - assert_eq!(tokens[10].token_type, TokenType::DEDENT); - assert_eq!(next, 11); + assert_eq!(tokens[7].token_type, TokenType::DEDENT); + assert_eq!(tokens[8].token_type, TokenType::DEDENT); + assert_eq!(next, 9); match result { Expression::BinaryOperator(_, _, op) => { @@ -129,8 +129,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n * b * c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[7].token_type, TokenType::DEDENT); - assert_eq!(next, 8); + assert_eq!(tokens[6].token_type, TokenType::DEDENT); + assert_eq!(next, 7); match result { Expression::BinaryOperator(_, _, op) => { @@ -145,7 +145,7 @@ mod tests { let tokens = get_tokens(&String::from("a\n * b\n * c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 9); + assert_eq!(next, 8); match result { Expression::BinaryOperator(_, _, op) => { @@ -160,7 +160,7 @@ mod tests { let tokens = get_tokens(&String::from("a /\n b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 6); + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { @@ -175,7 +175,7 @@ mod tests { let tokens = get_tokens(&String::from("a\n /\n b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 9); + assert_eq!(next, 7); match result { Expression::BinaryOperator(_, _, op) => { diff --git a/src/syntax/parsers/expression/term.rs b/src/syntax/parsers/expression/term.rs index e9391ac..c36fc22 100644 --- a/src/syntax/parsers/expression/term.rs +++ b/src/syntax/parsers/expression/term.rs @@ -97,8 +97,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n + b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[5].token_type, TokenType::DEDENT); - assert_eq!(next, 6); + assert_eq!(tokens[4].token_type, TokenType::DEDENT); + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { @@ -112,7 +112,7 @@ mod tests { fn should_parse_indented_2() { let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 11); + assert_eq!(next, 9); match result { Expression::BinaryOperator(_, _, op) => { @@ -127,8 +127,8 @@ mod tests { let tokens = get_tokens(&String::from("a\n + b + c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(tokens[7].token_type, TokenType::DEDENT); - assert_eq!(next, 8); + assert_eq!(tokens[6].token_type, TokenType::DEDENT); + assert_eq!(next, 7); match result { Expression::BinaryOperator(_, _, op) => { @@ -143,7 +143,7 @@ mod tests { let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 9); + assert_eq!(next, 8); match result { Expression::BinaryOperator(_, _, op) => { @@ -158,7 +158,22 @@ mod tests { let tokens = get_tokens(&String::from("a +\n b")).unwrap(); let (result, next) = try_parse(&tokens, 0).unwrap(); - assert_eq!(next, 6); + assert_eq!(next, 5); + + match result { + Expression::BinaryOperator(_, _, op) => { + assert_eq!(op, "+") + } + _ => panic!("Expected a binary operator"), + } + } + + #[test] + fn should_parse_indented_6() { + let tokens = get_tokens(&String::from("a\n + b\nc")).unwrap(); + let (result, next) = try_parse(&tokens, 0).unwrap(); + + assert_eq!(next, 5); match result { Expression::BinaryOperator(_, _, op) => { diff --git a/src/syntax/parsers/expression/utils.rs b/src/syntax/parsers/expression/utils.rs index 3dc9483..0cb5f80 100644 --- a/src/syntax/parsers/expression/utils.rs +++ b/src/syntax/parsers/expression/utils.rs @@ -32,14 +32,14 @@ where let pos = original_pos; // handle possible opening indentation - let pos = match (tokens.get(pos), tokens.get(pos + 1)) { + let pos = match tokens.get(pos) { // New indentation level - (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { + Some(t2) if t2.token_type == INDENT => { indent_count += 1; - pos + 2 + pos + 1 } // when indented, ignore newlines - (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, + Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1, // let other handlers handle this _ => pos, }; @@ -52,14 +52,14 @@ where }; // handle possible closing indentation - let pos = match (tokens.get(pos), tokens.get(pos + 1)) { + let pos = match tokens.get(pos) { // New indentation level - (Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => { + Some(t2) if t2.token_type == INDENT => { indent_count += 1; - pos + 2 + pos + 1 } // when indented, ignore newlines - (Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1, + Some(t) if t.token_type == NewLine && indentation_level > 0 => pos + 1, // let other handlers handle this _ => pos, }; @@ -70,7 +70,7 @@ where x => return x, }; - // handle the possible dedentation before/after the operator + // handle dedentation before/after the operator for _ in 0..indent_count { // expect a DEDENT for each INDENT matched match tokens.get(next_pos) {