Compare commits
5 Commits
be8c16ccf0
...
78d01a8fc8
Author | SHA1 | Date | |
---|---|---|---|
78d01a8fc8 | |||
8429ca8d8f | |||
d08019c010 | |||
56ea63cf8c | |||
fa14439507 |
@ -9,6 +9,7 @@
|
|||||||
and a THP ast -> PHP ast process, so that the
|
and a THP ast -> PHP ast process, so that the
|
||||||
codegen section can focus only in codegen, not in
|
codegen section can focus only in codegen, not in
|
||||||
translation of thp->php.
|
translation of thp->php.
|
||||||
|
- Ignore indentation where it doesn't matter
|
||||||
- Parse __more__ binary operators
|
- Parse __more__ binary operators
|
||||||
- Store tokens for the semantic analysis phase, to have actual error reporting
|
- Store tokens for the semantic analysis phase, to have actual error reporting
|
||||||
- Parse more complex bindings
|
- Parse more complex bindings
|
||||||
@ -31,6 +32,7 @@
|
|||||||
|
|
||||||
- [x] Begin work on a formal grammar
|
- [x] Begin work on a formal grammar
|
||||||
- [x] Simplify/rewrite AST
|
- [x] Simplify/rewrite AST
|
||||||
|
- [x] Properly parse expression indentation/dedentation
|
||||||
- [x] Define the top level constructs
|
- [x] Define the top level constructs
|
||||||
- [ ] Include the original tokens in the AST
|
- [ ] Include the original tokens in the AST
|
||||||
- [ ] Finish the workflow for a hello world
|
- [ ] Finish the workflow for a hello world
|
||||||
|
@ -3,6 +3,8 @@ use crate::{
|
|||||||
syntax::{ast::Expression, ParsingError, ParsingResult},
|
syntax::{ast::Expression, ParsingError, ParsingResult},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use super::utils::try_binary_op;
|
||||||
|
|
||||||
/// Parses a factor expression.
|
/// Parses a factor expression.
|
||||||
///
|
///
|
||||||
/// ```ebnf
|
/// ```ebnf
|
||||||
@ -14,44 +16,41 @@ pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> ParsingResult<Expression> {
|
|||||||
_ => return Err(ParsingError::Unmatched),
|
_ => return Err(ParsingError::Unmatched),
|
||||||
};
|
};
|
||||||
|
|
||||||
parse_many(tokens, next_pos, term)
|
parse_many(tokens, next_pos, term, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_many<'a>(
|
fn parse_many<'a>(
|
||||||
tokens: &'a Vec<Token>,
|
tokens: &'a Vec<Token>,
|
||||||
pos: usize,
|
pos: usize,
|
||||||
prev_expr: Expression<'a>,
|
prev_expr: Expression<'a>,
|
||||||
|
indentation_level: u32,
|
||||||
) -> ParsingResult<'a, Expression<'a>> {
|
) -> ParsingResult<'a, Expression<'a>> {
|
||||||
// comparison = term, ((">" | ">=" | "<" | "<="), term)*;
|
// comparison = term, ((">" | ">=" | "<" | "<="), term)*;
|
||||||
|
try_binary_op(
|
||||||
|
tokens,
|
||||||
|
pos,
|
||||||
|
prev_expr,
|
||||||
|
vec![">", ">=", "<", "<="],
|
||||||
|
indentation_level,
|
||||||
|
|tokens, next_pos, prev_expr, token, indent_count: u32| match super::term::try_parse(
|
||||||
|
tokens, next_pos,
|
||||||
|
) {
|
||||||
|
Ok((expr, next_pos)) => {
|
||||||
|
let expr =
|
||||||
|
Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value);
|
||||||
|
|
||||||
match tokens.get(pos) {
|
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
|
||||||
Some(token)
|
|
||||||
if token.value == "<"
|
|
||||||
|| token.value == "<="
|
|
||||||
|| token.value == ">"
|
|
||||||
|| token.value == ">=" =>
|
|
||||||
{
|
|
||||||
match super::term::try_parse(tokens, pos + 1) {
|
|
||||||
Ok((expr, next_pos)) => {
|
|
||||||
let expr = Expression::BinaryOperator(
|
|
||||||
Box::new(prev_expr),
|
|
||||||
Box::new(expr),
|
|
||||||
&token.value,
|
|
||||||
);
|
|
||||||
|
|
||||||
parse_many(tokens, next_pos, expr)
|
|
||||||
}
|
|
||||||
_ => Err(ParsingError::Unmatched),
|
|
||||||
}
|
}
|
||||||
}
|
_ => return Err(ParsingError::Unmatched),
|
||||||
_ => Ok((prev_expr, pos)),
|
},
|
||||||
}
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::lexic::get_tokens;
|
use crate::lexic::get_tokens;
|
||||||
|
use crate::lexic::token::TokenType;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_comparison() {
|
fn should_parse_comparison() {
|
||||||
@ -88,4 +87,80 @@ mod tests {
|
|||||||
_ => panic!("Expected an Unmatched error"),
|
_ => panic!("Expected an Unmatched error"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_1() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n >= b")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
|
||||||
|
assert_eq!(next, 6);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, ">=")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_2() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
assert_eq!(next, 11);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "<=")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_3() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n <= b <= c")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||||
|
assert_eq!(next, 8);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "<=")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_4() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n <= b\n <= c")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(next, 9);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "<=")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_5() {
|
||||||
|
let tokens = get_tokens(&String::from("a >=\n b")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(next, 6);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, ">=")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
handle_dedentation, handle_indentation, lexic::token::{Token, TokenType}, syntax::{ast::Expression, ParsingError, ParsingResult}
|
lexic::token::Token,
|
||||||
|
syntax::{ast::Expression, ParsingError, ParsingResult},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use super::utils::try_binary_op;
|
||||||
|
|
||||||
/// Parses a factor expression.
|
/// Parses a factor expression.
|
||||||
///
|
///
|
||||||
/// ```ebnf
|
/// ```ebnf
|
||||||
@ -23,50 +26,30 @@ fn parse_many<'a>(
|
|||||||
indentation_level: u32,
|
indentation_level: u32,
|
||||||
) -> ParsingResult<'a, Expression<'a>> {
|
) -> ParsingResult<'a, Expression<'a>> {
|
||||||
// equality = comparison, (("==" | "!="), comparison )*;
|
// equality = comparison, (("==" | "!="), comparison )*;
|
||||||
|
try_binary_op(
|
||||||
|
tokens,
|
||||||
|
pos,
|
||||||
|
prev_expr,
|
||||||
|
vec!["==", "!="],
|
||||||
|
indentation_level,
|
||||||
|
|tokens, next_pos, prev_expr, token, indent_count: u32| match super::comparison::try_parse(
|
||||||
|
tokens, next_pos,
|
||||||
|
) {
|
||||||
|
Ok((expr, next_pos)) => {
|
||||||
|
let expr =
|
||||||
|
Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value);
|
||||||
|
|
||||||
let mut indent_count: u32 = 0;
|
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
|
||||||
let mut next_pos = pos;
|
|
||||||
|
|
||||||
// Handle possible indentation before binary operator
|
|
||||||
handle_indentation!(tokens, next_pos, indent_count, indentation_level);
|
|
||||||
|
|
||||||
let result = match tokens.get(next_pos) {
|
|
||||||
Some(token) if token.value == "==" || token.value == "!=" => {
|
|
||||||
next_pos += 1;
|
|
||||||
|
|
||||||
// Handle possible indentation after binary operator
|
|
||||||
handle_indentation!(tokens, next_pos, indent_count, indentation_level);
|
|
||||||
|
|
||||||
match super::comparison::try_parse(tokens, next_pos) {
|
|
||||||
Ok((expr, next_pos)) => {
|
|
||||||
let expr = Expression::BinaryOperator(
|
|
||||||
Box::new(prev_expr),
|
|
||||||
Box::new(expr),
|
|
||||||
&token.value,
|
|
||||||
);
|
|
||||||
|
|
||||||
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
|
|
||||||
}
|
|
||||||
_ => return Err(ParsingError::Unmatched),
|
|
||||||
}
|
}
|
||||||
}
|
_ => return Err(ParsingError::Unmatched),
|
||||||
_ => return Ok((prev_expr, pos)),
|
},
|
||||||
};
|
)
|
||||||
|
|
||||||
let (new_expr, mut next_pos) = match result {
|
|
||||||
Ok((e, n)) => (e, n),
|
|
||||||
_ => return result,
|
|
||||||
};
|
|
||||||
|
|
||||||
handle_dedentation!(tokens, next_pos, indent_count);
|
|
||||||
|
|
||||||
Ok((new_expr, next_pos))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::lexic::get_tokens;
|
use crate::lexic::{get_tokens, token::TokenType};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_comparison() {
|
fn should_parse_comparison() {
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
handle_dedentation, handle_indentation, lexic::token::{Token, TokenType}, syntax::{ast::Expression, ParsingError, ParsingResult}
|
lexic::token::Token,
|
||||||
|
syntax::{
|
||||||
|
ast::Expression, parsers::expression::utils::try_binary_op, ParsingError, ParsingResult,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Parses a factor expression.
|
/// Parses a factor expression.
|
||||||
@ -23,20 +26,14 @@ fn parse_many<'a>(
|
|||||||
indentation_level: u32,
|
indentation_level: u32,
|
||||||
) -> ParsingResult<'a, Expression<'a>> {
|
) -> ParsingResult<'a, Expression<'a>> {
|
||||||
// (("/" | "*"), unary)*
|
// (("/" | "*"), unary)*
|
||||||
|
try_binary_op(
|
||||||
let mut indent_count: u32 = 0;
|
tokens,
|
||||||
let mut next_pos = pos;
|
pos,
|
||||||
|
prev_expr,
|
||||||
// Handle possible indentation before binary operator
|
vec!["/", "*"],
|
||||||
handle_indentation!(tokens, next_pos, indent_count, indentation_level);
|
indentation_level,
|
||||||
|
|tokens, next_pos, prev_expr, token, indent_count: u32| {
|
||||||
let result = match tokens.get(next_pos) {
|
// match next
|
||||||
Some(token) if token.value == "/" || token.value == "*" => {
|
|
||||||
next_pos += 1;
|
|
||||||
|
|
||||||
// Handle possible indentation after binary operator
|
|
||||||
handle_indentation!(tokens, next_pos, indent_count, indentation_level);
|
|
||||||
|
|
||||||
match super::unary::try_parse(tokens, next_pos) {
|
match super::unary::try_parse(tokens, next_pos) {
|
||||||
Ok((expr, next_pos)) => {
|
Ok((expr, next_pos)) => {
|
||||||
let expr = Expression::BinaryOperator(
|
let expr = Expression::BinaryOperator(
|
||||||
@ -49,24 +46,14 @@ fn parse_many<'a>(
|
|||||||
}
|
}
|
||||||
_ => return Err(ParsingError::Unmatched),
|
_ => return Err(ParsingError::Unmatched),
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
_ => return Ok((prev_expr, pos)),
|
)
|
||||||
};
|
|
||||||
|
|
||||||
let (new_expr, mut next_pos) = match result {
|
|
||||||
Ok((e, n)) => (e, n),
|
|
||||||
_ => return result,
|
|
||||||
};
|
|
||||||
|
|
||||||
handle_dedentation!(tokens, next_pos, indent_count);
|
|
||||||
|
|
||||||
Ok((new_expr, next_pos))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::lexic::get_tokens;
|
use crate::lexic::{get_tokens, token::TokenType};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_comparison() {
|
fn should_parse_comparison() {
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use crate::syntax::parsers::expression::utils::try_binary_op;
|
||||||
use crate::{
|
use crate::{
|
||||||
lexic::token::Token,
|
lexic::token::Token,
|
||||||
syntax::{ast::Expression, ParsingError, ParsingResult},
|
syntax::{ast::Expression, ParsingError, ParsingResult},
|
||||||
@ -14,19 +15,26 @@ pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> ParsingResult<Expression> {
|
|||||||
_ => return Err(ParsingError::Unmatched),
|
_ => return Err(ParsingError::Unmatched),
|
||||||
};
|
};
|
||||||
|
|
||||||
parse_many(tokens, next_pos, factor)
|
parse_many(tokens, next_pos, factor, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_many<'a>(
|
fn parse_many<'a>(
|
||||||
tokens: &'a Vec<Token>,
|
tokens: &'a Vec<Token>,
|
||||||
pos: usize,
|
pos: usize,
|
||||||
prev_expr: Expression<'a>,
|
prev_expr: Expression<'a>,
|
||||||
|
indentation_level: u32,
|
||||||
) -> ParsingResult<'a, Expression<'a>> {
|
) -> ParsingResult<'a, Expression<'a>> {
|
||||||
// term = factor, (("-" | "+"), factor)*;
|
// term = factor, (("-" | "+"), factor)*;
|
||||||
|
|
||||||
match tokens.get(pos) {
|
try_binary_op(
|
||||||
Some(token) if token.value == "+" || token.value == "-" => {
|
tokens,
|
||||||
match super::factor::try_parse(tokens, pos + 1) {
|
pos,
|
||||||
|
prev_expr,
|
||||||
|
vec!["+", "-"],
|
||||||
|
indentation_level,
|
||||||
|
|tokens, pos, prev_expr, token, indent_count: u32| {
|
||||||
|
// Parse the next factor
|
||||||
|
match super::factor::try_parse(tokens, pos) {
|
||||||
Ok((expr, next_pos)) => {
|
Ok((expr, next_pos)) => {
|
||||||
let expr = Expression::BinaryOperator(
|
let expr = Expression::BinaryOperator(
|
||||||
Box::new(prev_expr),
|
Box::new(prev_expr),
|
||||||
@ -34,19 +42,19 @@ fn parse_many<'a>(
|
|||||||
&token.value,
|
&token.value,
|
||||||
);
|
);
|
||||||
|
|
||||||
parse_many(tokens, next_pos, expr)
|
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
|
||||||
}
|
}
|
||||||
_ => Err(ParsingError::Unmatched),
|
_ => return Err(ParsingError::Unmatched),
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
_ => Ok((prev_expr, pos)),
|
)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::lexic::get_tokens;
|
use crate::lexic::get_tokens;
|
||||||
|
use crate::lexic::token::TokenType;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_comparison() {
|
fn should_parse_comparison() {
|
||||||
@ -83,4 +91,80 @@ mod tests {
|
|||||||
_ => panic!("Expected an Unmatched error"),
|
_ => panic!("Expected an Unmatched error"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_1() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n + b")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(tokens[5].token_type, TokenType::DEDENT);
|
||||||
|
assert_eq!(next, 6);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "+")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_2() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
assert_eq!(next, 11);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "+")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_3() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n + b + c")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(tokens[7].token_type, TokenType::DEDENT);
|
||||||
|
assert_eq!(next, 8);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "+")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_4() {
|
||||||
|
let tokens = get_tokens(&String::from("a\n + b\n + c")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(next, 9);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "+")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_indented_5() {
|
||||||
|
let tokens = get_tokens(&String::from("a +\n b")).unwrap();
|
||||||
|
let (result, next) = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(next, 6);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Expression::BinaryOperator(_, _, op) => {
|
||||||
|
assert_eq!(op, "+")
|
||||||
|
}
|
||||||
|
_ => panic!("Expected a binary operator"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,43 +1,89 @@
|
|||||||
/// macro for handling indentation in expressions
|
use crate::lexic::token::Token;
|
||||||
#[macro_export]
|
use crate::lexic::token::TokenType::{NewLine, DEDENT, INDENT};
|
||||||
macro_rules! handle_indentation {
|
use crate::syntax::ast::Expression;
|
||||||
($tokens: ident, $next_pos: ident, $indent_count: ident, $indentation_level: ident) => {
|
use crate::syntax::parseable::ParsingResult;
|
||||||
match ($tokens.get($next_pos), $tokens.get($next_pos + 1)) {
|
|
||||||
// New indentation level
|
|
||||||
(Some(t1), Some(t2))
|
|
||||||
if t1.token_type == TokenType::NewLine && t2.token_type == TokenType::INDENT =>
|
|
||||||
{
|
|
||||||
// set indentation
|
|
||||||
$next_pos += 2;
|
|
||||||
$indent_count += 1;
|
|
||||||
}
|
|
||||||
// we are indented, ignore newlines
|
|
||||||
(Some(t), _) if t.token_type == TokenType::NewLine && $indentation_level > 0 => {
|
|
||||||
$next_pos += 1;
|
|
||||||
}
|
|
||||||
// let other handlers handle this
|
|
||||||
_ => {}
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// macro for handling dedentation in expressions
|
/// Parses a binary operator, handles indentation and runs a function on it.
|
||||||
#[macro_export]
|
///
|
||||||
macro_rules! handle_dedentation {
|
/// First, handles indentation before the binary operator. Then, tries to
|
||||||
($tokens: ident, $next_pos: ident, $indent_count: ident) => {
|
/// parse the binary operator. Then, handles indentation after the binary
|
||||||
for _ in 0..$indent_count {
|
/// operator.
|
||||||
// Expect a DEDENT for each indentation matched
|
///
|
||||||
match $tokens.get($next_pos) {
|
/// After this runs the function `fun`. Finishes by handling dedentation
|
||||||
// continue
|
/// parsed in the previous phase.
|
||||||
Some(t) if t.token_type == TokenType::DEDENT => {}
|
pub fn try_binary_op<'a, F>(
|
||||||
// This should be unreachable, as the lexer always emits a DEDENT for each INDENT
|
tokens: &'a Vec<Token>,
|
||||||
_ => unreachable!(
|
original_pos: usize,
|
||||||
"Illegal parser state: Expected DEDENT (count: {})",
|
prev_expr: Expression<'a>,
|
||||||
$indent_count
|
operators: Vec<&str>,
|
||||||
),
|
indentation_level: u32,
|
||||||
};
|
fun: F,
|
||||||
|
) -> ParsingResult<'a, Expression<'a>>
|
||||||
|
where
|
||||||
|
F: FnOnce(
|
||||||
|
&'a Vec<Token>,
|
||||||
|
usize,
|
||||||
|
Expression<'a>,
|
||||||
|
&'a Token,
|
||||||
|
u32,
|
||||||
|
) -> ParsingResult<'a, Expression<'a>>,
|
||||||
|
{
|
||||||
|
let mut indent_count = 0;
|
||||||
|
let pos = original_pos;
|
||||||
|
|
||||||
$next_pos += 1;
|
// handle possible opening indentation
|
||||||
|
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
|
||||||
|
// New indentation level
|
||||||
|
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
|
||||||
|
indent_count += 1;
|
||||||
|
pos + 2
|
||||||
}
|
}
|
||||||
|
// when indented, ignore newlines
|
||||||
|
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
|
||||||
|
// let other handlers handle this
|
||||||
|
_ => pos,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// try to parse any of the binary operators
|
||||||
|
let (matched_token, pos) = match tokens.get(pos) {
|
||||||
|
Some(token) if operators.contains(&token.value.as_str()) => (token, pos + 1),
|
||||||
|
// If not matched, return the existing expression
|
||||||
|
_ => return Ok((prev_expr, original_pos)),
|
||||||
|
};
|
||||||
|
|
||||||
|
// handle possible closing indentation
|
||||||
|
let pos = match (tokens.get(pos), tokens.get(pos + 1)) {
|
||||||
|
// New indentation level
|
||||||
|
(Some(t1), Some(t2)) if t1.token_type == NewLine && t2.token_type == INDENT => {
|
||||||
|
indent_count += 1;
|
||||||
|
pos + 2
|
||||||
|
}
|
||||||
|
// when indented, ignore newlines
|
||||||
|
(Some(t), _) if t.token_type == NewLine && indentation_level > 0 => pos + 1,
|
||||||
|
// let other handlers handle this
|
||||||
|
_ => pos,
|
||||||
|
};
|
||||||
|
|
||||||
|
// run the rest of the logic
|
||||||
|
let (new_expr, mut next_pos) = match fun(tokens, pos, prev_expr, matched_token, indent_count) {
|
||||||
|
Ok((e, n)) => (e, n),
|
||||||
|
x => return x,
|
||||||
|
};
|
||||||
|
|
||||||
|
// handle the possible dedentation before/after the operator
|
||||||
|
for _ in 0..indent_count {
|
||||||
|
// expect a DEDENT for each INDENT matched
|
||||||
|
match tokens.get(next_pos) {
|
||||||
|
// continue
|
||||||
|
Some(t) if t.token_type == DEDENT => {}
|
||||||
|
_ => unreachable!(
|
||||||
|
"Illegal parser state: Expected DEDENT (count: {})",
|
||||||
|
indent_count
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
next_pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((new_expr, next_pos))
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,6 @@ use super::{ParsingError, ParsingResult};
|
|||||||
|
|
||||||
pub trait Tokenizer {
|
pub trait Tokenizer {
|
||||||
fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)>;
|
fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)>;
|
||||||
|
|
||||||
fn get_indented<'a>(&'a self, index: usize, indented: bool) -> (Option<&'a Token>, usize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tokenizer for Vec<Token> {
|
impl Tokenizer for Vec<Token> {
|
||||||
@ -30,31 +28,6 @@ impl Tokenizer for Vec<Token> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_indented<'a>(&'a self, index: usize, indented: bool) -> (Option<&'a Token>, usize) {
|
|
||||||
if !indented {
|
|
||||||
return (self.get(index), index + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut current_pos = index;
|
|
||||||
|
|
||||||
// Ignore all whitespace and newlines
|
|
||||||
loop {
|
|
||||||
match self.get(current_pos) {
|
|
||||||
Some(token) => {
|
|
||||||
if token.token_type == TokenType::INDENT
|
|
||||||
|| token.token_type == TokenType::DEDENT
|
|
||||||
|| token.token_type == TokenType::NewLine
|
|
||||||
{
|
|
||||||
current_pos += 1;
|
|
||||||
} else {
|
|
||||||
return (Some(token), current_pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => return (None, index + 1),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines
|
/// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines
|
||||||
|
Loading…
Reference in New Issue
Block a user