Refactor parsing pt3
This commit is contained in:
parent
98f67bd097
commit
ec09dbfc0d
@ -41,6 +41,7 @@
|
||||
- [ ] Parse bindings and function declarations as top level constructs
|
||||
- [ ] Parse function declaration arguments (`Type id`)
|
||||
- [ ] Parse function return datatype (`fun f() -> Type`)
|
||||
- [ ] Return parsing to variables to var/val
|
||||
- [ ] Write tests
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
use super::ast::var_binding::Binding;
|
||||
use super::utils::{parse_token_type, try_operator};
|
||||
use super::{expression, ParseResult};
|
||||
use super::{expression, ParseResult, ParsingError};
|
||||
use crate::error_handling::SyntaxError;
|
||||
use crate::lexic::token::{Token, TokenType};
|
||||
use crate::utils::Result3;
|
||||
@ -9,6 +9,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
|
||||
let mut current_pos = pos;
|
||||
|
||||
// TODO: Detect if the binding starts with a datatype
|
||||
// TODO: Revert to val/var
|
||||
|
||||
/*
|
||||
* let keyword
|
||||
@ -16,10 +17,10 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
|
||||
let (is_mutable, binding_token, next_pos) = {
|
||||
let let_token = parse_token_type(tokens, current_pos, TokenType::LET);
|
||||
match let_token {
|
||||
ParseResult::Ok(let_token, next_let) => {
|
||||
Ok((let_token, next_let)) => {
|
||||
let mut_token = parse_token_type(tokens, next_let, TokenType::MUT);
|
||||
match mut_token {
|
||||
ParseResult::Ok(_mut_token, next_mut) => (true, let_token, next_mut),
|
||||
Ok((_mut_token, next_mut)) => (true, let_token, next_mut),
|
||||
_ => (false, let_token, next_let),
|
||||
}
|
||||
}
|
||||
@ -33,8 +34,8 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
|
||||
*/
|
||||
let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier)
|
||||
{
|
||||
ParseResult::Ok(t, n) => (t, n),
|
||||
ParseResult::Mismatch(token) => {
|
||||
Ok((t, n)) => (t, n),
|
||||
Err(ParsingError::Mismatch(token)) => {
|
||||
// The parser found a token, but it's not an identifier
|
||||
return ParseResult::Err(SyntaxError {
|
||||
error_start: token.position,
|
||||
@ -42,7 +43,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
|
||||
reason: "??".into(),
|
||||
});
|
||||
}
|
||||
ParseResult::Err(error) => {
|
||||
Err(ParsingError::Err(error)) => {
|
||||
return ParseResult::Err(error);
|
||||
}
|
||||
_ => {
|
||||
@ -108,7 +109,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{lexic::get_tokens, syntax::utils::parse_immediate_token_type};
|
||||
use crate::{lexic::get_tokens, syntax::utils::parse_token_type};
|
||||
|
||||
#[test]
|
||||
fn should_parse_val_binding() {
|
||||
@ -123,7 +124,7 @@ mod tests {
|
||||
#[test]
|
||||
fn should_parse_val() {
|
||||
let tokens = get_tokens(&String::from("let")).unwrap();
|
||||
let token = *parse_immediate_token_type(&tokens, 0, TokenType::LET).unwrap();
|
||||
let (token, _) = parse_token_type(&tokens, 0, TokenType::LET).unwrap();
|
||||
|
||||
assert_eq!(TokenType::LET, token.token_type);
|
||||
assert_eq!("let", token.value);
|
||||
@ -132,7 +133,7 @@ mod tests {
|
||||
#[test]
|
||||
fn should_parse_identifier() {
|
||||
let tokens = get_tokens(&String::from("identifier")).unwrap();
|
||||
let token = *parse_immediate_token_type(&tokens, 0, TokenType::Identifier).unwrap();
|
||||
let (token, _) = parse_token_type(&tokens, 0, TokenType::Identifier).unwrap();
|
||||
|
||||
assert_eq!("identifier", token.value);
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ use crate::{
|
||||
lexic::token::{Token, TokenType},
|
||||
};
|
||||
|
||||
use super::{ast::Block, utils::parse_token_type, ParseResult};
|
||||
use super::{ast::Block, utils::parse_token_type, ParseResult, ParsingError};
|
||||
|
||||
// Assumes that the token at `pos` is a {
|
||||
pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block> {
|
||||
@ -11,10 +11,10 @@ pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block>
|
||||
|
||||
let (opening_brace, next_pos) =
|
||||
match parse_token_type(tokens, current_pos, TokenType::LeftBrace) {
|
||||
ParseResult::Ok(t, next) => (t, next),
|
||||
ParseResult::Err(err) => return ParseResult::Err(err),
|
||||
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t),
|
||||
ParseResult::Unmatched => return ParseResult::Unmatched,
|
||||
Ok((t, next)) => (t, next),
|
||||
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
|
||||
Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
|
||||
Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
@ -52,16 +52,16 @@ pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block>
|
||||
// Parse closing brace
|
||||
let (_closing_brace, next_pos) =
|
||||
match parse_token_type(tokens, current_pos, TokenType::RightBrace) {
|
||||
ParseResult::Ok(t, next) => (t, next),
|
||||
ParseResult::Err(err) => return ParseResult::Err(err),
|
||||
ParseResult::Mismatch(t) => {
|
||||
Ok((t, next)) => (t, next),
|
||||
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
|
||||
Err(ParsingError::Mismatch(t)) => {
|
||||
return ParseResult::Err(SyntaxError {
|
||||
reason: String::from("Expected a closing brace after the block body."),
|
||||
error_start: t.position,
|
||||
error_end: t.get_end_position(),
|
||||
});
|
||||
}
|
||||
ParseResult::Unmatched => {
|
||||
Err(ParsingError::Unmatched) => {
|
||||
return ParseResult::Err(SyntaxError {
|
||||
reason: String::from("Expected a closing brace after the block body."),
|
||||
error_start: opening_brace.position,
|
||||
|
@ -4,7 +4,7 @@ use crate::{
|
||||
syntax::{
|
||||
ast::{functions::ArgumentsList, Expression},
|
||||
utils::parse_token_type,
|
||||
ParseResult,
|
||||
ParseResult, ParsingError,
|
||||
},
|
||||
};
|
||||
|
||||
@ -13,10 +13,10 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Argument
|
||||
|
||||
let (opening_paren, next_pos) =
|
||||
match parse_token_type(tokens, current_pos, TokenType::LeftParen) {
|
||||
ParseResult::Ok(t, next) => (t, next),
|
||||
ParseResult::Err(err) => return ParseResult::Err(err),
|
||||
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t),
|
||||
ParseResult::Unmatched => return ParseResult::Unmatched,
|
||||
Ok((t, next)) => (t, next),
|
||||
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
|
||||
Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
|
||||
Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
@ -37,34 +37,34 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Argument
|
||||
|
||||
// Parse comma. This also parses a trailing comma
|
||||
match parse_token_type(tokens, current_pos, TokenType::Comma) {
|
||||
ParseResult::Ok(_, next) => {
|
||||
Ok((_, next)) => {
|
||||
current_pos = next;
|
||||
}
|
||||
// This should never happen
|
||||
ParseResult::Err(err) => return ParseResult::Err(err),
|
||||
ParseResult::Mismatch(_) => {
|
||||
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
|
||||
Err(ParsingError::Mismatch(_)) => {
|
||||
// Something other than a comma was found. It must be a closing paren )
|
||||
// Still, break the loop, assume there are no more arguments
|
||||
// TODO: This could be a good place to write a detailed error?
|
||||
break;
|
||||
}
|
||||
ParseResult::Unmatched => break,
|
||||
Err(ParsingError::Unmatched) => break,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse closing paren
|
||||
let (_closing_paren, next_pos) =
|
||||
match parse_token_type(tokens, current_pos, TokenType::RightParen) {
|
||||
ParseResult::Ok(t, next) => (t, next),
|
||||
ParseResult::Err(err) => return ParseResult::Err(err),
|
||||
ParseResult::Mismatch(t) => {
|
||||
Ok((t, next)) => (t, next),
|
||||
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
|
||||
Err(ParsingError::Mismatch(t)) => {
|
||||
return ParseResult::Err(SyntaxError {
|
||||
reason: String::from("Expected a closing paren after the function identifier."),
|
||||
error_start: t.position,
|
||||
error_end: t.get_end_position(),
|
||||
});
|
||||
}
|
||||
ParseResult::Unmatched => {
|
||||
Err(ParsingError::Unmatched) => {
|
||||
return ParseResult::Err(SyntaxError {
|
||||
reason: String::from("Expected a closing paren after the function identifier."),
|
||||
error_start: opening_paren.position,
|
||||
|
@ -14,23 +14,23 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Functi
|
||||
|
||||
// `fun` keyword
|
||||
let (fun_keyword, next_pos) = match parse_token_type(tokens, current_pos, TokenType::FUN) {
|
||||
ParseResult::Ok(t, next) => (t, next),
|
||||
Ok((t, next)) => (t, next),
|
||||
_ => return Err(ParsingError::Unmatched),
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier)
|
||||
{
|
||||
ParseResult::Ok(id, next) => (id, next),
|
||||
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
|
||||
ParseResult::Mismatch(wrong_token) => {
|
||||
Ok((id, next)) => (id, next),
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
Err(ParsingError::Mismatch(wrong_token)) => {
|
||||
return Err(ParsingError::Err(SyntaxError {
|
||||
reason: String::from("Expected an identifier after the `fun` keyword."),
|
||||
error_start: wrong_token.position,
|
||||
error_end: wrong_token.get_end_position(),
|
||||
}));
|
||||
}
|
||||
ParseResult::Unmatched => {
|
||||
Err(ParsingError::Unmatched) => {
|
||||
return Err(ParsingError::Err(SyntaxError {
|
||||
reason: String::from("Expected an identifier after the `fun` keyword."),
|
||||
error_start: fun_keyword.position,
|
||||
|
@ -14,10 +14,10 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
|
||||
|
||||
let (opening_paren, next_pos) =
|
||||
match parse_token_type(tokens, current_pos, TokenType::LeftParen) {
|
||||
ParseResult::Ok(t, next) => (t, next),
|
||||
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
|
||||
ParseResult::Mismatch(t) => return Err(ParsingError::Mismatch(&t)),
|
||||
ParseResult::Unmatched => return Err(ParsingError::Unmatched),
|
||||
Ok((t, next)) => (t, next),
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
Err(ParsingError::Mismatch(t)) => return Err(ParsingError::Mismatch(&t)),
|
||||
Err(ParsingError::Unmatched) => return Err(ParsingError::Unmatched),
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
@ -44,34 +44,34 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
|
||||
|
||||
// Parse comma. This also parses a trailing comma
|
||||
match parse_token_type(tokens, current_pos, TokenType::Comma) {
|
||||
ParseResult::Ok(_, next) => {
|
||||
Ok((_, next)) => {
|
||||
current_pos = next;
|
||||
}
|
||||
// This should never happen
|
||||
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
|
||||
ParseResult::Mismatch(_) => {
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
Err(ParsingError::Mismatch(_)) => {
|
||||
// Something other than a comma was found. It must be a closing paren )
|
||||
// Still, break the loop, assume there are no more arguments
|
||||
// TODO: This could be a good place to write a detailed error?
|
||||
break;
|
||||
}
|
||||
ParseResult::Unmatched => break,
|
||||
Err(ParsingError::Unmatched) => break,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse closing paren
|
||||
let (_closing_paren, next_pos) =
|
||||
match parse_token_type(tokens, current_pos, TokenType::RightParen) {
|
||||
ParseResult::Ok(t, next) => (t, next),
|
||||
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
|
||||
ParseResult::Mismatch(t) => {
|
||||
Ok((t, next)) => (t, next),
|
||||
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
|
||||
Err(ParsingError::Mismatch(t)) => {
|
||||
return Err(ParsingError::Err(SyntaxError {
|
||||
reason: String::from("Expected a closing paren after the function identifier."),
|
||||
error_start: t.position,
|
||||
error_end: t.get_end_position(),
|
||||
}));
|
||||
}
|
||||
ParseResult::Unmatched => {
|
||||
Err(ParsingError::Unmatched) => {
|
||||
return Err(ParsingError::Err(SyntaxError {
|
||||
reason: String::from("Expected a closing paren after the function identifier."),
|
||||
error_start: opening_paren.position,
|
||||
@ -95,32 +95,32 @@ fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult
|
||||
let mut current_pos = pos;
|
||||
let (datatype, next_pos) =
|
||||
match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) {
|
||||
ParseResult::Ok(token, next) => (token, next),
|
||||
ParseResult::Err(err) => {
|
||||
Ok((token, next)) => (token, next),
|
||||
Err(ParsingError::Err(err)) => {
|
||||
return ParseResult::Err(err);
|
||||
}
|
||||
// If there is no datatype this construction doesn't apply.
|
||||
// Return a mismatch and let the caller handle it
|
||||
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t),
|
||||
ParseResult::Unmatched => return ParseResult::Unmatched,
|
||||
Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
|
||||
Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
|
||||
};
|
||||
current_pos = next_pos;
|
||||
|
||||
let (identifier, next_pos) =
|
||||
match utils::parse_token_type(tokens, current_pos, TokenType::Identifier) {
|
||||
ParseResult::Ok(token, next) => (token, next),
|
||||
ParseResult::Err(err) => {
|
||||
Ok((token, next)) => (token, next),
|
||||
Err(ParsingError::Err(err)) => {
|
||||
return ParseResult::Err(err);
|
||||
}
|
||||
// However, if we fail to parse an identifier, it's an error
|
||||
ParseResult::Mismatch(_) => {
|
||||
Err(ParsingError::Mismatch(_)) => {
|
||||
return ParseResult::Err(SyntaxError {
|
||||
reason: String::from("Expected an identifier for the parameter."),
|
||||
error_start: tokens[pos].position,
|
||||
error_end: tokens[pos].get_end_position(),
|
||||
});
|
||||
}
|
||||
ParseResult::Unmatched => {
|
||||
Err(ParsingError::Unmatched) => {
|
||||
return ParseResult::Err(SyntaxError {
|
||||
reason: String::from("Expected an identifier for the parameter."),
|
||||
error_start: tokens[pos].position,
|
||||
|
@ -3,7 +3,7 @@ use crate::{
|
||||
utils::Result3,
|
||||
};
|
||||
|
||||
use super::ParseResult;
|
||||
use super::{ParsingError, ParsingResult};
|
||||
|
||||
pub trait Tokenizer {
|
||||
fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)>;
|
||||
@ -33,24 +33,6 @@ impl Tokenizer for Vec<Token> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Expects the token at `pos` to be of type `token_type`.
|
||||
///
|
||||
/// **Doesn't ignore whitespace or newlines**
|
||||
pub fn parse_immediate_token_type(
|
||||
tokens: &Vec<Token>,
|
||||
pos: usize,
|
||||
token_type: TokenType,
|
||||
) -> Result3<&Token> {
|
||||
match tokens.get(pos) {
|
||||
Some(t) if t.token_type == token_type => Result3::Ok(t),
|
||||
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
|
||||
Result3::None
|
||||
}
|
||||
Some(t) => Result3::Err(t),
|
||||
None => Result3::None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines
|
||||
pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
|
||||
match tokens.get(pos) {
|
||||
@ -66,13 +48,13 @@ pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result
|
||||
/// Expects the token at `pos` to be of type `token_type`, and returns the token and the next position.
|
||||
///
|
||||
/// Ignores all whitespace and newlines.
|
||||
///
|
||||
///
|
||||
/// Only returns: Ok, Unmatched, Mismatched
|
||||
pub fn parse_token_type(
|
||||
tokens: &Vec<Token>,
|
||||
pos: usize,
|
||||
token_type: TokenType,
|
||||
) -> ParseResult<&Token> {
|
||||
) -> ParsingResult<&Token> {
|
||||
let mut current_pos = pos;
|
||||
|
||||
// Ignore all whitespace and newlines
|
||||
@ -88,11 +70,11 @@ pub fn parse_token_type(
|
||||
}
|
||||
|
||||
match tokens.get(current_pos) {
|
||||
Some(t) if t.token_type == token_type => ParseResult::Ok(t, current_pos + 1),
|
||||
Some(t) if t.token_type == token_type => Ok((t, current_pos + 1)),
|
||||
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
|
||||
ParseResult::Unmatched
|
||||
Err(ParsingError::Unmatched)
|
||||
}
|
||||
Some(t) => ParseResult::Mismatch(t),
|
||||
None => ParseResult::Unmatched,
|
||||
Some(t) => Err(ParsingError::Mismatch(t)),
|
||||
None => Err(ParsingError::Unmatched),
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user