Refactor parsing pt3

This commit is contained in:
Araozu 2024-03-15 16:56:45 -05:00
parent 98f67bd097
commit ec09dbfc0d
7 changed files with 65 additions and 81 deletions

View File

@ -41,6 +41,7 @@
- [ ] Parse bindings and function declarations as top level constructs
- [ ] Parse function declaration arguments (`Type id`)
- [ ] Parse function return datatype (`fun f() -> Type`)
- [ ] Return parsing to variables to var/val
- [ ] Write tests

View File

@ -1,6 +1,6 @@
use super::ast::var_binding::Binding;
use super::utils::{parse_token_type, try_operator};
use super::{expression, ParseResult};
use super::{expression, ParseResult, ParsingError};
use crate::error_handling::SyntaxError;
use crate::lexic::token::{Token, TokenType};
use crate::utils::Result3;
@ -9,6 +9,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
let mut current_pos = pos;
// TODO: Detect if the binding starts with a datatype
// TODO: Revert to val/var
/*
* let keyword
@ -16,10 +17,10 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
let (is_mutable, binding_token, next_pos) = {
let let_token = parse_token_type(tokens, current_pos, TokenType::LET);
match let_token {
ParseResult::Ok(let_token, next_let) => {
Ok((let_token, next_let)) => {
let mut_token = parse_token_type(tokens, next_let, TokenType::MUT);
match mut_token {
ParseResult::Ok(_mut_token, next_mut) => (true, let_token, next_mut),
Ok((_mut_token, next_mut)) => (true, let_token, next_mut),
_ => (false, let_token, next_let),
}
}
@ -33,8 +34,8 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
*/
let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier)
{
ParseResult::Ok(t, n) => (t, n),
ParseResult::Mismatch(token) => {
Ok((t, n)) => (t, n),
Err(ParsingError::Mismatch(token)) => {
// The parser found a token, but it's not an identifier
return ParseResult::Err(SyntaxError {
error_start: token.position,
@ -42,7 +43,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
reason: "??".into(),
});
}
ParseResult::Err(error) => {
Err(ParsingError::Err(error)) => {
return ParseResult::Err(error);
}
_ => {
@ -108,7 +109,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
#[cfg(test)]
mod tests {
use super::*;
use crate::{lexic::get_tokens, syntax::utils::parse_immediate_token_type};
use crate::{lexic::get_tokens, syntax::utils::parse_token_type};
#[test]
fn should_parse_val_binding() {
@ -123,7 +124,7 @@ mod tests {
#[test]
fn should_parse_val() {
let tokens = get_tokens(&String::from("let")).unwrap();
let token = *parse_immediate_token_type(&tokens, 0, TokenType::LET).unwrap();
let (token, _) = parse_token_type(&tokens, 0, TokenType::LET).unwrap();
assert_eq!(TokenType::LET, token.token_type);
assert_eq!("let", token.value);
@ -132,7 +133,7 @@ mod tests {
#[test]
fn should_parse_identifier() {
let tokens = get_tokens(&String::from("identifier")).unwrap();
let token = *parse_immediate_token_type(&tokens, 0, TokenType::Identifier).unwrap();
let (token, _) = parse_token_type(&tokens, 0, TokenType::Identifier).unwrap();
assert_eq!("identifier", token.value);
}

View File

@ -3,7 +3,7 @@ use crate::{
lexic::token::{Token, TokenType},
};
use super::{ast::Block, utils::parse_token_type, ParseResult};
use super::{ast::Block, utils::parse_token_type, ParseResult, ParsingError};
// Assumes that the token at `pos` is a {
pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block> {
@ -11,10 +11,10 @@ pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block>
let (opening_brace, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::LeftBrace) {
ParseResult::Ok(t, next) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t),
ParseResult::Unmatched => return ParseResult::Unmatched,
Ok((t, next)) => (t, next),
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
};
current_pos = next_pos;
@ -52,16 +52,16 @@ pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block>
// Parse closing brace
let (_closing_brace, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::RightBrace) {
ParseResult::Ok(t, next) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => {
Ok((t, next)) => (t, next),
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
Err(ParsingError::Mismatch(t)) => {
return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing brace after the block body."),
error_start: t.position,
error_end: t.get_end_position(),
});
}
ParseResult::Unmatched => {
Err(ParsingError::Unmatched) => {
return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing brace after the block body."),
error_start: opening_brace.position,

View File

@ -4,7 +4,7 @@ use crate::{
syntax::{
ast::{functions::ArgumentsList, Expression},
utils::parse_token_type,
ParseResult,
ParseResult, ParsingError,
},
};
@ -13,10 +13,10 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Argument
let (opening_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::LeftParen) {
ParseResult::Ok(t, next) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t),
ParseResult::Unmatched => return ParseResult::Unmatched,
Ok((t, next)) => (t, next),
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
};
current_pos = next_pos;
@ -37,34 +37,34 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Argument
// Parse comma. This also parses a trailing comma
match parse_token_type(tokens, current_pos, TokenType::Comma) {
ParseResult::Ok(_, next) => {
Ok((_, next)) => {
current_pos = next;
}
// This should never happen
ParseResult::Err(err) => return ParseResult::Err(err),
ParseResult::Mismatch(_) => {
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
Err(ParsingError::Mismatch(_)) => {
// Something other than a comma was found. It must be a closing paren )
// Still, break the loop, assume there are no more arguments
// TODO: This could be a good place to write a detailed error?
break;
}
ParseResult::Unmatched => break,
Err(ParsingError::Unmatched) => break,
};
}
// Parse closing paren
let (_closing_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::RightParen) {
ParseResult::Ok(t, next) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => {
Ok((t, next)) => (t, next),
Err(ParsingError::Err(err)) => return ParseResult::Err(err),
Err(ParsingError::Mismatch(t)) => {
return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."),
error_start: t.position,
error_end: t.get_end_position(),
});
}
ParseResult::Unmatched => {
Err(ParsingError::Unmatched) => {
return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."),
error_start: opening_paren.position,

View File

@ -14,23 +14,23 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Functi
// `fun` keyword
let (fun_keyword, next_pos) = match parse_token_type(tokens, current_pos, TokenType::FUN) {
ParseResult::Ok(t, next) => (t, next),
Ok((t, next)) => (t, next),
_ => return Err(ParsingError::Unmatched),
};
current_pos = next_pos;
let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier)
{
ParseResult::Ok(id, next) => (id, next),
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(wrong_token) => {
Ok((id, next)) => (id, next),
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
Err(ParsingError::Mismatch(wrong_token)) => {
return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected an identifier after the `fun` keyword."),
error_start: wrong_token.position,
error_end: wrong_token.get_end_position(),
}));
}
ParseResult::Unmatched => {
Err(ParsingError::Unmatched) => {
return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected an identifier after the `fun` keyword."),
error_start: fun_keyword.position,

View File

@ -14,10 +14,10 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
let (opening_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::LeftParen) {
ParseResult::Ok(t, next) => (t, next),
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(t) => return Err(ParsingError::Mismatch(&t)),
ParseResult::Unmatched => return Err(ParsingError::Unmatched),
Ok((t, next)) => (t, next),
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
Err(ParsingError::Mismatch(t)) => return Err(ParsingError::Mismatch(&t)),
Err(ParsingError::Unmatched) => return Err(ParsingError::Unmatched),
};
current_pos = next_pos;
@ -44,34 +44,34 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
// Parse comma. This also parses a trailing comma
match parse_token_type(tokens, current_pos, TokenType::Comma) {
ParseResult::Ok(_, next) => {
Ok((_, next)) => {
current_pos = next;
}
// This should never happen
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(_) => {
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
Err(ParsingError::Mismatch(_)) => {
// Something other than a comma was found. It must be a closing paren )
// Still, break the loop, assume there are no more arguments
// TODO: This could be a good place to write a detailed error?
break;
}
ParseResult::Unmatched => break,
Err(ParsingError::Unmatched) => break,
};
}
// Parse closing paren
let (_closing_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::RightParen) {
ParseResult::Ok(t, next) => (t, next),
ParseResult::Err(err) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(t) => {
Ok((t, next)) => (t, next),
Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
Err(ParsingError::Mismatch(t)) => {
return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."),
error_start: t.position,
error_end: t.get_end_position(),
}));
}
ParseResult::Unmatched => {
Err(ParsingError::Unmatched) => {
return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."),
error_start: opening_paren.position,
@ -95,32 +95,32 @@ fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult
let mut current_pos = pos;
let (datatype, next_pos) =
match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) {
ParseResult::Ok(token, next) => (token, next),
ParseResult::Err(err) => {
Ok((token, next)) => (token, next),
Err(ParsingError::Err(err)) => {
return ParseResult::Err(err);
}
// If there is no datatype this construction doesn't apply.
// Return a mismatch and let the caller handle it
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t),
ParseResult::Unmatched => return ParseResult::Unmatched,
Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
};
current_pos = next_pos;
let (identifier, next_pos) =
match utils::parse_token_type(tokens, current_pos, TokenType::Identifier) {
ParseResult::Ok(token, next) => (token, next),
ParseResult::Err(err) => {
Ok((token, next)) => (token, next),
Err(ParsingError::Err(err)) => {
return ParseResult::Err(err);
}
// However, if we fail to parse an identifier, it's an error
ParseResult::Mismatch(_) => {
Err(ParsingError::Mismatch(_)) => {
return ParseResult::Err(SyntaxError {
reason: String::from("Expected an identifier for the parameter."),
error_start: tokens[pos].position,
error_end: tokens[pos].get_end_position(),
});
}
ParseResult::Unmatched => {
Err(ParsingError::Unmatched) => {
return ParseResult::Err(SyntaxError {
reason: String::from("Expected an identifier for the parameter."),
error_start: tokens[pos].position,

View File

@ -3,7 +3,7 @@ use crate::{
utils::Result3,
};
use super::ParseResult;
use super::{ParsingError, ParsingResult};
pub trait Tokenizer {
fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)>;
@ -33,24 +33,6 @@ impl Tokenizer for Vec<Token> {
}
}
/// Expects the token at `pos` to be of type `token_type`.
///
/// **Doesn't ignore whitespace or newlines**
pub fn parse_immediate_token_type(
tokens: &Vec<Token>,
pos: usize,
token_type: TokenType,
) -> Result3<&Token> {
match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
Result3::None
}
Some(t) => Result3::Err(t),
None => Result3::None,
}
}
/// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines
pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
match tokens.get(pos) {
@ -72,7 +54,7 @@ pub fn parse_token_type(
tokens: &Vec<Token>,
pos: usize,
token_type: TokenType,
) -> ParseResult<&Token> {
) -> ParsingResult<&Token> {
let mut current_pos = pos;
// Ignore all whitespace and newlines
@ -88,11 +70,11 @@ pub fn parse_token_type(
}
match tokens.get(current_pos) {
Some(t) if t.token_type == token_type => ParseResult::Ok(t, current_pos + 1),
Some(t) if t.token_type == token_type => Ok((t, current_pos + 1)),
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
ParseResult::Unmatched
Err(ParsingError::Unmatched)
}
Some(t) => ParseResult::Mismatch(t),
None => ParseResult::Unmatched,
Some(t) => Err(ParsingError::Mismatch(t)),
None => Err(ParsingError::Unmatched),
}
}