Refactor parsing pt3

This commit is contained in:
Araozu 2024-03-15 16:56:45 -05:00
parent 98f67bd097
commit ec09dbfc0d
7 changed files with 65 additions and 81 deletions

View File

@ -41,6 +41,7 @@
- [ ] Parse bindings and function declarations as top level constructs - [ ] Parse bindings and function declarations as top level constructs
- [ ] Parse function declaration arguments (`Type id`) - [ ] Parse function declaration arguments (`Type id`)
- [ ] Parse function return datatype (`fun f() -> Type`) - [ ] Parse function return datatype (`fun f() -> Type`)
- [ ] Return parsing to variables to var/val
- [ ] Write tests - [ ] Write tests

View File

@ -1,6 +1,6 @@
use super::ast::var_binding::Binding; use super::ast::var_binding::Binding;
use super::utils::{parse_token_type, try_operator}; use super::utils::{parse_token_type, try_operator};
use super::{expression, ParseResult}; use super::{expression, ParseResult, ParsingError};
use crate::error_handling::SyntaxError; use crate::error_handling::SyntaxError;
use crate::lexic::token::{Token, TokenType}; use crate::lexic::token::{Token, TokenType};
use crate::utils::Result3; use crate::utils::Result3;
@ -9,6 +9,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
let mut current_pos = pos; let mut current_pos = pos;
// TODO: Detect if the binding starts with a datatype // TODO: Detect if the binding starts with a datatype
// TODO: Revert to val/var
/* /*
* let keyword * let keyword
@ -16,10 +17,10 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
let (is_mutable, binding_token, next_pos) = { let (is_mutable, binding_token, next_pos) = {
let let_token = parse_token_type(tokens, current_pos, TokenType::LET); let let_token = parse_token_type(tokens, current_pos, TokenType::LET);
match let_token { match let_token {
ParseResult::Ok(let_token, next_let) => { Ok((let_token, next_let)) => {
let mut_token = parse_token_type(tokens, next_let, TokenType::MUT); let mut_token = parse_token_type(tokens, next_let, TokenType::MUT);
match mut_token { match mut_token {
ParseResult::Ok(_mut_token, next_mut) => (true, let_token, next_mut), Ok((_mut_token, next_mut)) => (true, let_token, next_mut),
_ => (false, let_token, next_let), _ => (false, let_token, next_let),
} }
} }
@ -33,8 +34,8 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
*/ */
let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier) let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier)
{ {
ParseResult::Ok(t, n) => (t, n), Ok((t, n)) => (t, n),
ParseResult::Mismatch(token) => { Err(ParsingError::Mismatch(token)) => {
// The parser found a token, but it's not an identifier // The parser found a token, but it's not an identifier
return ParseResult::Err(SyntaxError { return ParseResult::Err(SyntaxError {
error_start: token.position, error_start: token.position,
@ -42,7 +43,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
reason: "??".into(), reason: "??".into(),
}); });
} }
ParseResult::Err(error) => { Err(ParsingError::Err(error)) => {
return ParseResult::Err(error); return ParseResult::Err(error);
} }
_ => { _ => {
@ -108,7 +109,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Binding>
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::{lexic::get_tokens, syntax::utils::parse_immediate_token_type}; use crate::{lexic::get_tokens, syntax::utils::parse_token_type};
#[test] #[test]
fn should_parse_val_binding() { fn should_parse_val_binding() {
@ -123,7 +124,7 @@ mod tests {
#[test] #[test]
fn should_parse_val() { fn should_parse_val() {
let tokens = get_tokens(&String::from("let")).unwrap(); let tokens = get_tokens(&String::from("let")).unwrap();
let token = *parse_immediate_token_type(&tokens, 0, TokenType::LET).unwrap(); let (token, _) = parse_token_type(&tokens, 0, TokenType::LET).unwrap();
assert_eq!(TokenType::LET, token.token_type); assert_eq!(TokenType::LET, token.token_type);
assert_eq!("let", token.value); assert_eq!("let", token.value);
@ -132,7 +133,7 @@ mod tests {
#[test] #[test]
fn should_parse_identifier() { fn should_parse_identifier() {
let tokens = get_tokens(&String::from("identifier")).unwrap(); let tokens = get_tokens(&String::from("identifier")).unwrap();
let token = *parse_immediate_token_type(&tokens, 0, TokenType::Identifier).unwrap(); let (token, _) = parse_token_type(&tokens, 0, TokenType::Identifier).unwrap();
assert_eq!("identifier", token.value); assert_eq!("identifier", token.value);
} }

View File

@ -3,7 +3,7 @@ use crate::{
lexic::token::{Token, TokenType}, lexic::token::{Token, TokenType},
}; };
use super::{ast::Block, utils::parse_token_type, ParseResult}; use super::{ast::Block, utils::parse_token_type, ParseResult, ParsingError};
// Assumes that the token at `pos` is a { // Assumes that the token at `pos` is a {
pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block> { pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block> {
@ -11,10 +11,10 @@ pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block>
let (opening_brace, next_pos) = let (opening_brace, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::LeftBrace) { match parse_token_type(tokens, current_pos, TokenType::LeftBrace) {
ParseResult::Ok(t, next) => (t, next), Ok((t, next)) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err), Err(ParsingError::Err(err)) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t), Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
ParseResult::Unmatched => return ParseResult::Unmatched, Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
}; };
current_pos = next_pos; current_pos = next_pos;
@ -52,16 +52,16 @@ pub fn parse_block<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Block>
// Parse closing brace // Parse closing brace
let (_closing_brace, next_pos) = let (_closing_brace, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::RightBrace) { match parse_token_type(tokens, current_pos, TokenType::RightBrace) {
ParseResult::Ok(t, next) => (t, next), Ok((t, next)) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err), Err(ParsingError::Err(err)) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => { Err(ParsingError::Mismatch(t)) => {
return ParseResult::Err(SyntaxError { return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing brace after the block body."), reason: String::from("Expected a closing brace after the block body."),
error_start: t.position, error_start: t.position,
error_end: t.get_end_position(), error_end: t.get_end_position(),
}); });
} }
ParseResult::Unmatched => { Err(ParsingError::Unmatched) => {
return ParseResult::Err(SyntaxError { return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing brace after the block body."), reason: String::from("Expected a closing brace after the block body."),
error_start: opening_brace.position, error_start: opening_brace.position,

View File

@ -4,7 +4,7 @@ use crate::{
syntax::{ syntax::{
ast::{functions::ArgumentsList, Expression}, ast::{functions::ArgumentsList, Expression},
utils::parse_token_type, utils::parse_token_type,
ParseResult, ParseResult, ParsingError,
}, },
}; };
@ -13,10 +13,10 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Argument
let (opening_paren, next_pos) = let (opening_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::LeftParen) { match parse_token_type(tokens, current_pos, TokenType::LeftParen) {
ParseResult::Ok(t, next) => (t, next), Ok((t, next)) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err), Err(ParsingError::Err(err)) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t), Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
ParseResult::Unmatched => return ParseResult::Unmatched, Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
}; };
current_pos = next_pos; current_pos = next_pos;
@ -37,34 +37,34 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult<Argument
// Parse comma. This also parses a trailing comma // Parse comma. This also parses a trailing comma
match parse_token_type(tokens, current_pos, TokenType::Comma) { match parse_token_type(tokens, current_pos, TokenType::Comma) {
ParseResult::Ok(_, next) => { Ok((_, next)) => {
current_pos = next; current_pos = next;
} }
// This should never happen // This should never happen
ParseResult::Err(err) => return ParseResult::Err(err), Err(ParsingError::Err(err)) => return ParseResult::Err(err),
ParseResult::Mismatch(_) => { Err(ParsingError::Mismatch(_)) => {
// Something other than a comma was found. It must be a closing paren ) // Something other than a comma was found. It must be a closing paren )
// Still, break the loop, assume there are no more arguments // Still, break the loop, assume there are no more arguments
// TODO: This could be a good place to write a detailed error? // TODO: This could be a good place to write a detailed error?
break; break;
} }
ParseResult::Unmatched => break, Err(ParsingError::Unmatched) => break,
}; };
} }
// Parse closing paren // Parse closing paren
let (_closing_paren, next_pos) = let (_closing_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::RightParen) { match parse_token_type(tokens, current_pos, TokenType::RightParen) {
ParseResult::Ok(t, next) => (t, next), Ok((t, next)) => (t, next),
ParseResult::Err(err) => return ParseResult::Err(err), Err(ParsingError::Err(err)) => return ParseResult::Err(err),
ParseResult::Mismatch(t) => { Err(ParsingError::Mismatch(t)) => {
return ParseResult::Err(SyntaxError { return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."), reason: String::from("Expected a closing paren after the function identifier."),
error_start: t.position, error_start: t.position,
error_end: t.get_end_position(), error_end: t.get_end_position(),
}); });
} }
ParseResult::Unmatched => { Err(ParsingError::Unmatched) => {
return ParseResult::Err(SyntaxError { return ParseResult::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."), reason: String::from("Expected a closing paren after the function identifier."),
error_start: opening_paren.position, error_start: opening_paren.position,

View File

@ -14,23 +14,23 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResult<Functi
// `fun` keyword // `fun` keyword
let (fun_keyword, next_pos) = match parse_token_type(tokens, current_pos, TokenType::FUN) { let (fun_keyword, next_pos) = match parse_token_type(tokens, current_pos, TokenType::FUN) {
ParseResult::Ok(t, next) => (t, next), Ok((t, next)) => (t, next),
_ => return Err(ParsingError::Unmatched), _ => return Err(ParsingError::Unmatched),
}; };
current_pos = next_pos; current_pos = next_pos;
let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier) let (identifier, next_pos) = match parse_token_type(tokens, current_pos, TokenType::Identifier)
{ {
ParseResult::Ok(id, next) => (id, next), Ok((id, next)) => (id, next),
ParseResult::Err(err) => return Err(ParsingError::Err(err)), Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(wrong_token) => { Err(ParsingError::Mismatch(wrong_token)) => {
return Err(ParsingError::Err(SyntaxError { return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected an identifier after the `fun` keyword."), reason: String::from("Expected an identifier after the `fun` keyword."),
error_start: wrong_token.position, error_start: wrong_token.position,
error_end: wrong_token.get_end_position(), error_end: wrong_token.get_end_position(),
})); }));
} }
ParseResult::Unmatched => { Err(ParsingError::Unmatched) => {
return Err(ParsingError::Err(SyntaxError { return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected an identifier after the `fun` keyword."), reason: String::from("Expected an identifier after the `fun` keyword."),
error_start: fun_keyword.position, error_start: fun_keyword.position,

View File

@ -14,10 +14,10 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
let (opening_paren, next_pos) = let (opening_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::LeftParen) { match parse_token_type(tokens, current_pos, TokenType::LeftParen) {
ParseResult::Ok(t, next) => (t, next), Ok((t, next)) => (t, next),
ParseResult::Err(err) => return Err(ParsingError::Err(err)), Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(t) => return Err(ParsingError::Mismatch(&t)), Err(ParsingError::Mismatch(t)) => return Err(ParsingError::Mismatch(&t)),
ParseResult::Unmatched => return Err(ParsingError::Unmatched), Err(ParsingError::Unmatched) => return Err(ParsingError::Unmatched),
}; };
current_pos = next_pos; current_pos = next_pos;
@ -44,34 +44,34 @@ pub fn parse_params_list<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParsingResul
// Parse comma. This also parses a trailing comma // Parse comma. This also parses a trailing comma
match parse_token_type(tokens, current_pos, TokenType::Comma) { match parse_token_type(tokens, current_pos, TokenType::Comma) {
ParseResult::Ok(_, next) => { Ok((_, next)) => {
current_pos = next; current_pos = next;
} }
// This should never happen // This should never happen
ParseResult::Err(err) => return Err(ParsingError::Err(err)), Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(_) => { Err(ParsingError::Mismatch(_)) => {
// Something other than a comma was found. It must be a closing paren ) // Something other than a comma was found. It must be a closing paren )
// Still, break the loop, assume there are no more arguments // Still, break the loop, assume there are no more arguments
// TODO: This could be a good place to write a detailed error? // TODO: This could be a good place to write a detailed error?
break; break;
} }
ParseResult::Unmatched => break, Err(ParsingError::Unmatched) => break,
}; };
} }
// Parse closing paren // Parse closing paren
let (_closing_paren, next_pos) = let (_closing_paren, next_pos) =
match parse_token_type(tokens, current_pos, TokenType::RightParen) { match parse_token_type(tokens, current_pos, TokenType::RightParen) {
ParseResult::Ok(t, next) => (t, next), Ok((t, next)) => (t, next),
ParseResult::Err(err) => return Err(ParsingError::Err(err)), Err(ParsingError::Err(err)) => return Err(ParsingError::Err(err)),
ParseResult::Mismatch(t) => { Err(ParsingError::Mismatch(t)) => {
return Err(ParsingError::Err(SyntaxError { return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."), reason: String::from("Expected a closing paren after the function identifier."),
error_start: t.position, error_start: t.position,
error_end: t.get_end_position(), error_end: t.get_end_position(),
})); }));
} }
ParseResult::Unmatched => { Err(ParsingError::Unmatched) => {
return Err(ParsingError::Err(SyntaxError { return Err(ParsingError::Err(SyntaxError {
reason: String::from("Expected a closing paren after the function identifier."), reason: String::from("Expected a closing paren after the function identifier."),
error_start: opening_paren.position, error_start: opening_paren.position,
@ -95,32 +95,32 @@ fn parse_param_definition<'a>(tokens: &'a Vec<Token>, pos: usize) -> ParseResult
let mut current_pos = pos; let mut current_pos = pos;
let (datatype, next_pos) = let (datatype, next_pos) =
match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) { match utils::parse_token_type(tokens, current_pos, TokenType::Datatype) {
ParseResult::Ok(token, next) => (token, next), Ok((token, next)) => (token, next),
ParseResult::Err(err) => { Err(ParsingError::Err(err)) => {
return ParseResult::Err(err); return ParseResult::Err(err);
} }
// If there is no datatype this construction doesn't apply. // If there is no datatype this construction doesn't apply.
// Return a mismatch and let the caller handle it // Return a mismatch and let the caller handle it
ParseResult::Mismatch(t) => return ParseResult::Mismatch(t), Err(ParsingError::Mismatch(t)) => return ParseResult::Mismatch(t),
ParseResult::Unmatched => return ParseResult::Unmatched, Err(ParsingError::Unmatched) => return ParseResult::Unmatched,
}; };
current_pos = next_pos; current_pos = next_pos;
let (identifier, next_pos) = let (identifier, next_pos) =
match utils::parse_token_type(tokens, current_pos, TokenType::Identifier) { match utils::parse_token_type(tokens, current_pos, TokenType::Identifier) {
ParseResult::Ok(token, next) => (token, next), Ok((token, next)) => (token, next),
ParseResult::Err(err) => { Err(ParsingError::Err(err)) => {
return ParseResult::Err(err); return ParseResult::Err(err);
} }
// However, if we fail to parse an identifier, it's an error // However, if we fail to parse an identifier, it's an error
ParseResult::Mismatch(_) => { Err(ParsingError::Mismatch(_)) => {
return ParseResult::Err(SyntaxError { return ParseResult::Err(SyntaxError {
reason: String::from("Expected an identifier for the parameter."), reason: String::from("Expected an identifier for the parameter."),
error_start: tokens[pos].position, error_start: tokens[pos].position,
error_end: tokens[pos].get_end_position(), error_end: tokens[pos].get_end_position(),
}); });
} }
ParseResult::Unmatched => { Err(ParsingError::Unmatched) => {
return ParseResult::Err(SyntaxError { return ParseResult::Err(SyntaxError {
reason: String::from("Expected an identifier for the parameter."), reason: String::from("Expected an identifier for the parameter."),
error_start: tokens[pos].position, error_start: tokens[pos].position,

View File

@ -3,7 +3,7 @@ use crate::{
utils::Result3, utils::Result3,
}; };
use super::ParseResult; use super::{ParsingError, ParsingResult};
pub trait Tokenizer { pub trait Tokenizer {
fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)>; fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)>;
@ -33,24 +33,6 @@ impl Tokenizer for Vec<Token> {
} }
} }
/// Expects the token at `pos` to be of type `token_type`.
///
/// **Doesn't ignore whitespace or newlines**
pub fn parse_immediate_token_type(
tokens: &Vec<Token>,
pos: usize,
token_type: TokenType,
) -> Result3<&Token> {
match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
Result3::None
}
Some(t) => Result3::Err(t),
None => Result3::None,
}
}
/// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines /// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines
pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> { pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result3<&Token> {
match tokens.get(pos) { match tokens.get(pos) {
@ -66,13 +48,13 @@ pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result
/// Expects the token at `pos` to be of type `token_type`, and returns the token and the next position. /// Expects the token at `pos` to be of type `token_type`, and returns the token and the next position.
/// ///
/// Ignores all whitespace and newlines. /// Ignores all whitespace and newlines.
/// ///
/// Only returns: Ok, Unmatched, Mismatched /// Only returns: Ok, Unmatched, Mismatched
pub fn parse_token_type( pub fn parse_token_type(
tokens: &Vec<Token>, tokens: &Vec<Token>,
pos: usize, pos: usize,
token_type: TokenType, token_type: TokenType,
) -> ParseResult<&Token> { ) -> ParsingResult<&Token> {
let mut current_pos = pos; let mut current_pos = pos;
// Ignore all whitespace and newlines // Ignore all whitespace and newlines
@ -88,11 +70,11 @@ pub fn parse_token_type(
} }
match tokens.get(current_pos) { match tokens.get(current_pos) {
Some(t) if t.token_type == token_type => ParseResult::Ok(t, current_pos + 1), Some(t) if t.token_type == token_type => Ok((t, current_pos + 1)),
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => { Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
ParseResult::Unmatched Err(ParsingError::Unmatched)
} }
Some(t) => ParseResult::Mismatch(t), Some(t) => Err(ParsingError::Mismatch(t)),
None => ParseResult::Unmatched, None => Err(ParsingError::Unmatched),
} }
} }