[syntax] Ignore whitespace when parsing a function declaration

This commit is contained in:
Araozu 2023-09-19 20:30:49 -05:00
parent 807c46314b
commit 6604632d9e
2 changed files with 204 additions and 93 deletions

View File

@ -34,7 +34,7 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
*/ */
// Parse identifier // Parse identifier
let identifier = match expect_token_w( let (identifier, next_pos) = match expect_token_w(
tokens, tokens,
current_pos, current_pos,
TokenType::Identifier, TokenType::Identifier,
@ -44,9 +44,9 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
Ok(t) => t, Ok(t) => t,
Err(err) => return err, Err(err) => return err,
}; };
current_pos += 1; current_pos = next_pos;
let opening_paren = match expect_token_w( let (opening_paren, next_pos) = match expect_token_w(
tokens, tokens,
current_pos, current_pos,
TokenType::LeftParen, TokenType::LeftParen,
@ -56,89 +56,49 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult>
Ok(t) => t, Ok(t) => t,
Err(err) => return err, Err(err) => return err,
}; };
current_pos += 1; current_pos = next_pos;
// Parse a closing paren // Parse a closing paren
let closing_paren = match try_token_type(tokens, current_pos, TokenType::RightParen) { let (closing_paren, next_pos) = match expect_token_w(
Result3::Ok(t) => t, tokens,
Result3::Err(t) => { current_pos,
// The parser found a token, but it's not an opening paren TokenType::RightParen,
return Some(SyntaxResult::Err(SyntaxError { "Expected a closing paren afted the function identifier.".into(),
reason: format!( opening_paren,
"There should be a closing paren after the parameter list, but found `{}`", ) {
t.value Ok(t) => t,
), Err(err) => return err,
error_start: t.position,
error_end: t.get_end_position(),
}));
}
Result3::None => {
// The parser didn't find any token
return Some(SyntaxResult::Err(SyntaxError {
reason: format!(
"There should be a closing paren after the parameter list, but found nothing"
),
error_start: opening_paren.position,
error_end: opening_paren.get_end_position(),
}));
}
}; };
current_pos += 1; current_pos = next_pos;
// TODO: Replace by block parsing
// Parse opening brace // Parse opening brace
let opening_brace = match try_token_type(tokens, current_pos, TokenType::LeftBrace) { let (opening_brace, next_pos) = match expect_token_w(
Result3::Ok(t) => t, tokens,
Result3::Err(t) => { current_pos,
// The parser found a token, but it's not an opening brace TokenType::LeftBrace,
return Some(SyntaxResult::Err(SyntaxError { "Expected an opening brace afted the parameter list.".into(),
reason: format!( closing_paren,
"There should be an opening brace after the parameter list, but found `{}`", ) {
t.value Ok(t) => t,
), Err(err) => return err,
error_start: t.position,
error_end: t.get_end_position(),
}));
}
Result3::None => {
// The parser didn't find any token
return Some(SyntaxResult::Err(SyntaxError {
reason: format!(
"There should be an opening brace after the parameter list, but found nothing"
),
error_start: closing_paren.position,
error_end: closing_paren.get_end_position(),
}));
}
}; };
current_pos += 1; current_pos = next_pos;
// Parse closing brace // Parse closing brace
let _closing_brace = match try_token_type(tokens, current_pos, TokenType::RightBrace) { let (_closing_brace, next_pos) = match expect_token_w(
Result3::Ok(t) => t, tokens,
Result3::Err(t) => { current_pos,
// The parser found a token, but it's not an opening brace TokenType::RightBrace,
return Some(SyntaxResult::Err(SyntaxError { "Expected a closing brace after afted the function body.".into(),
reason: format!( opening_brace,
"There should be a closing brace after the function body, but found `{}`", ) {
t.value Ok(t) => t,
), Err(err) => return err,
error_start: t.position,
error_end: t.get_end_position(),
}));
}
Result3::None => {
// The parser didn't find any token
return Some(SyntaxResult::Err(SyntaxError {
reason: format!(
"There should be a closing brace after the function body, but found nothing"
),
error_start: opening_brace.position,
error_end: opening_brace.get_end_position(),
}));
}
}; };
current_pos += 1; current_pos = next_pos;
// Construct and return the function declaration // Construct and return the function declaration
Some(SyntaxResult::Ok( Some(SyntaxResult::Ok(
@ -236,7 +196,7 @@ mod tests {
Some(SyntaxResult::Err(err)) => { Some(SyntaxResult::Err(err)) => {
assert_eq!( assert_eq!(
err.reason, err.reason,
"There should be a closing paren after the parameter list, but found `=`" "Expected a closing paren afted the function identifier."
); );
assert_eq!(err.error_start, 7); assert_eq!(err.error_start, 7);
assert_eq!(err.error_end, 8); assert_eq!(err.error_end, 8);
@ -250,7 +210,7 @@ mod tests {
Some(SyntaxResult::Err(err)) => { Some(SyntaxResult::Err(err)) => {
assert_eq!( assert_eq!(
err.reason, err.reason,
"There should be a closing paren after the parameter list, but found nothing" "Expected a closing paren afted the function identifier."
); );
assert_eq!(err.error_start, 6); assert_eq!(err.error_start, 6);
assert_eq!(err.error_end, 7); assert_eq!(err.error_end, 7);
@ -302,7 +262,7 @@ mod tests {
Some(SyntaxResult::Err(err)) => { Some(SyntaxResult::Err(err)) => {
assert_eq!( assert_eq!(
err.reason, err.reason,
"There should be an opening brace after the parameter list, but found `=`" "Expected an opening brace afted the parameter list."
); );
assert_eq!(err.error_start, 9); assert_eq!(err.error_start, 9);
assert_eq!(err.error_end, 10); assert_eq!(err.error_end, 10);
@ -316,7 +276,7 @@ mod tests {
Some(SyntaxResult::Err(err)) => { Some(SyntaxResult::Err(err)) => {
assert_eq!( assert_eq!(
err.reason, err.reason,
"There should be an opening brace after the parameter list, but found nothing" "Expected an opening brace afted the parameter list."
); );
assert_eq!(err.error_start, 7); assert_eq!(err.error_start, 7);
assert_eq!(err.error_end, 8); assert_eq!(err.error_end, 8);
@ -334,7 +294,7 @@ mod tests {
Some(SyntaxResult::Err(err)) => { Some(SyntaxResult::Err(err)) => {
assert_eq!( assert_eq!(
err.reason, err.reason,
"There should be a closing brace after the function body, but found `20`" "Expected a closing brace after afted the function body."
); );
assert_eq!(err.error_start, 11); assert_eq!(err.error_start, 11);
assert_eq!(err.error_end, 13); assert_eq!(err.error_end, 13);
@ -349,7 +309,7 @@ mod tests {
Some(SyntaxResult::Err(err)) => { Some(SyntaxResult::Err(err)) => {
assert_eq!( assert_eq!(
err.reason, err.reason,
"There should be a closing brace after the function body, but found nothing" "Expected a closing brace after afted the function body."
); );
assert_eq!(err.error_start, 9); assert_eq!(err.error_start, 9);
assert_eq!(err.error_end, 10); assert_eq!(err.error_end, 10);
@ -374,3 +334,90 @@ mod tests {
} }
} }
} }
#[cfg(test)]
mod whitespace_test {
use crate::{lexic::get_tokens, syntax::ast::TopLevelDeclaration};
use super::*;
#[test]
fn should_ignore_whitespace_1() {
let tokens = get_tokens(&String::from("fun\nid() {}")).unwrap();
let function_declaration = try_parse(&tokens, 0).unwrap();
match function_declaration {
SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
assert_eq!(declaration.identifier, Box::new(String::from("id")));
}
_ => panic!(
"Expected a function declaration: {:?}",
function_declaration
),
}
}
#[test]
fn should_ignore_whitespace_2() {
let tokens = get_tokens(&String::from("fun\nid\n() {}")).unwrap();
let function_declaration = try_parse(&tokens, 0).unwrap();
match function_declaration {
SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
assert_eq!(declaration.identifier, Box::new(String::from("id")));
}
_ => panic!(
"Expected a function declaration: {:?}",
function_declaration
),
}
}
#[test]
fn should_ignore_whitespace_3() {
let tokens = get_tokens(&String::from("fun\nid\n(\n) {}")).unwrap();
let function_declaration = try_parse(&tokens, 0).unwrap();
match function_declaration {
SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
assert_eq!(declaration.identifier, Box::new(String::from("id")));
}
_ => panic!(
"Expected a function declaration: {:?}",
function_declaration
),
}
}
#[test]
fn should_ignore_whitespace_4() {
let tokens = get_tokens(&String::from("fun id\n(\n)\n{}")).unwrap();
let function_declaration = try_parse(&tokens, 0).unwrap();
match function_declaration {
SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
assert_eq!(declaration.identifier, Box::new(String::from("id")));
}
_ => panic!(
"Expected a function declaration: {:?}",
function_declaration
),
}
}
#[test]
fn should_ignore_whitespace_5() {
let tokens = get_tokens(&String::from("fun\nid() \n{\n}")).unwrap();
let function_declaration = try_parse(&tokens, 0).unwrap();
match function_declaration {
SyntaxResult::Ok(TopLevelDeclaration::FunctionDeclaration(declaration), _) => {
assert_eq!(declaration.identifier, Box::new(String::from("id")));
}
_ => panic!(
"Expected a function declaration: {:?}",
function_declaration
),
}
}
}

View File

@ -10,7 +10,9 @@ use super::SyntaxResult;
pub fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> { pub fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Result3<&Token> {
match tokens.get(pos) { match tokens.get(pos) {
Some(t) if t.token_type == token_type => Result3::Ok(t), Some(t) if t.token_type == token_type => Result3::Ok(t),
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => Result3::None, Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
Result3::None
}
Some(t) => Result3::Err(t), Some(t) => Result3::Err(t),
None => Result3::None, None => Result3::None,
} }
@ -27,20 +29,82 @@ pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Result
} }
} }
pub fn expect_token_w<'a>( pub fn try_operator_w<'a>(
tokens: &'a Vec<Token>, tokens: &'a Vec<Token>,
pos: usize, pos: usize,
token_type: TokenType, operator: String,
error_message: String, error_message: String,
prev_token: &Token, prev_token: &Token,
) -> Result<&'a Token, Option<SyntaxResult>> { ) -> Result<(&'a Token, usize), Option<SyntaxResult>> {
match tokens.get(pos) { let mut current_pos = pos;
Some(t) if t.token_type == token_type => Ok(t),
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => Err(Some(SyntaxResult::Err(SyntaxError { // Ignore all whitespace and newlines
reason: error_message, while let Some(t) = tokens.get(current_pos) {
error_start: prev_token.position, if t.token_type == TokenType::INDENT
error_end: prev_token.get_end_position(), || t.token_type == TokenType::DEDENT
}))), || t.token_type == TokenType::NewLine
{
current_pos += 1;
} else {
break;
}
}
match tokens.get(current_pos) {
Some(t) if t.token_type == TokenType::Operator && t.value == operator => {
Ok((t, current_pos + 1))
}
Some(t) if t.token_type == TokenType::NewLine || t.token_type == TokenType::EOF => {
Err(Some(SyntaxResult::Err(SyntaxError {
reason: error_message,
error_start: prev_token.position,
error_end: prev_token.get_end_position(),
})))
}
Some(t) => Err(Some(SyntaxResult::Err(SyntaxError {
reason: error_message,
error_start: t.position,
error_end: t.get_end_position(),
}))),
None => Err(Some(SyntaxResult::Err(SyntaxError {
reason: error_message,
error_start: prev_token.position,
error_end: prev_token.get_end_position(),
}))),
}
}
/// Ignores all whitespace and newlines
pub fn expect_token_w<'a>(
tokens: &'a Vec<Token>,
pos: usize,
token_type: TokenType,
error_message: String,
prev_token: &Token,
) -> Result<(&'a Token, usize), Option<SyntaxResult>> {
let mut current_pos = pos;
// Ignore all whitespace and newlines
while let Some(t) = tokens.get(current_pos) {
if t.token_type == TokenType::INDENT
|| t.token_type == TokenType::DEDENT
|| t.token_type == TokenType::NewLine
{
current_pos += 1;
} else {
break;
}
}
match tokens.get(current_pos) {
Some(t) if t.token_type == token_type => Ok((t, current_pos + 1)),
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
Err(Some(SyntaxResult::Err(SyntaxError {
reason: error_message,
error_start: prev_token.position,
error_end: prev_token.get_end_position(),
})))
}
Some(t) => Err(Some(SyntaxResult::Err(SyntaxError { Some(t) => Err(Some(SyntaxResult::Err(SyntaxError {
reason: error_message, reason: error_message,
error_start: t.position, error_start: t.position,