2024-03-15 21:59:28 +00:00
|
|
|
use crate::lexic::token::{Token, TokenType};
|
2023-09-09 01:17:46 +00:00
|
|
|
|
2024-03-15 21:56:45 +00:00
|
|
|
use super::{ParsingError, ParsingResult};
|
2023-09-20 01:06:38 +00:00
|
|
|
|
2024-01-02 17:45:45 +00:00
|
|
|
pub trait Tokenizer {
|
|
|
|
fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)>;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Tokenizer for Vec<Token> {
|
|
|
|
/// Returns the first non whitespace token at index & the position the found token
|
|
|
|
fn get_significant<'a>(&'a self, index: usize) -> Option<(&'a Token, usize)> {
|
|
|
|
let mut current_pos = index;
|
|
|
|
|
2024-08-02 13:41:25 +00:00
|
|
|
// Ignore all whitespace, newlines and comments
|
2024-01-02 17:45:45 +00:00
|
|
|
loop {
|
|
|
|
match self.get(current_pos) {
|
|
|
|
Some(token) => {
|
|
|
|
if token.token_type == TokenType::INDENT
|
|
|
|
|| token.token_type == TokenType::DEDENT
|
|
|
|
|| token.token_type == TokenType::NewLine
|
2024-08-02 13:41:25 +00:00
|
|
|
|| token.token_type == TokenType::Comment
|
|
|
|
|| token.token_type == TokenType::MultilineComment
|
2024-01-02 17:45:45 +00:00
|
|
|
{
|
|
|
|
current_pos += 1;
|
|
|
|
} else {
|
|
|
|
return Some((token, current_pos));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None => return None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-15 20:49:02 +00:00
|
|
|
/// Expects the token at `pos` to be an operator of value `operator`. Doesn't ignore whitespace or newlines
|
2024-03-15 21:59:28 +00:00
|
|
|
pub fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> ParsingResult<&Token> {
|
2023-10-06 01:26:47 +00:00
|
|
|
match tokens.get(pos) {
|
2024-03-15 21:59:28 +00:00
|
|
|
Some(t) if t.token_type == TokenType::Operator && t.value == operator => Ok((t, pos + 1)),
|
2023-10-06 01:26:47 +00:00
|
|
|
Some(t) if t.token_type == TokenType::NewLine || t.token_type == TokenType::EOF => {
|
2024-03-15 21:59:28 +00:00
|
|
|
Err(ParsingError::Unmatched)
|
2023-10-06 01:26:47 +00:00
|
|
|
}
|
2024-03-15 21:59:28 +00:00
|
|
|
Some(t) => Err(ParsingError::Mismatch(t)),
|
|
|
|
None => Err(ParsingError::Unmatched),
|
2023-10-06 01:26:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-15 21:07:28 +00:00
|
|
|
/// Expects the token at `pos` to be of type `token_type`, and returns the token and the next position.
|
|
|
|
///
|
2024-08-02 13:41:25 +00:00
|
|
|
/// Ignores all whitespace, newlines and comments.
|
2024-03-15 21:56:45 +00:00
|
|
|
///
|
2024-03-15 21:44:29 +00:00
|
|
|
/// Only returns: Ok, Unmatched, Mismatched
|
2023-09-21 00:53:46 +00:00
|
|
|
pub fn parse_token_type(
|
|
|
|
tokens: &Vec<Token>,
|
|
|
|
pos: usize,
|
|
|
|
token_type: TokenType,
|
2024-03-15 21:56:45 +00:00
|
|
|
) -> ParsingResult<&Token> {
|
2023-09-21 00:53:46 +00:00
|
|
|
let mut current_pos = pos;
|
|
|
|
|
2024-03-15 22:42:35 +00:00
|
|
|
// Ignore all whitespace, newlines and semicolons
|
2023-09-21 00:53:46 +00:00
|
|
|
while let Some(t) = tokens.get(current_pos) {
|
|
|
|
if t.token_type == TokenType::INDENT
|
|
|
|
|| t.token_type == TokenType::DEDENT
|
|
|
|
|| t.token_type == TokenType::NewLine
|
2024-08-02 13:41:25 +00:00
|
|
|
|| t.token_type == TokenType::Comment
|
|
|
|
|| t.token_type == TokenType::MultilineComment
|
2023-09-21 00:53:46 +00:00
|
|
|
{
|
|
|
|
current_pos += 1;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
match tokens.get(current_pos) {
|
2024-03-15 21:56:45 +00:00
|
|
|
Some(t) if t.token_type == token_type => Ok((t, current_pos + 1)),
|
2023-09-21 00:53:46 +00:00
|
|
|
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
|
2024-03-15 21:56:45 +00:00
|
|
|
Err(ParsingError::Unmatched)
|
2023-09-21 00:53:46 +00:00
|
|
|
}
|
2024-03-15 21:56:45 +00:00
|
|
|
Some(t) => Err(ParsingError::Mismatch(t)),
|
|
|
|
None => Err(ParsingError::Unmatched),
|
2023-09-21 00:53:46 +00:00
|
|
|
}
|
|
|
|
}
|
2024-07-28 23:18:11 +00:00
|
|
|
|
2024-08-27 15:49:25 +00:00
|
|
|
/// Expects the token at `pos` to be a terminator (newline or eof)
|
|
|
|
///
|
|
|
|
/// Ignores indentation, newlines and comments.
|
|
|
|
///
|
|
|
|
/// Only returns: Ok or Unmatched.
|
|
|
|
pub fn parse_terminator(
|
|
|
|
tokens: &Vec<Token>,
|
|
|
|
pos: usize,
|
|
|
|
) -> ParsingResult<()> {
|
|
|
|
let mut current_pos = pos;
|
|
|
|
|
|
|
|
// Ignore all whitespace, newlines and semicolons
|
|
|
|
while let Some(t) = tokens.get(current_pos) {
|
|
|
|
if t.token_type == TokenType::INDENT
|
|
|
|
|| t.token_type == TokenType::DEDENT
|
|
|
|
|| t.token_type == TokenType::Comment
|
|
|
|
|| t.token_type == TokenType::MultilineComment
|
|
|
|
{
|
|
|
|
current_pos += 1;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
match tokens.get(current_pos) {
|
|
|
|
Some(t) if t.token_type == TokenType::EOF || t.token_type == TokenType::NewLine => {
|
|
|
|
Ok(((), current_pos + 1))
|
|
|
|
}
|
|
|
|
Some(t) => Err(ParsingError::Mismatch(t)),
|
|
|
|
None => unreachable!("Stream of tokens finished before getting an EOF"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-28 23:18:11 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use crate::{
|
|
|
|
lexic::{get_tokens, token::TokenType},
|
2024-07-30 20:55:29 +00:00
|
|
|
syntax::{
|
|
|
|
parseable::ParsingError,
|
|
|
|
utils::{parse_token_type, Tokenizer},
|
|
|
|
},
|
2024-07-28 23:18:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
use super::try_operator;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_1() {
|
|
|
|
let input = String::from("");
|
|
|
|
let tokens = get_tokens(&input).unwrap();
|
|
|
|
assert_eq!(1, tokens.len());
|
|
|
|
|
|
|
|
match try_operator(&tokens, 10, "+".into()) {
|
|
|
|
Ok(_) => panic!("Expected an error"),
|
|
|
|
Err(error) => match error {
|
|
|
|
ParsingError::Unmatched => {
|
|
|
|
assert!(true);
|
|
|
|
}
|
|
|
|
_ => panic!(
|
|
|
|
"Expected an error due to incorrect position, got {:?}",
|
|
|
|
error
|
|
|
|
),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_2() {
|
|
|
|
let input = String::from("");
|
|
|
|
let tokens = get_tokens(&input).unwrap();
|
|
|
|
assert_eq!(1, tokens.len());
|
|
|
|
|
|
|
|
match parse_token_type(&tokens, 10, TokenType::Operator) {
|
|
|
|
Ok(_) => panic!("Expected an error"),
|
|
|
|
Err(error) => match error {
|
|
|
|
ParsingError::Unmatched => {
|
|
|
|
assert!(true);
|
|
|
|
}
|
|
|
|
_ => panic!(
|
|
|
|
"Expected an error due to incorrect position, got {:?}",
|
|
|
|
error
|
|
|
|
),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_3() {
|
|
|
|
let input = String::from("");
|
|
|
|
let tokens = get_tokens(&input).unwrap();
|
|
|
|
assert_eq!(1, tokens.len());
|
|
|
|
|
|
|
|
match tokens.get_significant(10) {
|
|
|
|
Some(_) => panic!("Expected a None"),
|
2024-07-30 20:55:29 +00:00
|
|
|
None => {}
|
2024-07-28 23:18:11 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|