2022-11-28 23:33:34 +00:00
|
|
|
mod utils;
|
|
|
|
mod scanner;
|
|
|
|
use super::token::{self, Token};
|
|
|
|
|
|
|
|
type Chars = Vec<char>;
|
|
|
|
|
2022-11-30 13:38:43 +00:00
|
|
|
pub enum LexResult {
|
|
|
|
// A token was scanned
|
|
|
|
Some(Token, usize),
|
|
|
|
// No token was found, but there was no error (EOF)
|
|
|
|
None(usize),
|
|
|
|
Err(String),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-11-28 23:33:34 +00:00
|
|
|
/// Scans and returns all the tokens in the input String
|
2022-11-29 00:16:55 +00:00
|
|
|
pub fn get_tokens(input: &String) -> Result<Vec<Token>, String> {
|
2022-11-28 23:33:34 +00:00
|
|
|
let chars: Vec<char> = input.chars().into_iter().collect();
|
|
|
|
let mut results = Vec::new();
|
|
|
|
let mut current_pos: usize = 0;
|
|
|
|
|
|
|
|
while has_input(&chars, current_pos) {
|
2022-11-29 00:16:55 +00:00
|
|
|
match next_token(&chars, current_pos) {
|
2022-11-30 13:38:43 +00:00
|
|
|
LexResult::Some(token, next_pos) => {
|
2022-11-29 00:16:55 +00:00
|
|
|
results.push(token);
|
|
|
|
current_pos = next_pos;
|
|
|
|
},
|
2022-11-30 13:38:43 +00:00
|
|
|
LexResult::None(next_pos) => {
|
2022-11-29 00:16:55 +00:00
|
|
|
current_pos = next_pos;
|
|
|
|
},
|
2022-11-30 13:38:43 +00:00
|
|
|
LexResult::Err(reason) => return Err(reason),
|
2022-11-28 23:33:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
results.push(token::new_eof(0));
|
2022-11-29 00:16:55 +00:00
|
|
|
Ok(results)
|
2022-11-28 23:33:34 +00:00
|
|
|
}
|
|
|
|
|
2022-11-30 13:38:43 +00:00
|
|
|
fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
2022-11-28 23:33:34 +00:00
|
|
|
let next_char = peek(chars, current_pos);
|
|
|
|
|
2022-11-30 13:38:43 +00:00
|
|
|
// If EOF is reached return nothing but the current position
|
2022-11-29 00:16:55 +00:00
|
|
|
if next_char == '\0' {
|
2022-11-30 13:38:43 +00:00
|
|
|
return LexResult::None(current_pos)
|
2022-11-29 00:16:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Handle whitespace recursively
|
2022-11-28 23:33:34 +00:00
|
|
|
if next_char == ' ' {
|
|
|
|
return next_token(chars, current_pos + 1)
|
|
|
|
}
|
|
|
|
|
2022-12-01 13:33:48 +00:00
|
|
|
// Scanners
|
2022-11-30 13:38:43 +00:00
|
|
|
None
|
2022-12-01 17:53:14 +00:00
|
|
|
.or_else(|| scanner::number(next_char, chars, current_pos))
|
|
|
|
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
|
|
|
.or_else(|| scanner::string(next_char, chars, current_pos))
|
|
|
|
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
|
|
|
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
2022-11-30 13:38:43 +00:00
|
|
|
.unwrap_or_else(|| {
|
|
|
|
LexResult::Err(format!("Unrecognized character: {}", next_char))
|
|
|
|
})
|
2022-11-28 23:33:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn peek(input: &Chars, pos: usize) -> char {
|
|
|
|
let result = input.get(pos).unwrap_or(&'\0');
|
|
|
|
*result
|
|
|
|
}
|
|
|
|
|
|
|
|
fn has_input(input: &Chars, current_pos: usize) -> bool {
|
2022-11-29 00:16:55 +00:00
|
|
|
current_pos < input.len()
|
2022-11-28 23:33:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2022-11-29 00:16:55 +00:00
|
|
|
use token::TokenType;
|
2022-11-28 23:33:34 +00:00
|
|
|
|
|
|
|
/// Should return an EOF token if the input has no tokens
|
|
|
|
#[test]
|
|
|
|
fn test1() {
|
|
|
|
let input = String::from("");
|
2022-11-29 00:16:55 +00:00
|
|
|
let tokens = get_tokens(&input).unwrap();
|
2022-11-28 23:33:34 +00:00
|
|
|
assert_eq!(1, tokens.len());
|
|
|
|
let first = tokens.get(0).unwrap();
|
|
|
|
assert_eq!(TokenType::EOF, first.token_type);
|
|
|
|
|
|
|
|
let input = String::from(" ");
|
2022-11-29 00:16:55 +00:00
|
|
|
let tokens = get_tokens(&input).unwrap();
|
2022-11-28 23:33:34 +00:00
|
|
|
assert_eq!(1, tokens.len());
|
|
|
|
let first = tokens.get(0).unwrap();
|
|
|
|
assert_eq!(TokenType::EOF, first.token_type);
|
|
|
|
|
2022-11-29 00:16:55 +00:00
|
|
|
let input = String::from(" ");
|
|
|
|
let tokens = get_tokens(&input).unwrap();
|
2022-11-28 23:33:34 +00:00
|
|
|
assert_eq!(1, tokens.len());
|
|
|
|
let first = tokens.get(0).unwrap();
|
|
|
|
assert_eq!(TokenType::EOF, first.token_type);
|
|
|
|
}
|
|
|
|
|
2022-11-29 00:16:55 +00:00
|
|
|
#[test]
|
|
|
|
fn t() {
|
|
|
|
let input = String::from("126 ");
|
|
|
|
let chars: Vec<char> = input.chars().into_iter().collect();
|
|
|
|
|
|
|
|
assert_eq!(4, chars.len());
|
|
|
|
assert!(has_input(&chars, 0));
|
|
|
|
|
2022-11-30 13:38:43 +00:00
|
|
|
match next_token(&chars, 0) {
|
|
|
|
LexResult::Some(t, _) => {
|
2022-11-29 00:16:55 +00:00
|
|
|
assert_eq!("126", t.value)
|
|
|
|
},
|
2022-11-30 13:38:43 +00:00
|
|
|
_ => {
|
2022-11-29 00:16:55 +00:00
|
|
|
panic!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-28 23:33:34 +00:00
|
|
|
/// Should scan numbers
|
|
|
|
#[test]
|
|
|
|
fn number_test() {
|
2022-11-30 13:38:43 +00:00
|
|
|
let input = String::from("126 278.98 0.282398 1789e+1 239.3298e-103");
|
2022-11-29 00:16:55 +00:00
|
|
|
let tokens = get_tokens(&input).unwrap();
|
|
|
|
|
|
|
|
let t1 = tokens.get(0).unwrap();
|
|
|
|
assert_eq!(TokenType::Number, t1.token_type);
|
|
|
|
assert_eq!("126", t1.value);
|
|
|
|
|
|
|
|
let t2 = tokens.get(1).unwrap();
|
|
|
|
assert_eq!(TokenType::Number, t2.token_type);
|
|
|
|
assert_eq!("278.98", t2.value);
|
2022-11-28 23:33:34 +00:00
|
|
|
|
2022-11-29 00:16:55 +00:00
|
|
|
let t3 = tokens.get(2).unwrap();
|
|
|
|
assert_eq!(TokenType::Number, t3.token_type);
|
|
|
|
assert_eq!("0.282398", t3.value);
|
2022-11-30 13:38:43 +00:00
|
|
|
|
|
|
|
assert_eq!("1789e+1", tokens.get(3).unwrap().value);
|
2022-11-28 23:33:34 +00:00
|
|
|
assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
|
|
|
|
assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type);
|
2022-11-30 13:38:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn grouping_sign_test() {
|
|
|
|
let input = String::from("( ) { } [ ]");
|
|
|
|
let tokens = get_tokens(&input).unwrap();
|
|
|
|
|
|
|
|
let t = tokens.get(0).unwrap();
|
|
|
|
assert_eq!(TokenType::LeftParen, t.token_type);
|
|
|
|
assert_eq!("(", t.value);
|
|
|
|
|
|
|
|
let t = tokens.get(1).unwrap();
|
|
|
|
assert_eq!(TokenType::RightParen, t.token_type);
|
|
|
|
assert_eq!(")", t.value);
|
|
|
|
|
|
|
|
let t = tokens.get(2).unwrap();
|
|
|
|
assert_eq!(TokenType::LeftBrace, t.token_type);
|
|
|
|
assert_eq!("{", t.value);
|
|
|
|
|
|
|
|
let t = tokens.get(3).unwrap();
|
|
|
|
assert_eq!(TokenType::RightBrace, t.token_type);
|
|
|
|
assert_eq!("}", t.value);
|
|
|
|
|
|
|
|
let t = tokens.get(4).unwrap();
|
|
|
|
assert_eq!(TokenType::LeftBracket, t.token_type);
|
|
|
|
assert_eq!("[", t.value);
|
|
|
|
|
|
|
|
let t = tokens.get(5).unwrap();
|
|
|
|
assert_eq!(TokenType::RightBracket, t.token_type);
|
|
|
|
assert_eq!("]", t.value);
|
2022-11-28 23:33:34 +00:00
|
|
|
}
|
|
|
|
}
|