Refactor and scan grouping signs
This commit is contained in:
parent
0951551a3e
commit
11ca7edb44
@ -4,6 +4,15 @@ use super::token::{self, Token};
|
|||||||
|
|
||||||
type Chars = Vec<char>;
|
type Chars = Vec<char>;
|
||||||
|
|
||||||
|
pub enum LexResult {
|
||||||
|
// A token was scanned
|
||||||
|
Some(Token, usize),
|
||||||
|
// No token was found, but there was no error (EOF)
|
||||||
|
None(usize),
|
||||||
|
Err(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Scans and returns all the tokens in the input String
|
/// Scans and returns all the tokens in the input String
|
||||||
pub fn get_tokens(input: &String) -> Result<Vec<Token>, String> {
|
pub fn get_tokens(input: &String) -> Result<Vec<Token>, String> {
|
||||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||||
@ -12,14 +21,14 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, String> {
|
|||||||
|
|
||||||
while has_input(&chars, current_pos) {
|
while has_input(&chars, current_pos) {
|
||||||
match next_token(&chars, current_pos) {
|
match next_token(&chars, current_pos) {
|
||||||
Ok((Some(token), next_pos)) => {
|
LexResult::Some(token, next_pos) => {
|
||||||
results.push(token);
|
results.push(token);
|
||||||
current_pos = next_pos;
|
current_pos = next_pos;
|
||||||
},
|
},
|
||||||
Ok((None, next_pos)) => {
|
LexResult::None(next_pos) => {
|
||||||
current_pos = next_pos;
|
current_pos = next_pos;
|
||||||
},
|
},
|
||||||
Err(reason) => return Err(reason),
|
LexResult::Err(reason) => return Err(reason),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -27,12 +36,12 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, String> {
|
|||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next_token(chars: &Chars, current_pos: usize) -> Result<(Option<Token>, usize),String> {
|
fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||||
let next_char = peek(chars, current_pos);
|
let next_char = peek(chars, current_pos);
|
||||||
|
|
||||||
// If EOF is reached return nothing
|
// If EOF is reached return nothing but the current position
|
||||||
if next_char == '\0' {
|
if next_char == '\0' {
|
||||||
return Ok((None, current_pos))
|
return LexResult::None(current_pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle whitespace recursively
|
// Handle whitespace recursively
|
||||||
@ -41,20 +50,19 @@ fn next_token(chars: &Chars, current_pos: usize) -> Result<(Option<Token>, usize
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test number
|
// Test number
|
||||||
if utils::is_digit(next_char) {
|
None
|
||||||
match scanner::number(chars, current_pos) {
|
.or_else(|| {
|
||||||
Ok((token, next_pos)) => Ok((Some(token), next_pos)),
|
scanner::number(next_char, chars, current_pos)
|
||||||
Err(reason) => Err(reason),
|
})
|
||||||
}
|
.or_else(|| {
|
||||||
}
|
scanner::operator(next_char, chars, current_pos)
|
||||||
// Test operator
|
})
|
||||||
else if utils::is_operator(next_char) {
|
.or_else(|| {
|
||||||
let (token, next_pos) = scanner::operator(chars, current_pos);
|
scanner::grouping_sign(next_char, chars, current_pos)
|
||||||
Ok((Some(token), next_pos))
|
})
|
||||||
}
|
.unwrap_or_else(|| {
|
||||||
else {
|
LexResult::Err(format!("Unrecognized character: {}", next_char))
|
||||||
Err(format!("Unrecognized character: {}", next_char))
|
})
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn peek(input: &Chars, pos: usize) -> char {
|
fn peek(input: &Chars, pos: usize) -> char {
|
||||||
@ -103,11 +111,11 @@ mod tests {
|
|||||||
assert_eq!(4, chars.len());
|
assert_eq!(4, chars.len());
|
||||||
assert!(has_input(&chars, 0));
|
assert!(has_input(&chars, 0));
|
||||||
|
|
||||||
match next_token(&chars, 0).unwrap() {
|
match next_token(&chars, 0) {
|
||||||
(Some(t), _) => {
|
LexResult::Some(t, _) => {
|
||||||
assert_eq!("126", t.value)
|
assert_eq!("126", t.value)
|
||||||
},
|
},
|
||||||
(None, _) => {
|
_ => {
|
||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -116,7 +124,7 @@ mod tests {
|
|||||||
/// Should scan numbers
|
/// Should scan numbers
|
||||||
#[test]
|
#[test]
|
||||||
fn number_test() {
|
fn number_test() {
|
||||||
let input = String::from("126 278.98 0.282398");
|
let input = String::from("126 278.98 0.282398 1789e+1 239.3298e-103");
|
||||||
let tokens = get_tokens(&input).unwrap();
|
let tokens = get_tokens(&input).unwrap();
|
||||||
|
|
||||||
let t1 = tokens.get(0).unwrap();
|
let t1 = tokens.get(0).unwrap();
|
||||||
@ -130,10 +138,39 @@ mod tests {
|
|||||||
let t3 = tokens.get(2).unwrap();
|
let t3 = tokens.get(2).unwrap();
|
||||||
assert_eq!(TokenType::Number, t3.token_type);
|
assert_eq!(TokenType::Number, t3.token_type);
|
||||||
assert_eq!("0.282398", t3.value);
|
assert_eq!("0.282398", t3.value);
|
||||||
/*
|
|
||||||
assert_eq!("1798e+1", tokens.get(3).unwrap().value);
|
assert_eq!("1789e+1", tokens.get(3).unwrap().value);
|
||||||
assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
|
assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
|
||||||
assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type);
|
assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type);
|
||||||
*/
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn grouping_sign_test() {
|
||||||
|
let input = String::from("( ) { } [ ]");
|
||||||
|
let tokens = get_tokens(&input).unwrap();
|
||||||
|
|
||||||
|
let t = tokens.get(0).unwrap();
|
||||||
|
assert_eq!(TokenType::LeftParen, t.token_type);
|
||||||
|
assert_eq!("(", t.value);
|
||||||
|
|
||||||
|
let t = tokens.get(1).unwrap();
|
||||||
|
assert_eq!(TokenType::RightParen, t.token_type);
|
||||||
|
assert_eq!(")", t.value);
|
||||||
|
|
||||||
|
let t = tokens.get(2).unwrap();
|
||||||
|
assert_eq!(TokenType::LeftBrace, t.token_type);
|
||||||
|
assert_eq!("{", t.value);
|
||||||
|
|
||||||
|
let t = tokens.get(3).unwrap();
|
||||||
|
assert_eq!(TokenType::RightBrace, t.token_type);
|
||||||
|
assert_eq!("}", t.value);
|
||||||
|
|
||||||
|
let t = tokens.get(4).unwrap();
|
||||||
|
assert_eq!(TokenType::LeftBracket, t.token_type);
|
||||||
|
assert_eq!("[", t.value);
|
||||||
|
|
||||||
|
let t = tokens.get(5).unwrap();
|
||||||
|
assert_eq!(TokenType::RightBracket, t.token_type);
|
||||||
|
assert_eq!("]", t.value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,53 @@
|
|||||||
use super::token::Token;
|
use super::{token::{TokenType, self}, utils, LexResult};
|
||||||
|
|
||||||
mod number;
|
mod number;
|
||||||
mod operator;
|
mod operator;
|
||||||
|
|
||||||
pub fn number(chars: &Vec<char>, start_pos: usize) -> Result<(Token, usize), String> {
|
/// Attempts to scan a number. Returns None to be able to chain other scanner
|
||||||
number::scan(chars, start_pos)
|
pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
|
if utils::is_digit(c) {
|
||||||
|
match number::scan(chars, start_pos) {
|
||||||
|
Ok((token, next_pos)) => {
|
||||||
|
Some(LexResult::Some(token, next_pos))
|
||||||
|
},
|
||||||
|
Err(reason) => {
|
||||||
|
Some(LexResult::Err(reason))
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn operator(chars: &Vec<char>, start_pos: usize) -> (Token, usize) {
|
|
||||||
operator::scan(chars, start_pos)
|
/// Attempts to scan an operator. Returns None to be able to chain other scanner
|
||||||
|
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
|
if utils::is_operator(c) {
|
||||||
|
Some(operator::scan(chars, start_pos))
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Attempts to scan a grouping sign. Returns None to be able to chain other scanner
|
||||||
|
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
|
let token_type = match c {
|
||||||
|
'(' => TokenType::LeftParen,
|
||||||
|
')' => TokenType::RightParen,
|
||||||
|
'[' => TokenType::LeftBracket,
|
||||||
|
']' => TokenType::RightBracket,
|
||||||
|
'{' => TokenType::LeftBrace,
|
||||||
|
'}' => TokenType::RightBrace,
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let token = token::new_grouping_sign(
|
||||||
|
c.to_string(),
|
||||||
|
start_pos as i32,
|
||||||
|
token_type,
|
||||||
|
);
|
||||||
|
Some(LexResult::Some(token, start_pos + 1))
|
||||||
}
|
}
|
||||||
|
@ -1,25 +1,26 @@
|
|||||||
use crate::lexic::{token::{Token, self}, utils};
|
use crate::lexic::{token::{Token, self}, utils, LexResult};
|
||||||
|
|
||||||
|
|
||||||
/// Function to scan an operator
|
/// Function to scan an operator
|
||||||
///
|
///
|
||||||
/// This function assumes the character at `start_pos` is an operator
|
/// This function assumes the character at `start_pos` is an operator
|
||||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> (Token, usize) {
|
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||||
scan_impl(chars, start_pos, String::from(""))
|
scan_impl(chars, start_pos, String::from(""))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) {
|
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||||
let next_char = chars.get(start_pos);
|
match chars.get(start_pos) {
|
||||||
|
Some(c) if utils::is_operator(*c) => {
|
||||||
if let Some(c) = next_char {
|
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
||||||
if utils::is_operator(*c) {
|
},
|
||||||
return scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
_ => {
|
||||||
|
LexResult::Some(token::new_operator(current, start_pos as i32), start_pos)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return current value
|
|
||||||
(token::new_operator(current, start_pos as i32), start_pos)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
@ -59,11 +60,14 @@ mod tests {
|
|||||||
for op in operators {
|
for op in operators {
|
||||||
let input = str_to_vec(op);
|
let input = str_to_vec(op);
|
||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
let (token, next) = scan(&input, start_pos);
|
match scan(&input, start_pos) {
|
||||||
|
LexResult::Some(token, next) => {
|
||||||
assert_eq!(1, next);
|
assert_eq!(1, next);
|
||||||
assert_eq!(TokenType::Operator, token.token_type);
|
assert_eq!(TokenType::Operator, token.token_type);
|
||||||
assert_eq!(op, token.value);
|
assert_eq!(op, token.value);
|
||||||
|
},
|
||||||
|
_ => panic!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,11 +95,14 @@ mod tests {
|
|||||||
for op in operators {
|
for op in operators {
|
||||||
let input = str_to_vec(op);
|
let input = str_to_vec(op);
|
||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
let (token, next) = scan(&input, start_pos);
|
match scan(&input, start_pos) {
|
||||||
|
LexResult::Some(token, next) => {
|
||||||
assert_eq!(2, next);
|
assert_eq!(2, next);
|
||||||
assert_eq!(TokenType::Operator, token.token_type);
|
assert_eq!(TokenType::Operator, token.token_type);
|
||||||
assert_eq!(op, token.value);
|
assert_eq!(op, token.value);
|
||||||
|
},
|
||||||
|
_ => panic!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,3 +18,7 @@ pub fn is_operator(c: char) -> bool {
|
|||||||
|| c == '&' || c == '?' || c == '<' || c == '>'
|
|| c == '&' || c == '?' || c == '<' || c == '>'
|
||||||
|| c == '^' || c == '.' || c == ':'
|
|| c == '^' || c == '.' || c == ':'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_grouping_sign(c: char) -> bool {
|
||||||
|
c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']'
|
||||||
|
}
|
||||||
|
@ -5,7 +5,6 @@ pub enum TokenType {
|
|||||||
Comment,
|
Comment,
|
||||||
Number,
|
Number,
|
||||||
String,
|
String,
|
||||||
Unit,
|
|
||||||
Operator,
|
Operator,
|
||||||
LeftParen,
|
LeftParen,
|
||||||
RightParen,
|
RightParen,
|
||||||
@ -52,3 +51,7 @@ pub fn new_operator(value: String, position: i32) -> Token {
|
|||||||
position
|
position
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_grouping_sign(value: String, position: i32, token_type: TokenType) -> Token {
|
||||||
|
Token {token_type, value, position}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user