Scan and keywords

This commit is contained in:
Araozu 2022-12-01 12:17:17 -05:00
parent e580167682
commit f3ee68fcf1
4 changed files with 172 additions and 122 deletions

View File

@ -1,4 +1,14 @@
use crate::lexic::{token, utils, LexResult};
use crate::{lexic::{token, utils, LexResult}, token::TokenType};
/// Checks if a String is a keyword, and returns its TokenType
fn str_is_keyword(s: &String) -> Option<TokenType> {
match s.as_str() {
"var" => Some(TokenType::VAR),
"val" => Some(TokenType::VAL),
_ => None,
}
}
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
scan_impl(chars, start_pos + 1, format!("{}", start_char))
@ -10,7 +20,12 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
},
_ => {
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
if let Some(token_type) = str_is_keyword(&current) {
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
}
else {
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
}
}
}
}
@ -119,4 +134,25 @@ mod tests {
}
}
}
// Should scan keywords
#[test]
fn test_4() {
let input = str_to_vec("var");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
assert_eq!(3, next);
assert_eq!(TokenType::VAR, token.token_type);
assert_eq!("var", token.value);
} else {panic!()}
let input = str_to_vec("val");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
assert_eq!(3, next);
assert_eq!(TokenType::VAL, token.token_type);
assert_eq!("val", token.value);
} else {panic!()}
}
}

View File

@ -28,7 +28,7 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
_ => return None,
};
let token = token::new_grouping_sign(
let token = token::new(
c.to_string(),
start_pos as i32,
token_type,

View File

@ -1,7 +1,10 @@
use crate::lexic::{token::{Token, self}, utils, LexResult};
use crate::lexic::{
token::{self, Token},
utils, LexResult,
};
/// Function to scan a number
///
///
/// This function assumes that the character at `start_pos` is a number [0-9],
/// if not it will panic
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
@ -12,35 +15,29 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
// Test if the input contains a hex number
(Some(c1), Some(c2)) if *c1 == '0' && (*c2 == 'x' || *c2 == 'X') => {
scan_hex(chars, start_pos + 2, String::from("0x"))
},
}
// Scan decimal/double/scientific otherwise
_ => scan_decimal(chars, start_pos, String::from(""))
_ => scan_decimal(chars, start_pos, String::from("")),
}
}
/// Recursively scans an integer. If a dot `.` is found, scans a double,
/// if a `e` is found, scans a number in scientific notation
fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if *c == '.' => {
scan_double(chars, start_pos + 1, utils::str_append(current, *c))
},
Some(c) if *c == '.' => scan_double(chars, start_pos + 1, utils::str_append(current, *c)),
Some(c) if *c == 'e' => {
scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
},
}
Some(c) if utils::is_digit(*c) => {
scan_decimal(chars, start_pos + 1, utils::str_append(current, *c))
},
_ => {
LexResult::Some(token::new_number(current, start_pos as i32), start_pos)
}
_ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
}
}
/// Recursively scans a hex number
///
///
/// This function expects the following on the first call:
/// - The char at `start_pos` is a value between [0-9a-fA-F]. If not, will return an error.
/// - `current == "0x"`. If not will return an incorrect value, or panic.
@ -49,52 +46,45 @@ fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
Some(c) if utils::is_hex_digit(*c) => {
let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c));
LexResult::Some(t, next)
},
_ => LexResult::Err(String::from("Tried to scan an incomplete hex value"))
}
_ => LexResult::Err(String::from("Tried to scan an incomplete hex value")),
}
}
/// Scans a floating point number, with or without an exponent
///
///
/// This function expects the following:
/// - `start_pos` is the position after the dot. E.g., if the input is `3.22` then `start_pos == 2`.
///
///
/// Returns a syntax error if the char at `start_pos` is not a value between [0-9]
fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if utils::is_digit(*c) => {
scan_double_impl(chars, start_pos, current)
},
Some(_) => {
LexResult::Err(String::from("The character after the dot when scanning a double is not a number."))
},
_ => LexResult::Err(String::from("EOF when scanning a double number."))
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
Some(_) => LexResult::Err(String::from(
"The character after the dot when scanning a double is not a number.",
)),
_ => LexResult::Err(String::from("EOF when scanning a double number.")),
}
}
// Implementation of scan_double
fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if utils::is_digit(*c) => {
scan_double_impl(chars, start_pos + 1, utils::str_append(current, *c))
},
}
Some(c) if *c == 'e' => {
scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
}
_ => {
LexResult::Some(token::new_number(current, start_pos as i32), start_pos)
}
_ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
}
}
/// Scans a double in scientific notation
///
///
/// This function expects the following:
/// - `start_pos` is the position after the `e`. E.g., if the input is `3e+10` then `start_pos == 2`
///
///
/// Returns a syntax error if:
/// - The char at `start_pos` is not `+` or `-`
/// - The char at `start_pos + 1` is not between [0-9]
@ -107,36 +97,33 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
let new_value = format!("{}{}{}", current, *c1, *c2);
let (t, next) = scan_digits(chars, start_pos + 2, new_value);
LexResult::Some(t, next)
},
_ => LexResult::Err(String::from("The characters after 'e' are not + or -, or are not followed by a number"))
}
_ => LexResult::Err(String::from(
"The characters after 'e' are not + or -, or are not followed by a number",
)),
}
}
/// Scans chars between [0-9], returns when none is found
fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) {
match chars.get(start_pos) {
Some(c) if utils::is_digit(*c) => {
scan_digits(chars, start_pos + 1, utils::str_append(current, *c))
},
_ => (token::new_number(current, start_pos as i32), start_pos)
}
_ => (token::new_number(current, start_pos as i32), start_pos),
}
}
/// Scans chars between [0-9a-fA-F], returns when none is found
fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) {
match chars.get(start_pos) {
Some(c) if utils::is_hex_digit(*c) => {
scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
},
_ => (token::new_number(current, start_pos as i32), start_pos)
}
_ => (token::new_number(current, start_pos as i32), start_pos),
}
}
#[cfg(test)]
mod tests {
use crate::lexic::token::TokenType;
@ -153,30 +140,34 @@ mod tests {
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value);
} else {panic!()}
assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value);
} else {
panic!()
}
let input = str_to_vec("0123 ");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0123", token.value);
} else {panic!()}
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0123", token.value);
} else {
panic!()
}
let input = str_to_vec(" 123456 789");
let start_pos = 2;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(8, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456", token.value);
} else {panic!()}
assert_eq!(8, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456", token.value);
} else {
panic!()
}
}
// Should not scan whitespace after the number
@ -186,10 +177,12 @@ mod tests {
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value);
} else {panic!()}
assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value);
} else {
panic!()
}
}
#[test]
@ -198,20 +191,23 @@ mod tests {
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0x20", token.value);
} else {panic!()}
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0x20", token.value);
} else {
panic!()
}
let input = str_to_vec(" 0Xff23DA ");
let start_pos = 4;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(12, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0xff23DA", token.value);
} else {panic!()}
assert_eq!(12, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0xff23DA", token.value);
} else {
panic!()
}
}
// Should not scan an incomplete hex value
@ -225,13 +221,14 @@ mod tests {
_ => panic!(),
}
let input = str_to_vec("0 x20 ");
let start_pos = 0;
if let LexResult::Some(token, _) = scan(&input, start_pos) {
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0", token.value);
} else {panic!()}
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0", token.value);
} else {
panic!()
}
}
// Should not scan a hex value if it doesn't start with 0x
@ -240,9 +237,11 @@ mod tests {
let input = str_to_vec("1x20");
let start_pos = 0;
if let LexResult::Some(token, _) = scan(&input, start_pos) {
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("1", token.value);
} else {panic!()}
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("1", token.value);
} else {
panic!()
}
}
// Should scan a double
@ -251,22 +250,24 @@ mod tests {
let input = str_to_vec("3.22");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("3.22", token.value);
} else {panic!()}
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("3.22", token.value);
} else {
panic!()
}
let input = str_to_vec("123456.7890 ");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(11, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456.7890", token.value);
} else {panic!()}
assert_eq!(11, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456.7890", token.value);
} else {
panic!()
}
}
// Should not scan an incomplete double
#[test]
fn test_double_2() {
@ -274,11 +275,13 @@ mod tests {
let start_pos = 0;
match scan(&input, start_pos) {
LexResult::Err(reason) => assert_eq!("The character after the dot when scanning a double is not a number.", reason),
LexResult::Err(reason) => assert_eq!(
"The character after the dot when scanning a double is not a number.",
reason
),
_ => panic!(),
}
let input = str_to_vec("322.");
let start_pos = 0;
@ -294,54 +297,65 @@ mod tests {
let input = str_to_vec("1e+0");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!("1e+0", token.value);
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
} else {panic!()}
assert_eq!("1e+0", token.value);
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
} else {
panic!()
}
let input = str_to_vec("1e-0");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("1e-0", token.value);
} else {panic!()}
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("1e-0", token.value);
} else {
panic!()
}
let input = str_to_vec("0e+0");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0e+0", token.value);
} else {panic!()}
assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0e+0", token.value);
} else {
panic!()
}
let input = str_to_vec("123498790e+12349870");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123498790e+12349870", token.value);
} else {panic!()}
assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123498790e+12349870", token.value);
} else {
panic!()
}
}
// Should scan a double with decimal part and exponent
#[test]
fn test_exp_2(){
fn test_exp_2() {
let input = str_to_vec("1.24e+1");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!("1.24e+1", token.value);
assert_eq!(7, next);
assert_eq!(TokenType::Number, token.token_type);
} else {panic!()}
assert_eq!("1.24e+1", token.value);
assert_eq!(7, next);
assert_eq!(TokenType::Number, token.token_type);
} else {
panic!()
}
let input = str_to_vec("0.00000000000001e+1");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!("0.00000000000001e+1", token.value);
assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type);
} else {panic!()}
assert_eq!("0.00000000000001e+1", token.value);
assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type);
} else {
panic!()
}
}
}

View File

@ -1,4 +1,4 @@
#[derive(PartialEq, Debug)]
#[derive(PartialEq, Debug, Clone)]
pub enum TokenType {
NewLine,
Identifier,
@ -52,7 +52,7 @@ pub fn new_operator(value: String, position: i32) -> Token {
}
}
pub fn new_grouping_sign(value: String, position: i32, token_type: TokenType) -> Token {
pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
Token {token_type, value, position}
}