Scan and keywords

This commit is contained in:
Araozu 2022-12-01 12:17:17 -05:00
parent e580167682
commit f3ee68fcf1
4 changed files with 172 additions and 122 deletions

View File

@ -1,4 +1,14 @@
use crate::lexic::{token, utils, LexResult}; use crate::{lexic::{token, utils, LexResult}, token::TokenType};
/// Checks if a String is a keyword, and returns its TokenType
fn str_is_keyword(s: &String) -> Option<TokenType> {
match s.as_str() {
"var" => Some(TokenType::VAR),
"val" => Some(TokenType::VAL),
_ => None,
}
}
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult { pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
scan_impl(chars, start_pos + 1, format!("{}", start_char)) scan_impl(chars, start_pos + 1, format!("{}", start_char))
@ -10,7 +20,12 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
}, },
_ => { _ => {
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos) if let Some(token_type) = str_is_keyword(&current) {
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
}
else {
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
}
} }
} }
} }
@ -119,4 +134,25 @@ mod tests {
} }
} }
} }
// Should scan keywords
#[test]
fn test_4() {
let input = str_to_vec("var");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
assert_eq!(3, next);
assert_eq!(TokenType::VAR, token.token_type);
assert_eq!("var", token.value);
} else {panic!()}
let input = str_to_vec("val");
let start_pos = 0;
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
assert_eq!(3, next);
assert_eq!(TokenType::VAL, token.token_type);
assert_eq!("val", token.value);
} else {panic!()}
}
} }

View File

@ -28,7 +28,7 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
_ => return None, _ => return None,
}; };
let token = token::new_grouping_sign( let token = token::new(
c.to_string(), c.to_string(),
start_pos as i32, start_pos as i32,
token_type, token_type,

View File

@ -1,7 +1,10 @@
use crate::lexic::{token::{Token, self}, utils, LexResult}; use crate::lexic::{
token::{self, Token},
utils, LexResult,
};
/// Function to scan a number /// Function to scan a number
/// ///
/// This function assumes that the character at `start_pos` is a number [0-9], /// This function assumes that the character at `start_pos` is a number [0-9],
/// if not it will panic /// if not it will panic
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult { pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
@ -12,35 +15,29 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
// Test if the input contains a hex number // Test if the input contains a hex number
(Some(c1), Some(c2)) if *c1 == '0' && (*c2 == 'x' || *c2 == 'X') => { (Some(c1), Some(c2)) if *c1 == '0' && (*c2 == 'x' || *c2 == 'X') => {
scan_hex(chars, start_pos + 2, String::from("0x")) scan_hex(chars, start_pos + 2, String::from("0x"))
}, }
// Scan decimal/double/scientific otherwise // Scan decimal/double/scientific otherwise
_ => scan_decimal(chars, start_pos, String::from("")) _ => scan_decimal(chars, start_pos, String::from("")),
} }
} }
/// Recursively scans an integer. If a dot `.` is found, scans a double, /// Recursively scans an integer. If a dot `.` is found, scans a double,
/// if a `e` is found, scans a number in scientific notation /// if a `e` is found, scans a number in scientific notation
fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult { fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if *c == '.' => { Some(c) if *c == '.' => scan_double(chars, start_pos + 1, utils::str_append(current, *c)),
scan_double(chars, start_pos + 1, utils::str_append(current, *c))
},
Some(c) if *c == 'e' => { Some(c) if *c == 'e' => {
scan_scientific(chars, start_pos + 1, utils::str_append(current, *c)) scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
}, }
Some(c) if utils::is_digit(*c) => { Some(c) if utils::is_digit(*c) => {
scan_decimal(chars, start_pos + 1, utils::str_append(current, *c)) scan_decimal(chars, start_pos + 1, utils::str_append(current, *c))
},
_ => {
LexResult::Some(token::new_number(current, start_pos as i32), start_pos)
} }
_ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
} }
} }
/// Recursively scans a hex number /// Recursively scans a hex number
/// ///
/// This function expects the following on the first call: /// This function expects the following on the first call:
/// - The char at `start_pos` is a value between [0-9a-fA-F]. If not, will return an error. /// - The char at `start_pos` is a value between [0-9a-fA-F]. If not, will return an error.
/// - `current == "0x"`. If not will return an incorrect value, or panic. /// - `current == "0x"`. If not will return an incorrect value, or panic.
@ -49,52 +46,45 @@ fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
Some(c) if utils::is_hex_digit(*c) => { Some(c) if utils::is_hex_digit(*c) => {
let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)); let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c));
LexResult::Some(t, next) LexResult::Some(t, next)
}, }
_ => LexResult::Err(String::from("Tried to scan an incomplete hex value")) _ => LexResult::Err(String::from("Tried to scan an incomplete hex value")),
} }
} }
/// Scans a floating point number, with or without an exponent /// Scans a floating point number, with or without an exponent
/// ///
/// This function expects the following: /// This function expects the following:
/// - `start_pos` is the position after the dot. E.g., if the input is `3.22` then `start_pos == 2`. /// - `start_pos` is the position after the dot. E.g., if the input is `3.22` then `start_pos == 2`.
/// ///
/// Returns a syntax error if the char at `start_pos` is not a value between [0-9] /// Returns a syntax error if the char at `start_pos` is not a value between [0-9]
fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult { fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if utils::is_digit(*c) => { Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
scan_double_impl(chars, start_pos, current) Some(_) => LexResult::Err(String::from(
}, "The character after the dot when scanning a double is not a number.",
Some(_) => { )),
LexResult::Err(String::from("The character after the dot when scanning a double is not a number.")) _ => LexResult::Err(String::from("EOF when scanning a double number.")),
},
_ => LexResult::Err(String::from("EOF when scanning a double number."))
} }
} }
// Implementation of scan_double // Implementation of scan_double
fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult { fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if utils::is_digit(*c) => { Some(c) if utils::is_digit(*c) => {
scan_double_impl(chars, start_pos + 1, utils::str_append(current, *c)) scan_double_impl(chars, start_pos + 1, utils::str_append(current, *c))
}, }
Some(c) if *c == 'e' => { Some(c) if *c == 'e' => {
scan_scientific(chars, start_pos + 1, utils::str_append(current, *c)) scan_scientific(chars, start_pos + 1, utils::str_append(current, *c))
} }
_ => { _ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos),
LexResult::Some(token::new_number(current, start_pos as i32), start_pos)
}
} }
} }
/// Scans a double in scientific notation /// Scans a double in scientific notation
/// ///
/// This function expects the following: /// This function expects the following:
/// - `start_pos` is the position after the `e`. E.g., if the input is `3e+10` then `start_pos == 2` /// - `start_pos` is the position after the `e`. E.g., if the input is `3e+10` then `start_pos == 2`
/// ///
/// Returns a syntax error if: /// Returns a syntax error if:
/// - The char at `start_pos` is not `+` or `-` /// - The char at `start_pos` is not `+` or `-`
/// - The char at `start_pos + 1` is not between [0-9] /// - The char at `start_pos + 1` is not between [0-9]
@ -107,36 +97,33 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
let new_value = format!("{}{}{}", current, *c1, *c2); let new_value = format!("{}{}{}", current, *c1, *c2);
let (t, next) = scan_digits(chars, start_pos + 2, new_value); let (t, next) = scan_digits(chars, start_pos + 2, new_value);
LexResult::Some(t, next) LexResult::Some(t, next)
}, }
_ => LexResult::Err(String::from("The characters after 'e' are not + or -, or are not followed by a number")) _ => LexResult::Err(String::from(
"The characters after 'e' are not + or -, or are not followed by a number",
)),
} }
} }
/// Scans chars between [0-9], returns when none is found /// Scans chars between [0-9], returns when none is found
fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) { fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if utils::is_digit(*c) => { Some(c) if utils::is_digit(*c) => {
scan_digits(chars, start_pos + 1, utils::str_append(current, *c)) scan_digits(chars, start_pos + 1, utils::str_append(current, *c))
}, }
_ => (token::new_number(current, start_pos as i32), start_pos) _ => (token::new_number(current, start_pos as i32), start_pos),
} }
} }
/// Scans chars between [0-9a-fA-F], returns when none is found /// Scans chars between [0-9a-fA-F], returns when none is found
fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) { fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if utils::is_hex_digit(*c) => { Some(c) if utils::is_hex_digit(*c) => {
scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)) scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
}, }
_ => (token::new_number(current, start_pos as i32), start_pos) _ => (token::new_number(current, start_pos as i32), start_pos),
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::lexic::token::TokenType; use crate::lexic::token::TokenType;
@ -153,30 +140,34 @@ mod tests {
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(3, next); assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value); assert_eq!("123", token.value);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec("0123 "); let input = str_to_vec("0123 ");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next); assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0123", token.value); assert_eq!("0123", token.value);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec(" 123456 789"); let input = str_to_vec(" 123456 789");
let start_pos = 2; let start_pos = 2;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(8, next); assert_eq!(8, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456", token.value); assert_eq!("123456", token.value);
} else {panic!()} } else {
panic!()
}
} }
// Should not scan whitespace after the number // Should not scan whitespace after the number
@ -186,10 +177,12 @@ mod tests {
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(3, next); assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value); assert_eq!("123", token.value);
} else {panic!()} } else {
panic!()
}
} }
#[test] #[test]
@ -198,20 +191,23 @@ mod tests {
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next); assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0x20", token.value); assert_eq!("0x20", token.value);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec(" 0Xff23DA "); let input = str_to_vec(" 0Xff23DA ");
let start_pos = 4; let start_pos = 4;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(12, next); assert_eq!(12, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0xff23DA", token.value); assert_eq!("0xff23DA", token.value);
} else {panic!()} } else {
panic!()
}
} }
// Should not scan an incomplete hex value // Should not scan an incomplete hex value
@ -225,13 +221,14 @@ mod tests {
_ => panic!(), _ => panic!(),
} }
let input = str_to_vec("0 x20 "); let input = str_to_vec("0 x20 ");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, _) = scan(&input, start_pos) { if let LexResult::Some(token, _) = scan(&input, start_pos) {
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0", token.value); assert_eq!("0", token.value);
} else {panic!()} } else {
panic!()
}
} }
// Should not scan a hex value if it doesn't start with 0x // Should not scan a hex value if it doesn't start with 0x
@ -240,9 +237,11 @@ mod tests {
let input = str_to_vec("1x20"); let input = str_to_vec("1x20");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, _) = scan(&input, start_pos) { if let LexResult::Some(token, _) = scan(&input, start_pos) {
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("1", token.value); assert_eq!("1", token.value);
} else {panic!()} } else {
panic!()
}
} }
// Should scan a double // Should scan a double
@ -251,22 +250,24 @@ mod tests {
let input = str_to_vec("3.22"); let input = str_to_vec("3.22");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next); assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("3.22", token.value); assert_eq!("3.22", token.value);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec("123456.7890 "); let input = str_to_vec("123456.7890 ");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(11, next); assert_eq!(11, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123456.7890", token.value); assert_eq!("123456.7890", token.value);
} else {panic!()} } else {
panic!()
}
} }
// Should not scan an incomplete double // Should not scan an incomplete double
#[test] #[test]
fn test_double_2() { fn test_double_2() {
@ -274,11 +275,13 @@ mod tests {
let start_pos = 0; let start_pos = 0;
match scan(&input, start_pos) { match scan(&input, start_pos) {
LexResult::Err(reason) => assert_eq!("The character after the dot when scanning a double is not a number.", reason), LexResult::Err(reason) => assert_eq!(
"The character after the dot when scanning a double is not a number.",
reason
),
_ => panic!(), _ => panic!(),
} }
let input = str_to_vec("322."); let input = str_to_vec("322.");
let start_pos = 0; let start_pos = 0;
@ -294,54 +297,65 @@ mod tests {
let input = str_to_vec("1e+0"); let input = str_to_vec("1e+0");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!("1e+0", token.value); assert_eq!("1e+0", token.value);
assert_eq!(4, next); assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec("1e-0"); let input = str_to_vec("1e-0");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next); assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("1e-0", token.value); assert_eq!("1e-0", token.value);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec("0e+0"); let input = str_to_vec("0e+0");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(4, next); assert_eq!(4, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("0e+0", token.value); assert_eq!("0e+0", token.value);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec("123498790e+12349870"); let input = str_to_vec("123498790e+12349870");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(19, next); assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123498790e+12349870", token.value); assert_eq!("123498790e+12349870", token.value);
} else {panic!()} } else {
panic!()
}
} }
// Should scan a double with decimal part and exponent // Should scan a double with decimal part and exponent
#[test] #[test]
fn test_exp_2(){ fn test_exp_2() {
let input = str_to_vec("1.24e+1"); let input = str_to_vec("1.24e+1");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!("1.24e+1", token.value); assert_eq!("1.24e+1", token.value);
assert_eq!(7, next); assert_eq!(7, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
} else {panic!()} } else {
panic!()
}
let input = str_to_vec("0.00000000000001e+1"); let input = str_to_vec("0.00000000000001e+1");
let start_pos = 0; let start_pos = 0;
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!("0.00000000000001e+1", token.value); assert_eq!("0.00000000000001e+1", token.value);
assert_eq!(19, next); assert_eq!(19, next);
assert_eq!(TokenType::Number, token.token_type); assert_eq!(TokenType::Number, token.token_type);
} else {panic!()} } else {
panic!()
}
} }
} }

View File

@ -1,4 +1,4 @@
#[derive(PartialEq, Debug)] #[derive(PartialEq, Debug, Clone)]
pub enum TokenType { pub enum TokenType {
NewLine, NewLine,
Identifier, Identifier,
@ -52,7 +52,7 @@ pub fn new_operator(value: String, position: i32) -> Token {
} }
} }
pub fn new_grouping_sign(value: String, position: i32, token_type: TokenType) -> Token { pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
Token {token_type, value, position} Token {token_type, value, position}
} }