From f3ee68fcf1ffd4d603334497c5ba6ffa75210b63 Mon Sep 17 00:00:00 2001 From: Araozu Date: Thu, 1 Dec 2022 12:17:17 -0500 Subject: [PATCH] Scan and keywords --- src/lexic/scanner/identifier.rs | 40 +++++- src/lexic/scanner/mod.rs | 2 +- src/lexic/scanner/number.rs | 248 +++++++++++++++++--------------- src/token.rs | 4 +- 4 files changed, 172 insertions(+), 122 deletions(-) diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs index 688241c..b8c58f1 100644 --- a/src/lexic/scanner/identifier.rs +++ b/src/lexic/scanner/identifier.rs @@ -1,4 +1,14 @@ -use crate::lexic::{token, utils, LexResult}; +use crate::{lexic::{token, utils, LexResult}, token::TokenType}; + +/// Checks if a String is a keyword, and returns its TokenType +fn str_is_keyword(s: &String) -> Option { + match s.as_str() { + "var" => Some(TokenType::VAR), + "val" => Some(TokenType::VAL), + _ => None, + } +} + pub fn scan(start_char: char, chars: &Vec, start_pos: usize) -> LexResult { scan_impl(chars, start_pos + 1, format!("{}", start_char)) @@ -10,7 +20,12 @@ pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexRes scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) }, _ => { - LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos) + if let Some(token_type) = str_is_keyword(¤t) { + LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos) + } + else { + LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos) + } } } } @@ -119,4 +134,25 @@ mod tests { } } } + + // Should scan keywords + #[test] + fn test_4() { + let input = str_to_vec("var"); + let start_pos = 0; + if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) { + assert_eq!(3, next); + assert_eq!(TokenType::VAR, token.token_type); + assert_eq!("var", token.value); + } else {panic!()} + + + let input = str_to_vec("val"); + let start_pos = 0; + if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) { + assert_eq!(3, next); + assert_eq!(TokenType::VAL, token.token_type); + assert_eq!("val", token.value); + } else {panic!()} + } } diff --git a/src/lexic/scanner/mod.rs b/src/lexic/scanner/mod.rs index 73c6108..f3f6dce 100644 --- a/src/lexic/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -28,7 +28,7 @@ pub fn grouping_sign(c: char, _: &Vec, start_pos: usize) -> Option return None, }; - let token = token::new_grouping_sign( + let token = token::new( c.to_string(), start_pos as i32, token_type, diff --git a/src/lexic/scanner/number.rs b/src/lexic/scanner/number.rs index 3920392..40e9e70 100644 --- a/src/lexic/scanner/number.rs +++ b/src/lexic/scanner/number.rs @@ -1,7 +1,10 @@ -use crate::lexic::{token::{Token, self}, utils, LexResult}; +use crate::lexic::{ + token::{self, Token}, + utils, LexResult, +}; /// Function to scan a number -/// +/// /// This function assumes that the character at `start_pos` is a number [0-9], /// if not it will panic pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { @@ -12,35 +15,29 @@ pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { // Test if the input contains a hex number (Some(c1), Some(c2)) if *c1 == '0' && (*c2 == 'x' || *c2 == 'X') => { scan_hex(chars, start_pos + 2, String::from("0x")) - }, + } // Scan decimal/double/scientific otherwise - _ => scan_decimal(chars, start_pos, String::from("")) + _ => scan_decimal(chars, start_pos, String::from("")), } } - /// Recursively scans an integer. If a dot `.` is found, scans a double, /// if a `e` is found, scans a number in scientific notation fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { - Some(c) if *c == '.' => { - scan_double(chars, start_pos + 1, utils::str_append(current, *c)) - }, + Some(c) if *c == '.' => scan_double(chars, start_pos + 1, utils::str_append(current, *c)), Some(c) if *c == 'e' => { scan_scientific(chars, start_pos + 1, utils::str_append(current, *c)) - }, + } Some(c) if utils::is_digit(*c) => { scan_decimal(chars, start_pos + 1, utils::str_append(current, *c)) - }, - _ => { - LexResult::Some(token::new_number(current, start_pos as i32), start_pos) } + _ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos), } } - /// Recursively scans a hex number -/// +/// /// This function expects the following on the first call: /// - The char at `start_pos` is a value between [0-9a-fA-F]. If not, will return an error. /// - `current == "0x"`. If not will return an incorrect value, or panic. @@ -49,52 +46,45 @@ fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> LexResult { Some(c) if utils::is_hex_digit(*c) => { let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)); LexResult::Some(t, next) - }, - _ => LexResult::Err(String::from("Tried to scan an incomplete hex value")) + } + _ => LexResult::Err(String::from("Tried to scan an incomplete hex value")), } } - /// Scans a floating point number, with or without an exponent -/// +/// /// This function expects the following: /// - `start_pos` is the position after the dot. E.g., if the input is `3.22` then `start_pos == 2`. -/// +/// /// Returns a syntax error if the char at `start_pos` is not a value between [0-9] fn scan_double(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { - Some(c) if utils::is_digit(*c) => { - scan_double_impl(chars, start_pos, current) - }, - Some(_) => { - LexResult::Err(String::from("The character after the dot when scanning a double is not a number.")) - }, - _ => LexResult::Err(String::from("EOF when scanning a double number.")) + Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current), + Some(_) => LexResult::Err(String::from( + "The character after the dot when scanning a double is not a number.", + )), + _ => LexResult::Err(String::from("EOF when scanning a double number.")), } } - // Implementation of scan_double fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if utils::is_digit(*c) => { scan_double_impl(chars, start_pos + 1, utils::str_append(current, *c)) - }, + } Some(c) if *c == 'e' => { scan_scientific(chars, start_pos + 1, utils::str_append(current, *c)) } - _ => { - LexResult::Some(token::new_number(current, start_pos as i32), start_pos) - } + _ => LexResult::Some(token::new_number(current, start_pos as i32), start_pos), } } - /// Scans a double in scientific notation -/// +/// /// This function expects the following: /// - `start_pos` is the position after the `e`. E.g., if the input is `3e+10` then `start_pos == 2` -/// +/// /// Returns a syntax error if: /// - The char at `start_pos` is not `+` or `-` /// - The char at `start_pos + 1` is not between [0-9] @@ -107,36 +97,33 @@ fn scan_scientific(chars: &Vec, start_pos: usize, current: String) -> LexR let new_value = format!("{}{}{}", current, *c1, *c2); let (t, next) = scan_digits(chars, start_pos + 2, new_value); LexResult::Some(t, next) - }, - _ => LexResult::Err(String::from("The characters after 'e' are not + or -, or are not followed by a number")) + } + _ => LexResult::Err(String::from( + "The characters after 'e' are not + or -, or are not followed by a number", + )), } } - /// Scans chars between [0-9], returns when none is found fn scan_digits(chars: &Vec, start_pos: usize, current: String) -> (Token, usize) { match chars.get(start_pos) { Some(c) if utils::is_digit(*c) => { scan_digits(chars, start_pos + 1, utils::str_append(current, *c)) - }, - _ => (token::new_number(current, start_pos as i32), start_pos) + } + _ => (token::new_number(current, start_pos as i32), start_pos), } } - /// Scans chars between [0-9a-fA-F], returns when none is found fn scan_hex_digits(chars: &Vec, start_pos: usize, current: String) -> (Token, usize) { match chars.get(start_pos) { Some(c) if utils::is_hex_digit(*c) => { scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)) - }, - _ => (token::new_number(current, start_pos as i32), start_pos) + } + _ => (token::new_number(current, start_pos as i32), start_pos), } } - - - #[cfg(test)] mod tests { use crate::lexic::token::TokenType; @@ -153,30 +140,34 @@ mod tests { let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(3, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("123", token.value); - } else {panic!()} - + assert_eq!(3, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("123", token.value); + } else { + panic!() + } let input = str_to_vec("0123 "); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(4, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("0123", token.value); - } else {panic!()} - + assert_eq!(4, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("0123", token.value); + } else { + panic!() + } let input = str_to_vec(" 123456 789"); let start_pos = 2; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(8, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("123456", token.value); - } else {panic!()} + assert_eq!(8, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("123456", token.value); + } else { + panic!() + } } // Should not scan whitespace after the number @@ -186,10 +177,12 @@ mod tests { let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(3, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("123", token.value); - } else {panic!()} + assert_eq!(3, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("123", token.value); + } else { + panic!() + } } #[test] @@ -198,20 +191,23 @@ mod tests { let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(4, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("0x20", token.value); - } else {panic!()} - + assert_eq!(4, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("0x20", token.value); + } else { + panic!() + } let input = str_to_vec(" 0Xff23DA "); let start_pos = 4; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(12, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("0xff23DA", token.value); - } else {panic!()} + assert_eq!(12, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("0xff23DA", token.value); + } else { + panic!() + } } // Should not scan an incomplete hex value @@ -225,13 +221,14 @@ mod tests { _ => panic!(), } - let input = str_to_vec("0 x20 "); let start_pos = 0; if let LexResult::Some(token, _) = scan(&input, start_pos) { - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("0", token.value); - } else {panic!()} + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("0", token.value); + } else { + panic!() + } } // Should not scan a hex value if it doesn't start with 0x @@ -240,9 +237,11 @@ mod tests { let input = str_to_vec("1x20"); let start_pos = 0; if let LexResult::Some(token, _) = scan(&input, start_pos) { - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("1", token.value); - } else {panic!()} + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("1", token.value); + } else { + panic!() + } } // Should scan a double @@ -251,22 +250,24 @@ mod tests { let input = str_to_vec("3.22"); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(4, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("3.22", token.value); - } else {panic!()} - + assert_eq!(4, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("3.22", token.value); + } else { + panic!() + } let input = str_to_vec("123456.7890 "); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(11, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("123456.7890", token.value); - } else {panic!()} + assert_eq!(11, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("123456.7890", token.value); + } else { + panic!() + } } - // Should not scan an incomplete double #[test] fn test_double_2() { @@ -274,11 +275,13 @@ mod tests { let start_pos = 0; match scan(&input, start_pos) { - LexResult::Err(reason) => assert_eq!("The character after the dot when scanning a double is not a number.", reason), + LexResult::Err(reason) => assert_eq!( + "The character after the dot when scanning a double is not a number.", + reason + ), _ => panic!(), } - let input = str_to_vec("322."); let start_pos = 0; @@ -294,54 +297,65 @@ mod tests { let input = str_to_vec("1e+0"); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!("1e+0", token.value); - assert_eq!(4, next); - assert_eq!(TokenType::Number, token.token_type); - } else {panic!()} + assert_eq!("1e+0", token.value); + assert_eq!(4, next); + assert_eq!(TokenType::Number, token.token_type); + } else { + panic!() + } let input = str_to_vec("1e-0"); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(4, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("1e-0", token.value); - } else {panic!()} - + assert_eq!(4, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("1e-0", token.value); + } else { + panic!() + } let input = str_to_vec("0e+0"); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(4, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("0e+0", token.value); - } else {panic!()} + assert_eq!(4, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("0e+0", token.value); + } else { + panic!() + } let input = str_to_vec("123498790e+12349870"); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!(19, next); - assert_eq!(TokenType::Number, token.token_type); - assert_eq!("123498790e+12349870", token.value); - } else {panic!()} + assert_eq!(19, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("123498790e+12349870", token.value); + } else { + panic!() + } } // Should scan a double with decimal part and exponent #[test] - fn test_exp_2(){ + fn test_exp_2() { let input = str_to_vec("1.24e+1"); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!("1.24e+1", token.value); - assert_eq!(7, next); - assert_eq!(TokenType::Number, token.token_type); - } else {panic!()} + assert_eq!("1.24e+1", token.value); + assert_eq!(7, next); + assert_eq!(TokenType::Number, token.token_type); + } else { + panic!() + } let input = str_to_vec("0.00000000000001e+1"); let start_pos = 0; if let LexResult::Some(token, next) = scan(&input, start_pos) { - assert_eq!("0.00000000000001e+1", token.value); - assert_eq!(19, next); - assert_eq!(TokenType::Number, token.token_type); - } else {panic!()} + assert_eq!("0.00000000000001e+1", token.value); + assert_eq!(19, next); + assert_eq!(TokenType::Number, token.token_type); + } else { + panic!() + } } } diff --git a/src/token.rs b/src/token.rs index 5ab0b0b..0351c0f 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,4 +1,4 @@ -#[derive(PartialEq, Debug)] +#[derive(PartialEq, Debug, Clone)] pub enum TokenType { NewLine, Identifier, @@ -52,7 +52,7 @@ pub fn new_operator(value: String, position: i32) -> Token { } } -pub fn new_grouping_sign(value: String, position: i32, token_type: TokenType) -> Token { +pub fn new(value: String, position: i32, token_type: TokenType) -> Token { Token {token_type, value, position} }