diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 5a823c8..1ef17c9 100644 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -49,7 +49,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { return next_token(chars, current_pos + 1) } - // Test number + // Scanners None .or_else(|| { scanner::number(next_char, chars, current_pos) @@ -60,6 +60,9 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { .or_else(|| { scanner::grouping_sign(next_char, chars, current_pos) }) + .or_else(|| { + scanner::identifier(next_char, chars, current_pos) + }) .unwrap_or_else(|| { LexResult::Err(format!("Unrecognized character: {}", next_char)) }) diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs new file mode 100644 index 0000000..688241c --- /dev/null +++ b/src/lexic/scanner/identifier.rs @@ -0,0 +1,122 @@ +use crate::lexic::{token, utils, LexResult}; + +pub fn scan(start_char: char, chars: &Vec, start_pos: usize) -> LexResult { + scan_impl(chars, start_pos + 1, format!("{}", start_char)) +} + +pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { + match chars.get(start_pos) { + Some(c) if utils::is_identifier_char(*c) => { + scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) + }, + _ => { + LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos) + } + } +} + + + + +#[cfg(test)] +mod tests { + use crate::lexic::token::TokenType; + + use super::*; + + fn str_to_vec(s: &str) -> Vec { + s.chars().collect() + } + + // Should scan a lenght 1 identifier + #[test] + fn test_1() { + let input = str_to_vec("_"); + let start_pos = 0; + match scan(*input.get(0).unwrap(), &input, start_pos) { + LexResult::Some(token, next) => { + assert_eq!(1, next); + assert_eq!(TokenType::Identifier, token.token_type); + assert_eq!("_", token.value); + }, + _ => panic!() + } + + let input = str_to_vec("i"); + let start_pos = 0; + match scan(*input.get(0).unwrap(), &input, start_pos) { + LexResult::Some(token, next) => { + assert_eq!(1, next); + assert_eq!(TokenType::Identifier, token.token_type); + assert_eq!("i", token.value); + }, + _ => panic!() + } + } + + // Should scan a lenght 2 identifier + #[test] + fn test_2() { + let operators = vec![ + "_a", + "_z", + "_A", + "_Z", + "__", + "_0", + "_9", + "aa", + "az", + "aA", + "aZ", + "a_", + "a0", + "a9", + "za", + "zz", + "zA", + "zZ", + "z_", + "z0", + "z9", + ]; + + for op in operators { + let input = str_to_vec(op); + let start_pos = 0; + match scan(*input.get(0).unwrap(), &input, start_pos) { + LexResult::Some(token, next) => { + assert_eq!(2, next); + assert_eq!(TokenType::Identifier, token.token_type); + assert_eq!(op, token.value); + }, + _ => panic!() + } + } + } + + + // Should scan long identifiers + #[test] + fn test_3() { + let operators = vec![ + "_validIdentifier", + "iterationCount", + "buffer", + "aVeryLongIdentifier2WithSome5Numbers67InBetween1", + ]; + + for op in operators { + let input = str_to_vec(op); + let start_pos = 0; + match scan(*input.get(0).unwrap(), &input, start_pos) { + LexResult::Some(token, next) => { + assert_eq!(input.len(), next); + assert_eq!(TokenType::Identifier, token.token_type); + assert_eq!(op, token.value); + }, + _ => panic!() + } + } + } +} diff --git a/src/lexic/scanner/mod.rs b/src/lexic/scanner/mod.rs index 3656d7f..73c6108 100644 --- a/src/lexic/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -2,33 +2,17 @@ use super::{token::{TokenType, self}, utils, LexResult}; mod number; mod operator; +mod identifier; /// Attempts to scan a number. Returns None to be able to chain other scanner pub fn number(c: char, chars: &Vec, start_pos: usize) -> Option { - if utils::is_digit(c) { - match number::scan(chars, start_pos) { - Ok((token, next_pos)) => { - Some(LexResult::Some(token, next_pos)) - }, - Err(reason) => { - Some(LexResult::Err(reason)) - }, - } - } - else { - None - } + utils::is_digit(c).then(|| number::scan(chars, start_pos)) } /// Attempts to scan an operator. Returns None to be able to chain other scanner pub fn operator(c: char, chars: &Vec, start_pos: usize) -> Option { - if utils::is_operator(c) { - Some(operator::scan(chars, start_pos)) - } - else { - None - } + utils::is_operator(c).then(|| operator::scan(chars, start_pos)) } @@ -51,3 +35,9 @@ pub fn grouping_sign(c: char, _: &Vec, start_pos: usize) -> Option, start_pos: usize) -> Option { + utils::is_lowercase(c).then(|| identifier::scan(c, chars, start_pos)) +} diff --git a/src/lexic/scanner/number.rs b/src/lexic/scanner/number.rs index e43a557..3920392 100644 --- a/src/lexic/scanner/number.rs +++ b/src/lexic/scanner/number.rs @@ -1,12 +1,10 @@ -use crate::lexic::{token::{Token, self}, utils}; - -type ScanResult = Result<(Token, usize), String>; +use crate::lexic::{token::{Token, self}, utils, LexResult}; /// Function to scan a number /// /// This function assumes that the character at `start_pos` is a number [0-9], /// if not it will panic -pub fn scan(chars: &Vec, start_pos: usize) -> ScanResult { +pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { let next_char_1 = chars.get(start_pos); let next_char_2 = chars.get(start_pos + 1); @@ -23,7 +21,7 @@ pub fn scan(chars: &Vec, start_pos: usize) -> ScanResult { /// Recursively scans an integer. If a dot `.` is found, scans a double, /// if a `e` is found, scans a number in scientific notation -fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> ScanResult { +fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if *c == '.' => { scan_double(chars, start_pos + 1, utils::str_append(current, *c)) @@ -35,7 +33,7 @@ fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> ScanRes scan_decimal(chars, start_pos + 1, utils::str_append(current, *c)) }, _ => { - Ok((token::new_number(current, start_pos as i32), start_pos)) + LexResult::Some(token::new_number(current, start_pos as i32), start_pos) } } } @@ -46,12 +44,13 @@ fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> ScanRes /// This function expects the following on the first call: /// - The char at `start_pos` is a value between [0-9a-fA-F]. If not, will return an error. /// - `current == "0x"`. If not will return an incorrect value, or panic. -fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> ScanResult { +fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if utils::is_hex_digit(*c) => { - Ok(scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))) + let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)); + LexResult::Some(t, next) }, - _ => Err(String::from("Tried to scan an incomplete hex value")) + _ => LexResult::Err(String::from("Tried to scan an incomplete hex value")) } } @@ -62,21 +61,21 @@ fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> ScanResult /// - `start_pos` is the position after the dot. E.g., if the input is `3.22` then `start_pos == 2`. /// /// Returns a syntax error if the char at `start_pos` is not a value between [0-9] -fn scan_double(chars: &Vec, start_pos: usize, current: String) -> ScanResult { +fn scan_double(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if utils::is_digit(*c) => { scan_double_impl(chars, start_pos, current) }, Some(_) => { - Err(String::from("The character after the dot when scanning a double is not a number.")) + LexResult::Err(String::from("The character after the dot when scanning a double is not a number.")) }, - _ => Err(String::from("EOF when scanning a double number.")) + _ => LexResult::Err(String::from("EOF when scanning a double number.")) } } // Implementation of scan_double -fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> ScanResult { +fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if utils::is_digit(*c) => { scan_double_impl(chars, start_pos + 1, utils::str_append(current, *c)) @@ -85,7 +84,7 @@ fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> Sca scan_scientific(chars, start_pos + 1, utils::str_append(current, *c)) } _ => { - Ok((token::new_number(current, start_pos as i32), start_pos)) + LexResult::Some(token::new_number(current, start_pos as i32), start_pos) } } } @@ -99,16 +98,17 @@ fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> Sca /// Returns a syntax error if: /// - The char at `start_pos` is not `+` or `-` /// - The char at `start_pos + 1` is not between [0-9] -fn scan_scientific(chars: &Vec, start_pos: usize, current: String) -> ScanResult { +fn scan_scientific(chars: &Vec, start_pos: usize, current: String) -> LexResult { let next_char_1 = chars.get(start_pos); let next_char_2 = chars.get(start_pos + 1); match (next_char_1, next_char_2) { (Some(c1), Some(c2)) if (*c1 == '+' || *c1 == '-') && utils::is_digit(*c2) => { let new_value = format!("{}{}{}", current, *c1, *c2); - Ok(scan_digits(chars, start_pos + 2, new_value)) + let (t, next) = scan_digits(chars, start_pos + 2, new_value); + LexResult::Some(t, next) }, - _ => Err(String::from("The characters after 'e' are not + or -, or are not followed by a number")) + _ => LexResult::Err(String::from("The characters after 'e' are not + or -, or are not followed by a number")) } } @@ -152,28 +152,31 @@ mod tests { let input = str_to_vec("123"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(3, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123", token.value); + } else {panic!()} let input = str_to_vec("0123 "); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0123", token.value); + } else {panic!()} let input = str_to_vec(" 123456 789"); let start_pos = 2; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(8, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123456", token.value); + } else {panic!()} } // Should not scan whitespace after the number @@ -182,10 +185,11 @@ mod tests { let input = str_to_vec("123 "); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(3, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123", token.value); + } else {panic!()} } #[test] @@ -193,19 +197,21 @@ mod tests { let input = str_to_vec("0x20 "); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0x20", token.value); + } else {panic!()} let input = str_to_vec(" 0Xff23DA "); let start_pos = 4; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(12, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0xff23DA", token.value); + } else {panic!()} } // Should not scan an incomplete hex value @@ -215,16 +221,17 @@ mod tests { let start_pos = 0; match scan(&input, start_pos) { - Ok(_) => panic!(), - Err(reason) => assert_eq!("Tried to scan an incomplete hex value", reason) + LexResult::Err(reason) => assert_eq!("Tried to scan an incomplete hex value", reason), + _ => panic!(), } let input = str_to_vec("0 x20 "); let start_pos = 0; - let (token, _) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, _) = scan(&input, start_pos) { assert_eq!(TokenType::Number, token.token_type); assert_eq!("0", token.value); + } else {panic!()} } // Should not scan a hex value if it doesn't start with 0x @@ -232,9 +239,10 @@ mod tests { fn test_hex_3() { let input = str_to_vec("1x20"); let start_pos = 0; - let (token, _) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, _) = scan(&input, start_pos) { assert_eq!(TokenType::Number, token.token_type); assert_eq!("1", token.value); + } else {panic!()} } // Should scan a double @@ -242,18 +250,20 @@ mod tests { fn test_double_1() { let input = str_to_vec("3.22"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("3.22", token.value); + } else {panic!()} let input = str_to_vec("123456.7890 "); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(11, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123456.7890", token.value); + } else {panic!()} } @@ -264,8 +274,8 @@ mod tests { let start_pos = 0; match scan(&input, start_pos) { - Ok(_) => panic!(), - Err(reason) => assert_eq!("The character after the dot when scanning a double is not a number.", reason) + LexResult::Err(reason) => assert_eq!("The character after the dot when scanning a double is not a number.", reason), + _ => panic!(), } @@ -273,8 +283,8 @@ mod tests { let start_pos = 0; match scan(&input, start_pos) { - Ok(_) => panic!(), - Err(reason) => assert_eq!("EOF when scanning a double number.", reason) + LexResult::Err(reason) => assert_eq!("EOF when scanning a double number.", reason), + _ => panic!(), } } @@ -283,32 +293,36 @@ mod tests { fn test_exp_1() { let input = str_to_vec("1e+0"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!("1e+0", token.value); assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); + } else {panic!()} let input = str_to_vec("1e-0"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("1e-0", token.value); + } else {panic!()} let input = str_to_vec("0e+0"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(4, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("0e+0", token.value); + } else {panic!()} let input = str_to_vec("123498790e+12349870"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!(19, next); assert_eq!(TokenType::Number, token.token_type); assert_eq!("123498790e+12349870", token.value); + } else {panic!()} } // Should scan a double with decimal part and exponent @@ -316,16 +330,18 @@ mod tests { fn test_exp_2(){ let input = str_to_vec("1.24e+1"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!("1.24e+1", token.value); assert_eq!(7, next); assert_eq!(TokenType::Number, token.token_type); + } else {panic!()} let input = str_to_vec("0.00000000000001e+1"); let start_pos = 0; - let (token, next) = scan(&input, start_pos).unwrap(); + if let LexResult::Some(token, next) = scan(&input, start_pos) { assert_eq!("0.00000000000001e+1", token.value); assert_eq!(19, next); assert_eq!(TokenType::Number, token.token_type); + } else {panic!()} } } diff --git a/src/lexic/utils.rs b/src/lexic/utils.rs index 9a03711..eea48f2 100644 --- a/src/lexic/utils.rs +++ b/src/lexic/utils.rs @@ -19,6 +19,14 @@ pub fn is_operator(c: char) -> bool { || c == '^' || c == '.' || c == ':' } -pub fn is_grouping_sign(c: char) -> bool { - c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' +pub fn is_lowercase(c: char) -> bool { + c >= 'a' && c <= 'z' +} + +pub fn is_uppercase(c: char) -> bool { + c >= 'A' && c <= 'Z' +} + +pub fn is_identifier_char(c: char) -> bool { + is_lowercase(c) || is_uppercase(c) || c == '_' || is_digit(c) } diff --git a/src/token.rs b/src/token.rs index 2f29fec..5ab0b0b 100644 --- a/src/token.rs +++ b/src/token.rs @@ -55,3 +55,11 @@ pub fn new_operator(value: String, position: i32) -> Token { pub fn new_grouping_sign(value: String, position: i32, token_type: TokenType) -> Token { Token {token_type, value, position} } + +pub fn new_identifier(value: String, position: i32) -> Token { + Token { + token_type: TokenType::Identifier, + value, + position, + } +}