diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 1ef17c9..423d01e 100644 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -51,18 +51,11 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { // Scanners None - .or_else(|| { - scanner::number(next_char, chars, current_pos) - }) - .or_else(|| { - scanner::operator(next_char, chars, current_pos) - }) - .or_else(|| { - scanner::grouping_sign(next_char, chars, current_pos) - }) - .or_else(|| { - scanner::identifier(next_char, chars, current_pos) - }) + .or_else(|| scanner::number(next_char, chars, current_pos)) + .or_else(|| scanner::identifier(next_char, chars, current_pos)) + .or_else(|| scanner::string(next_char, chars, current_pos)) + .or_else(|| scanner::operator(next_char, chars, current_pos)) + .or_else(|| scanner::grouping_sign(next_char, chars, current_pos)) .unwrap_or_else(|| { LexResult::Err(format!("Unrecognized character: {}", next_char)) }) diff --git a/src/lexic/scanner/mod.rs b/src/lexic/scanner/mod.rs index f3f6dce..d9cc455 100644 --- a/src/lexic/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -3,6 +3,7 @@ use super::{token::{TokenType, self}, utils, LexResult}; mod number; mod operator; mod identifier; +mod string; /// Attempts to scan a number. Returns None to be able to chain other scanner pub fn number(c: char, chars: &Vec, start_pos: usize) -> Option { @@ -41,3 +42,9 @@ pub fn grouping_sign(c: char, _: &Vec, start_pos: usize) -> Option, start_pos: usize) -> Option { utils::is_lowercase(c).then(|| identifier::scan(c, chars, start_pos)) } + + +pub fn string(c: char, chars: &Vec, start_pos: usize) -> Option { + (c == '"').then(|| string::scan(chars, start_pos + 1)) +} + diff --git a/src/lexic/scanner/string.rs b/src/lexic/scanner/string.rs new file mode 100644 index 0000000..6b54a64 --- /dev/null +++ b/src/lexic/scanner/string.rs @@ -0,0 +1,172 @@ +use crate::lexic::{ + token::{self, Token}, + utils, LexResult, +}; + +/// Function to scan a string +/// +/// This function assumes that `start_pos` is after the first double quote, +/// e.g. if the input is `"hello"`, `start_pos == 1` +pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { + scan_impl(chars, start_pos, String::from("")) +} + +pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { + match chars.get(start_pos) { + Some(c) if *c == '"' => { + LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1) + } + Some(c) if *c == '\n' => { + LexResult::Err(String::from("Unexpected new line inside a string.")) + } + Some(c) if *c == '\\' => { + if let Some(escape) = test_escape_char(chars, start_pos + 1) { + scan_impl( + chars, + start_pos + 2, + utils::str_append(current, escape), + ) + } + else { + // Ignore the backslash + scan_impl( + chars, + start_pos + 1, + current, + ) + } + } + Some(c) => { + scan_impl( + chars, + start_pos + 1, + utils::str_append(current, *c), + ) + } + None => { + LexResult::Err(String::from("Incomplete string found")) + } + } +} + + +fn test_escape_char(chars: &Vec, start_pos: usize) -> Option { + if let Some(c) = chars.get(start_pos) { + match *c { + 'n' => Some('\n'), + '"' => Some('"'), + 'r' => Some('\r'), + '\\' => Some('\\'), + 't' => Some('\t'), + _ => None, + } + } + else { + None + } +} + + + + +#[cfg(test)] +mod tests { + use crate::lexic::token::TokenType; + + use super::*; + + fn str_to_vec(s: &str) -> Vec { + s.chars().collect() + } + + #[test] + fn should_scan_an_empty_string() { + let input = str_to_vec("\"\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(2, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("", token.value); + } + else {panic!()} + } + + #[test] + fn should_scan_a_string_with_contents() { + let input = str_to_vec("\"Hello, world!\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(15, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("Hello, world!", token.value); + } + else {panic!()} + } + + #[test] + fn should_not_scan_a_new_line() { + let input = str_to_vec("\"Hello,\nworld!\""); + let start_pos = 1; + if let LexResult::Err(reason) = scan(&input, start_pos) { + assert_eq!("Unexpected new line inside a string.", reason) + } + else {panic!()} + } + + #[test] + fn should_scan_escape_characters() { + let input = str_to_vec("\"Sample\\ntext\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(14, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("Sample\ntext", token.value); + } + else {panic!()} + + let input = str_to_vec("\"Sample\\\"text\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(14, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("Sample\"text", token.value); + } + else {panic!()} + + let input = str_to_vec("\"Sample\\rtext\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(14, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("Sample\rtext", token.value); + } + else {panic!()} + + let input = str_to_vec("\"Sample\\\\text\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(14, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("Sample\\text", token.value); + } + else {panic!()} + + let input = str_to_vec("\"Sample\\ttext\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(14, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("Sample\ttext", token.value); + } + else {panic!()} + + let input = str_to_vec("\"Sample\\ text\""); + let start_pos = 1; + if let LexResult::Some(token, next) = scan(&input, start_pos) { + assert_eq!(14, next); + assert_eq!(TokenType::String, token.token_type); + assert_eq!("Sample text", token.value); + } + else {panic!()} + } +} diff --git a/src/token.rs b/src/token.rs index 0351c0f..d5f2e0b 100644 --- a/src/token.rs +++ b/src/token.rs @@ -63,3 +63,11 @@ pub fn new_identifier(value: String, position: i32) -> Token { position, } } + +pub fn new_string(value: String, position: i32) -> Token { + Token { + token_type: TokenType::String, + value, + position, + } +}