Scan strings and escape characters inside string

master
Araozu 2022-12-01 12:53:14 -05:00
parent f3ee68fcf1
commit 5efcabbfc3
4 changed files with 192 additions and 12 deletions

View File

@ -51,18 +51,11 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
// Scanners // Scanners
None None
.or_else(|| { .or_else(|| scanner::number(next_char, chars, current_pos))
scanner::number(next_char, chars, current_pos) .or_else(|| scanner::identifier(next_char, chars, current_pos))
}) .or_else(|| scanner::string(next_char, chars, current_pos))
.or_else(|| { .or_else(|| scanner::operator(next_char, chars, current_pos))
scanner::operator(next_char, chars, current_pos) .or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
})
.or_else(|| {
scanner::grouping_sign(next_char, chars, current_pos)
})
.or_else(|| {
scanner::identifier(next_char, chars, current_pos)
})
.unwrap_or_else(|| { .unwrap_or_else(|| {
LexResult::Err(format!("Unrecognized character: {}", next_char)) LexResult::Err(format!("Unrecognized character: {}", next_char))
}) })

View File

@ -3,6 +3,7 @@ use super::{token::{TokenType, self}, utils, LexResult};
mod number; mod number;
mod operator; mod operator;
mod identifier; mod identifier;
mod string;
/// Attempts to scan a number. Returns None to be able to chain other scanner /// Attempts to scan a number. Returns None to be able to chain other scanner
pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
@ -41,3 +42,9 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
utils::is_lowercase(c).then(|| identifier::scan(c, chars, start_pos)) utils::is_lowercase(c).then(|| identifier::scan(c, chars, start_pos))
} }
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(c == '"').then(|| string::scan(chars, start_pos + 1))
}

172
src/lexic/scanner/string.rs Normal file
View File

@ -0,0 +1,172 @@
use crate::lexic::{
token::{self, Token},
utils, LexResult,
};
/// Function to scan a string
///
/// This function assumes that `start_pos` is after the first double quote,
/// e.g. if the input is `"hello"`, `start_pos == 1`
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
scan_impl(chars, start_pos, String::from(""))
}
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if *c == '"' => {
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
}
Some(c) if *c == '\n' => {
LexResult::Err(String::from("Unexpected new line inside a string."))
}
Some(c) if *c == '\\' => {
if let Some(escape) = test_escape_char(chars, start_pos + 1) {
scan_impl(
chars,
start_pos + 2,
utils::str_append(current, escape),
)
}
else {
// Ignore the backslash
scan_impl(
chars,
start_pos + 1,
current,
)
}
}
Some(c) => {
scan_impl(
chars,
start_pos + 1,
utils::str_append(current, *c),
)
}
None => {
LexResult::Err(String::from("Incomplete string found"))
}
}
}
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
if let Some(c) = chars.get(start_pos) {
match *c {
'n' => Some('\n'),
'"' => Some('"'),
'r' => Some('\r'),
'\\' => Some('\\'),
't' => Some('\t'),
_ => None,
}
}
else {
None
}
}
#[cfg(test)]
mod tests {
use crate::lexic::token::TokenType;
use super::*;
fn str_to_vec(s: &str) -> Vec<char> {
s.chars().collect()
}
#[test]
fn should_scan_an_empty_string() {
let input = str_to_vec("\"\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(2, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("", token.value);
}
else {panic!()}
}
#[test]
fn should_scan_a_string_with_contents() {
let input = str_to_vec("\"Hello, world!\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(15, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Hello, world!", token.value);
}
else {panic!()}
}
#[test]
fn should_not_scan_a_new_line() {
let input = str_to_vec("\"Hello,\nworld!\"");
let start_pos = 1;
if let LexResult::Err(reason) = scan(&input, start_pos) {
assert_eq!("Unexpected new line inside a string.", reason)
}
else {panic!()}
}
#[test]
fn should_scan_escape_characters() {
let input = str_to_vec("\"Sample\\ntext\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\ntext", token.value);
}
else {panic!()}
let input = str_to_vec("\"Sample\\\"text\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\"text", token.value);
}
else {panic!()}
let input = str_to_vec("\"Sample\\rtext\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\rtext", token.value);
}
else {panic!()}
let input = str_to_vec("\"Sample\\\\text\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\text", token.value);
}
else {panic!()}
let input = str_to_vec("\"Sample\\ttext\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\ttext", token.value);
}
else {panic!()}
let input = str_to_vec("\"Sample\\ text\"");
let start_pos = 1;
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample text", token.value);
}
else {panic!()}
}
}

View File

@ -63,3 +63,11 @@ pub fn new_identifier(value: String, position: i32) -> Token {
position, position,
} }
} }
pub fn new_string(value: String, position: i32) -> Token {
Token {
token_type: TokenType::String,
value,
position,
}
}