Scan strings and escape characters inside string
This commit is contained in:
parent
f3ee68fcf1
commit
5efcabbfc3
@ -51,18 +51,11 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||
|
||||
// Scanners
|
||||
None
|
||||
.or_else(|| {
|
||||
scanner::number(next_char, chars, current_pos)
|
||||
})
|
||||
.or_else(|| {
|
||||
scanner::operator(next_char, chars, current_pos)
|
||||
})
|
||||
.or_else(|| {
|
||||
scanner::grouping_sign(next_char, chars, current_pos)
|
||||
})
|
||||
.or_else(|| {
|
||||
scanner::identifier(next_char, chars, current_pos)
|
||||
})
|
||||
.or_else(|| scanner::number(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
||||
.unwrap_or_else(|| {
|
||||
LexResult::Err(format!("Unrecognized character: {}", next_char))
|
||||
})
|
||||
|
@ -3,6 +3,7 @@ use super::{token::{TokenType, self}, utils, LexResult};
|
||||
mod number;
|
||||
mod operator;
|
||||
mod identifier;
|
||||
mod string;
|
||||
|
||||
/// Attempts to scan a number. Returns None to be able to chain other scanner
|
||||
pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
@ -41,3 +42,9 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
|
||||
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
utils::is_lowercase(c).then(|| identifier::scan(c, chars, start_pos))
|
||||
}
|
||||
|
||||
|
||||
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
(c == '"').then(|| string::scan(chars, start_pos + 1))
|
||||
}
|
||||
|
||||
|
172
src/lexic/scanner/string.rs
Normal file
172
src/lexic/scanner/string.rs
Normal file
@ -0,0 +1,172 @@
|
||||
use crate::lexic::{
|
||||
token::{self, Token},
|
||||
utils, LexResult,
|
||||
};
|
||||
|
||||
/// Function to scan a string
|
||||
///
|
||||
/// This function assumes that `start_pos` is after the first double quote,
|
||||
/// e.g. if the input is `"hello"`, `start_pos == 1`
|
||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
scan_impl(chars, start_pos, String::from(""))
|
||||
}
|
||||
|
||||
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if *c == '"' => {
|
||||
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
|
||||
}
|
||||
Some(c) if *c == '\n' => {
|
||||
LexResult::Err(String::from("Unexpected new line inside a string."))
|
||||
}
|
||||
Some(c) if *c == '\\' => {
|
||||
if let Some(escape) = test_escape_char(chars, start_pos + 1) {
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 2,
|
||||
utils::str_append(current, escape),
|
||||
)
|
||||
}
|
||||
else {
|
||||
// Ignore the backslash
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
current,
|
||||
)
|
||||
}
|
||||
}
|
||||
Some(c) => {
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
utils::str_append(current, *c),
|
||||
)
|
||||
}
|
||||
None => {
|
||||
LexResult::Err(String::from("Incomplete string found"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
|
||||
if let Some(c) = chars.get(start_pos) {
|
||||
match *c {
|
||||
'n' => Some('\n'),
|
||||
'"' => Some('"'),
|
||||
'r' => Some('\r'),
|
||||
'\\' => Some('\\'),
|
||||
't' => Some('\t'),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic::token::TokenType;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn str_to_vec(s: &str) -> Vec<char> {
|
||||
s.chars().collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_an_empty_string() {
|
||||
let input = str_to_vec("\"\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(2, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_a_string_with_contents() {
|
||||
let input = str_to_vec("\"Hello, world!\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(15, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Hello, world!", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_not_scan_a_new_line() {
|
||||
let input = str_to_vec("\"Hello,\nworld!\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Err(reason) = scan(&input, start_pos) {
|
||||
assert_eq!("Unexpected new line inside a string.", reason)
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_escape_characters() {
|
||||
let input = str_to_vec("\"Sample\\ntext\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\ntext", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\\"text\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\"text", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\rtext\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\rtext", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\\\text\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\text", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\ttext\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\ttext", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\ text\"");
|
||||
let start_pos = 1;
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample text", token.value);
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
}
|
@ -63,3 +63,11 @@ pub fn new_identifier(value: String, position: i32) -> Token {
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_string(value: String, position: i32) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::String,
|
||||
value,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user