Fix bug with escape characters inside strings in the lexer

master
Araozu 2023-03-16 13:56:54 -05:00
parent def93715c7
commit cc4f304d00
3 changed files with 41 additions and 16 deletions

View File

@ -148,6 +148,7 @@ mod tests {
assert_eq!(3, next); assert_eq!(3, next);
assert_eq!(TokenType::VAR, token.token_type); assert_eq!(TokenType::VAR, token.token_type);
assert_eq!("var", token.value); assert_eq!("var", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -158,6 +159,7 @@ mod tests {
assert_eq!(3, next); assert_eq!(3, next);
assert_eq!(TokenType::VAL, token.token_type); assert_eq!(TokenType::VAL, token.token_type);
assert_eq!("val", token.value); assert_eq!("val", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }

View File

@ -12,7 +12,13 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
Some(c) if utils::is_operator(*c) => { Some(c) if utils::is_operator(*c) => {
scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
} }
_ => LexResult::Some(token::new_operator(current, start_pos), start_pos), _ => {
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
let current_len = current.len();
LexResult::Some(token::new_operator(current, start_pos - current_len), start_pos)
}
} }
} }
@ -41,6 +47,7 @@ mod tests {
assert_eq!(1, next); assert_eq!(1, next);
assert_eq!(TokenType::Operator, token.token_type); assert_eq!(TokenType::Operator, token.token_type);
assert_eq!(op, token.value); assert_eq!(op, token.value);
assert_eq!(0, token.position);
} }
_ => panic!(), _ => panic!(),
} }
@ -63,6 +70,7 @@ mod tests {
assert_eq!(2, next); assert_eq!(2, next);
assert_eq!(TokenType::Operator, token.token_type); assert_eq!(TokenType::Operator, token.token_type);
assert_eq!(op, token.value); assert_eq!(op, token.value);
assert_eq!(0, token.position);
} }
_ => panic!(), _ => panic!(),
} }

View File

@ -13,7 +13,12 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult { pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if *c == '"' => { Some(c) if *c == '"' => {
LexResult::Some(token::new_string(current, start_pos), start_pos + 1) // start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
// 1 is added to account for the opening `"`
let current_len = current.len() + 1;
LexResult::Some(token::new_string(current, start_pos - current_len), start_pos + 1)
} }
Some(c) if *c == '\n' => LexResult::Err(LexError { Some(c) if *c == '\n' => LexResult::Err(LexError {
position: start_pos, position: start_pos,
@ -21,10 +26,11 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
}), }),
Some(c) if *c == '\\' => { Some(c) if *c == '\\' => {
if let Some(escape) = test_escape_char(chars, start_pos + 1) { if let Some(escape) = test_escape_char(chars, start_pos + 1) {
scan_impl(chars, start_pos + 2, utils::str_append(current, escape)) // This should only detect an escaped `"`
scan_impl(chars, start_pos + 2, format!("{}{}", current, escape))
} else { } else {
// Ignore the backslash // Ignore the backslash
scan_impl(chars, start_pos + 1, current) scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
} }
} }
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)), Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
@ -36,14 +42,15 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
} }
/// Checks if the char at `start_pos` is a escape character /// Checks if the char at `start_pos` is a escape character
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> { fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<String> {
if let Some(c) = chars.get(start_pos) { if let Some(c) = chars.get(start_pos) {
match *c { match *c {
'n' => Some('\n'), // Escape sequences ignored: They are passed as is to JS
'"' => Some('"'), 'n' => Some(String::from("\\n")),
'r' => Some('\r'), '"' => Some(String::from("\\\"")),
'\\' => Some('\\'), 'r' => Some(String::from("\\r")),
't' => Some('\t'), '\\' => Some(String::from("\\\\")),
't' => Some(String::from("\\t")),
_ => None, _ => None,
} }
} else { } else {
@ -69,6 +76,7 @@ mod tests {
assert_eq!(2, next); assert_eq!(2, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("", token.value); assert_eq!("", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -82,6 +90,7 @@ mod tests {
assert_eq!(15, next); assert_eq!(15, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("Hello, world!", token.value); assert_eq!("Hello, world!", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -105,7 +114,8 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next); assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\ntext", token.value); assert_eq!("Sample\\ntext", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -115,7 +125,8 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next); assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\"text", token.value); assert_eq!("Sample\\\"text", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -125,7 +136,8 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next); assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\rtext", token.value); assert_eq!("Sample\\rtext", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -135,7 +147,8 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next); assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\text", token.value); assert_eq!("Sample\\\\text", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -145,7 +158,8 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next); assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\ttext", token.value); assert_eq!("Sample\\ttext", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }
@ -155,7 +169,8 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) { if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next); assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type); assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample text", token.value); assert_eq!("Sample\\ text", token.value);
assert_eq!(0, token.position);
} else { } else {
panic!() panic!()
} }