refactor: Dont store quotes inside string tokens

This commit is contained in:
Araozu 2024-07-31 10:54:17 -05:00
parent a62d08455b
commit c0e20ad283
7 changed files with 48 additions and 28 deletions

View File

@ -22,6 +22,7 @@ pub enum MistiError {
pub struct LexError {
pub position: usize,
// TODO: Add and end position
pub end_position: usize,
pub reason: String,
}

View File

@ -152,6 +152,7 @@ fn next_token(
.unwrap_or_else(|| {
let error = LexError {
position: current_pos,
end_position: current_pos + 1,
reason: format!(
"Illegal character `{}` (escaped: {})",
next_char,
@ -196,6 +197,7 @@ fn handle_indentation(
// Illegal state: Indentation error
let error = LexError {
position: current_pos,
end_position: current_pos + 1,
reason: format!(
"Indentation error: expected {} spaces, found {}",
new_top, spaces

View File

@ -34,22 +34,27 @@ fn scan_any_except_new_line(
/// and the character at `start_pos + 1` is '*'
pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
match multiline_impl(chars, start_pos + 2) {
Some((value, next_position)) => LexResult::Some(
Ok((value, next_position)) => LexResult::Some(
Token::new_multiline_comment(value.iter().collect(), start_pos),
next_position,
),
None => {
Err(last_position) => {
// Throw an error: Incomplete multiline comment
LexResult::Err(LexError {
position: start_pos,
// TODO: add an end_position
end_position: last_position,
reason: "Unfinished multiline commend".into(),
})
}
}
}
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usize)> {
/// Implementation that scans the multiline comment.
///
/// May only error if EOF is found before the comment is finished.
/// If Err, returns the last position where a char was available.
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Result<(Vec<char>, usize), usize> {
let mut current_position = start_pos;
let mut result = Vec::<char>::new();
@ -61,10 +66,10 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
// Scan nested comment
let (mut nested, next_position) =
match multiline_impl(chars, current_position + 2) {
Some(v) => v,
None => {
Ok(v) => v,
Err(pos) => {
// The nested comment is not closed.
return None;
return Err(pos);
}
};
result.push('/');
@ -79,7 +84,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
result.push('/');
result.push(*c);
}
None => return None,
None => return Err(current_position),
}
}
Some('*') => {
@ -88,7 +93,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
Some('/') => {
// Create and return the token,
// ignoring the `*/`
return Some((result, current_position + 2));
return Ok((result, current_position + 2));
}
Some(c) => {
// Append both and continue
@ -98,7 +103,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
}
None => {
// Throw an error
return None;
return Err(current_position);
}
}
}
@ -108,10 +113,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
current_position += 1;
}
None => {
// TODO: Also return the position where this token ends,
// to display better error messages.
// Requires LexError to implement an end_position field
return None;
return Err(current_position);
}
}
}

View File

@ -53,6 +53,7 @@ fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
}
_ => LexResult::Err(LexError {
position: start_pos,
end_position: start_pos + 1,
reason: String::from("Tried to scan an incomplete hex value"),
}),
}
@ -69,12 +70,14 @@ fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResul
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
Some(_) => LexResult::Err(LexError {
position: start_pos,
end_position: start_pos + 1,
reason: String::from(
"The character after the dot when scanning a double is not a number.",
),
}),
_ => LexResult::Err(LexError {
position: start_pos,
end_position: start_pos + 1,
reason: String::from("EOF when scanning a double number."),
}),
}
@ -122,6 +125,7 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
}
_ => LexResult::Err(LexError {
position: start_pos,
end_position: start_pos + 1,
reason: String::from(
"The characters after 'e' are not + or -, or are not followed by a number",
),

View File

@ -7,9 +7,11 @@ use crate::lexic::{utils, LexResult};
/// This function assumes that `start_pos` is after the first double quote,
/// e.g. if the input is `"hello"`, `start_pos == 1`
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
scan_impl(chars, start_pos, String::from("\""))
scan_impl(chars, start_pos, String::from(""))
}
// TODO: This can be iterative instead of recursive
/// Recursive function that does the scanning
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
@ -17,16 +19,16 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
// 1 is added to account for the opening `"`
let current_len = current.len();
let current_len = current.len() + 1;
let final_str = format!("{}\"", current);
LexResult::Some(
Token::new_string(final_str, start_pos - current_len),
Token::new_string(current, start_pos - current_len),
start_pos + 1,
)
}
Some(c) if *c == '\n' => LexResult::Err(LexError {
position: start_pos,
end_position: start_pos + 1,
reason: String::from("Unexpected new line inside a string."),
}),
Some(c) if *c == '\\' => {
@ -40,6 +42,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
None => LexResult::Err(LexError {
position: start_pos,
end_position: start_pos + 1,
reason: String::from("Incomplete string found"),
}),
}
@ -79,7 +82,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(2, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"\"", token.value);
assert_eq!("", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -93,7 +96,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(15, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Hello, world!\"", token.value);
assert_eq!("Hello, world!", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -118,7 +121,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\ntext\"", token.value);
assert_eq!("Sample\\ntext", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -129,7 +132,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\\"text\"", token.value);
assert_eq!("Sample\\\"text", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -140,7 +143,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\rtext\"", token.value);
assert_eq!("Sample\\rtext", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -151,7 +154,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\\\text\"", token.value);
assert_eq!("Sample\\\\text", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -162,7 +165,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\ttext\"", token.value);
assert_eq!("Sample\\ttext", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -173,7 +176,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\ text\"", token.value);
assert_eq!("Sample\\ text", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -187,7 +190,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("\"Sample\\atext\"", token.value);
assert_eq!("Sample\\atext", token.value);
assert_eq!(0, token.position);
} else {
panic!()

View File

@ -38,7 +38,15 @@ pub struct Token {
impl Token {
pub fn get_end_position(&self) -> usize {
self.position + self.value.len()
match self.token_type {
// 4 extra characters for /* and */
TokenType::MultilineComment => self.position + self.value.len() + 4,
// 2 extra characters for //
TokenType::Comment => self.position + self.value.len() + 2,
// 2 extra characters for ""
TokenType::String => self.position + self.value.len() + 2,
_ => self.position + self.value.len()
}
}
}

View File

@ -66,7 +66,7 @@ mod tests {
match expression {
Ok((Expression::String(value), _)) => {
assert_eq!("\"Hello\"", format!("{}", value))
assert_eq!("Hello", format!("{}", value))
}
_ => panic!(),
}