refactor: Dont store quotes inside string tokens
This commit is contained in:
parent
a62d08455b
commit
c0e20ad283
@ -22,6 +22,7 @@ pub enum MistiError {
|
||||
pub struct LexError {
|
||||
pub position: usize,
|
||||
// TODO: Add and end position
|
||||
pub end_position: usize,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
|
@ -152,6 +152,7 @@ fn next_token(
|
||||
.unwrap_or_else(|| {
|
||||
let error = LexError {
|
||||
position: current_pos,
|
||||
end_position: current_pos + 1,
|
||||
reason: format!(
|
||||
"Illegal character `{}` (escaped: {})",
|
||||
next_char,
|
||||
@ -196,6 +197,7 @@ fn handle_indentation(
|
||||
// Illegal state: Indentation error
|
||||
let error = LexError {
|
||||
position: current_pos,
|
||||
end_position: current_pos + 1,
|
||||
reason: format!(
|
||||
"Indentation error: expected {} spaces, found {}",
|
||||
new_top, spaces
|
||||
|
@ -34,22 +34,27 @@ fn scan_any_except_new_line(
|
||||
/// and the character at `start_pos + 1` is '*'
|
||||
pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
match multiline_impl(chars, start_pos + 2) {
|
||||
Some((value, next_position)) => LexResult::Some(
|
||||
Ok((value, next_position)) => LexResult::Some(
|
||||
Token::new_multiline_comment(value.iter().collect(), start_pos),
|
||||
next_position,
|
||||
),
|
||||
None => {
|
||||
Err(last_position) => {
|
||||
// Throw an error: Incomplete multiline comment
|
||||
LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
// TODO: add an end_position
|
||||
end_position: last_position,
|
||||
reason: "Unfinished multiline commend".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usize)> {
|
||||
/// Implementation that scans the multiline comment.
|
||||
///
|
||||
/// May only error if EOF is found before the comment is finished.
|
||||
/// If Err, returns the last position where a char was available.
|
||||
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Result<(Vec<char>, usize), usize> {
|
||||
let mut current_position = start_pos;
|
||||
let mut result = Vec::<char>::new();
|
||||
|
||||
@ -61,10 +66,10 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
||||
// Scan nested comment
|
||||
let (mut nested, next_position) =
|
||||
match multiline_impl(chars, current_position + 2) {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
Ok(v) => v,
|
||||
Err(pos) => {
|
||||
// The nested comment is not closed.
|
||||
return None;
|
||||
return Err(pos);
|
||||
}
|
||||
};
|
||||
result.push('/');
|
||||
@ -79,7 +84,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
||||
result.push('/');
|
||||
result.push(*c);
|
||||
}
|
||||
None => return None,
|
||||
None => return Err(current_position),
|
||||
}
|
||||
}
|
||||
Some('*') => {
|
||||
@ -88,7 +93,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
||||
Some('/') => {
|
||||
// Create and return the token,
|
||||
// ignoring the `*/`
|
||||
return Some((result, current_position + 2));
|
||||
return Ok((result, current_position + 2));
|
||||
}
|
||||
Some(c) => {
|
||||
// Append both and continue
|
||||
@ -98,7 +103,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
||||
}
|
||||
None => {
|
||||
// Throw an error
|
||||
return None;
|
||||
return Err(current_position);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -108,10 +113,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
||||
current_position += 1;
|
||||
}
|
||||
None => {
|
||||
// TODO: Also return the position where this token ends,
|
||||
// to display better error messages.
|
||||
// Requires LexError to implement an end_position field
|
||||
return None;
|
||||
return Err(current_position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -53,6 +53,7 @@ fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
}
|
||||
_ => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
end_position: start_pos + 1,
|
||||
reason: String::from("Tried to scan an incomplete hex value"),
|
||||
}),
|
||||
}
|
||||
@ -69,12 +70,14 @@ fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResul
|
||||
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
|
||||
Some(_) => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
end_position: start_pos + 1,
|
||||
reason: String::from(
|
||||
"The character after the dot when scanning a double is not a number.",
|
||||
),
|
||||
}),
|
||||
_ => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
end_position: start_pos + 1,
|
||||
reason: String::from("EOF when scanning a double number."),
|
||||
}),
|
||||
}
|
||||
@ -122,6 +125,7 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
|
||||
}
|
||||
_ => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
end_position: start_pos + 1,
|
||||
reason: String::from(
|
||||
"The characters after 'e' are not + or -, or are not followed by a number",
|
||||
),
|
||||
|
@ -7,9 +7,11 @@ use crate::lexic::{utils, LexResult};
|
||||
/// This function assumes that `start_pos` is after the first double quote,
|
||||
/// e.g. if the input is `"hello"`, `start_pos == 1`
|
||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
scan_impl(chars, start_pos, String::from("\""))
|
||||
scan_impl(chars, start_pos, String::from(""))
|
||||
}
|
||||
|
||||
// TODO: This can be iterative instead of recursive
|
||||
|
||||
/// Recursive function that does the scanning
|
||||
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
match chars.get(start_pos) {
|
||||
@ -17,16 +19,16 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
// start_pos is the position where the token ENDS, not where it STARTS,
|
||||
// so this is used to retrieve the original START position of the token
|
||||
// 1 is added to account for the opening `"`
|
||||
let current_len = current.len();
|
||||
let current_len = current.len() + 1;
|
||||
|
||||
let final_str = format!("{}\"", current);
|
||||
LexResult::Some(
|
||||
Token::new_string(final_str, start_pos - current_len),
|
||||
Token::new_string(current, start_pos - current_len),
|
||||
start_pos + 1,
|
||||
)
|
||||
}
|
||||
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
end_position: start_pos + 1,
|
||||
reason: String::from("Unexpected new line inside a string."),
|
||||
}),
|
||||
Some(c) if *c == '\\' => {
|
||||
@ -40,6 +42,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
|
||||
None => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
end_position: start_pos + 1,
|
||||
reason: String::from("Incomplete string found"),
|
||||
}),
|
||||
}
|
||||
@ -79,7 +82,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(2, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"\"", token.value);
|
||||
assert_eq!("", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -93,7 +96,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(15, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Hello, world!\"", token.value);
|
||||
assert_eq!("Hello, world!", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -118,7 +121,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\ntext\"", token.value);
|
||||
assert_eq!("Sample\\ntext", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -129,7 +132,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\\"text\"", token.value);
|
||||
assert_eq!("Sample\\\"text", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -140,7 +143,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\rtext\"", token.value);
|
||||
assert_eq!("Sample\\rtext", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -151,7 +154,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\\\text\"", token.value);
|
||||
assert_eq!("Sample\\\\text", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -162,7 +165,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\ttext\"", token.value);
|
||||
assert_eq!("Sample\\ttext", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -173,7 +176,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\ text\"", token.value);
|
||||
assert_eq!("Sample\\ text", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -187,7 +190,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("\"Sample\\atext\"", token.value);
|
||||
assert_eq!("Sample\\atext", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
|
@ -38,7 +38,15 @@ pub struct Token {
|
||||
|
||||
impl Token {
|
||||
pub fn get_end_position(&self) -> usize {
|
||||
self.position + self.value.len()
|
||||
match self.token_type {
|
||||
// 4 extra characters for /* and */
|
||||
TokenType::MultilineComment => self.position + self.value.len() + 4,
|
||||
// 2 extra characters for //
|
||||
TokenType::Comment => self.position + self.value.len() + 2,
|
||||
// 2 extra characters for ""
|
||||
TokenType::String => self.position + self.value.len() + 2,
|
||||
_ => self.position + self.value.len()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ mod tests {
|
||||
|
||||
match expression {
|
||||
Ok((Expression::String(value), _)) => {
|
||||
assert_eq!("\"Hello\"", format!("{}", value))
|
||||
assert_eq!("Hello", format!("{}", value))
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user