refactor: Dont store quotes inside string tokens
This commit is contained in:
parent
a62d08455b
commit
c0e20ad283
@ -22,6 +22,7 @@ pub enum MistiError {
|
|||||||
pub struct LexError {
|
pub struct LexError {
|
||||||
pub position: usize,
|
pub position: usize,
|
||||||
// TODO: Add and end position
|
// TODO: Add and end position
|
||||||
|
pub end_position: usize,
|
||||||
pub reason: String,
|
pub reason: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,6 +152,7 @@ fn next_token(
|
|||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
let error = LexError {
|
let error = LexError {
|
||||||
position: current_pos,
|
position: current_pos,
|
||||||
|
end_position: current_pos + 1,
|
||||||
reason: format!(
|
reason: format!(
|
||||||
"Illegal character `{}` (escaped: {})",
|
"Illegal character `{}` (escaped: {})",
|
||||||
next_char,
|
next_char,
|
||||||
@ -196,6 +197,7 @@ fn handle_indentation(
|
|||||||
// Illegal state: Indentation error
|
// Illegal state: Indentation error
|
||||||
let error = LexError {
|
let error = LexError {
|
||||||
position: current_pos,
|
position: current_pos,
|
||||||
|
end_position: current_pos + 1,
|
||||||
reason: format!(
|
reason: format!(
|
||||||
"Indentation error: expected {} spaces, found {}",
|
"Indentation error: expected {} spaces, found {}",
|
||||||
new_top, spaces
|
new_top, spaces
|
||||||
|
@ -34,22 +34,27 @@ fn scan_any_except_new_line(
|
|||||||
/// and the character at `start_pos + 1` is '*'
|
/// and the character at `start_pos + 1` is '*'
|
||||||
pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||||
match multiline_impl(chars, start_pos + 2) {
|
match multiline_impl(chars, start_pos + 2) {
|
||||||
Some((value, next_position)) => LexResult::Some(
|
Ok((value, next_position)) => LexResult::Some(
|
||||||
Token::new_multiline_comment(value.iter().collect(), start_pos),
|
Token::new_multiline_comment(value.iter().collect(), start_pos),
|
||||||
next_position,
|
next_position,
|
||||||
),
|
),
|
||||||
None => {
|
Err(last_position) => {
|
||||||
// Throw an error: Incomplete multiline comment
|
// Throw an error: Incomplete multiline comment
|
||||||
LexResult::Err(LexError {
|
LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
// TODO: add an end_position
|
// TODO: add an end_position
|
||||||
|
end_position: last_position,
|
||||||
reason: "Unfinished multiline commend".into(),
|
reason: "Unfinished multiline commend".into(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usize)> {
|
/// Implementation that scans the multiline comment.
|
||||||
|
///
|
||||||
|
/// May only error if EOF is found before the comment is finished.
|
||||||
|
/// If Err, returns the last position where a char was available.
|
||||||
|
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Result<(Vec<char>, usize), usize> {
|
||||||
let mut current_position = start_pos;
|
let mut current_position = start_pos;
|
||||||
let mut result = Vec::<char>::new();
|
let mut result = Vec::<char>::new();
|
||||||
|
|
||||||
@ -61,10 +66,10 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
|||||||
// Scan nested comment
|
// Scan nested comment
|
||||||
let (mut nested, next_position) =
|
let (mut nested, next_position) =
|
||||||
match multiline_impl(chars, current_position + 2) {
|
match multiline_impl(chars, current_position + 2) {
|
||||||
Some(v) => v,
|
Ok(v) => v,
|
||||||
None => {
|
Err(pos) => {
|
||||||
// The nested comment is not closed.
|
// The nested comment is not closed.
|
||||||
return None;
|
return Err(pos);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
result.push('/');
|
result.push('/');
|
||||||
@ -79,7 +84,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
|||||||
result.push('/');
|
result.push('/');
|
||||||
result.push(*c);
|
result.push(*c);
|
||||||
}
|
}
|
||||||
None => return None,
|
None => return Err(current_position),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some('*') => {
|
Some('*') => {
|
||||||
@ -88,7 +93,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
|||||||
Some('/') => {
|
Some('/') => {
|
||||||
// Create and return the token,
|
// Create and return the token,
|
||||||
// ignoring the `*/`
|
// ignoring the `*/`
|
||||||
return Some((result, current_position + 2));
|
return Ok((result, current_position + 2));
|
||||||
}
|
}
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
// Append both and continue
|
// Append both and continue
|
||||||
@ -98,7 +103,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
|||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
// Throw an error
|
// Throw an error
|
||||||
return None;
|
return Err(current_position);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -108,10 +113,7 @@ fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(Vec<char>, usi
|
|||||||
current_position += 1;
|
current_position += 1;
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
// TODO: Also return the position where this token ends,
|
return Err(current_position);
|
||||||
// to display better error messages.
|
|
||||||
// Requires LexError to implement an end_position field
|
|
||||||
return None;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,6 +53,7 @@ fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
|||||||
}
|
}
|
||||||
_ => LexResult::Err(LexError {
|
_ => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
|
end_position: start_pos + 1,
|
||||||
reason: String::from("Tried to scan an incomplete hex value"),
|
reason: String::from("Tried to scan an incomplete hex value"),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
@ -69,12 +70,14 @@ fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResul
|
|||||||
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
|
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
|
||||||
Some(_) => LexResult::Err(LexError {
|
Some(_) => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
|
end_position: start_pos + 1,
|
||||||
reason: String::from(
|
reason: String::from(
|
||||||
"The character after the dot when scanning a double is not a number.",
|
"The character after the dot when scanning a double is not a number.",
|
||||||
),
|
),
|
||||||
}),
|
}),
|
||||||
_ => LexResult::Err(LexError {
|
_ => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
|
end_position: start_pos + 1,
|
||||||
reason: String::from("EOF when scanning a double number."),
|
reason: String::from("EOF when scanning a double number."),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
@ -122,6 +125,7 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
|
|||||||
}
|
}
|
||||||
_ => LexResult::Err(LexError {
|
_ => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
|
end_position: start_pos + 1,
|
||||||
reason: String::from(
|
reason: String::from(
|
||||||
"The characters after 'e' are not + or -, or are not followed by a number",
|
"The characters after 'e' are not + or -, or are not followed by a number",
|
||||||
),
|
),
|
||||||
|
@ -7,9 +7,11 @@ use crate::lexic::{utils, LexResult};
|
|||||||
/// This function assumes that `start_pos` is after the first double quote,
|
/// This function assumes that `start_pos` is after the first double quote,
|
||||||
/// e.g. if the input is `"hello"`, `start_pos == 1`
|
/// e.g. if the input is `"hello"`, `start_pos == 1`
|
||||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||||
scan_impl(chars, start_pos, String::from("\""))
|
scan_impl(chars, start_pos, String::from(""))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: This can be iterative instead of recursive
|
||||||
|
|
||||||
/// Recursive function that does the scanning
|
/// Recursive function that does the scanning
|
||||||
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||||
match chars.get(start_pos) {
|
match chars.get(start_pos) {
|
||||||
@ -17,16 +19,16 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
|||||||
// start_pos is the position where the token ENDS, not where it STARTS,
|
// start_pos is the position where the token ENDS, not where it STARTS,
|
||||||
// so this is used to retrieve the original START position of the token
|
// so this is used to retrieve the original START position of the token
|
||||||
// 1 is added to account for the opening `"`
|
// 1 is added to account for the opening `"`
|
||||||
let current_len = current.len();
|
let current_len = current.len() + 1;
|
||||||
|
|
||||||
let final_str = format!("{}\"", current);
|
|
||||||
LexResult::Some(
|
LexResult::Some(
|
||||||
Token::new_string(final_str, start_pos - current_len),
|
Token::new_string(current, start_pos - current_len),
|
||||||
start_pos + 1,
|
start_pos + 1,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
|
end_position: start_pos + 1,
|
||||||
reason: String::from("Unexpected new line inside a string."),
|
reason: String::from("Unexpected new line inside a string."),
|
||||||
}),
|
}),
|
||||||
Some(c) if *c == '\\' => {
|
Some(c) if *c == '\\' => {
|
||||||
@ -40,6 +42,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
|||||||
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
|
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
|
||||||
None => LexResult::Err(LexError {
|
None => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
|
end_position: start_pos + 1,
|
||||||
reason: String::from("Incomplete string found"),
|
reason: String::from("Incomplete string found"),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
@ -79,7 +82,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(2, next);
|
assert_eq!(2, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"\"", token.value);
|
assert_eq!("", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -93,7 +96,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(15, next);
|
assert_eq!(15, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Hello, world!\"", token.value);
|
assert_eq!("Hello, world!", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -118,7 +121,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Sample\\ntext\"", token.value);
|
assert_eq!("Sample\\ntext", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -129,7 +132,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Sample\\\"text\"", token.value);
|
assert_eq!("Sample\\\"text", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -140,7 +143,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Sample\\rtext\"", token.value);
|
assert_eq!("Sample\\rtext", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -151,7 +154,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Sample\\\\text\"", token.value);
|
assert_eq!("Sample\\\\text", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -162,7 +165,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Sample\\ttext\"", token.value);
|
assert_eq!("Sample\\ttext", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -173,7 +176,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Sample\\ text\"", token.value);
|
assert_eq!("Sample\\ text", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
@ -187,7 +190,7 @@ mod tests {
|
|||||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("\"Sample\\atext\"", token.value);
|
assert_eq!("Sample\\atext", token.value);
|
||||||
assert_eq!(0, token.position);
|
assert_eq!(0, token.position);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
|
@ -38,7 +38,15 @@ pub struct Token {
|
|||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
pub fn get_end_position(&self) -> usize {
|
pub fn get_end_position(&self) -> usize {
|
||||||
self.position + self.value.len()
|
match self.token_type {
|
||||||
|
// 4 extra characters for /* and */
|
||||||
|
TokenType::MultilineComment => self.position + self.value.len() + 4,
|
||||||
|
// 2 extra characters for //
|
||||||
|
TokenType::Comment => self.position + self.value.len() + 2,
|
||||||
|
// 2 extra characters for ""
|
||||||
|
TokenType::String => self.position + self.value.len() + 2,
|
||||||
|
_ => self.position + self.value.len()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ mod tests {
|
|||||||
|
|
||||||
match expression {
|
match expression {
|
||||||
Ok((Expression::String(value), _)) => {
|
Ok((Expression::String(value), _)) => {
|
||||||
assert_eq!("\"Hello\"", format!("{}", value))
|
assert_eq!("Hello", format!("{}", value))
|
||||||
}
|
}
|
||||||
_ => panic!(),
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user