Emit a DEDENT for each indentation level decreased

This commit is contained in:
Araozu 2023-09-10 20:10:34 -05:00
parent 19474eb85e
commit bd9e418ddd
2 changed files with 99 additions and 14 deletions

View File

@ -28,6 +28,8 @@ pub enum LexResult {
/// - `10` is the position where the token ends, and from where the next token /// - `10` is the position where the token ends, and from where the next token
/// should be scanned /// should be scanned
Some(Token, usize), Some(Token, usize),
/// Multiple tokens
Multiple(Vec<Token>, usize),
/// No token was found. This indicates that EOF has been reached. /// No token was found. This indicates that EOF has been reached.
/// ///
/// Contains the last position, which should be the input lenght - 1 /// Contains the last position, which should be the input lenght - 1
@ -53,6 +55,12 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
results.push(token); results.push(token);
current_pos = next_pos; current_pos = next_pos;
} }
LexResult::Multiple(tokens, next_pos) => {
at_new_line = tokens.last().unwrap().token_type == TokenType::NewLine;
results.extend(tokens);
current_pos = next_pos;
}
LexResult::None(next_pos) => { LexResult::None(next_pos) => {
current_pos = next_pos; current_pos = next_pos;
} }
@ -79,16 +87,14 @@ fn next_token(
) -> LexResult { ) -> LexResult {
let mut current_pos = current_pos; let mut current_pos = current_pos;
// Handle whitespace if at_new_line {
if peek(chars, current_pos) == ' ' { return handle_indentation(chars, current_pos, indentation_stack);
if at_new_line { }
return handle_indentation(chars, current_pos, indentation_stack); else if !at_new_line && peek(chars, current_pos) == ' ' {
} else { // Consume whitespace
// Consume whitespace current_pos += 1;
while peek(chars, current_pos) == ' ' {
current_pos += 1; current_pos += 1;
while peek(chars, current_pos) == ' ' {
current_pos += 1;
}
} }
} }
@ -134,19 +140,48 @@ fn handle_indentation(
sub_pos += 1; sub_pos += 1;
} }
// TODO: should emit a DEDENT for every single entry decreased in the stack
// Compare the number of spaces with the top of the stack // Compare the number of spaces with the top of the stack
let top = indentation_stack.last().unwrap_or(&0); let top = indentation_stack.last().unwrap_or(&0);
if spaces > *top { if spaces > *top {
// Push the new indentation level // Push the new indentation level
indentation_stack.push(spaces); indentation_stack.push(spaces);
return LexResult::Some(Token::new_indent(current_pos), current_pos + spaces); return LexResult::Some(Token::new_indent(current_pos), current_pos + spaces);
} else if spaces < *top { } else if spaces < *top {
// Pop the indentation level // Emit a DEDENT token for each indentation level that is decreased
indentation_stack.pop(); let mut dedent_tokens = Vec::<Token>::new();
return LexResult::Some(Token::new_dedent(current_pos), current_pos + spaces);
while let Some(new_top) = indentation_stack.last() {
if spaces < *new_top {
indentation_stack.pop();
dedent_tokens.push(Token::new_dedent(current_pos));
}
else if spaces == *new_top {
break;
}
else {
// Illegal state: Indentation error
let error = LexError {
position: current_pos,
reason: format!(
"Indentation error: expected {} spaces, found {}",
new_top,
spaces
),
};
return LexResult::Err(error);
}
}
return LexResult::Multiple(
dedent_tokens,
current_pos + spaces
);
} else { } else {
// Same indentation level // Same indentation level
return next_token(chars, current_pos + spaces, indentation_stack, true); return next_token(chars, current_pos + spaces, indentation_stack, false);
} }
} }
@ -335,4 +370,54 @@ mod tests {
assert_eq!(TokenType::NewLine, tokens[4].token_type); assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::Number, tokens[5].token_type); assert_eq!(TokenType::Number, tokens[5].token_type);
} }
#[test]
fn should_emit_dedent() {
let input = String::from("3\n \n 22\n111");
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Number, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Number, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
assert_eq!(TokenType::Number, tokens[6].token_type);
}
#[test]
fn should_emit_multiple_dedents() {
let input = String::from("1\n 2\n 3\n 4\n5");
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Number, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Number, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type);
assert_eq!(TokenType::Number, tokens[6].token_type);
assert_eq!(TokenType::NewLine, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::Number, tokens[9].token_type);
assert_eq!(TokenType::NewLine, tokens[10].token_type);
assert_eq!(TokenType::DEDENT, tokens[11].token_type);
}
#[test]
fn should_emit_multiple_dedents_2() {
let input = String::from("1\n 2\n 3\n4");
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Number, tokens[0].token_type);
assert_eq!(TokenType::NewLine, tokens[1].token_type);
assert_eq!(TokenType::INDENT, tokens[2].token_type);
assert_eq!(TokenType::Number, tokens[3].token_type);
assert_eq!(TokenType::NewLine, tokens[4].token_type);
assert_eq!(TokenType::INDENT, tokens[5].token_type);
assert_eq!(TokenType::Number, tokens[6].token_type);
assert_eq!(TokenType::NewLine, tokens[7].token_type);
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
assert_eq!(TokenType::DEDENT, tokens[9].token_type);
}
} }

View File

@ -120,7 +120,7 @@ impl Token {
pub fn new_dedent(position: usize) -> Token { pub fn new_dedent(position: usize) -> Token {
Token { Token {
token_type: TokenType::INDENT, token_type: TokenType::DEDENT,
value: String::from(""), value: String::from(""),
position, position,
} }