Emit a DEDENT for each indentation level decreased
This commit is contained in:
parent
19474eb85e
commit
bd9e418ddd
111
src/lexic/mod.rs
111
src/lexic/mod.rs
@ -28,6 +28,8 @@ pub enum LexResult {
|
|||||||
/// - `10` is the position where the token ends, and from where the next token
|
/// - `10` is the position where the token ends, and from where the next token
|
||||||
/// should be scanned
|
/// should be scanned
|
||||||
Some(Token, usize),
|
Some(Token, usize),
|
||||||
|
/// Multiple tokens
|
||||||
|
Multiple(Vec<Token>, usize),
|
||||||
/// No token was found. This indicates that EOF has been reached.
|
/// No token was found. This indicates that EOF has been reached.
|
||||||
///
|
///
|
||||||
/// Contains the last position, which should be the input lenght - 1
|
/// Contains the last position, which should be the input lenght - 1
|
||||||
@ -53,6 +55,12 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
|||||||
results.push(token);
|
results.push(token);
|
||||||
current_pos = next_pos;
|
current_pos = next_pos;
|
||||||
}
|
}
|
||||||
|
LexResult::Multiple(tokens, next_pos) => {
|
||||||
|
at_new_line = tokens.last().unwrap().token_type == TokenType::NewLine;
|
||||||
|
|
||||||
|
results.extend(tokens);
|
||||||
|
current_pos = next_pos;
|
||||||
|
}
|
||||||
LexResult::None(next_pos) => {
|
LexResult::None(next_pos) => {
|
||||||
current_pos = next_pos;
|
current_pos = next_pos;
|
||||||
}
|
}
|
||||||
@ -79,16 +87,14 @@ fn next_token(
|
|||||||
) -> LexResult {
|
) -> LexResult {
|
||||||
let mut current_pos = current_pos;
|
let mut current_pos = current_pos;
|
||||||
|
|
||||||
// Handle whitespace
|
if at_new_line {
|
||||||
if peek(chars, current_pos) == ' ' {
|
return handle_indentation(chars, current_pos, indentation_stack);
|
||||||
if at_new_line {
|
}
|
||||||
return handle_indentation(chars, current_pos, indentation_stack);
|
else if !at_new_line && peek(chars, current_pos) == ' ' {
|
||||||
} else {
|
// Consume whitespace
|
||||||
// Consume whitespace
|
current_pos += 1;
|
||||||
|
while peek(chars, current_pos) == ' ' {
|
||||||
current_pos += 1;
|
current_pos += 1;
|
||||||
while peek(chars, current_pos) == ' ' {
|
|
||||||
current_pos += 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -134,19 +140,48 @@ fn handle_indentation(
|
|||||||
sub_pos += 1;
|
sub_pos += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: should emit a DEDENT for every single entry decreased in the stack
|
||||||
|
|
||||||
// Compare the number of spaces with the top of the stack
|
// Compare the number of spaces with the top of the stack
|
||||||
let top = indentation_stack.last().unwrap_or(&0);
|
let top = indentation_stack.last().unwrap_or(&0);
|
||||||
|
|
||||||
if spaces > *top {
|
if spaces > *top {
|
||||||
// Push the new indentation level
|
// Push the new indentation level
|
||||||
indentation_stack.push(spaces);
|
indentation_stack.push(spaces);
|
||||||
return LexResult::Some(Token::new_indent(current_pos), current_pos + spaces);
|
return LexResult::Some(Token::new_indent(current_pos), current_pos + spaces);
|
||||||
} else if spaces < *top {
|
} else if spaces < *top {
|
||||||
// Pop the indentation level
|
// Emit a DEDENT token for each indentation level that is decreased
|
||||||
indentation_stack.pop();
|
let mut dedent_tokens = Vec::<Token>::new();
|
||||||
return LexResult::Some(Token::new_dedent(current_pos), current_pos + spaces);
|
|
||||||
|
while let Some(new_top) = indentation_stack.last() {
|
||||||
|
if spaces < *new_top {
|
||||||
|
indentation_stack.pop();
|
||||||
|
dedent_tokens.push(Token::new_dedent(current_pos));
|
||||||
|
}
|
||||||
|
else if spaces == *new_top {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Illegal state: Indentation error
|
||||||
|
let error = LexError {
|
||||||
|
position: current_pos,
|
||||||
|
reason: format!(
|
||||||
|
"Indentation error: expected {} spaces, found {}",
|
||||||
|
new_top,
|
||||||
|
spaces
|
||||||
|
),
|
||||||
|
};
|
||||||
|
return LexResult::Err(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return LexResult::Multiple(
|
||||||
|
dedent_tokens,
|
||||||
|
current_pos + spaces
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
// Same indentation level
|
// Same indentation level
|
||||||
return next_token(chars, current_pos + spaces, indentation_stack, true);
|
return next_token(chars, current_pos + spaces, indentation_stack, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -335,4 +370,54 @@ mod tests {
|
|||||||
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||||
assert_eq!(TokenType::Number, tokens[5].token_type);
|
assert_eq!(TokenType::Number, tokens[5].token_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_emit_dedent() {
|
||||||
|
let input = String::from("3\n \n 22\n111");
|
||||||
|
let tokens = get_tokens(&input).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(TokenType::Number, tokens[0].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||||
|
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||||
|
assert_eq!(TokenType::Number, tokens[3].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||||
|
assert_eq!(TokenType::DEDENT, tokens[5].token_type);
|
||||||
|
assert_eq!(TokenType::Number, tokens[6].token_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_emit_multiple_dedents() {
|
||||||
|
let input = String::from("1\n 2\n 3\n 4\n5");
|
||||||
|
let tokens = get_tokens(&input).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(TokenType::Number, tokens[0].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||||
|
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||||
|
assert_eq!(TokenType::Number, tokens[3].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||||
|
assert_eq!(TokenType::INDENT, tokens[5].token_type);
|
||||||
|
assert_eq!(TokenType::Number, tokens[6].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[7].token_type);
|
||||||
|
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
|
||||||
|
assert_eq!(TokenType::Number, tokens[9].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[10].token_type);
|
||||||
|
assert_eq!(TokenType::DEDENT, tokens[11].token_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_emit_multiple_dedents_2() {
|
||||||
|
let input = String::from("1\n 2\n 3\n4");
|
||||||
|
let tokens = get_tokens(&input).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(TokenType::Number, tokens[0].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[1].token_type);
|
||||||
|
assert_eq!(TokenType::INDENT, tokens[2].token_type);
|
||||||
|
assert_eq!(TokenType::Number, tokens[3].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[4].token_type);
|
||||||
|
assert_eq!(TokenType::INDENT, tokens[5].token_type);
|
||||||
|
assert_eq!(TokenType::Number, tokens[6].token_type);
|
||||||
|
assert_eq!(TokenType::NewLine, tokens[7].token_type);
|
||||||
|
assert_eq!(TokenType::DEDENT, tokens[8].token_type);
|
||||||
|
assert_eq!(TokenType::DEDENT, tokens[9].token_type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -120,7 +120,7 @@ impl Token {
|
|||||||
|
|
||||||
pub fn new_dedent(position: usize) -> Token {
|
pub fn new_dedent(position: usize) -> Token {
|
||||||
Token {
|
Token {
|
||||||
token_type: TokenType::INDENT,
|
token_type: TokenType::DEDENT,
|
||||||
value: String::from(""),
|
value: String::from(""),
|
||||||
position,
|
position,
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user