Fix errors in lexical analyzer

master
Araozu 2022-11-28 19:16:55 -05:00
parent 0bb71c6822
commit 2c17557aad
6 changed files with 76 additions and 27 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
/target /target
.vscode

View File

@ -5,44 +5,55 @@ use super::token::{self, Token};
type Chars = Vec<char>; type Chars = Vec<char>;
/// Scans and returns all the tokens in the input String /// Scans and returns all the tokens in the input String
pub fn get_tokens(input: &String) -> Vec<Token> { pub fn get_tokens(input: &String) -> Result<Vec<Token>, String> {
let chars: Vec<char> = input.chars().into_iter().collect(); let chars: Vec<char> = input.chars().into_iter().collect();
let mut results = Vec::new(); let mut results = Vec::new();
let mut current_pos: usize = 0; let mut current_pos: usize = 0;
while has_input(&chars, current_pos) { while has_input(&chars, current_pos) {
let (possible_token, next_pos) = next_token(&chars, current_pos); match next_token(&chars, current_pos) {
current_pos = next_pos; Ok((Some(token), next_pos)) => {
if let Some(token) = possible_token {
results.push(token); results.push(token);
current_pos = next_pos;
},
Ok((None, next_pos)) => {
current_pos = next_pos;
},
Err(reason) => return Err(reason),
} }
} }
results.push(token::new_eof(0)); results.push(token::new_eof(0));
results Ok(results)
} }
fn next_token(chars: &Chars, current_pos: usize) -> (Option<Token>, usize) { fn next_token(chars: &Chars, current_pos: usize) -> Result<(Option<Token>, usize),String> {
let next_char = peek(chars, current_pos); let next_char = peek(chars, current_pos);
// Handle whitespace // If EOF is reached return nothing
if next_char == '\0' {
return Ok((None, current_pos))
}
// Handle whitespace recursively
if next_char == ' ' { if next_char == ' ' {
return next_token(chars, current_pos + 1) return next_token(chars, current_pos + 1)
} }
// Test number // Test number
if utils::is_digit(next_char) { if utils::is_digit(next_char) {
let (token, next_pos) = scanner::number(chars, current_pos).unwrap(); match scanner::number(chars, current_pos) {
(Some(token), next_pos) Ok((token, next_pos)) => Ok((Some(token), next_pos)),
Err(reason) => Err(reason),
}
} }
// Test operator // Test operator
else if utils::is_operator(next_char) { else if utils::is_operator(next_char) {
let (token, next_pos) = scanner::operator(chars, current_pos); let (token, next_pos) = scanner::operator(chars, current_pos);
(Some(token), next_pos) Ok((Some(token), next_pos))
} }
else { else {
(None, current_pos) Err(format!("Unrecognized character: {}", next_char))
} }
} }
@ -52,7 +63,7 @@ fn peek(input: &Chars, pos: usize) -> char {
} }
fn has_input(input: &Chars, current_pos: usize) -> bool { fn has_input(input: &Chars, current_pos: usize) -> bool {
input.len() < current_pos current_pos < input.len()
} }
@ -60,37 +71,65 @@ fn has_input(input: &Chars, current_pos: usize) -> bool {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use token::{Token, TokenType}; use token::TokenType;
/// Should return an EOF token if the input has no tokens /// Should return an EOF token if the input has no tokens
#[test] #[test]
fn test1() { fn test1() {
let input = String::from(""); let input = String::from("");
let tokens = get_tokens(&input); let tokens = get_tokens(&input).unwrap();
assert_eq!(1, tokens.len()); assert_eq!(1, tokens.len());
let first = tokens.get(0).unwrap(); let first = tokens.get(0).unwrap();
assert_eq!(TokenType::EOF, first.token_type); assert_eq!(TokenType::EOF, first.token_type);
let input = String::from(" "); let input = String::from(" ");
let tokens = get_tokens(&input); let tokens = get_tokens(&input).unwrap();
assert_eq!(1, tokens.len()); assert_eq!(1, tokens.len());
let first = tokens.get(0).unwrap(); let first = tokens.get(0).unwrap();
assert_eq!(TokenType::EOF, first.token_type); assert_eq!(TokenType::EOF, first.token_type);
let input = String::from(" \n "); let input = String::from(" ");
let tokens = get_tokens(&input); let tokens = get_tokens(&input).unwrap();
assert_eq!(1, tokens.len()); assert_eq!(1, tokens.len());
let first = tokens.get(0).unwrap(); let first = tokens.get(0).unwrap();
assert_eq!(TokenType::EOF, first.token_type); assert_eq!(TokenType::EOF, first.token_type);
} }
#[test]
fn t() {
let input = String::from("126 ");
let chars: Vec<char> = input.chars().into_iter().collect();
assert_eq!(4, chars.len());
assert!(has_input(&chars, 0));
match next_token(&chars, 0).unwrap() {
(Some(t), _) => {
assert_eq!("126", t.value)
},
(None, _) => {
panic!()
}
}
}
/// Should scan numbers /// Should scan numbers
#[test] #[test]
fn number_test() { fn number_test() {
let input = String::from("126 278.98 0.282398 1798e+1 239.3298e-103"); let input = String::from("126 278.98 0.282398");
let tokens = get_tokens(&input); let tokens = get_tokens(&input).unwrap();
// assert_eq!("126", tokens.get(0).unwrap().value); let t1 = tokens.get(0).unwrap();
assert_eq!(TokenType::Number, t1.token_type);
assert_eq!("126", t1.value);
let t2 = tokens.get(1).unwrap();
assert_eq!(TokenType::Number, t2.token_type);
assert_eq!("278.98", t2.value);
let t3 = tokens.get(2).unwrap();
assert_eq!(TokenType::Number, t3.token_type);
assert_eq!("0.282398", t3.value);
/* /*
assert_eq!("278.98", tokens.get(1).unwrap().value); assert_eq!("278.98", tokens.get(1).unwrap().value);
assert_eq!("0.282398", tokens.get(2).unwrap().value); assert_eq!("0.282398", tokens.get(2).unwrap().value);

View File

@ -154,6 +154,18 @@ mod tests {
assert_eq!("123456", token.value); assert_eq!("123456", token.value);
} }
// Should not scan whitespace after the number
#[test]
fn test_int_2() {
let input = str_to_vec("123 ");
let start_pos = 0;
let (token, next) = scan(&input, start_pos).unwrap();
assert_eq!(3, next);
assert_eq!(TokenType::Number, token.token_type);
assert_eq!("123", token.value);
}
#[test] #[test]
fn test_hex() { fn test_hex() {
let input = str_to_vec("0x20 "); let input = str_to_vec("0x20 ");

View File

@ -1,5 +1,3 @@
use core::panic;
use crate::lexic::{token::{Token, self}, utils}; use crate::lexic::{token::{Token, self}, utils};

View File

@ -1,10 +1,9 @@
use std::io::{self, Write}; use std::io::{self, Write};
use super::lexic; use super::lexic;
use super::syntax;
fn compile(input: &String) { fn compile(input: &String) {
let tokens = lexic::get_tokens(input); let _tokens = lexic::get_tokens(input);
} }
pub fn run() -> io::Result<()> { pub fn run() -> io::Result<()> {

View File

@ -2,6 +2,6 @@
use super::token::Token; use super::token::Token;
/// Constructs the Misti AST from a vector of tokens /// Constructs the Misti AST from a vector of tokens
pub fn construct_ast(tokens: Vec<Token>) -> Result<(), String> { pub fn _construct_ast(_tokens: Vec<Token>) -> Result<(), String> {
Err(String::from("NOT IMPLEMENTED")) Err(String::from("NOT IMPLEMENTED"))
} }