From 2c17557aad5d638bd853820cd965e34737a5a440 Mon Sep 17 00:00:00 2001 From: Araozu Date: Mon, 28 Nov 2022 19:16:55 -0500 Subject: [PATCH] Fix errors in lexical analyzer --- .gitignore | 1 + src/lexic/mod.rs | 83 +++++++++++++++++++++++++---------- src/lexic/scanner/number.rs | 12 +++++ src/lexic/scanner/operator.rs | 2 - src/repl/mod.rs | 3 +- src/syntax/mod.rs | 2 +- 6 files changed, 76 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index ea8c4bf..9026c77 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.vscode diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 4551e69..b8cd36a 100644 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -5,44 +5,55 @@ use super::token::{self, Token}; type Chars = Vec; /// Scans and returns all the tokens in the input String -pub fn get_tokens(input: &String) -> Vec { +pub fn get_tokens(input: &String) -> Result, String> { let chars: Vec = input.chars().into_iter().collect(); let mut results = Vec::new(); let mut current_pos: usize = 0; while has_input(&chars, current_pos) { - let (possible_token, next_pos) = next_token(&chars, current_pos); - current_pos = next_pos; - - if let Some(token) = possible_token { - results.push(token); + match next_token(&chars, current_pos) { + Ok((Some(token), next_pos)) => { + results.push(token); + current_pos = next_pos; + }, + Ok((None, next_pos)) => { + current_pos = next_pos; + }, + Err(reason) => return Err(reason), } } results.push(token::new_eof(0)); - results + Ok(results) } -fn next_token(chars: &Chars, current_pos: usize) -> (Option, usize) { +fn next_token(chars: &Chars, current_pos: usize) -> Result<(Option, usize),String> { let next_char = peek(chars, current_pos); - // Handle whitespace + // If EOF is reached return nothing + if next_char == '\0' { + return Ok((None, current_pos)) + } + + // Handle whitespace recursively if next_char == ' ' { return next_token(chars, current_pos + 1) } // Test number if utils::is_digit(next_char) { - let (token, next_pos) = scanner::number(chars, current_pos).unwrap(); - (Some(token), next_pos) + match scanner::number(chars, current_pos) { + Ok((token, next_pos)) => Ok((Some(token), next_pos)), + Err(reason) => Err(reason), + } } // Test operator else if utils::is_operator(next_char) { let (token, next_pos) = scanner::operator(chars, current_pos); - (Some(token), next_pos) + Ok((Some(token), next_pos)) } else { - (None, current_pos) + Err(format!("Unrecognized character: {}", next_char)) } } @@ -52,7 +63,7 @@ fn peek(input: &Chars, pos: usize) -> char { } fn has_input(input: &Chars, current_pos: usize) -> bool { - input.len() < current_pos + current_pos < input.len() } @@ -60,37 +71,65 @@ fn has_input(input: &Chars, current_pos: usize) -> bool { #[cfg(test)] mod tests { use super::*; - use token::{Token, TokenType}; + use token::TokenType; /// Should return an EOF token if the input has no tokens #[test] fn test1() { let input = String::from(""); - let tokens = get_tokens(&input); + let tokens = get_tokens(&input).unwrap(); assert_eq!(1, tokens.len()); let first = tokens.get(0).unwrap(); assert_eq!(TokenType::EOF, first.token_type); let input = String::from(" "); - let tokens = get_tokens(&input); + let tokens = get_tokens(&input).unwrap(); assert_eq!(1, tokens.len()); let first = tokens.get(0).unwrap(); assert_eq!(TokenType::EOF, first.token_type); - let input = String::from(" \n "); - let tokens = get_tokens(&input); + let input = String::from(" "); + let tokens = get_tokens(&input).unwrap(); assert_eq!(1, tokens.len()); let first = tokens.get(0).unwrap(); assert_eq!(TokenType::EOF, first.token_type); } + #[test] + fn t() { + let input = String::from("126 "); + let chars: Vec = input.chars().into_iter().collect(); + + assert_eq!(4, chars.len()); + assert!(has_input(&chars, 0)); + + match next_token(&chars, 0).unwrap() { + (Some(t), _) => { + assert_eq!("126", t.value) + }, + (None, _) => { + panic!() + } + } + } + /// Should scan numbers #[test] fn number_test() { - let input = String::from("126 278.98 0.282398 1798e+1 239.3298e-103"); - let tokens = get_tokens(&input); + let input = String::from("126 278.98 0.282398"); + let tokens = get_tokens(&input).unwrap(); - // assert_eq!("126", tokens.get(0).unwrap().value); + let t1 = tokens.get(0).unwrap(); + assert_eq!(TokenType::Number, t1.token_type); + assert_eq!("126", t1.value); + + let t2 = tokens.get(1).unwrap(); + assert_eq!(TokenType::Number, t2.token_type); + assert_eq!("278.98", t2.value); + + let t3 = tokens.get(2).unwrap(); + assert_eq!(TokenType::Number, t3.token_type); + assert_eq!("0.282398", t3.value); /* assert_eq!("278.98", tokens.get(1).unwrap().value); assert_eq!("0.282398", tokens.get(2).unwrap().value); diff --git a/src/lexic/scanner/number.rs b/src/lexic/scanner/number.rs index 3b584f2..d2974e2 100644 --- a/src/lexic/scanner/number.rs +++ b/src/lexic/scanner/number.rs @@ -154,6 +154,18 @@ mod tests { assert_eq!("123456", token.value); } + // Should not scan whitespace after the number + #[test] + fn test_int_2() { + let input = str_to_vec("123 "); + let start_pos = 0; + + let (token, next) = scan(&input, start_pos).unwrap(); + assert_eq!(3, next); + assert_eq!(TokenType::Number, token.token_type); + assert_eq!("123", token.value); + } + #[test] fn test_hex() { let input = str_to_vec("0x20 "); diff --git a/src/lexic/scanner/operator.rs b/src/lexic/scanner/operator.rs index 835a7b0..b9d3052 100644 --- a/src/lexic/scanner/operator.rs +++ b/src/lexic/scanner/operator.rs @@ -1,5 +1,3 @@ -use core::panic; - use crate::lexic::{token::{Token, self}, utils}; diff --git a/src/repl/mod.rs b/src/repl/mod.rs index 525aad3..244b771 100644 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -1,10 +1,9 @@ use std::io::{self, Write}; use super::lexic; -use super::syntax; fn compile(input: &String) { - let tokens = lexic::get_tokens(input); + let _tokens = lexic::get_tokens(input); } pub fn run() -> io::Result<()> { diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index aea8e49..2816caf 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,6 +2,6 @@ use super::token::Token; /// Constructs the Misti AST from a vector of tokens -pub fn construct_ast(tokens: Vec) -> Result<(), String> { +pub fn _construct_ast(_tokens: Vec) -> Result<(), String> { Err(String::from("NOT IMPLEMENTED")) }