From 0bb71c6822c9a6203e48103278d96e6c499cc468 Mon Sep 17 00:00:00 2001 From: Araozu Date: Mon, 28 Nov 2022 18:33:34 -0500 Subject: [PATCH] Scan operators --- src/lexic/mod.rs | 102 +++++++++++++++++++++++ src/{syntax => lexic}/scanner/mod.rs | 4 + src/{syntax => lexic}/scanner/number.rs | 4 +- src/lexic/scanner/operator.rs | 103 ++++++++++++++++++++++++ src/lexic/utils.rs | 20 +++++ src/main.rs | 2 + src/repl/mod.rs | 3 +- src/syntax/mod.rs | 98 +--------------------- src/syntax/utils.rs | 12 --- src/{syntax => }/token.rs | 8 ++ 10 files changed, 247 insertions(+), 109 deletions(-) create mode 100644 src/lexic/mod.rs rename src/{syntax => lexic}/scanner/mod.rs (56%) rename src/{syntax => lexic}/scanner/number.rs (98%) create mode 100644 src/lexic/scanner/operator.rs create mode 100644 src/lexic/utils.rs delete mode 100644 src/syntax/utils.rs rename src/{syntax => }/token.rs (83%) diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs new file mode 100644 index 0000000..4551e69 --- /dev/null +++ b/src/lexic/mod.rs @@ -0,0 +1,102 @@ +mod utils; +mod scanner; +use super::token::{self, Token}; + +type Chars = Vec; + +/// Scans and returns all the tokens in the input String +pub fn get_tokens(input: &String) -> Vec { + let chars: Vec = input.chars().into_iter().collect(); + let mut results = Vec::new(); + let mut current_pos: usize = 0; + + while has_input(&chars, current_pos) { + let (possible_token, next_pos) = next_token(&chars, current_pos); + current_pos = next_pos; + + if let Some(token) = possible_token { + results.push(token); + } + } + + results.push(token::new_eof(0)); + results +} + +fn next_token(chars: &Chars, current_pos: usize) -> (Option, usize) { + let next_char = peek(chars, current_pos); + + // Handle whitespace + if next_char == ' ' { + return next_token(chars, current_pos + 1) + } + + // Test number + if utils::is_digit(next_char) { + let (token, next_pos) = scanner::number(chars, current_pos).unwrap(); + (Some(token), next_pos) + } + // Test operator + else if utils::is_operator(next_char) { + let (token, next_pos) = scanner::operator(chars, current_pos); + (Some(token), next_pos) + } + else { + (None, current_pos) + } +} + +fn peek(input: &Chars, pos: usize) -> char { + let result = input.get(pos).unwrap_or(&'\0'); + *result +} + +fn has_input(input: &Chars, current_pos: usize) -> bool { + input.len() < current_pos +} + + + +#[cfg(test)] +mod tests { + use super::*; + use token::{Token, TokenType}; + + /// Should return an EOF token if the input has no tokens + #[test] + fn test1() { + let input = String::from(""); + let tokens = get_tokens(&input); + assert_eq!(1, tokens.len()); + let first = tokens.get(0).unwrap(); + assert_eq!(TokenType::EOF, first.token_type); + + let input = String::from(" "); + let tokens = get_tokens(&input); + assert_eq!(1, tokens.len()); + let first = tokens.get(0).unwrap(); + assert_eq!(TokenType::EOF, first.token_type); + + let input = String::from(" \n "); + let tokens = get_tokens(&input); + assert_eq!(1, tokens.len()); + let first = tokens.get(0).unwrap(); + assert_eq!(TokenType::EOF, first.token_type); + } + + /// Should scan numbers + #[test] + fn number_test() { + let input = String::from("126 278.98 0.282398 1798e+1 239.3298e-103"); + let tokens = get_tokens(&input); + + // assert_eq!("126", tokens.get(0).unwrap().value); + /* + assert_eq!("278.98", tokens.get(1).unwrap().value); + assert_eq!("0.282398", tokens.get(2).unwrap().value); + assert_eq!("1798e+1", tokens.get(3).unwrap().value); + assert_eq!("239.3298e-103", tokens.get(4).unwrap().value); + assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type); + */ + } +} diff --git a/src/syntax/scanner/mod.rs b/src/lexic/scanner/mod.rs similarity index 56% rename from src/syntax/scanner/mod.rs rename to src/lexic/scanner/mod.rs index 8e41174..cc90d1c 100644 --- a/src/syntax/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -1,8 +1,12 @@ use super::token::Token; mod number; +mod operator; pub fn number(chars: &Vec, start_pos: usize) -> Result<(Token, usize), String> { number::scan(chars, start_pos) } +pub fn operator(chars: &Vec, start_pos: usize) -> (Token, usize) { + operator::scan(chars, start_pos) +} diff --git a/src/syntax/scanner/number.rs b/src/lexic/scanner/number.rs similarity index 98% rename from src/syntax/scanner/number.rs rename to src/lexic/scanner/number.rs index 358518b..3b584f2 100644 --- a/src/syntax/scanner/number.rs +++ b/src/lexic/scanner/number.rs @@ -1,4 +1,4 @@ -use crate::syntax::{token::{Token, self}, utils}; +use crate::lexic::{token::{Token, self}, utils}; /// Function to scan a number /// @@ -117,7 +117,7 @@ fn scan_double_impl(chars: &Vec, start_pos: usize, current: String) -> (To #[cfg(test)] mod tests { - use crate::syntax::token::TokenType; + use crate::lexic::token::TokenType; use super::*; diff --git a/src/lexic/scanner/operator.rs b/src/lexic/scanner/operator.rs new file mode 100644 index 0000000..835a7b0 --- /dev/null +++ b/src/lexic/scanner/operator.rs @@ -0,0 +1,103 @@ +use core::panic; + +use crate::lexic::{token::{Token, self}, utils}; + + +/// Function to scan an operator +/// +/// This function assumes the character at `start_pos` is an operator +pub fn scan(chars: &Vec, start_pos: usize) -> (Token, usize) { + scan_impl(chars, start_pos, String::from("")) +} + +pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> (Token, usize) { + let next_char = chars.get(start_pos); + + if let Some(c) = next_char { + if utils::is_operator(*c) { + return scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) + } + } + + // Return current value + (token::new_operator(current, start_pos as i32), start_pos) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lexic::token::TokenType; + + fn str_to_vec(s: &str) -> Vec { + s.chars().collect() + } + + // Should scan operators of length 1 + #[test] + fn test_1() { + let operators = vec![ + "+", + "-", + "=", + "*", + "!", + "\\", + "/", + "|", + "@", + "#", + "$", + "~", + "%", + "&", + "?", + "<", + ">", + "^", + ".", + ":", + ]; + + for op in operators { + let input = str_to_vec(op); + let start_pos = 0; + let (token, next) = scan(&input, start_pos); + + assert_eq!(1, next); + assert_eq!(TokenType::Operator, token.token_type); + assert_eq!(op, token.value); + } + } + + // Should scan operators of length 2 + #[test] + fn test_2() { + let operators = vec![ + "<<", + ">>", + "<|", + "|>", + "+>", + "<+", + "+=", + "-=", + "?.", + "??", + "?:", + "*=", + "/=", + "==", + "!=", + ]; + + for op in operators { + let input = str_to_vec(op); + let start_pos = 0; + let (token, next) = scan(&input, start_pos); + + assert_eq!(2, next); + assert_eq!(TokenType::Operator, token.token_type); + assert_eq!(op, token.value); + } + } +} diff --git a/src/lexic/utils.rs b/src/lexic/utils.rs new file mode 100644 index 0000000..bd74331 --- /dev/null +++ b/src/lexic/utils.rs @@ -0,0 +1,20 @@ + +pub fn is_digit(c: char) -> bool { + '0' <= c && c <= '9' +} + +pub fn is_hex_digit(c: char) -> bool { + is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' +} + +pub fn str_append(current: String, c: char) -> String { + format!("{}{}", current, c) +} + +pub fn is_operator(c: char) -> bool { + c == '+' || c == '-' || c == '=' || c == '*' || c == '!' + || c == '\\' || c == '/' || c == '|' || c == '@' + || c == '#' || c == '$' || c == '~' || c == '%' + || c == '&' || c == '?' || c == '<' || c == '>' + || c == '^' || c == '.' || c == ':' +} diff --git a/src/main.rs b/src/main.rs index 16946af..d9e79af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,8 @@ use chrono::{prelude::Utc, Datelike}; mod repl; mod syntax; +mod lexic; +mod token; const VERSION: &str = "0.0.1"; diff --git a/src/repl/mod.rs b/src/repl/mod.rs index 1cc43af..525aad3 100644 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -1,9 +1,10 @@ use std::io::{self, Write}; +use super::lexic; use super::syntax; fn compile(input: &String) { - let tokens = syntax::get_tokens(input); + let tokens = lexic::get_tokens(input); } pub fn run() -> io::Result<()> { diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 2eaba82..aea8e49 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,97 +1,7 @@ -mod utils; -mod scanner; -mod token; -use token::{Token, TokenType}; -type Chars = Vec; +use super::token::Token; -/// Scans and returns all the tokens in the input String -pub fn get_tokens(input: &String) -> Vec { - let chars: Vec = input.chars().into_iter().collect(); - let mut results = Vec::new(); - let mut current_pos: usize = 0; - - while has_input(&chars, current_pos) { - let (possible_token, next_pos) = next_token(&chars, current_pos); - current_pos = next_pos; - - if let Some(token) = possible_token { - results.push(token); - } - } - - results.push(token::new_eof(0)); - results -} - -fn next_token(chars: &Chars, current_pos: usize) -> (Option, usize) { - let next_char = peek(chars, current_pos); - - // Handle whitespace - if next_char == ' ' { - return next_token(chars, current_pos + 1) - } - - // Test number - if utils::is_digit(next_char) { - let (token, next_pos) = scanner::number(chars, current_pos).unwrap(); - (Some(token), next_pos) - } else { - (None, current_pos) - } -} - -fn peek(input: &Chars, pos: usize) -> char { - let result = input.get(pos).unwrap_or(&'\0'); - *result -} - -fn has_input(input: &Vec, current_pos: usize) -> bool { - input.len() < current_pos -} - - - -#[cfg(test)] -mod tests { - use super::*; - use token::{Token, TokenType}; - - /// Should return an EOF token if the input has no tokens - #[test] - fn test1() { - let input = String::from(""); - let tokens = get_tokens(&input); - assert_eq!(1, tokens.len()); - let first = tokens.get(0).unwrap(); - assert_eq!(TokenType::EOF, first.token_type); - - let input = String::from(" "); - let tokens = get_tokens(&input); - assert_eq!(1, tokens.len()); - let first = tokens.get(0).unwrap(); - assert_eq!(TokenType::EOF, first.token_type); - - let input = String::from(" \n "); - let tokens = get_tokens(&input); - assert_eq!(1, tokens.len()); - let first = tokens.get(0).unwrap(); - assert_eq!(TokenType::EOF, first.token_type); - } - - /// Should scan numbers - #[test] - fn number_test() { - let input = String::from("126 278.98 0.282398 1798e+1 239.3298e-103"); - let tokens = get_tokens(&input); - - // assert_eq!("126", tokens.get(0).unwrap().value); - /* - assert_eq!("278.98", tokens.get(1).unwrap().value); - assert_eq!("0.282398", tokens.get(2).unwrap().value); - assert_eq!("1798e+1", tokens.get(3).unwrap().value); - assert_eq!("239.3298e-103", tokens.get(4).unwrap().value); - assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type); - */ - } +/// Constructs the Misti AST from a vector of tokens +pub fn construct_ast(tokens: Vec) -> Result<(), String> { + Err(String::from("NOT IMPLEMENTED")) } diff --git a/src/syntax/utils.rs b/src/syntax/utils.rs deleted file mode 100644 index c8e5abb..0000000 --- a/src/syntax/utils.rs +++ /dev/null @@ -1,12 +0,0 @@ - -pub fn is_digit(c: char) -> bool { - '0' <= c && c <= '9' -} - -pub fn is_hex_digit(c: char) -> bool { - is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' -} - -pub fn str_append(current: String, c: char) -> String { - format!("{}{}", current, c) -} diff --git a/src/syntax/token.rs b/src/token.rs similarity index 83% rename from src/syntax/token.rs rename to src/token.rs index 9599537..ee08d85 100644 --- a/src/syntax/token.rs +++ b/src/token.rs @@ -44,3 +44,11 @@ pub fn new_number(value: String, position: i32) -> Token { position } } + +pub fn new_operator(value: String, position: i32) -> Token { + Token { + token_type: TokenType::Operator, + value, + position + } +}