Scan operators

2022-11-28 18:33:34 -05:00 · 2022-11-28 18:33:34 -05:00 · 0bb71c6822
commit 0bb71c6822
parent de34dd0b71
10 changed files with 247 additions and 109 deletions
--- a/src/lexic/mod.rs
+++ b/src/lexic/mod.rs
@ -0,0 +1,102 @@
+mod utils;
+mod scanner;
+use super::token::{self, Token};
+
+type Chars = Vec<char>;
+
+/// Scans and returns all the tokens in the input String
+pub fn get_tokens(input: &String) -> Vec<Token> {
+    let chars: Vec<char> = input.chars().into_iter().collect();
+    let mut results = Vec::new();
+    let mut current_pos: usize = 0;
+
+    while has_input(&chars, current_pos) {
+        let (possible_token, next_pos) = next_token(&chars, current_pos);
+        current_pos = next_pos;
+
+        if let Some(token) = possible_token {
+            results.push(token);
+        }
+    }
+
+    results.push(token::new_eof(0));
+    results
+}
+
+fn next_token(chars: &Chars, current_pos: usize) -> (Option<Token>, usize) {
+    let next_char = peek(chars, current_pos);
+
+    // Handle whitespace
+    if next_char == ' ' {
+        return next_token(chars, current_pos + 1)
+    }
+
+    // Test number
+    if utils::is_digit(next_char) {
+        let (token, next_pos) = scanner::number(chars, current_pos).unwrap();
+        (Some(token), next_pos)
+    }
+    // Test operator
+    else if utils::is_operator(next_char) {
+        let (token, next_pos) = scanner::operator(chars, current_pos);
+        (Some(token), next_pos)
+    }
+    else {
+        (None, current_pos)
+    }
+}
+
+fn peek(input: &Chars, pos: usize) -> char {
+    let result = input.get(pos).unwrap_or(&'\0');
+    *result
+}
+
+fn has_input(input: &Chars, current_pos: usize) -> bool {
+    input.len() < current_pos
+}
+
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use token::{Token, TokenType};
+
+    /// Should return an EOF token if the input has no tokens
+    #[test]
+    fn test1() {
+        let input = String::from("");
+        let tokens = get_tokens(&input);
+        assert_eq!(1, tokens.len());
+        let first = tokens.get(0).unwrap();
+        assert_eq!(TokenType::EOF, first.token_type);
+
+        let input = String::from("  ");
+        let tokens = get_tokens(&input);
+        assert_eq!(1, tokens.len());
+        let first = tokens.get(0).unwrap();
+        assert_eq!(TokenType::EOF, first.token_type);
+
+        let input = String::from("  \n  ");
+        let tokens = get_tokens(&input);
+        assert_eq!(1, tokens.len());
+        let first = tokens.get(0).unwrap();
+        assert_eq!(TokenType::EOF, first.token_type);
+    }
+
+    /// Should scan numbers
+    #[test]
+    fn number_test() {
+        let input = String::from("126 278.98 0.282398 1798e+1 239.3298e-103");
+        let tokens = get_tokens(&input);
+
+        // assert_eq!("126", tokens.get(0).unwrap().value);
+        /*
+        assert_eq!("278.98", tokens.get(1).unwrap().value);
+        assert_eq!("0.282398", tokens.get(2).unwrap().value);
+        assert_eq!("1798e+1", tokens.get(3).unwrap().value);
+        assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
+        assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type);
+        */
+    }
+}
--- a/src/syntax/scanner/mod.rs
+++ b/src/syntax/scanner/mod.rs
@ -1,8 +1,12 @@
 use super::token::Token;

 mod number;
+mod operator;

 pub fn number(chars: &Vec<char>, start_pos: usize) -> Result<(Token, usize), String> {
    number::scan(chars, start_pos)
 }

+pub fn operator(chars: &Vec<char>, start_pos: usize) -> (Token, usize) {
+    operator::scan(chars, start_pos)
+}
--- a/src/syntax/scanner/number.rs
+++ b/src/syntax/scanner/number.rs
@ -1,4 +1,4 @@
-use crate::syntax::{token::{Token, self}, utils};
+use crate::lexic::{token::{Token, self}, utils};

 /// Function to scan a number
 /// 
@ -117,7 +117,7 @@ fn scan_double_impl(chars: &Vec<char>, start_pos: usize, current: String) -> (To

 #[cfg(test)]
 mod tests {
-    use crate::syntax::token::TokenType;
+    use crate::lexic::token::TokenType;

    use super::*;

--- a/src/lexic/scanner/operator.rs
+++ b/src/lexic/scanner/operator.rs
@ -0,0 +1,103 @@
+use core::panic;
+
+use crate::lexic::{token::{Token, self}, utils};
+
+
+/// Function to scan an operator
+/// 
+/// This function assumes the character at `start_pos` is an operator
+pub fn scan(chars: &Vec<char>, start_pos: usize) -> (Token, usize) {
+    scan_impl(chars, start_pos, String::from(""))
+}
+
+pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) {
+    let next_char = chars.get(start_pos);
+
+    if let Some(c) = next_char {
+        if utils::is_operator(*c) {
+            return scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
+        }
+    }
+
+    // Return current value
+    (token::new_operator(current, start_pos as i32), start_pos)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::lexic::token::TokenType;
+
+    fn str_to_vec(s: &str) -> Vec<char> {
+        s.chars().collect()
+    }
+
+    // Should scan operators of length 1
+    #[test]
+    fn test_1() {
+        let operators = vec![
+            "+",
+            "-",
+            "=",
+            "*",
+            "!",
+            "\\",
+            "/",
+            "|",
+            "@",
+            "#",
+            "$",
+            "~",
+            "%",
+            "&",
+            "?",
+            "<",
+            ">",
+            "^",
+            ".",
+            ":",
+        ];
+
+        for op in operators {
+            let input = str_to_vec(op);
+            let start_pos = 0;
+            let (token, next) = scan(&input, start_pos);
+
+            assert_eq!(1, next);
+            assert_eq!(TokenType::Operator, token.token_type);
+            assert_eq!(op, token.value);
+        }
+    }
+
+    // Should scan operators of length 2
+    #[test]
+    fn test_2() {
+        let operators = vec![
+            "<<",
+            ">>",
+            "<|",
+            "|>",
+            "+>",
+            "<+",
+            "+=",
+            "-=",
+            "?.",
+            "??",
+            "?:",
+            "*=",
+            "/=",
+            "==",
+            "!=",
+        ];
+
+        for op in operators {
+            let input = str_to_vec(op);
+            let start_pos = 0;
+            let (token, next) = scan(&input, start_pos);
+
+            assert_eq!(2, next);
+            assert_eq!(TokenType::Operator, token.token_type);
+            assert_eq!(op, token.value);
+        }
+    }
+}
--- a/src/lexic/utils.rs
+++ b/src/lexic/utils.rs
@ -0,0 +1,20 @@
+
+pub fn is_digit(c: char) -> bool {
+    '0' <= c && c <= '9'
+}
+
+pub fn is_hex_digit(c: char) -> bool {
+    is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
+}
+
+pub fn str_append(current: String, c: char) -> String {
+    format!("{}{}", current, c)
+}
+
+pub fn is_operator(c: char) -> bool {
+    c == '+' || c == '-' || c == '=' || c == '*' || c == '!'
+             || c == '\\' || c == '/' || c == '|' || c == '@'
+             || c == '#' || c == '$' || c == '~' || c == '%' 
+             || c == '&' || c == '?' || c == '<' || c == '>' 
+             || c == '^' || c == '.' || c == ':'
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -3,6 +3,8 @@ use chrono::{prelude::Utc, Datelike};

 mod repl;
 mod syntax;
+mod lexic;
+mod token;

 const VERSION: &str = "0.0.1";

--- a/src/repl/mod.rs
+++ b/src/repl/mod.rs
@ -1,9 +1,10 @@
 use std::io::{self, Write};

+use super::lexic;
 use super::syntax;

 fn compile(input: &String) {
-    let tokens = syntax::get_tokens(input);
+    let tokens = lexic::get_tokens(input);
 }

 pub fn run() -> io::Result<()> {
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@ -1,97 +1,7 @@
-mod utils;
-mod scanner;
-mod token;
-use token::{Token, TokenType};

-type Chars = Vec<char>;
+use super::token::Token;

-/// Scans and returns all the tokens in the input String
-pub fn get_tokens(input: &String) -> Vec<Token> {
-    let chars: Vec<char> = input.chars().into_iter().collect();
-    let mut results = Vec::new();
-    let mut current_pos: usize = 0;
-
-    while has_input(&chars, current_pos) {
-        let (possible_token, next_pos) = next_token(&chars, current_pos);
-        current_pos = next_pos;
-
-        if let Some(token) = possible_token {
-            results.push(token);
-        }
-    }
-
-    results.push(token::new_eof(0));
-    results
-}
-
-fn next_token(chars: &Chars, current_pos: usize) -> (Option<Token>, usize) {
-    let next_char = peek(chars, current_pos);
-
-    // Handle whitespace
-    if next_char == ' ' {
-        return next_token(chars, current_pos + 1)
-    }
-
-    // Test number
-    if utils::is_digit(next_char) {
-        let (token, next_pos) = scanner::number(chars, current_pos).unwrap();
-        (Some(token), next_pos)
-    } else {
-        (None, current_pos)
-    }
-}
-
-fn peek(input: &Chars, pos: usize) -> char {
-    let result = input.get(pos).unwrap_or(&'\0');
-    *result
-}
-
-fn has_input(input: &Vec<char>, current_pos: usize) -> bool {
-    input.len() < current_pos
-}
-
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use token::{Token, TokenType};
-
-    /// Should return an EOF token if the input has no tokens
-    #[test]
-    fn test1() {
-        let input = String::from("");
-        let tokens = get_tokens(&input);
-        assert_eq!(1, tokens.len());
-        let first = tokens.get(0).unwrap();
-        assert_eq!(TokenType::EOF, first.token_type);
-
-        let input = String::from("  ");
-        let tokens = get_tokens(&input);
-        assert_eq!(1, tokens.len());
-        let first = tokens.get(0).unwrap();
-        assert_eq!(TokenType::EOF, first.token_type);
-
-        let input = String::from("  \n  ");
-        let tokens = get_tokens(&input);
-        assert_eq!(1, tokens.len());
-        let first = tokens.get(0).unwrap();
-        assert_eq!(TokenType::EOF, first.token_type);
-    }
-
-    /// Should scan numbers
-    #[test]
-    fn number_test() {
-        let input = String::from("126 278.98 0.282398 1798e+1 239.3298e-103");
-        let tokens = get_tokens(&input);
-
-        // assert_eq!("126", tokens.get(0).unwrap().value);
-        /*
-        assert_eq!("278.98", tokens.get(1).unwrap().value);
-        assert_eq!("0.282398", tokens.get(2).unwrap().value);
-        assert_eq!("1798e+1", tokens.get(3).unwrap().value);
-        assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
-        assert_eq!(TokenType::EOF, tokens.get(5).unwrap().token_type);
-        */
-    }
+/// Constructs the Misti AST from a vector of tokens
+pub fn construct_ast(tokens: Vec<Token>) -> Result<(), String> {
+    Err(String::from("NOT IMPLEMENTED"))
 }
--- a/src/syntax/utils.rs
+++ b/src/syntax/utils.rs
@ -1,12 +0,0 @@
-
-pub fn is_digit(c: char) -> bool {
-    '0' <= c && c <= '9'
-}
-
-pub fn is_hex_digit(c: char) -> bool {
-    is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
-}
-
-pub fn str_append(current: String, c: char) -> String {
-    format!("{}{}", current, c)
-}
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@ -44,3 +44,11 @@ pub fn new_number(value: String, position: i32) -> Token {
        position
    }
 }
+
+pub fn new_operator(value: String, position: i32) -> Token {
+    Token {
+        token_type: TokenType::Operator,
+        value,
+        position
+    }
+}