From 2b23e36955bbd674c32652cbe5ac4a3b5f73cafc Mon Sep 17 00:00:00 2001 From: Araozu Date: Tue, 13 Aug 2024 15:05:28 -0500 Subject: [PATCH] feat: lex octal and binary --- src/lexic/mod.rs | 1 + src/lexic/scanner/number.rs | 134 ++++++++++++++++++- src/semantic/checks/top_level_declaration.rs | 10 +- 3 files changed, 135 insertions(+), 10 deletions(-) diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 7e02080..bb5db37 100755 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -11,6 +11,7 @@ use self::token::TokenType; type Chars = Vec; /// Represents the result of scanning a single token from the input +#[derive(Debug)] pub enum LexResult { /// A token was found. The first element is the token, and the /// second element is the position in the input after the token. diff --git a/src/lexic/scanner/number.rs b/src/lexic/scanner/number.rs index c3605c4..1186d38 100755 --- a/src/lexic/scanner/number.rs +++ b/src/lexic/scanner/number.rs @@ -11,9 +11,17 @@ pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { match (next_char_1, next_char_2) { // Test if the input contains a hex number - (Some(c1), Some(c2)) if *c1 == '0' && (*c2 == 'x' || *c2 == 'X') => { + (Some('0'), Some('x'|'X')) => { scan_hex(chars, start_pos + 2, String::from("0x")) } + (Some('0'), Some('o'|'O')) => { + // octal + scan_octal(chars, start_pos + 2) + } + (Some('0'), Some('b')) => { + // binary + scan_binary(chars, start_pos + 2) + } // Scan decimal/double/scientific otherwise _ => scan_decimal(chars, start_pos, String::from("")), } @@ -45,7 +53,7 @@ fn scan_decimal(chars: &Vec, start_pos: usize, current: String) -> LexResu /// This function expects the following on the first call: /// - The char at `start_pos` is a value between [0-9a-fA-F]. If not, will return an error. /// - `current == "0x"`. If not will return an incorrect value, or panic. -fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> LexResult { +fn scan_hex(chars: &[char], start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if utils::is_hex_digit(*c) => { let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)); @@ -59,6 +67,67 @@ fn scan_hex(chars: &Vec, start_pos: usize, current: String) -> LexResult { } } +fn scan_octal(chars: &[char], start_pos: usize) -> LexResult { + let mut token_vec = vec![]; + let mut current_pos = start_pos; + let input_len = chars.len(); + + while current_pos < input_len { + match chars.get(current_pos) { + Some(c) if *c >= '0' && *c <= '7' => { + token_vec.push(*c); + } + _ => break, + } + + current_pos += 1; + } + + if token_vec.is_empty() { + LexResult::Err(LexError { + // minus 2 to account for the opening '0o' + position: start_pos - 2, + end_position: current_pos, + reason: String::from("Found an incomplete octal number"), + }) + } else { + let octal_numbers = format!("0o{}", token_vec.iter().collect::()); + let new_token = Token::new_int(octal_numbers, start_pos - 2); + LexResult::Some(new_token, current_pos) + } +} + +// TODO: Unify this, octal and hex in a single macro +fn scan_binary(chars: &[char], start_pos: usize) -> LexResult { + let mut token_vec = vec![]; + let mut current_pos = start_pos; + let input_len = chars.len(); + + while current_pos < input_len { + match chars.get(current_pos) { + Some(c) if *c == '0' || *c == '1' => { + token_vec.push(*c); + } + _ => break, + } + + current_pos += 1; + } + + if token_vec.is_empty() { + LexResult::Err(LexError { + // minus 2 to account for the opening '0b' + position: start_pos - 2, + end_position: current_pos, + reason: String::from("Found an incomplete binary number"), + }) + } else { + let octal_numbers = format!("0b{}", token_vec.iter().collect::()); + let new_token = Token::new_int(octal_numbers, start_pos - 2); + LexResult::Some(new_token, current_pos) + } +} + /// Scans a floating point number, with or without an exponent /// /// This function expects the following: @@ -153,7 +222,7 @@ fn scan_digits(chars: &Vec, start_pos: usize, current: String) -> (Token, } /// Scans chars between [0-9a-fA-F], returns when none is found -fn scan_hex_digits(chars: &Vec, start_pos: usize, current: String) -> (Token, usize) { +fn scan_hex_digits(chars: &[char], start_pos: usize, current: String) -> (Token, usize) { match chars.get(start_pos) { Some(c) if utils::is_hex_digit(*c) => { scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c)) @@ -295,6 +364,65 @@ mod tests { } } + #[test] + fn test_octal_1() { + let input = str_to_vec("0o20 "); + match scan(&input, 0) { + LexResult::Some(t, next) => { + assert_eq!(t.token_type, TokenType::Int); + assert_eq!(t.value, "0o20"); + assert_eq!(t.position, 0); + assert_eq!(t.get_end_position(), 4); + assert_eq!(next, 4); + } + _ => panic!("Expected a token") + } + } + + #[test] + fn test_octal_2() { + let input = str_to_vec("0o "); + let result = scan(&input, 0); + match result { + LexResult::Err(error) => { + assert_eq!(error.position, 0); + assert_eq!(error.end_position, 2); + assert_eq!(error.reason, "Found an incomplete octal number"); + } + _ => panic!("Expected an error, got {:?}", result) + } + } + + #[test] + fn test_binary_1() { + let input = str_to_vec("0b1011 "); + match scan(&input, 0) { + LexResult::Some(t, next) => { + assert_eq!(t.token_type, TokenType::Int); + assert_eq!(t.value, "0b1011"); + assert_eq!(t.position, 0); + assert_eq!(t.get_end_position(), 6); + assert_eq!(next, 6); + } + _ => panic!("Expected a token") + } + } + + #[test] + fn test_binary_2() { + let input = str_to_vec("0b "); + let result = scan(&input, 0); + match result { + LexResult::Err(error) => { + assert_eq!(error.position, 0); + assert_eq!(error.end_position, 2); + assert_eq!(error.reason, "Found an incomplete binary number"); + } + _ => panic!("Expected an error, got {:?}", result) + } + } + + // Should scan a double #[test] fn test_double_1() { diff --git a/src/semantic/checks/top_level_declaration.rs b/src/semantic/checks/top_level_declaration.rs index 1d04f84..80d52ca 100644 --- a/src/semantic/checks/top_level_declaration.rs +++ b/src/semantic/checks/top_level_declaration.rs @@ -1,11 +1,7 @@ use crate::{ - error_handling::{semantic_error::SemanticError, MistiError}, - semantic::{ - impls::SemanticCheck, - symbol_table::SymbolTable, - types::{Type, Typed}, - }, - syntax::ast::{Expression, ModuleMembers, Statement}, + error_handling::MistiError, + semantic::{impls::SemanticCheck, symbol_table::SymbolTable}, + syntax::ast::{ModuleMembers, Statement}, }; impl SemanticCheck for ModuleMembers<'_> {