From e7459ab441cbf886a4b8971684caa24a5a76cbe9 Mon Sep 17 00:00:00 2001 From: Araozu Date: Sat, 16 Nov 2024 18:34:36 -0500 Subject: [PATCH] feat: parse hex numbers --- src/01_lexic/number.zig | 95 ++++++++++++++++++++++++++++++++++++++--- src/01_lexic/root.zig | 11 +++-- src/01_lexic/token.zig | 5 +++ src/01_lexic/utils.zig | 4 ++ 4 files changed, 105 insertions(+), 10 deletions(-) diff --git a/src/01_lexic/number.zig b/src/01_lexic/number.zig index f36a849..4df12bd 100644 --- a/src/01_lexic/number.zig +++ b/src/01_lexic/number.zig @@ -4,6 +4,7 @@ const utils = @import("./utils.zig"); const Token = token.Token; const TokenType = token.TokenType; +const LexError = token.LexError; const is_decimal_digit = utils.is_decimal_digit; @@ -14,20 +15,22 @@ const LexReturn = struct { Token, usize }; /// A number is either an Int or a Float. /// No number can have a leading zero. That is an error to /// avoid confussion with PHP literal octals. -fn lex(input: []const u8, cap: usize, start: usize) !?LexReturn { +pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn { const first_char = input[start]; // Attempt to lex a hex, octal or binary number if (first_char == '0' and cap > start + 1) { const second_char = input[start + 1]; switch (second_char) { - 'x', 'X' => return hex(), + 'x', 'X' => return hex(input, cap, start), 'o', 'O' => return octal(), 'b', 'B' => return binary(), + else => { + // Leading zero found. Throw an error. + // TODO: throw an error :c + return LexError.LeadingZero; + }, } - - // Leading zero found. Throw an error. - // TODO: throw an error :c } // Attempt to lex an integer. @@ -35,8 +38,28 @@ fn lex(input: []const u8, cap: usize, start: usize) !?LexReturn { return integer(input, cap, start); } -fn hex() !?LexReturn { - return null; +/// Lexes a hexadecimal number. +/// Allows 0-9a-fA-F +/// Assumes that `start` is the position of the initial zero +fn hex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn { + var end_position = start + 2; + + // There should be at least 1 hex digit + if (end_position >= cap or !utils.is_hex_digit(input[end_position])) { + return LexError.Incomplete; + } + + // loop through all chars + end_position += 1; + + while (end_position < cap and utils.is_hex_digit(input[end_position])) { + end_position += 1; + } + + return .{ + Token.init(input[start..end_position], TokenType.Int, start), + end_position, + }; } fn octal() !?LexReturn { @@ -114,3 +137,61 @@ test "should return null if not an integer" { try std.testing.expect(result == null); } + +test "should lex hex number" { + const input = "0xa"; + const result = try lex(input, input.len, 0); + + if (result) |tuple| { + const r = tuple[0]; + try std.testing.expectEqualDeep("0xa", r.value); + } else { + try std.testing.expect(false); + } +} + +test "should lex hex number 2" { + const input = " 0Xff00AA "; + const result = try lex(input, input.len, 2); + + if (result) |tuple| { + const r = tuple[0]; + try std.testing.expectEqualDeep("0Xff00AA", r.value); + } else { + try std.testing.expect(false); + } +} + +test "shouldnt parse incomplete hex number" { + const input = "0xZZ"; + const result = lex(input, input.len, 0) catch |err| { + try std.testing.expect(err == token.LexError.Incomplete); + return; + }; + + if (result) |tuple| { + const r = tuple[0]; + std.debug.print("{s}\n", .{r.value}); + } else { + std.debug.print("nil returned", .{}); + } + + try std.testing.expect(false); +} + +test "shouldnt parse incomplete hex number 2" { + const input = "0x"; + const result = lex(input, input.len, 0) catch |err| { + try std.testing.expect(err == token.LexError.Incomplete); + return; + }; + + if (result) |tuple| { + const r = tuple[0]; + std.debug.print("{s}\n", .{r.value}); + } else { + std.debug.print("nil returned", .{}); + } + + try std.testing.expect(false); +} diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index 1c41159..ed5921c 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -7,8 +7,13 @@ const Token = token.Token; pub fn tokenize(input: []const u8) !void { const input_len = input.len; - const next_token = try number(input, input_len, 0); - _ = next_token; + const next_token = try number.lex(input, input_len, 0); - std.debug.print("tokenize :D {s}\n", .{input}); + if (next_token) |tuple| { + const t = tuple[0]; + + std.debug.print("{s}\n", .{t.value}); + } else { + std.debug.print("no token found :c", .{}); + } } diff --git a/src/01_lexic/token.zig b/src/01_lexic/token.zig index b154c05..dfdd355 100644 --- a/src/01_lexic/token.zig +++ b/src/01_lexic/token.zig @@ -16,3 +16,8 @@ pub const Token = struct { }; } }; + +pub const LexError = error{ + LeadingZero, + Incomplete, +}; diff --git a/src/01_lexic/utils.zig b/src/01_lexic/utils.zig index a23eaa8..0a4760d 100644 --- a/src/01_lexic/utils.zig +++ b/src/01_lexic/utils.zig @@ -1,3 +1,7 @@ pub fn is_decimal_digit(c: u8) bool { return '0' <= c and c <= '9'; } + +pub fn is_hex_digit(c: u8) bool { + return ('0' <= c and c <= '9') or ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F'); +}