From 00597752da401393a6228db85861c7e82520c776 Mon Sep 17 00:00:00 2001 From: Araozu Date: Sun, 24 Nov 2024 06:53:58 -0500 Subject: [PATCH] feat: lex operators --- build.zig | 1 + src/01_lexic/identifier.zig | 6 +-- src/01_lexic/number.zig | 3 ++ src/01_lexic/operator.zig | 73 +++++++++++++++++++++++++++++++++++++ src/01_lexic/root.zig | 14 +++++++ src/01_lexic/token.zig | 1 + src/01_lexic/utils.zig | 6 ++- 7 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 src/01_lexic/operator.zig diff --git a/build.zig b/build.zig index 81a2adf..cbc5822 100644 --- a/build.zig +++ b/build.zig @@ -95,6 +95,7 @@ pub fn build(b: *std.Build) void { "src/01_lexic/number.zig", "src/01_lexic/identifier.zig", "src/01_lexic/datatype.zig", + "src/01_lexic/operator.zig", "src/01_lexic/token.zig", "src/01_lexic/utils.zig", }; diff --git a/src/01_lexic/identifier.zig b/src/01_lexic/identifier.zig index 019eb37..18b1064 100644 --- a/src/01_lexic/identifier.zig +++ b/src/01_lexic/identifier.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const assert = std.debug.assert; const token = @import("./token.zig"); const utils = @import("./utils.zig"); @@ -11,10 +12,7 @@ const LexReturn = token.LexReturn; pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { const cap = input.len; var final_pos = start; - - if (start >= cap) { - return null; - } + assert(start < cap); // lex lowercase or underscore if (!utils.is_lowercase_underscore(input[start])) { diff --git a/src/01_lexic/number.zig b/src/01_lexic/number.zig index fc9dcf8..7ac1e90 100644 --- a/src/01_lexic/number.zig +++ b/src/01_lexic/number.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const assert = std.debug.assert; const token = @import("./token.zig"); const utils = @import("./utils.zig"); @@ -13,6 +14,7 @@ const is_decimal_digit = utils.is_decimal_digit; /// /// A number is either an Int or a Float. pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn { + assert(start < cap); const first_char = input[start]; // Attempt to lex a hex, octal or binary number @@ -74,6 +76,7 @@ fn prefixed(comptime prefix: u8, input: []const u8, cap: usize, start: usize) !? /// avoid confussion with PHP literal octals. /// Floating point numbers can. fn integer(input: []const u8, cap: usize, start: usize) LexError!?LexReturn { + assert(start < cap); const first_char = input[start]; if (!is_decimal_digit(first_char)) { return null; diff --git a/src/01_lexic/operator.zig b/src/01_lexic/operator.zig new file mode 100644 index 0000000..bc53119 --- /dev/null +++ b/src/01_lexic/operator.zig @@ -0,0 +1,73 @@ +const std = @import("std"); +const assert = std.debug.assert; +const token = @import("./token.zig"); +const utils = @import("./utils.zig"); + +const Token = token.Token; +const TokenType = token.TokenType; +const LexError = token.LexError; +const LexReturn = token.LexReturn; + +// lex an operator +pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { + const cap = input.len; + assert(start < cap); + + // lex operator + if (utils.lex_many_1(utils.is_operator_char, input, start)) |final_pos| { + return .{ + Token.init(input[start..final_pos], TokenType.Operator, start), + final_pos, + }; + } + // no operator found + else { + return null; + } +} + +test "should lex single operator" { + const input = "="; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("=", t.value); + try std.testing.expectEqual(1, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex operator of len 2" { + const input = "+="; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("+=", t.value); + try std.testing.expectEqual(2, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex operator of len 3" { + const input = " >>= "; + const output = try lex(input, 1); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep(">>=", t.value); + try std.testing.expectEqual(4, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should not lex something else" { + const input = "322"; + const output = try lex(input, 0); + + try std.testing.expect(output == null); +} diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index 5732ba2..a3f3fe1 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -1,8 +1,10 @@ const std = @import("std"); +const assert = std.debug.assert; const number = @import("./number.zig"); const identifier = @import("./identifier.zig"); const datatype = @import("./datatype.zig"); const token = @import("./token.zig"); +const operator = @import("./operator.zig"); const TokenType = token.TokenType; const Token = token.Token; @@ -16,9 +18,11 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void { while (current_pos < input_len) { const actual_next_pos = ignore_whitespace(input, current_pos); + assert(current_pos <= actual_next_pos); // attempt to lex a number if (try number.lex(input, input_len, actual_next_pos)) |tuple| { + assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; @@ -26,6 +30,7 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void { } // attempt to lex an identifier else if (try identifier.lex(input, actual_next_pos)) |tuple| { + assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; @@ -33,6 +38,15 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void { } // attempt to lex a datatype else if (try datatype.lex(input, actual_next_pos)) |tuple| { + assert(tuple[1] > current_pos); + const t = tuple[0]; + current_pos = tuple[1]; + + try tokens.append(t); + } + // attempt to lex an operator + else if (try operator.lex(input, actual_next_pos)) |tuple| { + assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; diff --git a/src/01_lexic/token.zig b/src/01_lexic/token.zig index 7e7f68c..18a8284 100644 --- a/src/01_lexic/token.zig +++ b/src/01_lexic/token.zig @@ -2,6 +2,7 @@ pub const TokenType = enum { Int, Float, Identifier, + Operator, }; pub const Token = struct { diff --git a/src/01_lexic/utils.zig b/src/01_lexic/utils.zig index 98b2820..13bee33 100644 --- a/src/01_lexic/utils.zig +++ b/src/01_lexic/utils.zig @@ -35,6 +35,10 @@ pub fn is_identifier_char(c: u8) bool { return c == '_' or ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') or ('0' <= c and c <= '9'); } +pub fn is_operator_char(c: u8) bool { + return c == '+' or c == '-' or c == '=' or c == '*' or c == '!' or c == '/' or c == '|' or c == '@' or c == '#' or c == '$' or c == '~' or c == '%' or c == '&' or c == '?' or c == '<' or c == '>' or c == '^' or c == '.' or c == ':'; +} + /// Runs a discriminator function at least once, /// and returns the end position of the lex. /// @@ -44,7 +48,7 @@ pub fn lex_many_1( comptime lex_fun: fn (c: u8) bool, input: []const u8, start: usize, -) usize { +) ?usize { // assert that there is input left const cap = input.len; var current_pos = start;