feat: integrate identifier/datatype lexer

feat: lex datatype
2024-11-18 21:14:09 -05:00 · 2024-11-18 21:08:37 -05:00
5 changed files with 107 additions and 5 deletions
--- a/build.zig
+++ b/build.zig
@ -94,6 +94,7 @@ pub fn build(b: *std.Build) void {
        "src/01_lexic/root.zig",
        "src/01_lexic/number.zig",
        "src/01_lexic/identifier.zig",
+        "src/01_lexic/datatype.zig",
        "src/01_lexic/token.zig",
        "src/01_lexic/utils.zig",
    };
--- a/src/01_lexic/datatype.zig
+++ b/src/01_lexic/datatype.zig
@ -0,0 +1,65 @@
+const std = @import("std");
+const token = @import("./token.zig");
+const utils = @import("./utils.zig");
+
+const Token = token.Token;
+const TokenType = token.TokenType;
+const LexError = token.LexError;
+const LexReturn = token.LexReturn;
+
+/// Lexes a Datatype
+pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
+    const cap = input.len;
+    var final_pos = start;
+
+    if (start >= cap) {
+        return null;
+    }
+
+    // lex uppercase
+    if (!utils.is_uppercase(input[start])) {
+        return null;
+    }
+    final_pos += 1;
+
+    // lex many lowercase/uppercase/underscore/number
+    if (utils.lex_many(utils.is_identifier_char, input, final_pos)) |new_pos| {
+        final_pos = new_pos;
+    }
+
+    return .{
+        Token.init(input[start..final_pos], TokenType.Identifier, start),
+        final_pos,
+    };
+}
+
+test "should lex datatype" {
+    const input = "MyType";
+    const output = try lex(input, 0);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("MyType", t.value);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
+test "should lex datatype 2" {
+    const input = "MyTypeWith322";
+    const output = try lex(input, 0);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("MyTypeWith322", t.value);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
+test "shouldnt lex identifier" {
+    const input = "myDatatype";
+    const output = try lex(input, 0);
+
+    try std.testing.expect(output == null);
+}
--- a/src/01_lexic/identifier.zig
+++ b/src/01_lexic/identifier.zig
@ -7,6 +7,7 @@ const TokenType = token.TokenType;
 const LexError = token.LexError;
 const LexReturn = token.LexReturn;

+/// Lexes a datatype
 pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
    const cap = input.len;
    var final_pos = start;
@ -92,6 +93,18 @@ test "should lex identifier 3" {
    }
 }

+test "should lex identifier 4" {
+    const input = "identifier_number_3";
+    const output = try lex(input, 0);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("identifier_number_3", t.value);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
 test "shouldnt lex datatype" {
    const input = "MyDatatype";
    const output = try lex(input, 0);
--- a/src/01_lexic/root.zig
+++ b/src/01_lexic/root.zig
@ -1,5 +1,7 @@
 const std = @import("std");
 const number = @import("./number.zig");
+const identifier = @import("./identifier.zig");
+const datatype = @import("./datatype.zig");
 const token = @import("./token.zig");

 const TokenType = token.TokenType;
@ -15,20 +17,37 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
    while (current_pos < input_len) {
        const actual_next_pos = ignore_whitespace(input, current_pos);

-        const next_token = try number.lex(input, input_len, actual_next_pos);
-        if (next_token) |tuple| {
+        // attempt to lex a number
+        if (try number.lex(input, input_len, actual_next_pos)) |tuple| {
            const t = tuple[0];
            current_pos = tuple[1];

            try tokens.append(t);
-        } else {
+        }
+        // attempt to lex an identifier
+        else if (try identifier.lex(input, actual_next_pos)) |tuple| {
+            const t = tuple[0];
+            current_pos = tuple[1];
+
+            try tokens.append(t);
+        }
+        // attempt to lex a datatype
+        else if (try datatype.lex(input, actual_next_pos)) |tuple| {
+            const t = tuple[0];
+            current_pos = tuple[1];
+
+            try tokens.append(t);
+        }
+        // nothing was matched. fail
+        // TODO: instead of failing add an error, ignore all chars
+        // until next whitespace, and continue lexing
+        // TODO: check if this is a good error recovery strategy
+        else {
            // no lexer matched
            std.debug.print("unmatched args: anytype:c\n", .{});
            break;
        }
    }
-
-    std.debug.print("array list len: {d}", .{tokens.items.len});
 }

 /// Ignores all whitespace on `input` since `start`
--- a/src/01_lexic/utils.zig
+++ b/src/01_lexic/utils.zig
@ -22,6 +22,10 @@ pub fn is_lowercase(c: u8) bool {
    return 'a' <= c and c <= 'z';
 }

+pub fn is_uppercase(c: u8) bool {
+    return 'A' <= c and c <= 'Z';
+}
+
 pub fn is_lowercase_underscore(c: u8) bool {
    return c == '_' or ('a' <= c and c <= 'z');
 }
Author	SHA1	Message	Date
Araozu	4e1b2c3cab	feat: integrate identifier/datatype lexer	2024-11-18 21:14:09 -05:00
Araozu	15a66ebc3d	feat: lex datatype	2024-11-18 21:08:37 -05:00