feat: lex groupign signs

2024-11-27 20:41:42 -05:00 · 2024-11-27 20:41:42 -05:00 · 882d123fe0
commit 882d123fe0
parent 71d617928a
6 changed files with 154 additions and 4 deletions
--- a/build.zig
+++ b/build.zig
@ -100,6 +100,7 @@ pub fn build(b: *std.Build) void {
        "src/01_lexic/string.zig",
        "src/01_lexic/token.zig",
        "src/01_lexic/utils.zig",
+        "src/01_lexic/grouping.zig",
    };
    for (files) |file| {
        const file_unit_test = b.addTest(.{
--- a/src/01_lexic/datatype.zig
+++ b/src/01_lexic/datatype.zig
@ -28,7 +28,7 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
    }

    return .{
-        Token.init(input[start..final_pos], TokenType.Identifier, start),
+        Token.init(input[start..final_pos], TokenType.Datatype, start),
        final_pos,
    };
 }
--- a/src/01_lexic/grouping.zig
+++ b/src/01_lexic/grouping.zig
@ -0,0 +1,121 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const token = @import("./token.zig");
+const utils = @import("./utils.zig");
+
+const Token = token.Token;
+const TokenType = token.TokenType;
+const LexError = token.LexError;
+const LexReturn = token.LexReturn;
+
+// lex grouping signs
+pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
+    // there should be at least 1 char
+    assert(start < input.len);
+
+    const c = input[start];
+    const token_type = switch (c) {
+        '(' => TokenType.LeftParen,
+        ')' => TokenType.RightParen,
+        '[' => TokenType.LeftBracket,
+        ']' => TokenType.RightBracket,
+        '{' => TokenType.LeftBrace,
+        '}' => TokenType.RightBrace,
+        else => {
+            return null;
+        },
+    };
+
+    return .{ Token.init(input[start .. start + 1], token_type, start), start + 1 };
+}
+
+test "shouldnt lex other things" {
+    const input = "322";
+    const output = try lex(input, 0);
+
+    try std.testing.expect(output == null);
+}
+
+test "should lex opening paren" {
+    const input = "( hello )";
+    const output = try lex(input, 0);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("(", t.value);
+        try std.testing.expectEqual(TokenType.LeftParen, t.token_type);
+        try std.testing.expectEqual(1, tuple[1]);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
+test "should lex closing paren" {
+    const input = "( hello )";
+    const output = try lex(input, 8);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep(")", t.value);
+        try std.testing.expectEqual(TokenType.RightParen, t.token_type);
+        try std.testing.expectEqual(9, tuple[1]);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
+test "should lex opening bracket" {
+    const input = "[ hello ]";
+    const output = try lex(input, 0);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("[", t.value);
+        try std.testing.expectEqual(TokenType.LeftBracket, t.token_type);
+        try std.testing.expectEqual(1, tuple[1]);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
+test "should lex closing bracket" {
+    const input = "[ hello ]";
+    const output = try lex(input, 8);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("]", t.value);
+        try std.testing.expectEqual(TokenType.RightBracket, t.token_type);
+        try std.testing.expectEqual(9, tuple[1]);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
+test "should lex opening brace" {
+    const input = "{ hello }";
+    const output = try lex(input, 0);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("{", t.value);
+        try std.testing.expectEqual(TokenType.LeftBrace, t.token_type);
+        try std.testing.expectEqual(1, tuple[1]);
+    } else {
+        try std.testing.expect(false);
+    }
+}
+
+test "should lex closing brace" {
+    const input = "{ hello }";
+    const output = try lex(input, 8);
+
+    if (output) |tuple| {
+        const t = tuple[0];
+        try std.testing.expectEqualDeep("}", t.value);
+        try std.testing.expectEqual(TokenType.RightBrace, t.token_type);
+        try std.testing.expectEqual(9, tuple[1]);
+    } else {
+        try std.testing.expect(false);
+    }
+}
--- a/src/01_lexic/root.zig
+++ b/src/01_lexic/root.zig
@ -6,6 +6,8 @@ const datatype = @import("./datatype.zig");
 const token = @import("./token.zig");
 const operator = @import("./operator.zig");
 const comment = @import("./comment.zig");
+const string = @import("./string.zig");
+const grouping = @import("./grouping.zig");

 const TokenType = token.TokenType;
 const Token = token.Token;
@ -37,6 +39,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {

            try tokens.append(t);
        }
+        // attempt to lex a string
+        else if (try string.lex(input, actual_next_pos)) |tuple| {
+            assert(tuple[1] > current_pos);
+            const t = tuple[0];
+            current_pos = tuple[1];
+
+            try tokens.append(t);
+        }
        // attempt to lex a datatype
        else if (try datatype.lex(input, actual_next_pos)) |tuple| {
            assert(tuple[1] > current_pos);
@ -61,6 +71,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {

            try tokens.append(t);
        }
+        // attempt to lex grouping signs
+        else if (try grouping.lex(input, actual_next_pos)) |tuple| {
+            assert(tuple[1] > current_pos);
+            const t = tuple[0];
+            current_pos = tuple[1];
+
+            try tokens.append(t);
+        }
        // nothing was matched. fail
        // TODO: instead of failing add an error, ignore all chars
        // until next whitespace, and continue lexing
--- a/src/01_lexic/string.zig
+++ b/src/01_lexic/string.zig
@ -18,10 +18,7 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
    }

    // lex everything but quote and newline
-    // TODO: escape characters
-
    var current_pos = start + 1;
-
    while (current_pos < cap) {
        const next_char = input[current_pos];
        // string is finished, return it
--- a/src/01_lexic/token.zig
+++ b/src/01_lexic/token.zig
@ -2,9 +2,22 @@ pub const TokenType = enum {
    Int,
    Float,
    Identifier,
+    Datatype,
    Operator,
    Comment,
    String,
+    // grouping signs
+    LeftParen,
+    RightParen,
+    LeftBracket,
+    RightBracket,
+    LeftBrace,
+    RightBrace,
+    // punctiation that carries special meaning
+    Comma,
+    Newline,
+    // Others
+    Keyword,
 };

 pub const Token = struct {