chore: update files

feat: lex punctuation
2024-11-29 06:13:02 -05:00 · 2024-11-27 21:06:51 -05:00
6 changed files with 89 additions and 15 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -24,7 +24,13 @@ Now in Zig!
 ## v0.0.1
- [x] Lex integers & floating point numbers
+- [x] Lex numbers
 - [x] Lex identifier
 - [x] Lex datatypes
 - [x] Lex operators
 - [x] Lex single line comments
 - [x] Lex strings
 - [x] Lex grouping signs
 - [ ]
--- a/build.zig
+++ b/build.zig
@ -101,6 +101,7 @@ pub fn build(b: *std.Build) void {
        "src/01_lexic/token.zig",
        "src/01_lexic/utils.zig",
        "src/01_lexic/grouping.zig",
        "src/01_lexic/punctiation.zig",
    };
    for (files) |file| {
        const file_unit_test = b.addTest(.{
--- a/src/01_lexic/punctiation.zig
+++ b/src/01_lexic/punctiation.zig
@ -0,0 +1,60 @@
 const std = @import("std");
 const assert = std.debug.assert;
 const token = @import("./token.zig");
 const utils = @import("./utils.zig");
 const Token = token.Token;
 const TokenType = token.TokenType;
 const LexError = token.LexError;
 const LexReturn = token.LexReturn;
 pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
    // there should be at least 1 char
    assert(start < input.len);
    const c = input[start];
    const token_type = switch (c) {
        ',' => TokenType.Comma,
        '\n' => TokenType.Newline,
        else => {
            return null;
        },
    };
    return .{ Token.init(input[start .. start + 1], token_type, start), start + 1 };
 }
 test "shouldnt lex other things" {
    const input = "322";
    const output = try lex(input, 0);
    try std.testing.expect(output == null);
 }
 test "should lex comma" {
    const input = ",";
    const output = try lex(input, 0);
    if (output) |tuple| {
        const t = tuple[0];
        try std.testing.expectEqualDeep(",", t.value);
        try std.testing.expectEqual(TokenType.Comma, t.token_type);
        try std.testing.expectEqual(1, tuple[1]);
    } else {
        try std.testing.expect(false);
    }
 }
 test "should lex new line" {
    const input = "\n";
    const output = try lex(input, 0);
    if (output) |tuple| {
        const t = tuple[0];
        try std.testing.expectEqualDeep("\n", t.value);
        try std.testing.expectEqual(TokenType.Newline, t.token_type);
        try std.testing.expectEqual(1, tuple[1]);
    } else {
        try std.testing.expect(false);
    }
 }
--- a/src/01_lexic/root.zig
+++ b/src/01_lexic/root.zig
@ -8,16 +8,18 @@ const operator = @import("./operator.zig");
 const comment = @import("./comment.zig");
 const string = @import("./string.zig");
 const grouping = @import("./grouping.zig");
 const punctuation = @import("./punctiation.zig");
 const TokenType = token.TokenType;
 const Token = token.Token;
-pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
+// Creates an array list of tokens. The caller is responsible of
 // calling `deinit` to free the array list
 pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !std.ArrayList(Token) {
    const input_len = input.len;
    var current_pos: usize = 0;
    var tokens = std.ArrayList(Token).init(alloc);
    defer tokens.deinit();
    while (current_pos < input_len) {
        const actual_next_pos = ignore_whitespace(input, current_pos);
@ -79,6 +81,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
            try tokens.append(t);
        }
        // lex punctuation
        else if (try punctuation.lex(input, actual_next_pos)) |tuple| {
            assert(tuple[1] > current_pos);
            const t = tuple[0];
            current_pos = tuple[1];
            try tokens.append(t);
        }
        // nothing was matched. fail
        // TODO: instead of failing add an error, ignore all chars
        // until next whitespace, and continue lexing
@ -89,6 +99,8 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
            break;
        }
    }
    return tokens;
 }
 /// Ignores all whitespace on `input` since `start`
@ -108,10 +120,12 @@ pub fn ignore_whitespace(input: []const u8, start: usize) usize {
 test "should insert 1 item" {
    const input = "322";
-    try tokenize(input, std.testing.allocator);
+    const arrl = try tokenize(input, std.testing.allocator);
    arrl.deinit();
 }
 test "should insert 2 item" {
    const input = "322 644";
-    try tokenize(input, std.testing.allocator);
+    const arrl = try tokenize(input, std.testing.allocator);
    arrl.deinit();
 }
--- a/src/01_lexic/token.zig
+++ b/src/01_lexic/token.zig
@ -16,8 +16,7 @@ pub const TokenType = enum {
    // punctiation that carries special meaning
    Comma,
    Newline,
-    // Others
+    // Each keyword will have its own token
    Keyword,
 };
 pub const Token = struct {
--- a/src/main.zig
+++ b/src/main.zig
@ -27,14 +27,8 @@ fn repl() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const alloc = gpa.allocator();
-    try lexic.tokenize(line, alloc);
+    const tokens = try lexic.tokenize(line, alloc);
    defer tokens.deinit();
    try bw.flush();
 }
 test "simple test" {
    var list = std.ArrayList(i32).init(std.testing.allocator);
    defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
    try list.append(42);
    try std.testing.expectEqual(@as(i32, 42), list.pop());
 }
Author	SHA1	Message	Date
Araozu	1bd463998c	chore: update files	2024-11-29 06:13:02 -05:00
Araozu	b71cfe4370	feat: lex punctuation	2024-11-27 21:06:51 -05:00