feat: lex comments, forbid CR

2024-11-24 07:32:37 -05:00 · 2024-11-24 07:32:37 -05:00 · 3f95515964
commit 3f95515964
parent 00597752da
4 changed files with 89 additions and 0 deletions
--- a/build.zig
+++ b/build.zig
@ -96,6 +96,7 @@ pub fn build(b: *std.Build) void {
        "src/01_lexic/identifier.zig",
        "src/01_lexic/datatype.zig",
        "src/01_lexic/operator.zig",
        "src/01_lexic/comment.zig",
        "src/01_lexic/token.zig",
        "src/01_lexic/utils.zig",
    };
--- a/src/01_lexic/comment.zig
+++ b/src/01_lexic/comment.zig
@ -0,0 +1,77 @@
 const std = @import("std");
 const assert = std.debug.assert;
 const token = @import("./token.zig");
 const utils = @import("./utils.zig");
 const Token = token.Token;
 const TokenType = token.TokenType;
 const LexError = token.LexError;
 const LexReturn = token.LexReturn;
 pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
    const cap = input.len;
    assert(start < cap);
    // there should be at least 2 characters
    if (start + 1 >= cap) {
        return null;
    }
    if (input[start] == '/' and input[start + 1] == '/') {
        var current_pos = start + 2;
        // consume all bytes until newline (LF)
        while (current_pos < cap and input[current_pos] != '\n') {
            // check for CR, and throw error
            if (input[current_pos] == '\r') {
                return LexError.CRLF;
            }
            current_pos += 1;
        }
        return .{ Token.init(input[start..current_pos], TokenType.Comment, start), current_pos };
    } else {
        return null;
    }
 }
 test "should lex comment until EOF" {
    const input = "// aea";
    const output = try lex(input, 0);
    if (output) |tuple| {
        const t = tuple[0];
        try std.testing.expectEqualDeep("// aea", t.value);
        try std.testing.expectEqual(6, tuple[1]);
    } else {
        try std.testing.expect(false);
    }
 }
 test "should lex comment until newline (LF)" {
    const input = "// my comment\n// other comment";
    const output = try lex(input, 0);
    if (output) |tuple| {
        const t = tuple[0];
        try std.testing.expectEqualDeep("// my comment", t.value);
        try std.testing.expectEqual(13, tuple[1]);
    } else {
        try std.testing.expect(false);
    }
 }
 test "shouldn lex incomplete comment" {
    const input = "/aa";
    const output = try lex(input, 0);
    try std.testing.expect(output == null);
 }
 test "should fail on CRLF" {
    const input = "// my comment\x0D\x0A// other comment";
    _ = lex(input, 0) catch |err| {
        try std.testing.expectEqual(LexError.CRLF, err);
        return;
    };
    try std.testing.expect(false);
 }
--- a/src/01_lexic/root.zig
+++ b/src/01_lexic/root.zig
@ -5,6 +5,7 @@ const identifier = @import("./identifier.zig");
 const datatype = @import("./datatype.zig");
 const token = @import("./token.zig");
 const operator = @import("./operator.zig");
 const comment = @import("./comment.zig");
 const TokenType = token.TokenType;
 const Token = token.Token;
@ -44,6 +45,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
            try tokens.append(t);
        }
        // attempt to lex a comment
        else if (try comment.lex(input, actual_next_pos)) |tuple| {
            assert(tuple[1] > current_pos);
            const t = tuple[0];
            current_pos = tuple[1];
            try tokens.append(t);
        }
        // attempt to lex an operator
        else if (try operator.lex(input, actual_next_pos)) |tuple| {
            assert(tuple[1] > current_pos);
--- a/src/01_lexic/token.zig
+++ b/src/01_lexic/token.zig
@ -3,6 +3,7 @@ pub const TokenType = enum {
    Float,
    Identifier,
    Operator,
    Comment,
 };
 pub const Token = struct {
@ -24,6 +25,7 @@ pub const LexError = error{
    Incomplete,
    IncompleteFloatingNumber,
    IncompleteScientificNumber,
    CRLF,
 };
 /// Contains the lexed token and the next position