feat: add error handling to string lexing

fix: test
2025-01-05 12:43:28 -05:00 · 2024-12-29 13:13:44 -05:00
3 changed files with 71 additions and 27 deletions
--- a/src/01_lexic/number.zig
+++ b/src/01_lexic/number.zig
@ -264,7 +264,7 @@ fn scientific(
 test "int lexer 1" {
    const input = "322   ";
-    const result = try integer(input, input.len, 0);
+    const result = try integer(input, input.len, 0, undefined, std.heap.page_allocator);
    if (result) |tuple| {
        const r = tuple[0];
@ -276,7 +276,7 @@ test "int lexer 1" {
 test "int lexer 2" {
    const input = "   644   ";
-    const result = try integer(input, input.len, 3);
+    const result = try integer(input, input.len, 3, undefined, std.heap.page_allocator);
    if (result) |tuple| {
        const r = tuple[0];
@ -288,7 +288,7 @@ test "int lexer 2" {
 test "int lexer 3" {
    const input = "4";
-    const result = try integer(input, input.len, 0);
+    const result = try integer(input, input.len, 0, undefined, std.heap.page_allocator);
    if (result) |tuple| {
        const r = tuple[0];
@ -300,7 +300,7 @@ test "int lexer 3" {
 test "should return null if not an integer" {
    const input = "prosor prosor";
-    const result = try integer(input, input.len, 0);
+    const result = try integer(input, input.len, 0, undefined, std.heap.page_allocator);
    try std.testing.expect(result == null);
 }
@ -323,6 +323,7 @@ test "should fail on integer with leading zero" {
    defer std.testing.allocator.destroy(errdata);
    const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
        try std.testing.expect(err == token.LexError.LeadingZero);
        defer errdata.deinit();
        return;
    };
@ -508,6 +509,7 @@ test "should fail on incomplete fp number" {
    defer std.testing.allocator.destroy(errdata);
    const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
        try std.testing.expect(err == token.LexError.IncompleteFloatingNumber);
        errdata.deinit();
        return;
    };
@ -539,6 +541,7 @@ test "should fail on incomplete scientific number" {
    defer std.testing.allocator.destroy(errdata);
    const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
        try std.testing.expect(err == token.LexError.IncompleteScientificNumber);
        defer errdata.deinit();
        return;
    };
@ -558,6 +561,7 @@ test "should fail on incomplete scientific number 2" {
    defer std.testing.allocator.destroy(errdata);
    const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
        try std.testing.expect(err == token.LexError.IncompleteScientificNumber);
        defer errdata.deinit();
        return;
    };
--- a/src/01_lexic/root.zig
+++ b/src/01_lexic/root.zig
@ -41,8 +41,9 @@ pub fn tokenize(
            break;
        }
        // attempt to lex a number
        var current_error: errors.ErrorData = undefined;
        // attempt to lex a number
        const number_lex = number.lex(input, input_len, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
            // recoverable errors
            LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => {
@ -51,7 +52,6 @@ pub fn tokenize(
                // ignore everything until whitespace and loop
                current_pos = ignore_until_whitespace(input, actual_next_pos);
                assert(current_pos > actual_next_pos);
                continue;
            },
            // just throw unrecoverable errors
@ -66,24 +66,36 @@ pub fn tokenize(
            continue;
        }
-        // attempt to lex an identifier
+        // attempt to lex an identifier. identifier parsing has no errors
-        else if (try identifier.lex(input, actual_next_pos)) |tuple| {
+        if (try identifier.lex(input, actual_next_pos)) |tuple| {
            assert(tuple[1] > current_pos);
            const t = tuple[0];
            current_pos = tuple[1];
            try tokens.append(t);
            continue;
        }
        // attempt to lex a string
-        else if (try string.lex(input, actual_next_pos)) |tuple| {
+        const str_lex = string.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
            LexError.IncompleteString => {
                try err_arrl.append(current_error);
                current_pos = ignore_until_whitespace(input, actual_next_pos);
                continue;
            },
            else => return e,
        };
        if (str_lex) |tuple| {
            assert(tuple[1] > current_pos);
            const t = tuple[0];
            current_pos = tuple[1];
            try tokens.append(t);
            continue;
        }
        // attempt to lex a datatype
-        else if (try datatype.lex(input, actual_next_pos)) |tuple| {
+        if (try datatype.lex(input, actual_next_pos)) |tuple| {
            assert(tuple[1] > current_pos);
            const t = tuple[0];
            current_pos = tuple[1];
@ -190,17 +202,17 @@ test "should insert an item, fail, and not leak" {
    const input = "322 \"hello";
    var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
    defer error_list.deinit();
    defer for (error_list.items) |*i| {
        i.deinit();
    };
    const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) {
        error.IncompleteString => {
            return;
        },
        else => {
            try std.testing.expect(false);
            return;
        },
    };
-    try std.testing.expect(false);
+    defer arrl.deinit();
    arrl.deinit();
 }
 test "shouldnt leak" {
--- a/src/01_lexic/string.zig
+++ b/src/01_lexic/string.zig
@ -2,13 +2,19 @@ const std = @import("std");
 const assert = std.debug.assert;
 const token = @import("./token.zig");
 const utils = @import("./utils.zig");
 const errors = @import("errors");
 const Token = token.Token;
 const TokenType = token.TokenType;
 const LexError = token.LexError;
 const LexReturn = token.LexReturn;
-pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
+pub fn lex(
    input: []const u8,
    start: usize,
    err: *errors.ErrorData,
    alloc: std.mem.Allocator,
 ) LexError!?LexReturn {
    const cap = input.len;
    assert(start < cap);
@ -28,18 +34,28 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
                current_pos + 1,
            };
        }
-        // new line, return error
+        // new line, initialize and return error
        else if (next_char == '\n') {
            try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
            try err.add_label("Found a new line here", current_pos, current_pos + 1);
            err.set_help("Strings must always end on the same line that they start.");
            return LexError.IncompleteString;
        }
        // lex escape characters
        else if (next_char == '\\') {
            // if next char is EOF, return error
            if (current_pos + 1 == cap) {
                try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
                try err.add_label("Found EOF here", current_pos, current_pos + 1);
                err.set_help("Strings must always end on the same line that they start.");
                return LexError.IncompleteString;
            }
            // if next char is newline, return error
            else if (input[current_pos + 1] == '\n') {
                try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
                try err.add_label("Found a new line here", current_pos, current_pos + 1);
                err.set_help("Strings must always end on the same line that they start.");
                return LexError.IncompleteString;
            }
            // here just consume whatever char is after
@ -52,12 +68,16 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
    }
    // this could only reach when EOF is hit, return error
    try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
    try err.add_label("Found EOF here", current_pos, current_pos + 1);
    err.set_help("Strings must always end on the same line that they start.");
    return LexError.IncompleteString;
 }
 test "should lex empty string" {
    const input = "\"\"";
-    const output = try lex(input, 0);
+    const output = try lex(input, 0, undefined, std.testing.allocator);
    if (output) |tuple| {
        const t = tuple[0];
@ -70,7 +90,7 @@ test "should lex empty string" {
 test "should lex string with 1 char" {
    const input = "\"a\"";
-    const output = try lex(input, 0);
+    const output = try lex(input, 0, undefined, std.testing.allocator);
    if (output) |tuple| {
        const t = tuple[0];
@ -83,7 +103,7 @@ test "should lex string with 1 char" {
 test "should lex string with unicode" {
    const input = "\"😭\"";
-    const output = try lex(input, 0);
+    const output = try lex(input, 0, undefined, std.testing.allocator);
    if (output) |tuple| {
        const t = tuple[0];
@ -96,14 +116,16 @@ test "should lex string with unicode" {
 test "shouldnt lex other things" {
    const input = "322";
-    const output = try lex(input, 0);
+    const output = try lex(input, 0, undefined, std.testing.allocator);
    try std.testing.expect(output == null);
 }
 test "should fail on EOF before closing string" {
    const input = "\"hello";
-    _ = lex(input, 0) catch |err| {
+    var errdata: errors.ErrorData = undefined;
    _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
        try std.testing.expectEqual(LexError.IncompleteString, err);
        defer errdata.deinit();
        return;
    };
@ -112,8 +134,10 @@ test "should fail on EOF before closing string" {
 test "should fail on newline before closing string" {
    const input = "\"hello\n";
-    _ = lex(input, 0) catch |err| {
+    var errdata: errors.ErrorData = undefined;
    _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
        try std.testing.expectEqual(LexError.IncompleteString, err);
        defer errdata.deinit();
        return;
    };
@ -122,7 +146,7 @@ test "should fail on newline before closing string" {
 test "should lex string with escape character 1" {
    const input = "\"test\\\"string\"";
-    const output = try lex(input, 0);
+    const output = try lex(input, 0, undefined, std.testing.allocator);
    if (output) |tuple| {
        const t = tuple[0];
@ -135,7 +159,7 @@ test "should lex string with escape character 1" {
 test "should lex string with escape character 2" {
    const input = "\"test\\\\string\"";
-    const output = try lex(input, 0);
+    const output = try lex(input, 0, undefined, std.testing.allocator);
    if (output) |tuple| {
        const t = tuple[0];
@ -148,7 +172,9 @@ test "should lex string with escape character 2" {
 test "should fail on EOF after backslash" {
    const input = "\"hello \\";
-    _ = lex(input, 0) catch |err| {
+    var errdata: errors.ErrorData = undefined;
    _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
        defer errdata.deinit();
        try std.testing.expectEqual(LexError.IncompleteString, err);
        return;
    };
@ -158,7 +184,9 @@ test "should fail on EOF after backslash" {
 test "should fail on newline after backslash" {
    const input = "\"hello \\\n";
-    _ = lex(input, 0) catch |err| {
+    var errdata: errors.ErrorData = undefined;
    _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
        defer errdata.deinit();
        try std.testing.expectEqual(LexError.IncompleteString, err);
        return;
    };
Author	SHA1	Message	Date
Fernando Araoz	9d89f2fad0	feat: add error handling to string lexing	2025-01-05 12:43:28 -05:00
Fernando Araoz	344ec18863	fix: test	2024-12-29 13:13:44 -05:00