From 9d89f2fad04d1f1f2f4eb090b3784463d1298f68 Mon Sep 17 00:00:00 2001 From: Fernando Araoz Date: Sun, 5 Jan 2025 12:43:28 -0500 Subject: [PATCH] feat: add error handling to string lexing --- src/01_lexic/root.zig | 34 ++++++++++++++++++--------- src/01_lexic/string.zig | 52 +++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 23 deletions(-) diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index 5f9d793..06deea1 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -41,8 +41,9 @@ pub fn tokenize( break; } - // attempt to lex a number var current_error: errors.ErrorData = undefined; + + // attempt to lex a number const number_lex = number.lex(input, input_len, actual_next_pos, ¤t_error, alloc) catch |e| switch (e) { // recoverable errors LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => { @@ -51,7 +52,6 @@ pub fn tokenize( // ignore everything until whitespace and loop current_pos = ignore_until_whitespace(input, actual_next_pos); - assert(current_pos > actual_next_pos); continue; }, // just throw unrecoverable errors @@ -66,24 +66,36 @@ pub fn tokenize( continue; } - // attempt to lex an identifier - else if (try identifier.lex(input, actual_next_pos)) |tuple| { + // attempt to lex an identifier. identifier parsing has no errors + if (try identifier.lex(input, actual_next_pos)) |tuple| { assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; try tokens.append(t); + continue; } + // attempt to lex a string - else if (try string.lex(input, actual_next_pos)) |tuple| { + const str_lex = string.lex(input, actual_next_pos, ¤t_error, alloc) catch |e| switch (e) { + LexError.IncompleteString => { + try err_arrl.append(current_error); + current_pos = ignore_until_whitespace(input, actual_next_pos); + continue; + }, + else => return e, + }; + if (str_lex) |tuple| { assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; try tokens.append(t); + continue; } + // attempt to lex a datatype - else if (try datatype.lex(input, actual_next_pos)) |tuple| { + if (try datatype.lex(input, actual_next_pos)) |tuple| { assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; @@ -190,17 +202,17 @@ test "should insert an item, fail, and not leak" { const input = "322 \"hello"; var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator); defer error_list.deinit(); + defer for (error_list.items) |*i| { + i.deinit(); + }; + const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) { - error.IncompleteString => { - return; - }, else => { try std.testing.expect(false); return; }, }; - try std.testing.expect(false); - arrl.deinit(); + defer arrl.deinit(); } test "shouldnt leak" { diff --git a/src/01_lexic/string.zig b/src/01_lexic/string.zig index 3d12f56..3dcb2ed 100644 --- a/src/01_lexic/string.zig +++ b/src/01_lexic/string.zig @@ -2,13 +2,19 @@ const std = @import("std"); const assert = std.debug.assert; const token = @import("./token.zig"); const utils = @import("./utils.zig"); +const errors = @import("errors"); const Token = token.Token; const TokenType = token.TokenType; const LexError = token.LexError; const LexReturn = token.LexReturn; -pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { +pub fn lex( + input: []const u8, + start: usize, + err: *errors.ErrorData, + alloc: std.mem.Allocator, +) LexError!?LexReturn { const cap = input.len; assert(start < cap); @@ -28,18 +34,28 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { current_pos + 1, }; } - // new line, return error + // new line, initialize and return error else if (next_char == '\n') { + try err.init("Incomplete String", current_pos, current_pos + 1, alloc); + try err.add_label("Found a new line here", current_pos, current_pos + 1); + err.set_help("Strings must always end on the same line that they start."); + return LexError.IncompleteString; } // lex escape characters else if (next_char == '\\') { // if next char is EOF, return error if (current_pos + 1 == cap) { + try err.init("Incomplete String", current_pos, current_pos + 1, alloc); + try err.add_label("Found EOF here", current_pos, current_pos + 1); + err.set_help("Strings must always end on the same line that they start."); return LexError.IncompleteString; } // if next char is newline, return error else if (input[current_pos + 1] == '\n') { + try err.init("Incomplete String", current_pos, current_pos + 1, alloc); + try err.add_label("Found a new line here", current_pos, current_pos + 1); + err.set_help("Strings must always end on the same line that they start."); return LexError.IncompleteString; } // here just consume whatever char is after @@ -52,12 +68,16 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { } // this could only reach when EOF is hit, return error + try err.init("Incomplete String", current_pos, current_pos + 1, alloc); + try err.add_label("Found EOF here", current_pos, current_pos + 1); + err.set_help("Strings must always end on the same line that they start."); + return LexError.IncompleteString; } test "should lex empty string" { const input = "\"\""; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); if (output) |tuple| { const t = tuple[0]; @@ -70,7 +90,7 @@ test "should lex empty string" { test "should lex string with 1 char" { const input = "\"a\""; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); if (output) |tuple| { const t = tuple[0]; @@ -83,7 +103,7 @@ test "should lex string with 1 char" { test "should lex string with unicode" { const input = "\"😭\""; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); if (output) |tuple| { const t = tuple[0]; @@ -96,14 +116,16 @@ test "should lex string with unicode" { test "shouldnt lex other things" { const input = "322"; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); try std.testing.expect(output == null); } test "should fail on EOF before closing string" { const input = "\"hello"; - _ = lex(input, 0) catch |err| { + var errdata: errors.ErrorData = undefined; + _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| { try std.testing.expectEqual(LexError.IncompleteString, err); + defer errdata.deinit(); return; }; @@ -112,8 +134,10 @@ test "should fail on EOF before closing string" { test "should fail on newline before closing string" { const input = "\"hello\n"; - _ = lex(input, 0) catch |err| { + var errdata: errors.ErrorData = undefined; + _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| { try std.testing.expectEqual(LexError.IncompleteString, err); + defer errdata.deinit(); return; }; @@ -122,7 +146,7 @@ test "should fail on newline before closing string" { test "should lex string with escape character 1" { const input = "\"test\\\"string\""; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); if (output) |tuple| { const t = tuple[0]; @@ -135,7 +159,7 @@ test "should lex string with escape character 1" { test "should lex string with escape character 2" { const input = "\"test\\\\string\""; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); if (output) |tuple| { const t = tuple[0]; @@ -148,7 +172,9 @@ test "should lex string with escape character 2" { test "should fail on EOF after backslash" { const input = "\"hello \\"; - _ = lex(input, 0) catch |err| { + var errdata: errors.ErrorData = undefined; + _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| { + defer errdata.deinit(); try std.testing.expectEqual(LexError.IncompleteString, err); return; }; @@ -158,7 +184,9 @@ test "should fail on EOF after backslash" { test "should fail on newline after backslash" { const input = "\"hello \\\n"; - _ = lex(input, 0) catch |err| { + var errdata: errors.ErrorData = undefined; + _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| { + defer errdata.deinit(); try std.testing.expectEqual(LexError.IncompleteString, err); return; };