diff --git a/CHANGELOG.md b/CHANGELOG.md index 934677c..882fdb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,5 @@ Now in Zig! - [x] Parse minimal variable binding - [x] Parse minimal statement - [x] Parse minimal module -- [x] Generate error messages on number lexing -- [x] Generate error messages on string lexing - +- [x] Recuperate errors & generate error messages for the lexer diff --git a/src/01_lexic/comment.zig b/src/01_lexic/comment.zig index d0c3fcb..9432720 100644 --- a/src/01_lexic/comment.zig +++ b/src/01_lexic/comment.zig @@ -2,13 +2,19 @@ const std = @import("std"); const assert = std.debug.assert; const token = @import("./token.zig"); const utils = @import("./utils.zig"); +const errors = @import("errors"); const Token = token.Token; const TokenType = token.TokenType; const LexError = token.LexError; const LexReturn = token.LexReturn; -pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { +pub fn lex( + input: []const u8, + start: usize, + err: *errors.ErrorData, + alloc: std.mem.Allocator, +) LexError!?LexReturn { const cap = input.len; assert(start < cap); @@ -24,6 +30,10 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { while (current_pos < cap and input[current_pos] != '\n') { // check for CR, and throw error if (input[current_pos] == '\r') { + try err.init("Usage of CRLF", current_pos, current_pos + 1, alloc); + try err.add_label("There is a line feed (CR) here", current_pos, current_pos + 1); + err.set_help("All THP code must use LF newline delimiters."); + return LexError.CRLF; } current_pos += 1; @@ -37,7 +47,7 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { test "should lex comment until EOF" { const input = "// aea"; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); if (output) |tuple| { const t = tuple[0]; @@ -50,7 +60,7 @@ test "should lex comment until EOF" { test "should lex comment until newline (LF)" { const input = "// my comment\n// other comment"; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); if (output) |tuple| { const t = tuple[0]; @@ -63,13 +73,15 @@ test "should lex comment until newline (LF)" { test "shouldn lex incomplete comment" { const input = "/aa"; - const output = try lex(input, 0); + const output = try lex(input, 0, undefined, std.testing.allocator); try std.testing.expect(output == null); } test "should fail on CRLF" { const input = "// my comment\x0D\x0A// other comment"; - _ = lex(input, 0) catch |err| { + var errdata: errors.ErrorData = undefined; + _ = lex(input, 0, &errdata, std.testing.allocator) catch |err| { + defer errdata.deinit(); try std.testing.expectEqual(LexError.CRLF, err); return; }; diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index 06deea1..ac408fd 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -101,17 +101,29 @@ pub fn tokenize( current_pos = tuple[1]; try tokens.append(t); + continue; } + // attempt to lex a comment - else if (try comment.lex(input, actual_next_pos)) |tuple| { + const comment_lex = comment.lex(input, actual_next_pos, ¤t_error, alloc) catch |e| switch (e) { + LexError.CRLF => { + try err_arrl.append(current_error); + current_pos = ignore_until_whitespace(input, actual_next_pos); + continue; + }, + else => return e, + }; + if (comment_lex) |tuple| { assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; try tokens.append(t); + continue; } + // attempt to lex an operator - else if (try operator.lex(input, actual_next_pos)) |tuple| { + if (try operator.lex(input, actual_next_pos)) |tuple| { assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1];