refactor: change way of storing errors in lexer

This commit is contained in:
Fernando Araoz 2024-12-24 07:11:10 -05:00
parent 92cefe7b0b
commit 1495e4ccad
6 changed files with 54 additions and 34 deletions

View File

@ -24,7 +24,7 @@ const LexError = token.LexError;
pub fn tokenize( pub fn tokenize(
input: []const u8, input: []const u8,
alloc: std.mem.Allocator, alloc: std.mem.Allocator,
err_arrl: *std.ArrayList(*errors.ErrorData), err_arrl: *std.ArrayList(errors.ErrorData),
) !std.ArrayList(Token) { ) !std.ArrayList(Token) {
const input_len = input.len; const input_len = input.len;
var current_pos: usize = 0; var current_pos: usize = 0;
@ -32,9 +32,6 @@ pub fn tokenize(
var tokens = std.ArrayList(Token).init(alloc); var tokens = std.ArrayList(Token).init(alloc);
errdefer tokens.deinit(); errdefer tokens.deinit();
var current_error = try alloc.create(errors.ErrorData);
defer alloc.destroy(current_error);
while (current_pos < input_len) { while (current_pos < input_len) {
const actual_next_pos = ignore_whitespace(input, current_pos); const actual_next_pos = ignore_whitespace(input, current_pos);
assert(current_pos <= actual_next_pos); assert(current_pos <= actual_next_pos);
@ -45,13 +42,12 @@ pub fn tokenize(
} }
// attempt to lex a number // attempt to lex a number
const number_lex = number.lex(input, input_len, actual_next_pos, current_error) catch |e| switch (e) { var current_error: errors.ErrorData = undefined;
const number_lex = number.lex(input, input_len, actual_next_pos, &current_error) catch |e| switch (e) {
// recoverable errors // recoverable errors
LexError.Incomplete => { LexError.Incomplete => {
// add to list of errors // add to list of errors
try err_arrl.append(current_error); try err_arrl.append(current_error);
// refresh the previous error pointer
current_error = try alloc.create(errors.ErrorData);
// ignore everything until whitespace and loop // ignore everything until whitespace and loop
current_pos = ignore_until_whitespace(input, actual_next_pos); current_pos = ignore_until_whitespace(input, actual_next_pos);
@ -127,13 +123,11 @@ pub fn tokenize(
} }
// nothing was matched. fail // nothing was matched. fail
// TODO: instead of failing add an error, ignore all chars
// until next whitespace, and continue lexing
// TODO: check if this is a good error recovery strategy
else { else {
// Create an error "nothing matched" and continue lexing // Create an error "nothing matched" and continue lexing
// after the whitespace // after the whitespace
current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1); current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1);
try err_arrl.append(current_error);
current_pos = ignore_until_whitespace(input, actual_next_pos); current_pos = ignore_until_whitespace(input, actual_next_pos);
continue; continue;
} }
@ -177,7 +171,7 @@ test {
test "should insert 1 item" { test "should insert 1 item" {
const input = "322"; const input = "322";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const arrl = try tokenize(input, std.testing.allocator, &error_list); const arrl = try tokenize(input, std.testing.allocator, &error_list);
arrl.deinit(); arrl.deinit();
@ -185,7 +179,7 @@ test "should insert 1 item" {
test "should insert 2 item" { test "should insert 2 item" {
const input = "322 644"; const input = "322 644";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const arrl = try tokenize(input, std.testing.allocator, &error_list); const arrl = try tokenize(input, std.testing.allocator, &error_list);
arrl.deinit(); arrl.deinit();
@ -193,7 +187,7 @@ test "should insert 2 item" {
test "should insert an item, fail, and not leak" { test "should insert an item, fail, and not leak" {
const input = "322 \"hello"; const input = "322 \"hello";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) { const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) {
error.IncompleteString => { error.IncompleteString => {
@ -207,3 +201,26 @@ test "should insert an item, fail, and not leak" {
try std.testing.expect(false); try std.testing.expect(false);
arrl.deinit(); arrl.deinit();
} }
test "shouldnt leak" {
const input = "";
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit();
const arrl = try tokenize(input, std.testing.allocator, &error_list);
arrl.deinit();
}
test "should handle recoverable errors" {
const input = "322 0b 644";
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit();
const arrl = try tokenize(input, std.testing.allocator, &error_list);
defer arrl.deinit();
try std.testing.expectEqual(@as(usize, 1), error_list.items.len);
try std.testing.expectEqual(@as(usize, 2), arrl.items.len);
try std.testing.expectEqualStrings("Invalid prefix passed to `prefixed` function.", error_list.items[0].reason);
try std.testing.expectEqual(@as(usize, 4), error_list.items[0].start_position);
try std.testing.expectEqual(@as(usize, 6), error_list.items[0].end_position);
}

View File

@ -27,7 +27,7 @@ pub const Expression = union(enum) {
test "should parse expression" { test "should parse expression" {
const input = "322"; const input = "322";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -40,7 +40,7 @@ test "should parse expression" {
test "should fail on non expression" { test "should fail on non expression" {
const input = "identifier"; const input = "identifier";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();

View File

@ -84,7 +84,7 @@ test {
test "should parse a single statement" { test "should parse a single statement" {
const input = "var my_variable = 322"; const input = "var my_variable = 322";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -100,7 +100,7 @@ test "should parse a single statement" {
test "should clean memory if a statement parsing fails after one item has been inserted" { test "should clean memory if a statement parsing fails after one item has been inserted" {
const input = "var my_variable = 322 unrelated()"; const input = "var my_variable = 322 unrelated()";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();

View File

@ -59,7 +59,7 @@ pub const Statement = struct {
test "should parse a variable declaration statement" { test "should parse a variable declaration statement" {
const input = "var my_variable = 322"; const input = "var my_variable = 322";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -78,7 +78,7 @@ test "should parse a variable declaration statement" {
test "should fail on other constructs" { test "should fail on other constructs" {
const input = "a_function_call(322)"; const input = "a_function_call(322)";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();

View File

@ -71,7 +71,7 @@ pub const VariableBinding = struct {
test "should parse a minimal var" { test "should parse a minimal var" {
const input = "var my_variable = 322"; const input = "var my_variable = 322";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -93,7 +93,7 @@ test "should parse a minimal var" {
test "should fail is it doesnt start with var" { test "should fail is it doesnt start with var" {
const input = "different_token_stream()"; const input = "different_token_stream()";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -109,7 +109,7 @@ test "should fail is it doesnt start with var" {
test "should fail if the identifier is missing" { test "should fail if the identifier is missing" {
const input = "var "; const input = "var ";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -125,7 +125,7 @@ test "should fail if the identifier is missing" {
test "should fail if there is not an identifier after var" { test "should fail if there is not an identifier after var" {
const input = "var 322"; const input = "var 322";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -141,7 +141,7 @@ test "should fail if there is not an identifier after var" {
test "should fail if the equal sign is missing" { test "should fail if the equal sign is missing" {
const input = "var my_id "; const input = "var my_id ";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -157,7 +157,7 @@ test "should fail if the equal sign is missing" {
test "should fail if the equal sign is not found" { test "should fail if the equal sign is not found" {
const input = "var my_id is string"; const input = "var my_id is string";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();
@ -173,7 +173,7 @@ test "should fail if the equal sign is not found" {
test "should fail if the expression parsing fails" { test "should fail if the expression parsing fails" {
const input = "var my_id = ehhh"; const input = "var my_id = ehhh";
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
defer tokens.deinit(); defer tokens.deinit();

View File

@ -51,19 +51,16 @@ fn repl() !void {
// //
// Tokenize // Tokenize
// //
const tokens = lexic.tokenize(line, alloc) catch |e| switch (e) { var error_array = std.ArrayList(errors.ErrorData).init(alloc);
defer error_array.deinit();
const tokens = lexic.tokenize(line, alloc, &error_array) catch |e| switch (e) {
error.OutOfMemory => { error.OutOfMemory => {
try stdout.print("FATAL ERROR: System Out of Memory!", .{}); try stdout.print("FATAL ERROR: System Out of Memory!", .{});
try bw.flush(); try bw.flush();
return e; return e;
}, },
else => { else => return e,
// TODO: implement error handling in the lexer,
// and print those errors here
try stdout.print("Unknown error while lexing :c\n", .{});
try bw.flush();
continue;
},
}; };
defer tokens.deinit(); defer tokens.deinit();
@ -78,6 +75,12 @@ fn repl() !void {
} }
} }
// Print errors
for (error_array.items) |err| {
try stdout.print("Lex error: {s} at pos {d}\n", .{ err.reason, err.start_position });
try bw.flush();
}
// next repl line // next repl line
} }