refactor: change way of storing errors in lexer
This commit is contained in:
parent
92cefe7b0b
commit
1495e4ccad
@ -24,7 +24,7 @@ const LexError = token.LexError;
|
|||||||
pub fn tokenize(
|
pub fn tokenize(
|
||||||
input: []const u8,
|
input: []const u8,
|
||||||
alloc: std.mem.Allocator,
|
alloc: std.mem.Allocator,
|
||||||
err_arrl: *std.ArrayList(*errors.ErrorData),
|
err_arrl: *std.ArrayList(errors.ErrorData),
|
||||||
) !std.ArrayList(Token) {
|
) !std.ArrayList(Token) {
|
||||||
const input_len = input.len;
|
const input_len = input.len;
|
||||||
var current_pos: usize = 0;
|
var current_pos: usize = 0;
|
||||||
@ -32,9 +32,6 @@ pub fn tokenize(
|
|||||||
var tokens = std.ArrayList(Token).init(alloc);
|
var tokens = std.ArrayList(Token).init(alloc);
|
||||||
errdefer tokens.deinit();
|
errdefer tokens.deinit();
|
||||||
|
|
||||||
var current_error = try alloc.create(errors.ErrorData);
|
|
||||||
defer alloc.destroy(current_error);
|
|
||||||
|
|
||||||
while (current_pos < input_len) {
|
while (current_pos < input_len) {
|
||||||
const actual_next_pos = ignore_whitespace(input, current_pos);
|
const actual_next_pos = ignore_whitespace(input, current_pos);
|
||||||
assert(current_pos <= actual_next_pos);
|
assert(current_pos <= actual_next_pos);
|
||||||
@ -45,13 +42,12 @@ pub fn tokenize(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// attempt to lex a number
|
// attempt to lex a number
|
||||||
const number_lex = number.lex(input, input_len, actual_next_pos, current_error) catch |e| switch (e) {
|
var current_error: errors.ErrorData = undefined;
|
||||||
|
const number_lex = number.lex(input, input_len, actual_next_pos, ¤t_error) catch |e| switch (e) {
|
||||||
// recoverable errors
|
// recoverable errors
|
||||||
LexError.Incomplete => {
|
LexError.Incomplete => {
|
||||||
// add to list of errors
|
// add to list of errors
|
||||||
try err_arrl.append(current_error);
|
try err_arrl.append(current_error);
|
||||||
// refresh the previous error pointer
|
|
||||||
current_error = try alloc.create(errors.ErrorData);
|
|
||||||
|
|
||||||
// ignore everything until whitespace and loop
|
// ignore everything until whitespace and loop
|
||||||
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
||||||
@ -127,13 +123,11 @@ pub fn tokenize(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// nothing was matched. fail
|
// nothing was matched. fail
|
||||||
// TODO: instead of failing add an error, ignore all chars
|
|
||||||
// until next whitespace, and continue lexing
|
|
||||||
// TODO: check if this is a good error recovery strategy
|
|
||||||
else {
|
else {
|
||||||
// Create an error "nothing matched" and continue lexing
|
// Create an error "nothing matched" and continue lexing
|
||||||
// after the whitespace
|
// after the whitespace
|
||||||
current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1);
|
current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1);
|
||||||
|
try err_arrl.append(current_error);
|
||||||
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -177,7 +171,7 @@ test {
|
|||||||
|
|
||||||
test "should insert 1 item" {
|
test "should insert 1 item" {
|
||||||
const input = "322";
|
const input = "322";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const arrl = try tokenize(input, std.testing.allocator, &error_list);
|
const arrl = try tokenize(input, std.testing.allocator, &error_list);
|
||||||
arrl.deinit();
|
arrl.deinit();
|
||||||
@ -185,7 +179,7 @@ test "should insert 1 item" {
|
|||||||
|
|
||||||
test "should insert 2 item" {
|
test "should insert 2 item" {
|
||||||
const input = "322 644";
|
const input = "322 644";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const arrl = try tokenize(input, std.testing.allocator, &error_list);
|
const arrl = try tokenize(input, std.testing.allocator, &error_list);
|
||||||
arrl.deinit();
|
arrl.deinit();
|
||||||
@ -193,7 +187,7 @@ test "should insert 2 item" {
|
|||||||
|
|
||||||
test "should insert an item, fail, and not leak" {
|
test "should insert an item, fail, and not leak" {
|
||||||
const input = "322 \"hello";
|
const input = "322 \"hello";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) {
|
const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) {
|
||||||
error.IncompleteString => {
|
error.IncompleteString => {
|
||||||
@ -207,3 +201,26 @@ test "should insert an item, fail, and not leak" {
|
|||||||
try std.testing.expect(false);
|
try std.testing.expect(false);
|
||||||
arrl.deinit();
|
arrl.deinit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "shouldnt leak" {
|
||||||
|
const input = "";
|
||||||
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
|
defer error_list.deinit();
|
||||||
|
const arrl = try tokenize(input, std.testing.allocator, &error_list);
|
||||||
|
arrl.deinit();
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should handle recoverable errors" {
|
||||||
|
const input = "322 0b 644";
|
||||||
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
|
defer error_list.deinit();
|
||||||
|
const arrl = try tokenize(input, std.testing.allocator, &error_list);
|
||||||
|
defer arrl.deinit();
|
||||||
|
|
||||||
|
try std.testing.expectEqual(@as(usize, 1), error_list.items.len);
|
||||||
|
try std.testing.expectEqual(@as(usize, 2), arrl.items.len);
|
||||||
|
|
||||||
|
try std.testing.expectEqualStrings("Invalid prefix passed to `prefixed` function.", error_list.items[0].reason);
|
||||||
|
try std.testing.expectEqual(@as(usize, 4), error_list.items[0].start_position);
|
||||||
|
try std.testing.expectEqual(@as(usize, 6), error_list.items[0].end_position);
|
||||||
|
}
|
||||||
|
@ -27,7 +27,7 @@ pub const Expression = union(enum) {
|
|||||||
|
|
||||||
test "should parse expression" {
|
test "should parse expression" {
|
||||||
const input = "322";
|
const input = "322";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -40,7 +40,7 @@ test "should parse expression" {
|
|||||||
|
|
||||||
test "should fail on non expression" {
|
test "should fail on non expression" {
|
||||||
const input = "identifier";
|
const input = "identifier";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
|
@ -84,7 +84,7 @@ test {
|
|||||||
|
|
||||||
test "should parse a single statement" {
|
test "should parse a single statement" {
|
||||||
const input = "var my_variable = 322";
|
const input = "var my_variable = 322";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -100,7 +100,7 @@ test "should parse a single statement" {
|
|||||||
|
|
||||||
test "should clean memory if a statement parsing fails after one item has been inserted" {
|
test "should clean memory if a statement parsing fails after one item has been inserted" {
|
||||||
const input = "var my_variable = 322 unrelated()";
|
const input = "var my_variable = 322 unrelated()";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
|
@ -59,7 +59,7 @@ pub const Statement = struct {
|
|||||||
|
|
||||||
test "should parse a variable declaration statement" {
|
test "should parse a variable declaration statement" {
|
||||||
const input = "var my_variable = 322";
|
const input = "var my_variable = 322";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -78,7 +78,7 @@ test "should parse a variable declaration statement" {
|
|||||||
|
|
||||||
test "should fail on other constructs" {
|
test "should fail on other constructs" {
|
||||||
const input = "a_function_call(322)";
|
const input = "a_function_call(322)";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
|
@ -71,7 +71,7 @@ pub const VariableBinding = struct {
|
|||||||
|
|
||||||
test "should parse a minimal var" {
|
test "should parse a minimal var" {
|
||||||
const input = "var my_variable = 322";
|
const input = "var my_variable = 322";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -93,7 +93,7 @@ test "should parse a minimal var" {
|
|||||||
|
|
||||||
test "should fail is it doesnt start with var" {
|
test "should fail is it doesnt start with var" {
|
||||||
const input = "different_token_stream()";
|
const input = "different_token_stream()";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -109,7 +109,7 @@ test "should fail is it doesnt start with var" {
|
|||||||
|
|
||||||
test "should fail if the identifier is missing" {
|
test "should fail if the identifier is missing" {
|
||||||
const input = "var ";
|
const input = "var ";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -125,7 +125,7 @@ test "should fail if the identifier is missing" {
|
|||||||
|
|
||||||
test "should fail if there is not an identifier after var" {
|
test "should fail if there is not an identifier after var" {
|
||||||
const input = "var 322";
|
const input = "var 322";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -141,7 +141,7 @@ test "should fail if there is not an identifier after var" {
|
|||||||
|
|
||||||
test "should fail if the equal sign is missing" {
|
test "should fail if the equal sign is missing" {
|
||||||
const input = "var my_id ";
|
const input = "var my_id ";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -157,7 +157,7 @@ test "should fail if the equal sign is missing" {
|
|||||||
|
|
||||||
test "should fail if the equal sign is not found" {
|
test "should fail if the equal sign is not found" {
|
||||||
const input = "var my_id is string";
|
const input = "var my_id is string";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
@ -173,7 +173,7 @@ test "should fail if the equal sign is not found" {
|
|||||||
|
|
||||||
test "should fail if the expression parsing fails" {
|
test "should fail if the expression parsing fails" {
|
||||||
const input = "var my_id = ehhh";
|
const input = "var my_id = ehhh";
|
||||||
var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator);
|
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
|
||||||
defer error_list.deinit();
|
defer error_list.deinit();
|
||||||
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list);
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
|
19
src/main.zig
19
src/main.zig
@ -51,19 +51,16 @@ fn repl() !void {
|
|||||||
//
|
//
|
||||||
// Tokenize
|
// Tokenize
|
||||||
//
|
//
|
||||||
const tokens = lexic.tokenize(line, alloc) catch |e| switch (e) {
|
var error_array = std.ArrayList(errors.ErrorData).init(alloc);
|
||||||
|
defer error_array.deinit();
|
||||||
|
|
||||||
|
const tokens = lexic.tokenize(line, alloc, &error_array) catch |e| switch (e) {
|
||||||
error.OutOfMemory => {
|
error.OutOfMemory => {
|
||||||
try stdout.print("FATAL ERROR: System Out of Memory!", .{});
|
try stdout.print("FATAL ERROR: System Out of Memory!", .{});
|
||||||
try bw.flush();
|
try bw.flush();
|
||||||
return e;
|
return e;
|
||||||
},
|
},
|
||||||
else => {
|
else => return e,
|
||||||
// TODO: implement error handling in the lexer,
|
|
||||||
// and print those errors here
|
|
||||||
try stdout.print("Unknown error while lexing :c\n", .{});
|
|
||||||
try bw.flush();
|
|
||||||
continue;
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
defer tokens.deinit();
|
defer tokens.deinit();
|
||||||
|
|
||||||
@ -78,6 +75,12 @@ fn repl() !void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print errors
|
||||||
|
for (error_array.items) |err| {
|
||||||
|
try stdout.print("Lex error: {s} at pos {d}\n", .{ err.reason, err.start_position });
|
||||||
|
try bw.flush();
|
||||||
|
}
|
||||||
|
|
||||||
// next repl line
|
// next repl line
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user