refactor: use context in lexer

This commit is contained in:
Fernando Araoz 2025-01-30 20:01:22 -05:00
parent 988bcbc243
commit 8c7640be4d
6 changed files with 100 additions and 33 deletions

View File

@ -3,6 +3,7 @@ const assert = std.debug.assert;
const token = @import("./token.zig");
const utils = @import("./utils.zig");
const errors = @import("errors");
const context = @import("context");
const Token = token.Token;
const TokenType = token.TokenType;
@ -12,8 +13,7 @@ const LexReturn = token.LexReturn;
pub fn lex(
input: []const u8,
start: usize,
err: *errors.ErrorData,
alloc: std.mem.Allocator,
ctx: *context.CompilerContext,
) LexError!?LexReturn {
const cap = input.len;
assert(start < cap);
@ -30,8 +30,9 @@ pub fn lex(
while (current_pos < cap and input[current_pos] != '\n') {
// check for CR, and throw error
if (input[current_pos] == '\r') {
try err.init("Usage of CRLF", current_pos, current_pos + 1, alloc);
try err.add_label("There is a line feed (CR) here", current_pos, current_pos + 1);
var err = try ctx.create_and_append_error("Usage of CRLF", current_pos, current_pos + 1);
var label = ctx.create_error_label("There is a line feed (CR) here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("All THP code must use LF newline delimiters.");
return LexError.CRLF;

View File

@ -67,7 +67,7 @@ fn prefixed(
// populate error information
var new_error = try ctx.create_and_append_error("Incomplete number", start, end_position);
var new_label = ctx.create_error_label("Expected a valid digit after the '" ++ [_]u8{prefix} ++ "'", start, end_position);
new_error.add_label(&new_label);
try new_error.add_label(&new_label);
switch (prefix) {
'x' => new_error.set_help("Hex numbers should have at least one 0-9a-fA-F after the x"),
@ -128,7 +128,8 @@ fn integer(
// - a single zero. valid
if (first_char == '0' and last_pos > start + 1) {
var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
try err.add_label("This decimal number has a leading zero.", start, last_pos);
var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
try err.add_label(&label);
err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");
return LexError.LeadingZero;
@ -156,7 +157,8 @@ fn integer(
// leading zero on an integer, throw an error
if (first_char == '0') {
var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
try err.add_label("This decimal number has a leading zero.", start, last_pos);
var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
try err.add_label(&label);
err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");
return LexError.LeadingZero;
@ -187,7 +189,8 @@ fn floating_point(
if (current_pos >= cap or !utils.is_decimal_digit(input[current_pos])) {
// This is an error
var err = try ctx.create_and_append_error("Incomplete floating point number", token_start, current_pos);
try err.add_label("This number is incomplete", token_start, current_pos);
var label = ctx.create_error_label("This number is incomplete", token_start, current_pos);
try err.add_label(&label);
err.set_help("Add a number after the period");
return LexError.IncompleteFloatingNumber;
@ -225,7 +228,8 @@ fn scientific(
// expect `+` or `-`
if (current_pos >= cap) {
var err = try ctx.create_and_append_error("Incomplete scientific point number", token_start, current_pos);
try err.add_label("Expected a '+' or '-' after the exponent", token_start, current_pos);
var label = ctx.create_error_label("Expected a '+' or '-' after the exponent", token_start, current_pos);
try err.add_label(&label);
err.set_help("Add a sign and a digit to complete the scientific number");
return LexError.IncompleteScientificNumber;
@ -233,7 +237,8 @@ fn scientific(
const sign_char = input[current_pos];
if (sign_char != '+' and sign_char != '-') {
var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos, current_pos + 1);
try err.add_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1);
var label = ctx.create_error_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Add a sign and a digit after the first 'e' to complete the scientific number");
return LexError.IncompleteScientificNumber;
@ -249,7 +254,8 @@ fn scientific(
// if there is no difference, no extra digits were lexed.
if (digits_start == current_pos) {
var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos - 1, current_pos);
try err.add_label("Expected at least one digit after this sign", current_pos - 1, current_pos);
var label = ctx.create_error_label("Expected at least one digit after this sign", current_pos - 1, current_pos);
try err.add_label(&label);
err.set_help("Add a digit after the sign to complit the scientific number");
return LexError.IncompleteScientificNumber;

View File

@ -11,6 +11,7 @@ const grouping = @import("grouping.zig");
const punctuation = @import("punctiation.zig");
const errors = @import("errors");
const context = @import("context");
pub const TokenType = token.TokenType;
pub const Token = token.Token;
@ -24,7 +25,7 @@ const LexError = token.LexError;
pub fn tokenize(
input: []const u8,
alloc: std.mem.Allocator,
err_arrl: *std.ArrayList(errors.ErrorData),
ctx: *context.CompilerContext,
) !std.ArrayList(Token) {
const input_len = input.len;
var current_pos: usize = 0;
@ -41,19 +42,12 @@ pub fn tokenize(
break;
}
// FIXME: should defer deinit, otherwise we leak memory?
var current_error: errors.ErrorData = undefined;
// attempt to lex a number
const number_lex = number.lex(input, input_len, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
// the lexer adds any errors to the context as neccesary
const number_lex = number.lex(input, input_len, actual_next_pos, ctx) catch |e| switch (e) {
// recoverable errors
LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => {
// add to list of errors
try err_arrl.append(current_error);
// FIXME: should deinit current_error now that its been allocated, otherwise we leak memory?
// ignore everything until whitespace and loop
// move to next syncronization point (whitespace) to recover lexing
current_pos = ignore_until_whitespace(input, actual_next_pos);
continue;
},
@ -80,9 +74,8 @@ pub fn tokenize(
}
// attempt to lex a string
const str_lex = string.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
const str_lex = string.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
LexError.IncompleteString => {
try err_arrl.append(current_error);
current_pos = ignore_until_whitespace(input, actual_next_pos);
continue;
},
@ -108,9 +101,8 @@ pub fn tokenize(
}
// attempt to lex a comment
const comment_lex = comment.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
const comment_lex = comment.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
LexError.CRLF => {
try err_arrl.append(current_error);
current_pos = ignore_until_whitespace(input, actual_next_pos);
continue;
},
@ -154,8 +146,7 @@ pub fn tokenize(
else {
// Create an error "nothing matched" and continue lexing
// after the whitespace
try current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1, alloc);
try err_arrl.append(current_error);
_ = try ctx.create_and_append_error("Unrecognized character", actual_next_pos, actual_next_pos + 1);
current_pos = ignore_until_whitespace(input, actual_next_pos);
continue;
}

View File

@ -37,7 +37,8 @@ pub fn lex(
// new line, initialize and return error
else if (next_char == '\n') {
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found a new line here", current_pos, current_pos + 1);
var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString;
@ -47,14 +48,16 @@ pub fn lex(
// if next char is EOF, return error
if (current_pos + 1 == cap) {
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found EOF here", current_pos, current_pos + 1);
var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString;
}
// if next char is newline, return error
else if (input[current_pos + 1] == '\n') {
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found a new line here", current_pos, current_pos + 1);
var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString;
}
@ -69,7 +72,8 @@ pub fn lex(
// this could only reach when EOF is hit, return error
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found EOF here", current_pos, current_pos + 1);
var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString;

View File

@ -13,7 +13,47 @@ pub const CompilerContext = struct {
};
}
/// Appends a new error to the compiler context
/// and returns a handle to the just created error
pub fn create_and_append_error(
self: *CompilerContext,
reason: []const u8,
start_position: usize,
end_position: usize,
) !*ErrorData {
var new_error = ErrorData{
.reason = reason,
.start_position = start_position,
.end_position = end_position,
.labels = std.ArrayList(ErrorLabel).init(self.allocator),
.help = null,
};
try self.errors.append(new_error);
return &new_error;
}
/// Creates a new ErrorLabel with a static message.
/// This error is meant to be added to a ErrorData,
/// and will be cleaned automatically
pub fn create_error_label(
self: *CompilerContext,
message: []const u8,
start: usize,
end: usize,
) ErrorLabel {
_ = self;
return .{
.message = .{ .static = message },
.start = start,
.end = end,
};
}
pub fn deinit(self: *CompilerContext) void {
for (self.errors.items) |*error_item| {
error_item.deinit(self.allocator);
}
self.errors.deinit();
}
};
@ -29,6 +69,24 @@ pub const ErrorData = struct {
end_position: usize,
/// A list of detailed messages about the error
labels: std.ArrayList(ErrorLabel),
pub fn add_label(self: *ErrorData, label: *ErrorLabel) !void {
try self.labels.append(label.*);
}
/// Sets the help message of this error.
pub fn set_help(self: *ErrorData, help: []const u8) void {
self.help = help;
}
pub fn deinit(self: *ErrorData, allocator: std.mem.Allocator) void {
// Clean any labels. Those are assumed to have been initialized
// by the same allocator this function receives
for (self.labels.items) |*label| {
label.deinit(allocator);
}
self.labels.deinit();
}
};
pub const ErrorLabel = struct {
@ -38,4 +96,11 @@ pub const ErrorLabel = struct {
},
start: usize,
end: usize,
pub fn deinit(self: *ErrorLabel, allocator: std.mem.Allocator) void {
switch (self.message) {
.static => {},
.dynamic => |msg| allocator.free(msg),
}
}
};

View File

@ -84,7 +84,7 @@ fn repl() !void {
i.deinit();
};
const tokens = lexic.tokenize(line, alloc, &error_array) catch |e| switch (e) {
const tokens = lexic.tokenize(line, alloc, &ctx) catch |e| switch (e) {
error.OutOfMemory => {
try stdout.print("FATAL ERROR: System Out of Memory!", .{});
try bw.flush();