refactor: use context in lexer

This commit is contained in:
Fernando Araoz 2025-01-30 20:01:22 -05:00
parent 988bcbc243
commit 8c7640be4d
6 changed files with 100 additions and 33 deletions

View File

@ -3,6 +3,7 @@ const assert = std.debug.assert;
const token = @import("./token.zig"); const token = @import("./token.zig");
const utils = @import("./utils.zig"); const utils = @import("./utils.zig");
const errors = @import("errors"); const errors = @import("errors");
const context = @import("context");
const Token = token.Token; const Token = token.Token;
const TokenType = token.TokenType; const TokenType = token.TokenType;
@ -12,8 +13,7 @@ const LexReturn = token.LexReturn;
pub fn lex( pub fn lex(
input: []const u8, input: []const u8,
start: usize, start: usize,
err: *errors.ErrorData, ctx: *context.CompilerContext,
alloc: std.mem.Allocator,
) LexError!?LexReturn { ) LexError!?LexReturn {
const cap = input.len; const cap = input.len;
assert(start < cap); assert(start < cap);
@ -30,8 +30,9 @@ pub fn lex(
while (current_pos < cap and input[current_pos] != '\n') { while (current_pos < cap and input[current_pos] != '\n') {
// check for CR, and throw error // check for CR, and throw error
if (input[current_pos] == '\r') { if (input[current_pos] == '\r') {
try err.init("Usage of CRLF", current_pos, current_pos + 1, alloc); var err = try ctx.create_and_append_error("Usage of CRLF", current_pos, current_pos + 1);
try err.add_label("There is a line feed (CR) here", current_pos, current_pos + 1); var label = ctx.create_error_label("There is a line feed (CR) here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("All THP code must use LF newline delimiters."); err.set_help("All THP code must use LF newline delimiters.");
return LexError.CRLF; return LexError.CRLF;

View File

@ -67,7 +67,7 @@ fn prefixed(
// populate error information // populate error information
var new_error = try ctx.create_and_append_error("Incomplete number", start, end_position); var new_error = try ctx.create_and_append_error("Incomplete number", start, end_position);
var new_label = ctx.create_error_label("Expected a valid digit after the '" ++ [_]u8{prefix} ++ "'", start, end_position); var new_label = ctx.create_error_label("Expected a valid digit after the '" ++ [_]u8{prefix} ++ "'", start, end_position);
new_error.add_label(&new_label); try new_error.add_label(&new_label);
switch (prefix) { switch (prefix) {
'x' => new_error.set_help("Hex numbers should have at least one 0-9a-fA-F after the x"), 'x' => new_error.set_help("Hex numbers should have at least one 0-9a-fA-F after the x"),
@ -128,7 +128,8 @@ fn integer(
// - a single zero. valid // - a single zero. valid
if (first_char == '0' and last_pos > start + 1) { if (first_char == '0' and last_pos > start + 1) {
var err = try ctx.create_and_append_error("Leading zero", start, start + 1); var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
try err.add_label("This decimal number has a leading zero.", start, last_pos); var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
try err.add_label(&label);
err.set_help("If you want an octal number use '0o', otherwise remove the leading zero"); err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");
return LexError.LeadingZero; return LexError.LeadingZero;
@ -156,7 +157,8 @@ fn integer(
// leading zero on an integer, throw an error // leading zero on an integer, throw an error
if (first_char == '0') { if (first_char == '0') {
var err = try ctx.create_and_append_error("Leading zero", start, start + 1); var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
try err.add_label("This decimal number has a leading zero.", start, last_pos); var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
try err.add_label(&label);
err.set_help("If you want an octal number use '0o', otherwise remove the leading zero"); err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");
return LexError.LeadingZero; return LexError.LeadingZero;
@ -187,7 +189,8 @@ fn floating_point(
if (current_pos >= cap or !utils.is_decimal_digit(input[current_pos])) { if (current_pos >= cap or !utils.is_decimal_digit(input[current_pos])) {
// This is an error // This is an error
var err = try ctx.create_and_append_error("Incomplete floating point number", token_start, current_pos); var err = try ctx.create_and_append_error("Incomplete floating point number", token_start, current_pos);
try err.add_label("This number is incomplete", token_start, current_pos); var label = ctx.create_error_label("This number is incomplete", token_start, current_pos);
try err.add_label(&label);
err.set_help("Add a number after the period"); err.set_help("Add a number after the period");
return LexError.IncompleteFloatingNumber; return LexError.IncompleteFloatingNumber;
@ -225,7 +228,8 @@ fn scientific(
// expect `+` or `-` // expect `+` or `-`
if (current_pos >= cap) { if (current_pos >= cap) {
var err = try ctx.create_and_append_error("Incomplete scientific point number", token_start, current_pos); var err = try ctx.create_and_append_error("Incomplete scientific point number", token_start, current_pos);
try err.add_label("Expected a '+' or '-' after the exponent", token_start, current_pos); var label = ctx.create_error_label("Expected a '+' or '-' after the exponent", token_start, current_pos);
try err.add_label(&label);
err.set_help("Add a sign and a digit to complete the scientific number"); err.set_help("Add a sign and a digit to complete the scientific number");
return LexError.IncompleteScientificNumber; return LexError.IncompleteScientificNumber;
@ -233,7 +237,8 @@ fn scientific(
const sign_char = input[current_pos]; const sign_char = input[current_pos];
if (sign_char != '+' and sign_char != '-') { if (sign_char != '+' and sign_char != '-') {
var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos, current_pos + 1); var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos, current_pos + 1);
try err.add_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1); var label = ctx.create_error_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Add a sign and a digit after the first 'e' to complete the scientific number"); err.set_help("Add a sign and a digit after the first 'e' to complete the scientific number");
return LexError.IncompleteScientificNumber; return LexError.IncompleteScientificNumber;
@ -249,7 +254,8 @@ fn scientific(
// if there is no difference, no extra digits were lexed. // if there is no difference, no extra digits were lexed.
if (digits_start == current_pos) { if (digits_start == current_pos) {
var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos - 1, current_pos); var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos - 1, current_pos);
try err.add_label("Expected at least one digit after this sign", current_pos - 1, current_pos); var label = ctx.create_error_label("Expected at least one digit after this sign", current_pos - 1, current_pos);
try err.add_label(&label);
err.set_help("Add a digit after the sign to complit the scientific number"); err.set_help("Add a digit after the sign to complit the scientific number");
return LexError.IncompleteScientificNumber; return LexError.IncompleteScientificNumber;

View File

@ -11,6 +11,7 @@ const grouping = @import("grouping.zig");
const punctuation = @import("punctiation.zig"); const punctuation = @import("punctiation.zig");
const errors = @import("errors"); const errors = @import("errors");
const context = @import("context");
pub const TokenType = token.TokenType; pub const TokenType = token.TokenType;
pub const Token = token.Token; pub const Token = token.Token;
@ -24,7 +25,7 @@ const LexError = token.LexError;
pub fn tokenize( pub fn tokenize(
input: []const u8, input: []const u8,
alloc: std.mem.Allocator, alloc: std.mem.Allocator,
err_arrl: *std.ArrayList(errors.ErrorData), ctx: *context.CompilerContext,
) !std.ArrayList(Token) { ) !std.ArrayList(Token) {
const input_len = input.len; const input_len = input.len;
var current_pos: usize = 0; var current_pos: usize = 0;
@ -41,19 +42,12 @@ pub fn tokenize(
break; break;
} }
// FIXME: should defer deinit, otherwise we leak memory?
var current_error: errors.ErrorData = undefined;
// attempt to lex a number // attempt to lex a number
const number_lex = number.lex(input, input_len, actual_next_pos, &current_error, alloc) catch |e| switch (e) { // the lexer adds any errors to the context as neccesary
const number_lex = number.lex(input, input_len, actual_next_pos, ctx) catch |e| switch (e) {
// recoverable errors // recoverable errors
LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => { LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => {
// add to list of errors // move to next syncronization point (whitespace) to recover lexing
try err_arrl.append(current_error);
// FIXME: should deinit current_error now that its been allocated, otherwise we leak memory?
// ignore everything until whitespace and loop
current_pos = ignore_until_whitespace(input, actual_next_pos); current_pos = ignore_until_whitespace(input, actual_next_pos);
continue; continue;
}, },
@ -80,9 +74,8 @@ pub fn tokenize(
} }
// attempt to lex a string // attempt to lex a string
const str_lex = string.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) { const str_lex = string.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
LexError.IncompleteString => { LexError.IncompleteString => {
try err_arrl.append(current_error);
current_pos = ignore_until_whitespace(input, actual_next_pos); current_pos = ignore_until_whitespace(input, actual_next_pos);
continue; continue;
}, },
@ -108,9 +101,8 @@ pub fn tokenize(
} }
// attempt to lex a comment // attempt to lex a comment
const comment_lex = comment.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) { const comment_lex = comment.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
LexError.CRLF => { LexError.CRLF => {
try err_arrl.append(current_error);
current_pos = ignore_until_whitespace(input, actual_next_pos); current_pos = ignore_until_whitespace(input, actual_next_pos);
continue; continue;
}, },
@ -154,8 +146,7 @@ pub fn tokenize(
else { else {
// Create an error "nothing matched" and continue lexing // Create an error "nothing matched" and continue lexing
// after the whitespace // after the whitespace
try current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1, alloc); _ = try ctx.create_and_append_error("Unrecognized character", actual_next_pos, actual_next_pos + 1);
try err_arrl.append(current_error);
current_pos = ignore_until_whitespace(input, actual_next_pos); current_pos = ignore_until_whitespace(input, actual_next_pos);
continue; continue;
} }

View File

@ -37,7 +37,8 @@ pub fn lex(
// new line, initialize and return error // new line, initialize and return error
else if (next_char == '\n') { else if (next_char == '\n') {
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1); var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found a new line here", current_pos, current_pos + 1); var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start."); err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;
@ -47,14 +48,16 @@ pub fn lex(
// if next char is EOF, return error // if next char is EOF, return error
if (current_pos + 1 == cap) { if (current_pos + 1 == cap) {
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1); var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found EOF here", current_pos, current_pos + 1); var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start."); err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;
} }
// if next char is newline, return error // if next char is newline, return error
else if (input[current_pos + 1] == '\n') { else if (input[current_pos + 1] == '\n') {
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1); var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found a new line here", current_pos, current_pos + 1); var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start."); err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;
} }
@ -69,7 +72,8 @@ pub fn lex(
// this could only reach when EOF is hit, return error // this could only reach when EOF is hit, return error
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1); var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
try err.add_label("Found EOF here", current_pos, current_pos + 1); var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
try err.add_label(&label);
err.set_help("Strings must always end on the same line that they start."); err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;

View File

@ -13,7 +13,47 @@ pub const CompilerContext = struct {
}; };
} }
/// Appends a new error to the compiler context
/// and returns a handle to the just created error
pub fn create_and_append_error(
self: *CompilerContext,
reason: []const u8,
start_position: usize,
end_position: usize,
) !*ErrorData {
var new_error = ErrorData{
.reason = reason,
.start_position = start_position,
.end_position = end_position,
.labels = std.ArrayList(ErrorLabel).init(self.allocator),
.help = null,
};
try self.errors.append(new_error);
return &new_error;
}
/// Creates a new ErrorLabel with a static message.
/// This error is meant to be added to a ErrorData,
/// and will be cleaned automatically
pub fn create_error_label(
self: *CompilerContext,
message: []const u8,
start: usize,
end: usize,
) ErrorLabel {
_ = self;
return .{
.message = .{ .static = message },
.start = start,
.end = end,
};
}
pub fn deinit(self: *CompilerContext) void { pub fn deinit(self: *CompilerContext) void {
for (self.errors.items) |*error_item| {
error_item.deinit(self.allocator);
}
self.errors.deinit(); self.errors.deinit();
} }
}; };
@ -29,6 +69,24 @@ pub const ErrorData = struct {
end_position: usize, end_position: usize,
/// A list of detailed messages about the error /// A list of detailed messages about the error
labels: std.ArrayList(ErrorLabel), labels: std.ArrayList(ErrorLabel),
pub fn add_label(self: *ErrorData, label: *ErrorLabel) !void {
try self.labels.append(label.*);
}
/// Sets the help message of this error.
pub fn set_help(self: *ErrorData, help: []const u8) void {
self.help = help;
}
pub fn deinit(self: *ErrorData, allocator: std.mem.Allocator) void {
// Clean any labels. Those are assumed to have been initialized
// by the same allocator this function receives
for (self.labels.items) |*label| {
label.deinit(allocator);
}
self.labels.deinit();
}
}; };
pub const ErrorLabel = struct { pub const ErrorLabel = struct {
@ -38,4 +96,11 @@ pub const ErrorLabel = struct {
}, },
start: usize, start: usize,
end: usize, end: usize,
pub fn deinit(self: *ErrorLabel, allocator: std.mem.Allocator) void {
switch (self.message) {
.static => {},
.dynamic => |msg| allocator.free(msg),
}
}
}; };

View File

@ -84,7 +84,7 @@ fn repl() !void {
i.deinit(); i.deinit();
}; };
const tokens = lexic.tokenize(line, alloc, &error_array) catch |e| switch (e) { const tokens = lexic.tokenize(line, alloc, &ctx) catch |e| switch (e) {
error.OutOfMemory => { error.OutOfMemory => {
try stdout.print("FATAL ERROR: System Out of Memory!", .{}); try stdout.print("FATAL ERROR: System Out of Memory!", .{});
try bw.flush(); try bw.flush();