refactor: use context in lexer
This commit is contained in:
parent
988bcbc243
commit
8c7640be4d
@ -3,6 +3,7 @@ const assert = std.debug.assert;
|
||||
const token = @import("./token.zig");
|
||||
const utils = @import("./utils.zig");
|
||||
const errors = @import("errors");
|
||||
const context = @import("context");
|
||||
|
||||
const Token = token.Token;
|
||||
const TokenType = token.TokenType;
|
||||
@ -12,8 +13,7 @@ const LexReturn = token.LexReturn;
|
||||
pub fn lex(
|
||||
input: []const u8,
|
||||
start: usize,
|
||||
err: *errors.ErrorData,
|
||||
alloc: std.mem.Allocator,
|
||||
ctx: *context.CompilerContext,
|
||||
) LexError!?LexReturn {
|
||||
const cap = input.len;
|
||||
assert(start < cap);
|
||||
@ -30,8 +30,9 @@ pub fn lex(
|
||||
while (current_pos < cap and input[current_pos] != '\n') {
|
||||
// check for CR, and throw error
|
||||
if (input[current_pos] == '\r') {
|
||||
try err.init("Usage of CRLF", current_pos, current_pos + 1, alloc);
|
||||
try err.add_label("There is a line feed (CR) here", current_pos, current_pos + 1);
|
||||
var err = try ctx.create_and_append_error("Usage of CRLF", current_pos, current_pos + 1);
|
||||
var label = ctx.create_error_label("There is a line feed (CR) here", current_pos, current_pos + 1);
|
||||
try err.add_label(&label);
|
||||
err.set_help("All THP code must use LF newline delimiters.");
|
||||
|
||||
return LexError.CRLF;
|
||||
|
@ -67,7 +67,7 @@ fn prefixed(
|
||||
// populate error information
|
||||
var new_error = try ctx.create_and_append_error("Incomplete number", start, end_position);
|
||||
var new_label = ctx.create_error_label("Expected a valid digit after the '" ++ [_]u8{prefix} ++ "'", start, end_position);
|
||||
new_error.add_label(&new_label);
|
||||
try new_error.add_label(&new_label);
|
||||
|
||||
switch (prefix) {
|
||||
'x' => new_error.set_help("Hex numbers should have at least one 0-9a-fA-F after the x"),
|
||||
@ -128,7 +128,8 @@ fn integer(
|
||||
// - a single zero. valid
|
||||
if (first_char == '0' and last_pos > start + 1) {
|
||||
var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
|
||||
try err.add_label("This decimal number has a leading zero.", start, last_pos);
|
||||
var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
|
||||
try err.add_label(&label);
|
||||
err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");
|
||||
|
||||
return LexError.LeadingZero;
|
||||
@ -156,7 +157,8 @@ fn integer(
|
||||
// leading zero on an integer, throw an error
|
||||
if (first_char == '0') {
|
||||
var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
|
||||
try err.add_label("This decimal number has a leading zero.", start, last_pos);
|
||||
var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
|
||||
try err.add_label(&label);
|
||||
err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");
|
||||
|
||||
return LexError.LeadingZero;
|
||||
@ -187,7 +189,8 @@ fn floating_point(
|
||||
if (current_pos >= cap or !utils.is_decimal_digit(input[current_pos])) {
|
||||
// This is an error
|
||||
var err = try ctx.create_and_append_error("Incomplete floating point number", token_start, current_pos);
|
||||
try err.add_label("This number is incomplete", token_start, current_pos);
|
||||
var label = ctx.create_error_label("This number is incomplete", token_start, current_pos);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Add a number after the period");
|
||||
|
||||
return LexError.IncompleteFloatingNumber;
|
||||
@ -225,7 +228,8 @@ fn scientific(
|
||||
// expect `+` or `-`
|
||||
if (current_pos >= cap) {
|
||||
var err = try ctx.create_and_append_error("Incomplete scientific point number", token_start, current_pos);
|
||||
try err.add_label("Expected a '+' or '-' after the exponent", token_start, current_pos);
|
||||
var label = ctx.create_error_label("Expected a '+' or '-' after the exponent", token_start, current_pos);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Add a sign and a digit to complete the scientific number");
|
||||
|
||||
return LexError.IncompleteScientificNumber;
|
||||
@ -233,7 +237,8 @@ fn scientific(
|
||||
const sign_char = input[current_pos];
|
||||
if (sign_char != '+' and sign_char != '-') {
|
||||
var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos, current_pos + 1);
|
||||
try err.add_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1);
|
||||
var label = ctx.create_error_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Add a sign and a digit after the first 'e' to complete the scientific number");
|
||||
|
||||
return LexError.IncompleteScientificNumber;
|
||||
@ -249,7 +254,8 @@ fn scientific(
|
||||
// if there is no difference, no extra digits were lexed.
|
||||
if (digits_start == current_pos) {
|
||||
var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos - 1, current_pos);
|
||||
try err.add_label("Expected at least one digit after this sign", current_pos - 1, current_pos);
|
||||
var label = ctx.create_error_label("Expected at least one digit after this sign", current_pos - 1, current_pos);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Add a digit after the sign to complit the scientific number");
|
||||
|
||||
return LexError.IncompleteScientificNumber;
|
||||
|
@ -11,6 +11,7 @@ const grouping = @import("grouping.zig");
|
||||
const punctuation = @import("punctiation.zig");
|
||||
|
||||
const errors = @import("errors");
|
||||
const context = @import("context");
|
||||
|
||||
pub const TokenType = token.TokenType;
|
||||
pub const Token = token.Token;
|
||||
@ -24,7 +25,7 @@ const LexError = token.LexError;
|
||||
pub fn tokenize(
|
||||
input: []const u8,
|
||||
alloc: std.mem.Allocator,
|
||||
err_arrl: *std.ArrayList(errors.ErrorData),
|
||||
ctx: *context.CompilerContext,
|
||||
) !std.ArrayList(Token) {
|
||||
const input_len = input.len;
|
||||
var current_pos: usize = 0;
|
||||
@ -41,19 +42,12 @@ pub fn tokenize(
|
||||
break;
|
||||
}
|
||||
|
||||
// FIXME: should defer deinit, otherwise we leak memory?
|
||||
var current_error: errors.ErrorData = undefined;
|
||||
|
||||
// attempt to lex a number
|
||||
const number_lex = number.lex(input, input_len, actual_next_pos, ¤t_error, alloc) catch |e| switch (e) {
|
||||
// the lexer adds any errors to the context as neccesary
|
||||
const number_lex = number.lex(input, input_len, actual_next_pos, ctx) catch |e| switch (e) {
|
||||
// recoverable errors
|
||||
LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => {
|
||||
// add to list of errors
|
||||
try err_arrl.append(current_error);
|
||||
|
||||
// FIXME: should deinit current_error now that its been allocated, otherwise we leak memory?
|
||||
|
||||
// ignore everything until whitespace and loop
|
||||
// move to next syncronization point (whitespace) to recover lexing
|
||||
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
||||
continue;
|
||||
},
|
||||
@ -80,9 +74,8 @@ pub fn tokenize(
|
||||
}
|
||||
|
||||
// attempt to lex a string
|
||||
const str_lex = string.lex(input, actual_next_pos, ¤t_error, alloc) catch |e| switch (e) {
|
||||
const str_lex = string.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
|
||||
LexError.IncompleteString => {
|
||||
try err_arrl.append(current_error);
|
||||
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
||||
continue;
|
||||
},
|
||||
@ -108,9 +101,8 @@ pub fn tokenize(
|
||||
}
|
||||
|
||||
// attempt to lex a comment
|
||||
const comment_lex = comment.lex(input, actual_next_pos, ¤t_error, alloc) catch |e| switch (e) {
|
||||
const comment_lex = comment.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
|
||||
LexError.CRLF => {
|
||||
try err_arrl.append(current_error);
|
||||
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
||||
continue;
|
||||
},
|
||||
@ -154,8 +146,7 @@ pub fn tokenize(
|
||||
else {
|
||||
// Create an error "nothing matched" and continue lexing
|
||||
// after the whitespace
|
||||
try current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1, alloc);
|
||||
try err_arrl.append(current_error);
|
||||
_ = try ctx.create_and_append_error("Unrecognized character", actual_next_pos, actual_next_pos + 1);
|
||||
current_pos = ignore_until_whitespace(input, actual_next_pos);
|
||||
continue;
|
||||
}
|
||||
|
@ -37,7 +37,8 @@ pub fn lex(
|
||||
// new line, initialize and return error
|
||||
else if (next_char == '\n') {
|
||||
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
|
||||
try err.add_label("Found a new line here", current_pos, current_pos + 1);
|
||||
var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Strings must always end on the same line that they start.");
|
||||
|
||||
return LexError.IncompleteString;
|
||||
@ -47,14 +48,16 @@ pub fn lex(
|
||||
// if next char is EOF, return error
|
||||
if (current_pos + 1 == cap) {
|
||||
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
|
||||
try err.add_label("Found EOF here", current_pos, current_pos + 1);
|
||||
var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Strings must always end on the same line that they start.");
|
||||
return LexError.IncompleteString;
|
||||
}
|
||||
// if next char is newline, return error
|
||||
else if (input[current_pos + 1] == '\n') {
|
||||
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
|
||||
try err.add_label("Found a new line here", current_pos, current_pos + 1);
|
||||
var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Strings must always end on the same line that they start.");
|
||||
return LexError.IncompleteString;
|
||||
}
|
||||
@ -69,7 +72,8 @@ pub fn lex(
|
||||
|
||||
// this could only reach when EOF is hit, return error
|
||||
var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
|
||||
try err.add_label("Found EOF here", current_pos, current_pos + 1);
|
||||
var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
|
||||
try err.add_label(&label);
|
||||
err.set_help("Strings must always end on the same line that they start.");
|
||||
|
||||
return LexError.IncompleteString;
|
||||
|
@ -13,7 +13,47 @@ pub const CompilerContext = struct {
|
||||
};
|
||||
}
|
||||
|
||||
/// Appends a new error to the compiler context
|
||||
/// and returns a handle to the just created error
|
||||
pub fn create_and_append_error(
|
||||
self: *CompilerContext,
|
||||
reason: []const u8,
|
||||
start_position: usize,
|
||||
end_position: usize,
|
||||
) !*ErrorData {
|
||||
var new_error = ErrorData{
|
||||
.reason = reason,
|
||||
.start_position = start_position,
|
||||
.end_position = end_position,
|
||||
.labels = std.ArrayList(ErrorLabel).init(self.allocator),
|
||||
.help = null,
|
||||
};
|
||||
|
||||
try self.errors.append(new_error);
|
||||
return &new_error;
|
||||
}
|
||||
|
||||
/// Creates a new ErrorLabel with a static message.
|
||||
/// This error is meant to be added to a ErrorData,
|
||||
/// and will be cleaned automatically
|
||||
pub fn create_error_label(
|
||||
self: *CompilerContext,
|
||||
message: []const u8,
|
||||
start: usize,
|
||||
end: usize,
|
||||
) ErrorLabel {
|
||||
_ = self;
|
||||
return .{
|
||||
.message = .{ .static = message },
|
||||
.start = start,
|
||||
.end = end,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *CompilerContext) void {
|
||||
for (self.errors.items) |*error_item| {
|
||||
error_item.deinit(self.allocator);
|
||||
}
|
||||
self.errors.deinit();
|
||||
}
|
||||
};
|
||||
@ -29,6 +69,24 @@ pub const ErrorData = struct {
|
||||
end_position: usize,
|
||||
/// A list of detailed messages about the error
|
||||
labels: std.ArrayList(ErrorLabel),
|
||||
|
||||
pub fn add_label(self: *ErrorData, label: *ErrorLabel) !void {
|
||||
try self.labels.append(label.*);
|
||||
}
|
||||
|
||||
/// Sets the help message of this error.
|
||||
pub fn set_help(self: *ErrorData, help: []const u8) void {
|
||||
self.help = help;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *ErrorData, allocator: std.mem.Allocator) void {
|
||||
// Clean any labels. Those are assumed to have been initialized
|
||||
// by the same allocator this function receives
|
||||
for (self.labels.items) |*label| {
|
||||
label.deinit(allocator);
|
||||
}
|
||||
self.labels.deinit();
|
||||
}
|
||||
};
|
||||
|
||||
pub const ErrorLabel = struct {
|
||||
@ -38,4 +96,11 @@ pub const ErrorLabel = struct {
|
||||
},
|
||||
start: usize,
|
||||
end: usize,
|
||||
|
||||
pub fn deinit(self: *ErrorLabel, allocator: std.mem.Allocator) void {
|
||||
switch (self.message) {
|
||||
.static => {},
|
||||
.dynamic => |msg| allocator.free(msg),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -84,7 +84,7 @@ fn repl() !void {
|
||||
i.deinit();
|
||||
};
|
||||
|
||||
const tokens = lexic.tokenize(line, alloc, &error_array) catch |e| switch (e) {
|
||||
const tokens = lexic.tokenize(line, alloc, &ctx) catch |e| switch (e) {
|
||||
error.OutOfMemory => {
|
||||
try stdout.print("FATAL ERROR: System Out of Memory!", .{});
|
||||
try bw.flush();
|
||||
|
Loading…
Reference in New Issue
Block a user