refactor: use context in lexer

2025-01-30 20:01:22 -05:00 · 2025-01-30 20:01:22 -05:00 · 8c7640be4d
commit 8c7640be4d
parent 988bcbc243
6 changed files with 100 additions and 33 deletions
--- a/src/01_lexic/comment.zig
+++ b/src/01_lexic/comment.zig
@ -3,6 +3,7 @@ const assert = std.debug.assert;
 const token = @import("./token.zig");
 const utils = @import("./utils.zig");
 const errors = @import("errors");
+const context = @import("context");

 const Token = token.Token;
 const TokenType = token.TokenType;
@ -12,8 +13,7 @@ const LexReturn = token.LexReturn;
 pub fn lex(
    input: []const u8,
    start: usize,
-    err: *errors.ErrorData,
-    alloc: std.mem.Allocator,
+    ctx: *context.CompilerContext,
 ) LexError!?LexReturn {
    const cap = input.len;
    assert(start < cap);
@ -30,8 +30,9 @@ pub fn lex(
        while (current_pos < cap and input[current_pos] != '\n') {
            // check for CR, and throw error
            if (input[current_pos] == '\r') {
-                try err.init("Usage of CRLF", current_pos, current_pos + 1, alloc);
-                try err.add_label("There is a line feed (CR) here", current_pos, current_pos + 1);
+                var err = try ctx.create_and_append_error("Usage of CRLF", current_pos, current_pos + 1);
+                var label = ctx.create_error_label("There is a line feed (CR) here", current_pos, current_pos + 1);
+                try err.add_label(&label);
                err.set_help("All THP code must use LF newline delimiters.");

                return LexError.CRLF;
--- a/src/01_lexic/number.zig
+++ b/src/01_lexic/number.zig
@ -67,7 +67,7 @@ fn prefixed(
        // populate error information
        var new_error = try ctx.create_and_append_error("Incomplete number", start, end_position);
        var new_label = ctx.create_error_label("Expected a valid digit after the '" ++ [_]u8{prefix} ++ "'", start, end_position);
-        new_error.add_label(&new_label);
+        try new_error.add_label(&new_label);

        switch (prefix) {
            'x' => new_error.set_help("Hex numbers should have at least one 0-9a-fA-F after the x"),
@ -128,7 +128,8 @@ fn integer(
        // - a single zero. valid
        if (first_char == '0' and last_pos > start + 1) {
            var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
-            try err.add_label("This decimal number has a leading zero.", start, last_pos);
+            var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
+            try err.add_label(&label);
            err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");

            return LexError.LeadingZero;
@ -156,7 +157,8 @@ fn integer(
            // leading zero on an integer, throw an error
            if (first_char == '0') {
                var err = try ctx.create_and_append_error("Leading zero", start, start + 1);
-                try err.add_label("This decimal number has a leading zero.", start, last_pos);
+                var label = ctx.create_error_label("This decimal number has a leading zero.", start, last_pos);
+                try err.add_label(&label);
                err.set_help("If you want an octal number use '0o', otherwise remove the leading zero");

                return LexError.LeadingZero;
@ -187,7 +189,8 @@ fn floating_point(
    if (current_pos >= cap or !utils.is_decimal_digit(input[current_pos])) {
        // This is an error
        var err = try ctx.create_and_append_error("Incomplete floating point number", token_start, current_pos);
-        try err.add_label("This number is incomplete", token_start, current_pos);
+        var label = ctx.create_error_label("This number is incomplete", token_start, current_pos);
+        try err.add_label(&label);
        err.set_help("Add a number after the period");

        return LexError.IncompleteFloatingNumber;
@ -225,7 +228,8 @@ fn scientific(
    // expect `+` or `-`
    if (current_pos >= cap) {
        var err = try ctx.create_and_append_error("Incomplete scientific point number", token_start, current_pos);
-        try err.add_label("Expected a '+' or '-' after the exponent", token_start, current_pos);
+        var label = ctx.create_error_label("Expected a '+' or '-' after the exponent", token_start, current_pos);
+        try err.add_label(&label);
        err.set_help("Add a sign and a digit to complete the scientific number");

        return LexError.IncompleteScientificNumber;
@ -233,7 +237,8 @@ fn scientific(
    const sign_char = input[current_pos];
    if (sign_char != '+' and sign_char != '-') {
        var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos, current_pos + 1);
-        try err.add_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1);
+        var label = ctx.create_error_label("Expected a '+' or '-' here, found another char", current_pos, current_pos + 1);
+        try err.add_label(&label);
        err.set_help("Add a sign and a digit after the first 'e' to complete the scientific number");

        return LexError.IncompleteScientificNumber;
@ -249,7 +254,8 @@ fn scientific(
    // if there is no difference, no extra digits were lexed.
    if (digits_start == current_pos) {
        var err = try ctx.create_and_append_error("Incomplete scientific point number", current_pos - 1, current_pos);
-        try err.add_label("Expected at least one digit after this sign", current_pos - 1, current_pos);
+        var label = ctx.create_error_label("Expected at least one digit after this sign", current_pos - 1, current_pos);
+        try err.add_label(&label);
        err.set_help("Add a digit after the sign to complit the scientific number");

        return LexError.IncompleteScientificNumber;
--- a/src/01_lexic/root.zig
+++ b/src/01_lexic/root.zig
@ -11,6 +11,7 @@ const grouping = @import("grouping.zig");
 const punctuation = @import("punctiation.zig");

 const errors = @import("errors");
+const context = @import("context");

 pub const TokenType = token.TokenType;
 pub const Token = token.Token;
@ -24,7 +25,7 @@ const LexError = token.LexError;
 pub fn tokenize(
    input: []const u8,
    alloc: std.mem.Allocator,
-    err_arrl: *std.ArrayList(errors.ErrorData),
+    ctx: *context.CompilerContext,
 ) !std.ArrayList(Token) {
    const input_len = input.len;
    var current_pos: usize = 0;
@ -41,19 +42,12 @@ pub fn tokenize(
            break;
        }

-        // FIXME: should defer deinit, otherwise we leak memory?
-        var current_error: errors.ErrorData = undefined;
-
        // attempt to lex a number
-        const number_lex = number.lex(input, input_len, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
+        // the lexer adds any errors to the context as neccesary
+        const number_lex = number.lex(input, input_len, actual_next_pos, ctx) catch |e| switch (e) {
            // recoverable errors
            LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => {
-                // add to list of errors
-                try err_arrl.append(current_error);
-
-                // FIXME: should deinit current_error now that its been allocated, otherwise we leak memory?
-
-                // ignore everything until whitespace and loop
+                // move to next syncronization point (whitespace) to recover lexing
                current_pos = ignore_until_whitespace(input, actual_next_pos);
                continue;
            },
@ -80,9 +74,8 @@ pub fn tokenize(
        }

        // attempt to lex a string
-        const str_lex = string.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
+        const str_lex = string.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
            LexError.IncompleteString => {
-                try err_arrl.append(current_error);
                current_pos = ignore_until_whitespace(input, actual_next_pos);
                continue;
            },
@ -108,9 +101,8 @@ pub fn tokenize(
        }

        // attempt to lex a comment
-        const comment_lex = comment.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
+        const comment_lex = comment.lex(input, actual_next_pos, ctx) catch |e| switch (e) {
            LexError.CRLF => {
-                try err_arrl.append(current_error);
                current_pos = ignore_until_whitespace(input, actual_next_pos);
                continue;
            },
@ -154,8 +146,7 @@ pub fn tokenize(
        else {
            // Create an error "nothing matched" and continue lexing
            // after the whitespace
-            try current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1, alloc);
-            try err_arrl.append(current_error);
+            _ = try ctx.create_and_append_error("Unrecognized character", actual_next_pos, actual_next_pos + 1);
            current_pos = ignore_until_whitespace(input, actual_next_pos);
            continue;
        }
--- a/src/01_lexic/string.zig
+++ b/src/01_lexic/string.zig
@ -37,7 +37,8 @@ pub fn lex(
        // new line, initialize and return error
        else if (next_char == '\n') {
            var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
-            try err.add_label("Found a new line here", current_pos, current_pos + 1);
+            var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
+            try err.add_label(&label);
            err.set_help("Strings must always end on the same line that they start.");

            return LexError.IncompleteString;
@ -47,14 +48,16 @@ pub fn lex(
            // if next char is EOF, return error
            if (current_pos + 1 == cap) {
                var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
-                try err.add_label("Found EOF here", current_pos, current_pos + 1);
+                var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
+                try err.add_label(&label);
                err.set_help("Strings must always end on the same line that they start.");
                return LexError.IncompleteString;
            }
            // if next char is newline, return error
            else if (input[current_pos + 1] == '\n') {
                var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
-                try err.add_label("Found a new line here", current_pos, current_pos + 1);
+                var label = ctx.create_error_label("Found a new line here", current_pos, current_pos + 1);
+                try err.add_label(&label);
                err.set_help("Strings must always end on the same line that they start.");
                return LexError.IncompleteString;
            }
@ -69,7 +72,8 @@ pub fn lex(

    // this could only reach when EOF is hit, return error
    var err = try ctx.create_and_append_error("Incomplete String", current_pos, current_pos + 1);
-    try err.add_label("Found EOF here", current_pos, current_pos + 1);
+    var label = ctx.create_error_label("Found EOF here", current_pos, current_pos + 1);
+    try err.add_label(&label);
    err.set_help("Strings must always end on the same line that they start.");

    return LexError.IncompleteString;
--- a/src/context/root.zig
+++ b/src/context/root.zig
@ -13,7 +13,47 @@ pub const CompilerContext = struct {
        };
    }

+    /// Appends a new error to the compiler context
+    /// and returns a handle to the just created error
+    pub fn create_and_append_error(
+        self: *CompilerContext,
+        reason: []const u8,
+        start_position: usize,
+        end_position: usize,
+    ) !*ErrorData {
+        var new_error = ErrorData{
+            .reason = reason,
+            .start_position = start_position,
+            .end_position = end_position,
+            .labels = std.ArrayList(ErrorLabel).init(self.allocator),
+            .help = null,
+        };
+
+        try self.errors.append(new_error);
+        return &new_error;
+    }
+
+    /// Creates a new ErrorLabel with a static message.
+    /// This error is meant to be added to a ErrorData,
+    /// and will be cleaned automatically
+    pub fn create_error_label(
+        self: *CompilerContext,
+        message: []const u8,
+        start: usize,
+        end: usize,
+    ) ErrorLabel {
+        _ = self;
+        return .{
+            .message = .{ .static = message },
+            .start = start,
+            .end = end,
+        };
+    }
+
    pub fn deinit(self: *CompilerContext) void {
+        for (self.errors.items) |*error_item| {
+            error_item.deinit(self.allocator);
+        }
        self.errors.deinit();
    }
 };
@ -29,6 +69,24 @@ pub const ErrorData = struct {
    end_position: usize,
    /// A list of detailed messages about the error
    labels: std.ArrayList(ErrorLabel),
+
+    pub fn add_label(self: *ErrorData, label: *ErrorLabel) !void {
+        try self.labels.append(label.*);
+    }
+
+    /// Sets the help message of this error.
+    pub fn set_help(self: *ErrorData, help: []const u8) void {
+        self.help = help;
+    }
+
+    pub fn deinit(self: *ErrorData, allocator: std.mem.Allocator) void {
+        // Clean any labels. Those are assumed to have been initialized
+        // by the same allocator this function receives
+        for (self.labels.items) |*label| {
+            label.deinit(allocator);
+        }
+        self.labels.deinit();
+    }
 };

 pub const ErrorLabel = struct {
@ -38,4 +96,11 @@ pub const ErrorLabel = struct {
    },
    start: usize,
    end: usize,
+
+    pub fn deinit(self: *ErrorLabel, allocator: std.mem.Allocator) void {
+        switch (self.message) {
+            .static => {},
+            .dynamic => |msg| allocator.free(msg),
+        }
+    }
 };
--- a/src/main.zig
+++ b/src/main.zig
@ -84,7 +84,7 @@ fn repl() !void {
            i.deinit();
        };

-        const tokens = lexic.tokenize(line, alloc, &error_array) catch |e| switch (e) {
+        const tokens = lexic.tokenize(line, alloc, &ctx) catch |e| switch (e) {
            error.OutOfMemory => {
                try stdout.print("FATAL ERROR: System Out of Memory!", .{});
                try bw.flush();