diff --git a/src/01_lexic/number.zig b/src/01_lexic/number.zig index 7ac1e90..98661b5 100644 --- a/src/01_lexic/number.zig +++ b/src/01_lexic/number.zig @@ -2,6 +2,7 @@ const std = @import("std"); const assert = std.debug.assert; const token = @import("./token.zig"); const utils = @import("./utils.zig"); +const errors = @import("errors"); const Token = token.Token; const TokenType = token.TokenType; @@ -13,7 +14,12 @@ const is_decimal_digit = utils.is_decimal_digit; /// Attempts to lex a number, as per the language grammar. /// /// A number is either an Int or a Float. -pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn { +pub fn lex( + input: []const u8, + cap: usize, + start: usize, + err: *errors.ErrorData, +) LexError!?LexReturn { assert(start < cap); const first_char = input[start]; @@ -21,9 +27,9 @@ pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn { if (first_char == '0' and cap > start + 1) { const second_char = input[start + 1]; switch (second_char) { - 'x', 'X' => return prefixed('x', input, cap, start), - 'o', 'O' => return prefixed('o', input, cap, start), - 'b', 'B' => return prefixed('b', input, cap, start), + 'x', 'X' => return prefixed('x', input, cap, start, err), + 'o', 'O' => return prefixed('o', input, cap, start, err), + 'b', 'B' => return prefixed('b', input, cap, start, err), else => { // Continue }, @@ -39,7 +45,13 @@ pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn { /// only allowed values for `prefix` are `x`, `o` & `b`. /// An adequate validator is choosen based on `prefix`, /// that validator will decide which characters to lex. -fn prefixed(comptime prefix: u8, input: []const u8, cap: usize, start: usize) !?LexReturn { +fn prefixed( + comptime prefix: u8, + input: []const u8, + cap: usize, + start: usize, + err: *errors.ErrorData, +) !?LexReturn { const validator = switch (prefix) { 'x' => utils.is_hex_digit, 'o' => utils.is_octal_digit, @@ -51,6 +63,10 @@ fn prefixed(comptime prefix: u8, input: []const u8, cap: usize, start: usize) !? // There should be at least 1 valid digit if (end_position >= cap or !validator(input[end_position])) { + // populate error information + err.init("Incomplete number", start, end_position); + + // throw error return LexError.Incomplete; } @@ -238,7 +254,7 @@ test "should return null if not an integer" { test "should lex hex number" { const input = "0xa"; - const result = try lex(input, input.len, 0); + const result = try lex(input, input.len, 0, undefined); if (result) |tuple| { const r = tuple[0]; @@ -250,7 +266,9 @@ test "should lex hex number" { test "should fail on integer with leading zero" { const input = "0322"; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.LeadingZero); return; }; @@ -267,7 +285,7 @@ test "should fail on integer with leading zero" { test "should lex hex number 2" { const input = " 0Xff00AA "; - const result = try lex(input, input.len, 2); + const result = try lex(input, input.len, 2, undefined); if (result) |tuple| { const r = tuple[0]; @@ -279,7 +297,9 @@ test "should lex hex number 2" { test "shouldnt parse incomplete hex number" { const input = "0xZZ"; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.Incomplete); return; }; @@ -296,7 +316,9 @@ test "shouldnt parse incomplete hex number" { test "shouldnt parse incomplete hex number 2" { const input = "0x"; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.Incomplete); return; }; @@ -313,7 +335,7 @@ test "shouldnt parse incomplete hex number 2" { test "should lex octal number" { const input = "0o755"; - const result = try lex(input, input.len, 0); + const result = try lex(input, input.len, 0, undefined); if (result) |tuple| { const r = tuple[0]; @@ -325,7 +347,7 @@ test "should lex octal number" { test "should lex octal number 2" { const input = " 0o755 "; - const result = try lex(input, input.len, 2); + const result = try lex(input, input.len, 2, undefined); if (result) |tuple| { const r = tuple[0]; @@ -337,7 +359,9 @@ test "should lex octal number 2" { test "shouldnt parse incomplete octal number" { const input = "0o8"; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.Incomplete); return; }; @@ -354,7 +378,7 @@ test "shouldnt parse incomplete octal number" { test "should lex binary number" { const input = "0b1011"; - const result = try lex(input, input.len, 0); + const result = try lex(input, input.len, 0, undefined); if (result) |tuple| { const r = tuple[0]; @@ -366,7 +390,9 @@ test "should lex binary number" { test "shouldnt parse incomplete binary number" { const input = "0b2"; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.Incomplete); return; }; @@ -383,7 +409,7 @@ test "shouldnt parse incomplete binary number" { test "should lex fp number 1" { const input = "1.2"; - const result = try lex(input, input.len, 0); + const result = try lex(input, input.len, 0, undefined); if (result) |tuple| { const r = tuple[0]; @@ -395,7 +421,9 @@ test "should lex fp number 1" { test "should lex fp number 2" { const input = "0.1"; - const result = try lex(input, input.len, 0); + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = try lex(input, input.len, 0, errdata); if (result) |tuple| { const r = tuple[0]; @@ -407,7 +435,7 @@ test "should lex fp number 2" { test "should lex fp number 3" { const input = "123.456"; - const result = try lex(input, input.len, 0); + const result = try lex(input, input.len, 0, undefined); if (result) |tuple| { const r = tuple[0]; @@ -419,7 +447,9 @@ test "should lex fp number 3" { test "should fail on incomplete fp number" { const input = "123."; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.IncompleteFloatingNumber); return; }; @@ -436,7 +466,7 @@ test "should fail on incomplete fp number" { test "should lex scientific number" { const input = "42e+3"; - const result = try lex(input, input.len, 0); + const result = try lex(input, input.len, 0, undefined); if (result) |tuple| { const r = tuple[0]; @@ -448,7 +478,9 @@ test "should lex scientific number" { test "should fail on incomplete scientific number" { const input = "123e"; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.IncompleteScientificNumber); return; }; @@ -465,7 +497,9 @@ test "should fail on incomplete scientific number" { test "should fail on incomplete scientific number 2" { const input = "123e+"; - const result = lex(input, input.len, 0) catch |err| { + const errdata = try std.testing.allocator.create(errors.ErrorData); + defer std.testing.allocator.destroy(errdata); + const result = lex(input, input.len, 0, errdata) catch |err| { try std.testing.expect(err == token.LexError.IncompleteScientificNumber); return; }; @@ -482,7 +516,7 @@ test "should fail on incomplete scientific number 2" { test "should lex floating scientific number" { const input = "0.58e+3"; - const result = try lex(input, input.len, 0); + const result = try lex(input, input.len, 0, undefined); if (result) |tuple| { const r = tuple[0]; diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index 5fab4be..a7c33bd 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -10,18 +10,31 @@ const string = @import("string.zig"); const grouping = @import("grouping.zig"); const punctuation = @import("punctiation.zig"); +const errors = @import("errors"); + pub const TokenType = token.TokenType; pub const Token = token.Token; +const LexError = token.LexError; /// Creates an array list of tokens. The caller is responsible of /// calling `deinit` to free the array list -pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !std.ArrayList(Token) { +/// +/// Also takes an arraylist of errors. This will be populated if any errors are +/// found while lexing. The caller is responsible for freeing it. +pub fn tokenize( + input: []const u8, + alloc: std.mem.Allocator, + err_arrl: *std.ArrayList(*errors.ErrorData), +) !std.ArrayList(Token) { const input_len = input.len; var current_pos: usize = 0; var tokens = std.ArrayList(Token).init(alloc); errdefer tokens.deinit(); + var current_error = try alloc.create(errors.ErrorData); + defer alloc.destroy(current_error); + while (current_pos < input_len) { const actual_next_pos = ignore_whitespace(input, current_pos); assert(current_pos <= actual_next_pos); @@ -32,13 +45,30 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !std.ArrayList(Toke } // attempt to lex a number - if (try number.lex(input, input_len, actual_next_pos)) |tuple| { + const number_lex = number.lex(input, input_len, actual_next_pos, current_error) catch |e| switch (e) { + // recoverable errors + LexError.Incomplete => { + // add to list of errors + try err_arrl.append(current_error); + // refresh the previous error pointer + current_error = try alloc.create(errors.ErrorData); + + // ignore everything until whitespace and loop + current_pos = ignore_until_whitespace(input, actual_next_pos); + continue; + }, + // just throw unrecoverable errors + else => return e, + }; + if (number_lex) |tuple| { assert(tuple[1] > current_pos); const t = tuple[0]; current_pos = tuple[1]; try tokens.append(t); + continue; } + // attempt to lex an identifier else if (try identifier.lex(input, actual_next_pos)) |tuple| { assert(tuple[1] > current_pos); @@ -95,13 +125,17 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !std.ArrayList(Toke try tokens.append(t); } + // nothing was matched. fail // TODO: instead of failing add an error, ignore all chars // until next whitespace, and continue lexing // TODO: check if this is a good error recovery strategy else { - // no lexer matched - break; + // Create an error "nothing matched" and continue lexing + // after the whitespace + current_error.init("Unrecognized character", actual_next_pos, actual_next_pos + 1); + current_pos = ignore_until_whitespace(input, actual_next_pos); + continue; } } @@ -123,25 +157,45 @@ pub fn ignore_whitespace(input: []const u8, start: usize) usize { return pos; } +/// Ignores all chars on `input` since `start` +/// and returns the position where the first whitespace/newline +/// is found. +inline fn ignore_until_whitespace(input: []const u8, start: usize) usize { + const cap = input.len; + var pos = start; + + while (pos < cap and (input[pos] != ' ' or input[pos] != '\t')) { + pos += 1; + } + + return pos; +} + test { std.testing.refAllDecls(@This()); } test "should insert 1 item" { const input = "322"; - const arrl = try tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const arrl = try tokenize(input, std.testing.allocator, &error_list); arrl.deinit(); } test "should insert 2 item" { const input = "322 644"; - const arrl = try tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const arrl = try tokenize(input, std.testing.allocator, &error_list); arrl.deinit(); } test "should insert an item, fail, and not leak" { const input = "322 \"hello"; - const arrl = tokenize(input, std.testing.allocator) catch |e| switch (e) { + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) { error.IncompleteString => { return; }, diff --git a/src/02_syntax/expression.zig b/src/02_syntax/expression.zig index aaf5fe2..45ac339 100644 --- a/src/02_syntax/expression.zig +++ b/src/02_syntax/expression.zig @@ -1,5 +1,6 @@ const std = @import("std"); const lexic = @import("lexic"); +const errors = @import("errors"); const Token = lexic.Token; const TokenType = lexic.TokenType; const ParseError = @import("./types.zig").ParseError; @@ -26,7 +27,9 @@ pub const Expression = union(enum) { test "should parse expression" { const input = "322"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var expr: Expression = undefined; @@ -37,7 +40,9 @@ test "should parse expression" { test "should fail on non expression" { const input = "identifier"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var expr: Expression = undefined; diff --git a/src/02_syntax/root.zig b/src/02_syntax/root.zig index 4e7a0d8..99bde53 100644 --- a/src/02_syntax/root.zig +++ b/src/02_syntax/root.zig @@ -48,7 +48,7 @@ pub const Module = struct { switch (e) { error.Unmatched => { // create the error value - try error_target.init( + error_target.init( "No statement found", current_pos, current_pos + 1, @@ -84,7 +84,9 @@ test { test "should parse a single statement" { const input = "var my_variable = 322"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); const error_target = try std.testing.allocator.create(errors.ErrorData); @@ -98,7 +100,9 @@ test "should parse a single statement" { test "should clean memory if a statement parsing fails after one item has been inserted" { const input = "var my_variable = 322 unrelated()"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); const error_target = try std.testing.allocator.create(errors.ErrorData); diff --git a/src/02_syntax/statement.zig b/src/02_syntax/statement.zig index b356c5b..a4082f4 100644 --- a/src/02_syntax/statement.zig +++ b/src/02_syntax/statement.zig @@ -4,6 +4,7 @@ const expression = @import("expression.zig"); const types = @import("./types.zig"); const utils = @import("./utils.zig"); const variable = @import("./variable.zig"); +const errors = @import("errors"); const TokenStream = types.TokenStream; const ParseError = types.ParseError; @@ -58,7 +59,9 @@ pub const Statement = struct { test "should parse a variable declaration statement" { const input = "var my_variable = 322"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var statement: Statement = undefined; @@ -75,7 +78,9 @@ test "should parse a variable declaration statement" { test "should fail on other constructs" { const input = "a_function_call(322)"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var statement: Statement = undefined; diff --git a/src/02_syntax/variable.zig b/src/02_syntax/variable.zig index 53704ce..710de74 100644 --- a/src/02_syntax/variable.zig +++ b/src/02_syntax/variable.zig @@ -3,6 +3,7 @@ const lexic = @import("lexic"); const expression = @import("expression.zig"); const types = @import("./types.zig"); const utils = @import("./utils.zig"); +const errors = @import("errors"); const TokenStream = types.TokenStream; const ParseError = types.ParseError; @@ -70,7 +71,9 @@ pub const VariableBinding = struct { test "should parse a minimal var" { const input = "var my_variable = 322"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var binding: VariableBinding = undefined; @@ -90,7 +93,9 @@ test "should parse a minimal var" { test "should fail is it doesnt start with var" { const input = "different_token_stream()"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var binding: VariableBinding = undefined; @@ -104,7 +109,9 @@ test "should fail is it doesnt start with var" { test "should fail if the identifier is missing" { const input = "var "; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var binding: VariableBinding = undefined; @@ -118,7 +125,9 @@ test "should fail if the identifier is missing" { test "should fail if there is not an identifier after var" { const input = "var 322"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var binding: VariableBinding = undefined; @@ -132,7 +141,9 @@ test "should fail if there is not an identifier after var" { test "should fail if the equal sign is missing" { const input = "var my_id "; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var binding: VariableBinding = undefined; @@ -146,7 +157,9 @@ test "should fail if the equal sign is missing" { test "should fail if the equal sign is not found" { const input = "var my_id is string"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var binding: VariableBinding = undefined; @@ -160,7 +173,9 @@ test "should fail if the equal sign is not found" { test "should fail if the expression parsing fails" { const input = "var my_id = ehhh"; - const tokens = try lexic.tokenize(input, std.testing.allocator); + var error_list = std.ArrayList(*errors.ErrorData).init(std.testing.allocator); + defer error_list.deinit(); + const tokens = try lexic.tokenize(input, std.testing.allocator, &error_list); defer tokens.deinit(); var binding: VariableBinding = undefined; diff --git a/src/errors/root.zig b/src/errors/root.zig index 8191a29..c2454a5 100644 --- a/src/errors/root.zig +++ b/src/errors/root.zig @@ -12,7 +12,7 @@ pub const ErrorData = struct { reason: []const u8, start_position: usize, end_position: usize, - ) !void { + ) void { target.* = .{ .reason = reason, .start_position = start_position, @@ -24,8 +24,7 @@ pub const ErrorData = struct { std.debug.print("Error: {s}\n", .{self.reason}); } - /// When called, this struct will clean its resources and then - /// clean itself. + /// Does nothing at the moment pub fn deinit(self: *@This()) void { _ = self; }