Compare commits

..

2 Commits

Author SHA1 Message Date
9d89f2fad0 feat: add error handling to string lexing 2025-01-05 12:43:28 -05:00
344ec18863 fix: test 2024-12-29 13:13:44 -05:00
3 changed files with 71 additions and 27 deletions

View File

@ -264,7 +264,7 @@ fn scientific(
test "int lexer 1" { test "int lexer 1" {
const input = "322 "; const input = "322 ";
const result = try integer(input, input.len, 0); const result = try integer(input, input.len, 0, undefined, std.heap.page_allocator);
if (result) |tuple| { if (result) |tuple| {
const r = tuple[0]; const r = tuple[0];
@ -276,7 +276,7 @@ test "int lexer 1" {
test "int lexer 2" { test "int lexer 2" {
const input = " 644 "; const input = " 644 ";
const result = try integer(input, input.len, 3); const result = try integer(input, input.len, 3, undefined, std.heap.page_allocator);
if (result) |tuple| { if (result) |tuple| {
const r = tuple[0]; const r = tuple[0];
@ -288,7 +288,7 @@ test "int lexer 2" {
test "int lexer 3" { test "int lexer 3" {
const input = "4"; const input = "4";
const result = try integer(input, input.len, 0); const result = try integer(input, input.len, 0, undefined, std.heap.page_allocator);
if (result) |tuple| { if (result) |tuple| {
const r = tuple[0]; const r = tuple[0];
@ -300,7 +300,7 @@ test "int lexer 3" {
test "should return null if not an integer" { test "should return null if not an integer" {
const input = "prosor prosor"; const input = "prosor prosor";
const result = try integer(input, input.len, 0); const result = try integer(input, input.len, 0, undefined, std.heap.page_allocator);
try std.testing.expect(result == null); try std.testing.expect(result == null);
} }
@ -323,6 +323,7 @@ test "should fail on integer with leading zero" {
defer std.testing.allocator.destroy(errdata); defer std.testing.allocator.destroy(errdata);
const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| { const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
try std.testing.expect(err == token.LexError.LeadingZero); try std.testing.expect(err == token.LexError.LeadingZero);
defer errdata.deinit();
return; return;
}; };
@ -508,6 +509,7 @@ test "should fail on incomplete fp number" {
defer std.testing.allocator.destroy(errdata); defer std.testing.allocator.destroy(errdata);
const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| { const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
try std.testing.expect(err == token.LexError.IncompleteFloatingNumber); try std.testing.expect(err == token.LexError.IncompleteFloatingNumber);
errdata.deinit();
return; return;
}; };
@ -539,6 +541,7 @@ test "should fail on incomplete scientific number" {
defer std.testing.allocator.destroy(errdata); defer std.testing.allocator.destroy(errdata);
const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| { const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
try std.testing.expect(err == token.LexError.IncompleteScientificNumber); try std.testing.expect(err == token.LexError.IncompleteScientificNumber);
defer errdata.deinit();
return; return;
}; };
@ -558,6 +561,7 @@ test "should fail on incomplete scientific number 2" {
defer std.testing.allocator.destroy(errdata); defer std.testing.allocator.destroy(errdata);
const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| { const result = lex(input, input.len, 0, errdata, std.testing.allocator) catch |err| {
try std.testing.expect(err == token.LexError.IncompleteScientificNumber); try std.testing.expect(err == token.LexError.IncompleteScientificNumber);
defer errdata.deinit();
return; return;
}; };

View File

@ -41,8 +41,9 @@ pub fn tokenize(
break; break;
} }
// attempt to lex a number
var current_error: errors.ErrorData = undefined; var current_error: errors.ErrorData = undefined;
// attempt to lex a number
const number_lex = number.lex(input, input_len, actual_next_pos, &current_error, alloc) catch |e| switch (e) { const number_lex = number.lex(input, input_len, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
// recoverable errors // recoverable errors
LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => { LexError.Incomplete, LexError.LeadingZero, LexError.IncompleteFloatingNumber, LexError.IncompleteScientificNumber => {
@ -51,7 +52,6 @@ pub fn tokenize(
// ignore everything until whitespace and loop // ignore everything until whitespace and loop
current_pos = ignore_until_whitespace(input, actual_next_pos); current_pos = ignore_until_whitespace(input, actual_next_pos);
assert(current_pos > actual_next_pos);
continue; continue;
}, },
// just throw unrecoverable errors // just throw unrecoverable errors
@ -66,24 +66,36 @@ pub fn tokenize(
continue; continue;
} }
// attempt to lex an identifier // attempt to lex an identifier. identifier parsing has no errors
else if (try identifier.lex(input, actual_next_pos)) |tuple| { if (try identifier.lex(input, actual_next_pos)) |tuple| {
assert(tuple[1] > current_pos); assert(tuple[1] > current_pos);
const t = tuple[0]; const t = tuple[0];
current_pos = tuple[1]; current_pos = tuple[1];
try tokens.append(t); try tokens.append(t);
continue;
} }
// attempt to lex a string // attempt to lex a string
else if (try string.lex(input, actual_next_pos)) |tuple| { const str_lex = string.lex(input, actual_next_pos, &current_error, alloc) catch |e| switch (e) {
LexError.IncompleteString => {
try err_arrl.append(current_error);
current_pos = ignore_until_whitespace(input, actual_next_pos);
continue;
},
else => return e,
};
if (str_lex) |tuple| {
assert(tuple[1] > current_pos); assert(tuple[1] > current_pos);
const t = tuple[0]; const t = tuple[0];
current_pos = tuple[1]; current_pos = tuple[1];
try tokens.append(t); try tokens.append(t);
continue;
} }
// attempt to lex a datatype // attempt to lex a datatype
else if (try datatype.lex(input, actual_next_pos)) |tuple| { if (try datatype.lex(input, actual_next_pos)) |tuple| {
assert(tuple[1] > current_pos); assert(tuple[1] > current_pos);
const t = tuple[0]; const t = tuple[0];
current_pos = tuple[1]; current_pos = tuple[1];
@ -190,17 +202,17 @@ test "should insert an item, fail, and not leak" {
const input = "322 \"hello"; const input = "322 \"hello";
var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator); var error_list = std.ArrayList(errors.ErrorData).init(std.testing.allocator);
defer error_list.deinit(); defer error_list.deinit();
defer for (error_list.items) |*i| {
i.deinit();
};
const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) { const arrl = tokenize(input, std.testing.allocator, &error_list) catch |e| switch (e) {
error.IncompleteString => {
return;
},
else => { else => {
try std.testing.expect(false); try std.testing.expect(false);
return; return;
}, },
}; };
try std.testing.expect(false); defer arrl.deinit();
arrl.deinit();
} }
test "shouldnt leak" { test "shouldnt leak" {

View File

@ -2,13 +2,19 @@ const std = @import("std");
const assert = std.debug.assert; const assert = std.debug.assert;
const token = @import("./token.zig"); const token = @import("./token.zig");
const utils = @import("./utils.zig"); const utils = @import("./utils.zig");
const errors = @import("errors");
const Token = token.Token; const Token = token.Token;
const TokenType = token.TokenType; const TokenType = token.TokenType;
const LexError = token.LexError; const LexError = token.LexError;
const LexReturn = token.LexReturn; const LexReturn = token.LexReturn;
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { pub fn lex(
input: []const u8,
start: usize,
err: *errors.ErrorData,
alloc: std.mem.Allocator,
) LexError!?LexReturn {
const cap = input.len; const cap = input.len;
assert(start < cap); assert(start < cap);
@ -28,18 +34,28 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
current_pos + 1, current_pos + 1,
}; };
} }
// new line, return error // new line, initialize and return error
else if (next_char == '\n') { else if (next_char == '\n') {
try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
try err.add_label("Found a new line here", current_pos, current_pos + 1);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;
} }
// lex escape characters // lex escape characters
else if (next_char == '\\') { else if (next_char == '\\') {
// if next char is EOF, return error // if next char is EOF, return error
if (current_pos + 1 == cap) { if (current_pos + 1 == cap) {
try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
try err.add_label("Found EOF here", current_pos, current_pos + 1);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;
} }
// if next char is newline, return error // if next char is newline, return error
else if (input[current_pos + 1] == '\n') { else if (input[current_pos + 1] == '\n') {
try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
try err.add_label("Found a new line here", current_pos, current_pos + 1);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;
} }
// here just consume whatever char is after // here just consume whatever char is after
@ -52,12 +68,16 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
} }
// this could only reach when EOF is hit, return error // this could only reach when EOF is hit, return error
try err.init("Incomplete String", current_pos, current_pos + 1, alloc);
try err.add_label("Found EOF here", current_pos, current_pos + 1);
err.set_help("Strings must always end on the same line that they start.");
return LexError.IncompleteString; return LexError.IncompleteString;
} }
test "should lex empty string" { test "should lex empty string" {
const input = "\"\""; const input = "\"\"";
const output = try lex(input, 0); const output = try lex(input, 0, undefined, std.testing.allocator);
if (output) |tuple| { if (output) |tuple| {
const t = tuple[0]; const t = tuple[0];
@ -70,7 +90,7 @@ test "should lex empty string" {
test "should lex string with 1 char" { test "should lex string with 1 char" {
const input = "\"a\""; const input = "\"a\"";
const output = try lex(input, 0); const output = try lex(input, 0, undefined, std.testing.allocator);
if (output) |tuple| { if (output) |tuple| {
const t = tuple[0]; const t = tuple[0];
@ -83,7 +103,7 @@ test "should lex string with 1 char" {
test "should lex string with unicode" { test "should lex string with unicode" {
const input = "\"😭\""; const input = "\"😭\"";
const output = try lex(input, 0); const output = try lex(input, 0, undefined, std.testing.allocator);
if (output) |tuple| { if (output) |tuple| {
const t = tuple[0]; const t = tuple[0];
@ -96,14 +116,16 @@ test "should lex string with unicode" {
test "shouldnt lex other things" { test "shouldnt lex other things" {
const input = "322"; const input = "322";
const output = try lex(input, 0); const output = try lex(input, 0, undefined, std.testing.allocator);
try std.testing.expect(output == null); try std.testing.expect(output == null);
} }
test "should fail on EOF before closing string" { test "should fail on EOF before closing string" {
const input = "\"hello"; const input = "\"hello";
_ = lex(input, 0) catch |err| { var errdata: errors.ErrorData = undefined;
_ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err); try std.testing.expectEqual(LexError.IncompleteString, err);
defer errdata.deinit();
return; return;
}; };
@ -112,8 +134,10 @@ test "should fail on EOF before closing string" {
test "should fail on newline before closing string" { test "should fail on newline before closing string" {
const input = "\"hello\n"; const input = "\"hello\n";
_ = lex(input, 0) catch |err| { var errdata: errors.ErrorData = undefined;
_ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err); try std.testing.expectEqual(LexError.IncompleteString, err);
defer errdata.deinit();
return; return;
}; };
@ -122,7 +146,7 @@ test "should fail on newline before closing string" {
test "should lex string with escape character 1" { test "should lex string with escape character 1" {
const input = "\"test\\\"string\""; const input = "\"test\\\"string\"";
const output = try lex(input, 0); const output = try lex(input, 0, undefined, std.testing.allocator);
if (output) |tuple| { if (output) |tuple| {
const t = tuple[0]; const t = tuple[0];
@ -135,7 +159,7 @@ test "should lex string with escape character 1" {
test "should lex string with escape character 2" { test "should lex string with escape character 2" {
const input = "\"test\\\\string\""; const input = "\"test\\\\string\"";
const output = try lex(input, 0); const output = try lex(input, 0, undefined, std.testing.allocator);
if (output) |tuple| { if (output) |tuple| {
const t = tuple[0]; const t = tuple[0];
@ -148,7 +172,9 @@ test "should lex string with escape character 2" {
test "should fail on EOF after backslash" { test "should fail on EOF after backslash" {
const input = "\"hello \\"; const input = "\"hello \\";
_ = lex(input, 0) catch |err| { var errdata: errors.ErrorData = undefined;
_ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
defer errdata.deinit();
try std.testing.expectEqual(LexError.IncompleteString, err); try std.testing.expectEqual(LexError.IncompleteString, err);
return; return;
}; };
@ -158,7 +184,9 @@ test "should fail on EOF after backslash" {
test "should fail on newline after backslash" { test "should fail on newline after backslash" {
const input = "\"hello \\\n"; const input = "\"hello \\\n";
_ = lex(input, 0) catch |err| { var errdata: errors.ErrorData = undefined;
_ = lex(input, 0, &errdata, std.testing.allocator) catch |err| {
defer errdata.deinit();
try std.testing.expectEqual(LexError.IncompleteString, err); try std.testing.expectEqual(LexError.IncompleteString, err);
return; return;
}; };