Compare commits

...

2 Commits

Author SHA1 Message Date
1c8a50f797 feat: lex fp numbers 2024-11-17 07:37:38 -05:00
98bef47824 fix: throw leading zero error only on integer 2024-11-17 07:12:07 -05:00
2 changed files with 144 additions and 17 deletions

View File

@ -13,8 +13,6 @@ const LexReturn = struct { Token, usize };
/// Attempts to lex a number, as per the language grammar.
///
/// A number is either an Int or a Float.
/// No number can have a leading zero. That is an error to
/// avoid confussion with PHP literal octals.
pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn {
const first_char = input[start];
@ -26,8 +24,7 @@ pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn {
'o', 'O' => return prefixed('o', input, cap, start),
'b', 'B' => return prefixed('b', input, cap, start),
else => {
// Leading zero found. Throw an error.
return LexError.LeadingZero;
// Continue
},
}
}
@ -51,7 +48,7 @@ fn prefixed(comptime prefix: u8, input: []const u8, cap: usize, start: usize) !?
var end_position = start + 2;
// There should be at least 1 hex digit
// There should be at least 1 valid digit
if (end_position >= cap or !validator(input[end_position])) {
return LexError.Incomplete;
}
@ -68,19 +65,16 @@ fn prefixed(comptime prefix: u8, input: []const u8, cap: usize, start: usize) !?
};
}
fn binary() !?LexReturn {
return null;
}
/// Attempts to lex an integer number.
///
/// This function fails if the first digit it encounters is a `0`,
/// this is because it could cause confusion with PHP literal integers,
/// where a number that starts with a `0` is octal, not decimal.
/// This function also attempts to lex a floating point number.
/// If it succeedes, it returns a floating point token.
/// Otherwise, it only returns an integer token.
///
/// For this reason, this function should be called after the lexers
/// for hex, octal and binary have been called.
fn integer(input: []const u8, cap: usize, start: usize) !?LexReturn {
/// An integer cannot have a leading zero. That is an error to
/// avoid confussion with PHP literal octals.
/// Floating point numbers can.
fn integer(input: []const u8, cap: usize, start: usize) LexError!?LexReturn {
const first_char = input[start];
if (!is_decimal_digit(first_char)) {
return null;
@ -91,12 +85,74 @@ fn integer(input: []const u8, cap: usize, start: usize) !?LexReturn {
last_pos += 1;
}
// up to here an integer was lexed.
// now check if a floating point number can be lexed
// if we hit eof, return the current integer
if (last_pos >= cap) {
// leading zero on an integer, throw an error
if (first_char == '0') {
return LexError.LeadingZero;
}
return .{
Token.init(input[start..last_pos], TokenType.Int, start),
last_pos,
};
}
const next_char = input[last_pos];
return switch (next_char) {
// if a dot is found, lex a fp number
'.' => {
return floating_point(input, cap, start, last_pos);
},
// if an `e` (exponential notiation) is found, lex that
'e' => {
return null;
},
// otherwise return the current integer
else => {
// leading zero on an integer, throw an error
if (first_char == '0') {
return LexError.LeadingZero;
}
return .{
Token.init(input[start..last_pos], TokenType.Int, start),
last_pos,
};
},
};
}
/// Trailing periods are an error.
///
/// token_start: the position the current token started at
/// decimal_point: the position of the decimal point `.`
fn floating_point(input: []const u8, cap: usize, token_start: usize, decimal_point: usize) LexError!?LexReturn {
var current_pos = decimal_point + 1;
// there should be at least 1 digit after the period
if (current_pos >= cap or !utils.is_decimal_digit(input[current_pos])) {
// This is an error
return LexError.IncompleteFloatingNumber;
}
// lex all remaining digits
current_pos += 1;
while (current_pos < cap and utils.is_decimal_digit(input[current_pos])) {
current_pos += 1;
}
// return the matched fp number
return .{
Token.init(input[token_start..current_pos], TokenType.Float, token_start),
current_pos,
};
}
test "int lexer 1" {
const input = "322 ";
const result = try integer(input, input.len, 0);
@ -152,6 +208,23 @@ test "should lex hex number" {
}
}
test "should fail on integer with leading zero" {
const input = "0322";
const result = lex(input, input.len, 0) catch |err| {
try std.testing.expect(err == token.LexError.LeadingZero);
return;
};
if (result) |tuple| {
const r = tuple[0];
std.debug.print("{s}\n", .{r.value});
} else {
std.debug.print("nil returned", .{});
}
try std.testing.expect(false);
}
test "should lex hex number 2" {
const input = " 0Xff00AA ";
const result = try lex(input, input.len, 2);
@ -267,3 +340,56 @@ test "shouldnt parse incomplete binary number" {
try std.testing.expect(false);
}
test "should lex fp number 1" {
const input = "1.2";
const result = try lex(input, input.len, 0);
if (result) |tuple| {
const r = tuple[0];
try std.testing.expectEqualDeep("1.2", r.value);
} else {
try std.testing.expect(false);
}
}
test "should lex fp number 2" {
const input = "0.1";
const result = try lex(input, input.len, 0);
if (result) |tuple| {
const r = tuple[0];
try std.testing.expectEqualDeep("0.1", r.value);
} else {
try std.testing.expect(false);
}
}
test "should lex fp number 3" {
const input = "123.456";
const result = try lex(input, input.len, 0);
if (result) |tuple| {
const r = tuple[0];
try std.testing.expectEqualDeep("123.456", r.value);
} else {
try std.testing.expect(false);
}
}
test "should fail on incomplete fp number" {
const input = "123.";
const result = lex(input, input.len, 0) catch |err| {
try std.testing.expect(err == token.LexError.IncompleteFloatingNumber);
return;
};
if (result) |tuple| {
const r = tuple[0];
std.debug.print("{s}\n", .{r.value});
} else {
std.debug.print("nil returned", .{});
}
try std.testing.expect(false);
}

View File

@ -20,4 +20,5 @@ pub const Token = struct {
pub const LexError = error{
LeadingZero,
Incomplete,
IncompleteFloatingNumber,
};