Compare commits

...

3 Commits

Author SHA1 Message Date
758f551db0 feat: structure for more number lexers 2024-11-16 17:08:07 -05:00
cbefbe3f68 refactor: organize files 2024-11-16 16:12:15 -05:00
70dce84f05 feat: lex integer 2024-11-16 16:03:24 -05:00
5 changed files with 146 additions and 44 deletions

View File

@ -92,6 +92,9 @@ pub fn build(b: *std.Build) void {
// Add more dependencies for unit testing
const files = [_][]const u8{
"src/01_lexic/root.zig",
"src/01_lexic/number.zig",
"src/01_lexic/token.zig",
"src/01_lexic/utils.zig",
};
for (files) |file| {
const file_unit_test = b.addTest(.{

116
src/01_lexic/number.zig Normal file
View File

@ -0,0 +1,116 @@
const std = @import("std");
const token = @import("./token.zig");
const utils = @import("./utils.zig");
const Token = token.Token;
const TokenType = token.TokenType;
const is_decimal_digit = utils.is_decimal_digit;
const LexReturn = struct { Token, usize };
/// Attempts to lex a number, as per the language grammar.
///
/// A number is either an Int or a Float.
/// No number can have a leading zero. That is an error to
/// avoid confussion with PHP literal octals.
fn lex(input: []const u8, cap: usize, start: usize) !?LexReturn {
const first_char = input[start];
// Attempt to lex a hex, octal or binary number
if (first_char == '0' and cap > start + 1) {
const second_char = input[start + 1];
switch (second_char) {
'x', 'X' => return hex(),
'o', 'O' => return octal(),
'b', 'B' => return binary(),
}
// Leading zero found. Throw an error.
// TODO: throw an error :c
}
// Attempt to lex an integer.
// Floating point numbers are lexed through the int lexer
return integer(input, cap, start);
}
fn hex() !?LexReturn {
return null;
}
fn octal() !?LexReturn {
return null;
}
fn binary() !?LexReturn {
return null;
}
/// Attempts to lex an integer number.
///
/// This function fails if the first digit it encounters is a `0`,
/// this is because it could cause confusion with PHP literal integers,
/// where a number that starts with a `0` is octal, not decimal.
///
/// For this reason, this function should be called after the lexers
/// for hex, octal and binary have been called.
fn integer(input: []const u8, cap: usize, start: usize) !?LexReturn {
const first_char = input[start];
if (!is_decimal_digit(first_char)) {
return null;
}
var last_pos = start + 1;
while (last_pos < cap and is_decimal_digit(input[last_pos])) {
last_pos += 1;
}
return .{
Token.init(input[start..last_pos], TokenType.Int, start),
last_pos,
};
}
test "int lexer 1" {
const input = "322 ";
const result = try integer(input, input.len, 0);
if (result) |tuple| {
const r = tuple[0];
try std.testing.expectEqualDeep("322", r.value);
} else {
try std.testing.expect(false);
}
}
test "int lexer 2" {
const input = " 644 ";
const result = try integer(input, input.len, 3);
if (result) |tuple| {
const r = tuple[0];
try std.testing.expectEqualDeep("644", r.value);
} else {
try std.testing.expect(false);
}
}
test "int lexer 3" {
const input = "4";
const result = try integer(input, input.len, 0);
if (result) |tuple| {
const r = tuple[0];
try std.testing.expectEqualDeep("4", r.value);
} else {
try std.testing.expect(false);
}
}
test "should return null if not an integer" {
const input = "prosor prosor";
const result = try integer(input, input.len, 0);
try std.testing.expect(result == null);
}

View File

@ -1,52 +1,14 @@
const std = @import("std");
const t = std.testing;
const number = @import("./number.zig");
const token = @import("./token.zig");
const TokenType = enum {
Int,
Float,
};
const Token = struct {
value: []const u8,
token_type: TokenType,
start_pos: usize,
pub fn init(value: []const u8, token_type: TokenType, start: usize) Token {
return Token{
.value = value,
.token_type = token_type,
.start_pos = start,
};
}
};
const TokenType = token.TokenType;
const Token = token.Token;
pub fn tokenize(input: []const u8) !void {
const next_token = try number(input, 0);
const input_len = input.len;
const next_token = try number(input, input_len, 0);
_ = next_token;
std.debug.print("tokenize :D {s}\n", .{input});
}
fn number(input: []const u8, start: usize) !?Token {
const first_char = input[start];
if (!is_digit(first_char)) {
return null;
}
return Token.init(input[start .. start + 1], TokenType.Int, start);
}
fn is_digit(c: u8) bool {
return '0' <= c and c <= '9';
}
test "number lexer" {
const input = "3";
const result = try number(input, 0);
if (result) |r| {
try std.testing.expectEqual("3", r.value);
} else {
try std.testing.expect(false);
}
}

18
src/01_lexic/token.zig Normal file
View File

@ -0,0 +1,18 @@
pub const TokenType = enum {
Int,
Float,
};
pub const Token = struct {
value: []const u8,
token_type: TokenType,
start_pos: usize,
pub fn init(value: []const u8, token_type: TokenType, start: usize) Token {
return Token{
.value = value,
.token_type = token_type,
.start_pos = start,
};
}
};

3
src/01_lexic/utils.zig Normal file
View File

@ -0,0 +1,3 @@
pub fn is_decimal_digit(c: u8) bool {
return '0' <= c and c <= '9';
}