Compare commits
3 Commits
86904218d5
...
758f551db0
Author | SHA1 | Date | |
---|---|---|---|
758f551db0 | |||
cbefbe3f68 | |||
70dce84f05 |
@ -92,6 +92,9 @@ pub fn build(b: *std.Build) void {
|
||||
// Add more dependencies for unit testing
|
||||
const files = [_][]const u8{
|
||||
"src/01_lexic/root.zig",
|
||||
"src/01_lexic/number.zig",
|
||||
"src/01_lexic/token.zig",
|
||||
"src/01_lexic/utils.zig",
|
||||
};
|
||||
for (files) |file| {
|
||||
const file_unit_test = b.addTest(.{
|
||||
|
116
src/01_lexic/number.zig
Normal file
116
src/01_lexic/number.zig
Normal file
@ -0,0 +1,116 @@
|
||||
const std = @import("std");
|
||||
const token = @import("./token.zig");
|
||||
const utils = @import("./utils.zig");
|
||||
|
||||
const Token = token.Token;
|
||||
const TokenType = token.TokenType;
|
||||
|
||||
const is_decimal_digit = utils.is_decimal_digit;
|
||||
|
||||
const LexReturn = struct { Token, usize };
|
||||
|
||||
/// Attempts to lex a number, as per the language grammar.
|
||||
///
|
||||
/// A number is either an Int or a Float.
|
||||
/// No number can have a leading zero. That is an error to
|
||||
/// avoid confussion with PHP literal octals.
|
||||
fn lex(input: []const u8, cap: usize, start: usize) !?LexReturn {
|
||||
const first_char = input[start];
|
||||
|
||||
// Attempt to lex a hex, octal or binary number
|
||||
if (first_char == '0' and cap > start + 1) {
|
||||
const second_char = input[start + 1];
|
||||
switch (second_char) {
|
||||
'x', 'X' => return hex(),
|
||||
'o', 'O' => return octal(),
|
||||
'b', 'B' => return binary(),
|
||||
}
|
||||
|
||||
// Leading zero found. Throw an error.
|
||||
// TODO: throw an error :c
|
||||
}
|
||||
|
||||
// Attempt to lex an integer.
|
||||
// Floating point numbers are lexed through the int lexer
|
||||
return integer(input, cap, start);
|
||||
}
|
||||
|
||||
fn hex() !?LexReturn {
|
||||
return null;
|
||||
}
|
||||
|
||||
fn octal() !?LexReturn {
|
||||
return null;
|
||||
}
|
||||
|
||||
fn binary() !?LexReturn {
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Attempts to lex an integer number.
|
||||
///
|
||||
/// This function fails if the first digit it encounters is a `0`,
|
||||
/// this is because it could cause confusion with PHP literal integers,
|
||||
/// where a number that starts with a `0` is octal, not decimal.
|
||||
///
|
||||
/// For this reason, this function should be called after the lexers
|
||||
/// for hex, octal and binary have been called.
|
||||
fn integer(input: []const u8, cap: usize, start: usize) !?LexReturn {
|
||||
const first_char = input[start];
|
||||
if (!is_decimal_digit(first_char)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
var last_pos = start + 1;
|
||||
while (last_pos < cap and is_decimal_digit(input[last_pos])) {
|
||||
last_pos += 1;
|
||||
}
|
||||
|
||||
return .{
|
||||
Token.init(input[start..last_pos], TokenType.Int, start),
|
||||
last_pos,
|
||||
};
|
||||
}
|
||||
|
||||
test "int lexer 1" {
|
||||
const input = "322 ";
|
||||
const result = try integer(input, input.len, 0);
|
||||
|
||||
if (result) |tuple| {
|
||||
const r = tuple[0];
|
||||
try std.testing.expectEqualDeep("322", r.value);
|
||||
} else {
|
||||
try std.testing.expect(false);
|
||||
}
|
||||
}
|
||||
|
||||
test "int lexer 2" {
|
||||
const input = " 644 ";
|
||||
const result = try integer(input, input.len, 3);
|
||||
|
||||
if (result) |tuple| {
|
||||
const r = tuple[0];
|
||||
try std.testing.expectEqualDeep("644", r.value);
|
||||
} else {
|
||||
try std.testing.expect(false);
|
||||
}
|
||||
}
|
||||
|
||||
test "int lexer 3" {
|
||||
const input = "4";
|
||||
const result = try integer(input, input.len, 0);
|
||||
|
||||
if (result) |tuple| {
|
||||
const r = tuple[0];
|
||||
try std.testing.expectEqualDeep("4", r.value);
|
||||
} else {
|
||||
try std.testing.expect(false);
|
||||
}
|
||||
}
|
||||
|
||||
test "should return null if not an integer" {
|
||||
const input = "prosor prosor";
|
||||
const result = try integer(input, input.len, 0);
|
||||
|
||||
try std.testing.expect(result == null);
|
||||
}
|
@ -1,52 +1,14 @@
|
||||
const std = @import("std");
|
||||
const t = std.testing;
|
||||
const number = @import("./number.zig");
|
||||
const token = @import("./token.zig");
|
||||
|
||||
const TokenType = enum {
|
||||
Int,
|
||||
Float,
|
||||
};
|
||||
|
||||
const Token = struct {
|
||||
value: []const u8,
|
||||
token_type: TokenType,
|
||||
start_pos: usize,
|
||||
|
||||
pub fn init(value: []const u8, token_type: TokenType, start: usize) Token {
|
||||
return Token{
|
||||
.value = value,
|
||||
.token_type = token_type,
|
||||
.start_pos = start,
|
||||
};
|
||||
}
|
||||
};
|
||||
const TokenType = token.TokenType;
|
||||
const Token = token.Token;
|
||||
|
||||
pub fn tokenize(input: []const u8) !void {
|
||||
const next_token = try number(input, 0);
|
||||
const input_len = input.len;
|
||||
const next_token = try number(input, input_len, 0);
|
||||
_ = next_token;
|
||||
|
||||
std.debug.print("tokenize :D {s}\n", .{input});
|
||||
}
|
||||
|
||||
fn number(input: []const u8, start: usize) !?Token {
|
||||
const first_char = input[start];
|
||||
if (!is_digit(first_char)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return Token.init(input[start .. start + 1], TokenType.Int, start);
|
||||
}
|
||||
|
||||
fn is_digit(c: u8) bool {
|
||||
return '0' <= c and c <= '9';
|
||||
}
|
||||
|
||||
test "number lexer" {
|
||||
const input = "3";
|
||||
const result = try number(input, 0);
|
||||
|
||||
if (result) |r| {
|
||||
try std.testing.expectEqual("3", r.value);
|
||||
} else {
|
||||
try std.testing.expect(false);
|
||||
}
|
||||
}
|
||||
|
18
src/01_lexic/token.zig
Normal file
18
src/01_lexic/token.zig
Normal file
@ -0,0 +1,18 @@
|
||||
pub const TokenType = enum {
|
||||
Int,
|
||||
Float,
|
||||
};
|
||||
|
||||
pub const Token = struct {
|
||||
value: []const u8,
|
||||
token_type: TokenType,
|
||||
start_pos: usize,
|
||||
|
||||
pub fn init(value: []const u8, token_type: TokenType, start: usize) Token {
|
||||
return Token{
|
||||
.value = value,
|
||||
.token_type = token_type,
|
||||
.start_pos = start,
|
||||
};
|
||||
}
|
||||
};
|
3
src/01_lexic/utils.zig
Normal file
3
src/01_lexic/utils.zig
Normal file
@ -0,0 +1,3 @@
|
||||
pub fn is_decimal_digit(c: u8) bool {
|
||||
return '0' <= c and c <= '9';
|
||||
}
|
Loading…
Reference in New Issue
Block a user