feat: lex identifier. create comptime fn for lexing many, many_1
This commit is contained in:
parent
d6a83ff46c
commit
92794cc07a
@ -93,6 +93,7 @@ pub fn build(b: *std.Build) void {
|
|||||||
const files = [_][]const u8{
|
const files = [_][]const u8{
|
||||||
"src/01_lexic/root.zig",
|
"src/01_lexic/root.zig",
|
||||||
"src/01_lexic/number.zig",
|
"src/01_lexic/number.zig",
|
||||||
|
"src/01_lexic/identifier.zig",
|
||||||
"src/01_lexic/token.zig",
|
"src/01_lexic/token.zig",
|
||||||
"src/01_lexic/utils.zig",
|
"src/01_lexic/utils.zig",
|
||||||
};
|
};
|
||||||
|
100
src/01_lexic/identifier.zig
Normal file
100
src/01_lexic/identifier.zig
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const token = @import("./token.zig");
|
||||||
|
const utils = @import("./utils.zig");
|
||||||
|
|
||||||
|
const Token = token.Token;
|
||||||
|
const TokenType = token.TokenType;
|
||||||
|
const LexError = token.LexError;
|
||||||
|
const LexReturn = token.LexReturn;
|
||||||
|
|
||||||
|
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
|
||||||
|
const cap = input.len;
|
||||||
|
var final_pos = start;
|
||||||
|
|
||||||
|
if (start >= cap) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// lex lowercase or underscore
|
||||||
|
if (!utils.is_lowercase_underscore(input[start])) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final_pos += 1;
|
||||||
|
|
||||||
|
// lex many lowercase/uppercase/underscore/number
|
||||||
|
if (utils.lex_many(utils.is_identifier_char, input, final_pos)) |new_pos| {
|
||||||
|
final_pos = new_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
return .{
|
||||||
|
Token.init(input[start..final_pos], TokenType.Identifier, start),
|
||||||
|
final_pos,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex single letter" {
|
||||||
|
const input = "a";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("a", t.value);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex single underscore" {
|
||||||
|
const input = "_";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("_", t.value);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex identifier 1" {
|
||||||
|
const input = "abc";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("abc", t.value);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex identifier 2" {
|
||||||
|
const input = "snake_case";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("snake_case", t.value);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex identifier 3" {
|
||||||
|
const input = "camelCase";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("camelCase", t.value);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "shouldnt lex datatype" {
|
||||||
|
const input = "MyDatatype";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
try std.testing.expect(output == null);
|
||||||
|
}
|
@ -5,11 +5,10 @@ const utils = @import("./utils.zig");
|
|||||||
const Token = token.Token;
|
const Token = token.Token;
|
||||||
const TokenType = token.TokenType;
|
const TokenType = token.TokenType;
|
||||||
const LexError = token.LexError;
|
const LexError = token.LexError;
|
||||||
|
const LexReturn = token.LexReturn;
|
||||||
|
|
||||||
const is_decimal_digit = utils.is_decimal_digit;
|
const is_decimal_digit = utils.is_decimal_digit;
|
||||||
|
|
||||||
const LexReturn = struct { Token, usize };
|
|
||||||
|
|
||||||
/// Attempts to lex a number, as per the language grammar.
|
/// Attempts to lex a number, as per the language grammar.
|
||||||
///
|
///
|
||||||
/// A number is either an Int or a Float.
|
/// A number is either an Int or a Float.
|
||||||
|
@ -31,7 +31,7 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
|
|||||||
std.debug.print("array list len: {d}", .{tokens.items.len});
|
std.debug.print("array list len: {d}", .{tokens.items.len});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ignores all whitespace from usize,
|
/// Ignores all whitespace on `input` since `start`
|
||||||
/// and returns the position where whitespace ends.
|
/// and returns the position where whitespace ends.
|
||||||
///
|
///
|
||||||
/// Whitespace is: tabs, spaces
|
/// Whitespace is: tabs, spaces
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
pub const TokenType = enum {
|
pub const TokenType = enum {
|
||||||
Int,
|
Int,
|
||||||
Float,
|
Float,
|
||||||
|
Identifier,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Token = struct {
|
pub const Token = struct {
|
||||||
@ -23,3 +24,7 @@ pub const LexError = error{
|
|||||||
IncompleteFloatingNumber,
|
IncompleteFloatingNumber,
|
||||||
IncompleteScientificNumber,
|
IncompleteScientificNumber,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Contains the lexed token and the next position
|
||||||
|
/// from which the next lex should start.
|
||||||
|
pub const LexReturn = struct { Token, usize };
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
const token = @import("./token.zig");
|
||||||
|
const LexError = token.LexError;
|
||||||
|
const LexReturn = token.LexReturn;
|
||||||
|
|
||||||
pub fn is_decimal_digit(c: u8) bool {
|
pub fn is_decimal_digit(c: u8) bool {
|
||||||
return '0' <= c and c <= '9';
|
return '0' <= c and c <= '9';
|
||||||
}
|
}
|
||||||
@ -13,3 +17,74 @@ pub fn is_binary_digit(c: u8) bool {
|
|||||||
pub fn is_hex_digit(c: u8) bool {
|
pub fn is_hex_digit(c: u8) bool {
|
||||||
return ('0' <= c and c <= '9') or ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F');
|
return ('0' <= c and c <= '9') or ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_lowercase(c: u8) bool {
|
||||||
|
return 'a' <= c and c <= 'z';
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_lowercase_underscore(c: u8) bool {
|
||||||
|
return c == '_' or ('a' <= c and c <= 'z');
|
||||||
|
}
|
||||||
|
|
||||||
|
/// identifier_letter = underscore | lowercase | uppercase | digit
|
||||||
|
pub fn is_identifier_char(c: u8) bool {
|
||||||
|
return c == '_' or ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') or ('0' <= c and c <= '9');
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Runs a discriminator function at least once,
|
||||||
|
/// and returns the end position of the lex.
|
||||||
|
///
|
||||||
|
/// If there is no more input or the lexer does not match
|
||||||
|
/// at least once, returns null.
|
||||||
|
pub fn lex_many_1(
|
||||||
|
comptime lex_fun: fn (c: u8) bool,
|
||||||
|
input: []const u8,
|
||||||
|
start: usize,
|
||||||
|
) usize {
|
||||||
|
// assert that there is input left
|
||||||
|
const cap = input.len;
|
||||||
|
var current_pos = start;
|
||||||
|
|
||||||
|
if (current_pos >= cap) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the lexer at least once
|
||||||
|
if (!lex_fun(input[current_pos])) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
current_pos += 1;
|
||||||
|
|
||||||
|
// run the lexer many times
|
||||||
|
while (current_pos < cap and lex_fun(input[current_pos])) {
|
||||||
|
current_pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return current_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Runs a discriminator function zero, one or more times
|
||||||
|
/// and returns the end position of the lex.
|
||||||
|
///
|
||||||
|
/// If there is no more input or the lexer does not match
|
||||||
|
/// at least once, returns null.
|
||||||
|
pub fn lex_many(
|
||||||
|
comptime lex_fun: fn (c: u8) bool,
|
||||||
|
input: []const u8,
|
||||||
|
start: usize,
|
||||||
|
) ?usize {
|
||||||
|
// assert that there is input left
|
||||||
|
const cap = input.len;
|
||||||
|
var current_pos = start;
|
||||||
|
|
||||||
|
if (current_pos >= cap) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the lexer many times
|
||||||
|
while (current_pos < cap and lex_fun(input[current_pos])) {
|
||||||
|
current_pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return current_pos;
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user