From 15a66ebc3dcf7fbebb7c14ec49515228c96de59a Mon Sep 17 00:00:00 2001 From: Araozu Date: Mon, 18 Nov 2024 21:08:37 -0500 Subject: [PATCH] feat: lex datatype --- build.zig | 1 + src/01_lexic/datatype.zig | 53 +++++++++++++++++++++++++++++++++++++ src/01_lexic/identifier.zig | 1 + src/01_lexic/root.zig | 2 -- src/01_lexic/utils.zig | 4 +++ 5 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 src/01_lexic/datatype.zig diff --git a/build.zig b/build.zig index c5fbdea..81a2adf 100644 --- a/build.zig +++ b/build.zig @@ -94,6 +94,7 @@ pub fn build(b: *std.Build) void { "src/01_lexic/root.zig", "src/01_lexic/number.zig", "src/01_lexic/identifier.zig", + "src/01_lexic/datatype.zig", "src/01_lexic/token.zig", "src/01_lexic/utils.zig", }; diff --git a/src/01_lexic/datatype.zig b/src/01_lexic/datatype.zig new file mode 100644 index 0000000..a05433b --- /dev/null +++ b/src/01_lexic/datatype.zig @@ -0,0 +1,53 @@ +const std = @import("std"); +const token = @import("./token.zig"); +const utils = @import("./utils.zig"); + +const Token = token.Token; +const TokenType = token.TokenType; +const LexError = token.LexError; +const LexReturn = token.LexReturn; + +/// Lexes a Datatype +pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { + const cap = input.len; + var final_pos = start; + + if (start >= cap) { + return null; + } + + // lex uppercase + if (!utils.is_uppercase(input[start])) { + return null; + } + final_pos += 1; + + // lex many lowercase/uppercase/underscore/number + if (utils.lex_many(utils.is_identifier_char, input, final_pos)) |new_pos| { + final_pos = new_pos; + } + + return .{ + Token.init(input[start..final_pos], TokenType.Identifier, start), + final_pos, + }; +} + +test "should lex datatype" { + const input = "MyType"; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("MyType", t.value); + } else { + try std.testing.expect(false); + } +} + +test "shouldnt lex identifier" { + const input = "myDatatype"; + const output = try lex(input, 0); + + try std.testing.expect(output == null); +} diff --git a/src/01_lexic/identifier.zig b/src/01_lexic/identifier.zig index 7201686..a5509c8 100644 --- a/src/01_lexic/identifier.zig +++ b/src/01_lexic/identifier.zig @@ -7,6 +7,7 @@ const TokenType = token.TokenType; const LexError = token.LexError; const LexReturn = token.LexReturn; +/// Lexes a datatype pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { const cap = input.len; var final_pos = start; diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index 8e0db11..8334fbb 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -27,8 +27,6 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void { break; } } - - std.debug.print("array list len: {d}", .{tokens.items.len}); } /// Ignores all whitespace on `input` since `start` diff --git a/src/01_lexic/utils.zig b/src/01_lexic/utils.zig index 17ad51e..98b2820 100644 --- a/src/01_lexic/utils.zig +++ b/src/01_lexic/utils.zig @@ -22,6 +22,10 @@ pub fn is_lowercase(c: u8) bool { return 'a' <= c and c <= 'z'; } +pub fn is_uppercase(c: u8) bool { + return 'A' <= c and c <= 'Z'; +} + pub fn is_lowercase_underscore(c: u8) bool { return c == '_' or ('a' <= c and c <= 'z'); }