diff --git a/build.zig b/build.zig index 7039c1b..7946728 100644 --- a/build.zig +++ b/build.zig @@ -101,6 +101,7 @@ pub fn build(b: *std.Build) void { "src/01_lexic/token.zig", "src/01_lexic/utils.zig", "src/01_lexic/grouping.zig", + "src/01_lexic/punctiation.zig", }; for (files) |file| { const file_unit_test = b.addTest(.{ diff --git a/src/01_lexic/punctiation.zig b/src/01_lexic/punctiation.zig new file mode 100644 index 0000000..0a3c281 --- /dev/null +++ b/src/01_lexic/punctiation.zig @@ -0,0 +1,60 @@ +const std = @import("std"); +const assert = std.debug.assert; +const token = @import("./token.zig"); +const utils = @import("./utils.zig"); + +const Token = token.Token; +const TokenType = token.TokenType; +const LexError = token.LexError; +const LexReturn = token.LexReturn; + +pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { + // there should be at least 1 char + assert(start < input.len); + + const c = input[start]; + const token_type = switch (c) { + ',' => TokenType.Comma, + '\n' => TokenType.Newline, + else => { + return null; + }, + }; + + return .{ Token.init(input[start .. start + 1], token_type, start), start + 1 }; +} + +test "shouldnt lex other things" { + const input = "322"; + const output = try lex(input, 0); + + try std.testing.expect(output == null); +} + +test "should lex comma" { + const input = ","; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep(",", t.value); + try std.testing.expectEqual(TokenType.Comma, t.token_type); + try std.testing.expectEqual(1, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex new line" { + const input = "\n"; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("\n", t.value); + try std.testing.expectEqual(TokenType.Newline, t.token_type); + try std.testing.expectEqual(1, tuple[1]); + } else { + try std.testing.expect(false); + } +} diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index 7c2337f..48ecdb2 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -8,6 +8,7 @@ const operator = @import("./operator.zig"); const comment = @import("./comment.zig"); const string = @import("./string.zig"); const grouping = @import("./grouping.zig"); +const punctuation = @import("./punctiation.zig"); const TokenType = token.TokenType; const Token = token.Token; @@ -79,6 +80,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void { try tokens.append(t); } + // lex punctuation + else if (try punctuation.lex(input, actual_next_pos)) |tuple| { + assert(tuple[1] > current_pos); + const t = tuple[0]; + current_pos = tuple[1]; + + try tokens.append(t); + } // nothing was matched. fail // TODO: instead of failing add an error, ignore all chars // until next whitespace, and continue lexing