From 882d123fe0c226f3040d510c96fb0e8832443fc1 Mon Sep 17 00:00:00 2001 From: Araozu Date: Wed, 27 Nov 2024 20:41:42 -0500 Subject: [PATCH] feat: lex groupign signs --- build.zig | 1 + src/01_lexic/datatype.zig | 2 +- src/01_lexic/grouping.zig | 121 ++++++++++++++++++++++++++++++++++++++ src/01_lexic/root.zig | 18 ++++++ src/01_lexic/string.zig | 3 - src/01_lexic/token.zig | 13 ++++ 6 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 src/01_lexic/grouping.zig diff --git a/build.zig b/build.zig index 3fd3e05..7039c1b 100644 --- a/build.zig +++ b/build.zig @@ -100,6 +100,7 @@ pub fn build(b: *std.Build) void { "src/01_lexic/string.zig", "src/01_lexic/token.zig", "src/01_lexic/utils.zig", + "src/01_lexic/grouping.zig", }; for (files) |file| { const file_unit_test = b.addTest(.{ diff --git a/src/01_lexic/datatype.zig b/src/01_lexic/datatype.zig index e81f96b..920556d 100644 --- a/src/01_lexic/datatype.zig +++ b/src/01_lexic/datatype.zig @@ -28,7 +28,7 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { } return .{ - Token.init(input[start..final_pos], TokenType.Identifier, start), + Token.init(input[start..final_pos], TokenType.Datatype, start), final_pos, }; } diff --git a/src/01_lexic/grouping.zig b/src/01_lexic/grouping.zig new file mode 100644 index 0000000..bec440d --- /dev/null +++ b/src/01_lexic/grouping.zig @@ -0,0 +1,121 @@ +const std = @import("std"); +const assert = std.debug.assert; +const token = @import("./token.zig"); +const utils = @import("./utils.zig"); + +const Token = token.Token; +const TokenType = token.TokenType; +const LexError = token.LexError; +const LexReturn = token.LexReturn; + +// lex grouping signs +pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { + // there should be at least 1 char + assert(start < input.len); + + const c = input[start]; + const token_type = switch (c) { + '(' => TokenType.LeftParen, + ')' => TokenType.RightParen, + '[' => TokenType.LeftBracket, + ']' => TokenType.RightBracket, + '{' => TokenType.LeftBrace, + '}' => TokenType.RightBrace, + else => { + return null; + }, + }; + + return .{ Token.init(input[start .. start + 1], token_type, start), start + 1 }; +} + +test "shouldnt lex other things" { + const input = "322"; + const output = try lex(input, 0); + + try std.testing.expect(output == null); +} + +test "should lex opening paren" { + const input = "( hello )"; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("(", t.value); + try std.testing.expectEqual(TokenType.LeftParen, t.token_type); + try std.testing.expectEqual(1, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex closing paren" { + const input = "( hello )"; + const output = try lex(input, 8); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep(")", t.value); + try std.testing.expectEqual(TokenType.RightParen, t.token_type); + try std.testing.expectEqual(9, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex opening bracket" { + const input = "[ hello ]"; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("[", t.value); + try std.testing.expectEqual(TokenType.LeftBracket, t.token_type); + try std.testing.expectEqual(1, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex closing bracket" { + const input = "[ hello ]"; + const output = try lex(input, 8); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("]", t.value); + try std.testing.expectEqual(TokenType.RightBracket, t.token_type); + try std.testing.expectEqual(9, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex opening brace" { + const input = "{ hello }"; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("{", t.value); + try std.testing.expectEqual(TokenType.LeftBrace, t.token_type); + try std.testing.expectEqual(1, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex closing brace" { + const input = "{ hello }"; + const output = try lex(input, 8); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("}", t.value); + try std.testing.expectEqual(TokenType.RightBrace, t.token_type); + try std.testing.expectEqual(9, tuple[1]); + } else { + try std.testing.expect(false); + } +} diff --git a/src/01_lexic/root.zig b/src/01_lexic/root.zig index cee5514..7c2337f 100644 --- a/src/01_lexic/root.zig +++ b/src/01_lexic/root.zig @@ -6,6 +6,8 @@ const datatype = @import("./datatype.zig"); const token = @import("./token.zig"); const operator = @import("./operator.zig"); const comment = @import("./comment.zig"); +const string = @import("./string.zig"); +const grouping = @import("./grouping.zig"); const TokenType = token.TokenType; const Token = token.Token; @@ -37,6 +39,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void { try tokens.append(t); } + // attempt to lex a string + else if (try string.lex(input, actual_next_pos)) |tuple| { + assert(tuple[1] > current_pos); + const t = tuple[0]; + current_pos = tuple[1]; + + try tokens.append(t); + } // attempt to lex a datatype else if (try datatype.lex(input, actual_next_pos)) |tuple| { assert(tuple[1] > current_pos); @@ -61,6 +71,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void { try tokens.append(t); } + // attempt to lex grouping signs + else if (try grouping.lex(input, actual_next_pos)) |tuple| { + assert(tuple[1] > current_pos); + const t = tuple[0]; + current_pos = tuple[1]; + + try tokens.append(t); + } // nothing was matched. fail // TODO: instead of failing add an error, ignore all chars // until next whitespace, and continue lexing diff --git a/src/01_lexic/string.zig b/src/01_lexic/string.zig index b5ae7d1..3d12f56 100644 --- a/src/01_lexic/string.zig +++ b/src/01_lexic/string.zig @@ -18,10 +18,7 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { } // lex everything but quote and newline - // TODO: escape characters - var current_pos = start + 1; - while (current_pos < cap) { const next_char = input[current_pos]; // string is finished, return it diff --git a/src/01_lexic/token.zig b/src/01_lexic/token.zig index 209a96f..7a667e2 100644 --- a/src/01_lexic/token.zig +++ b/src/01_lexic/token.zig @@ -2,9 +2,22 @@ pub const TokenType = enum { Int, Float, Identifier, + Datatype, Operator, Comment, String, + // grouping signs + LeftParen, + RightParen, + LeftBracket, + RightBracket, + LeftBrace, + RightBrace, + // punctiation that carries special meaning + Comma, + Newline, + // Others + Keyword, }; pub const Token = struct {