feat: lex groupign signs

This commit is contained in:
Araozu 2024-11-27 20:41:42 -05:00
parent 71d617928a
commit 882d123fe0
6 changed files with 154 additions and 4 deletions

View File

@ -100,6 +100,7 @@ pub fn build(b: *std.Build) void {
"src/01_lexic/string.zig", "src/01_lexic/string.zig",
"src/01_lexic/token.zig", "src/01_lexic/token.zig",
"src/01_lexic/utils.zig", "src/01_lexic/utils.zig",
"src/01_lexic/grouping.zig",
}; };
for (files) |file| { for (files) |file| {
const file_unit_test = b.addTest(.{ const file_unit_test = b.addTest(.{

View File

@ -28,7 +28,7 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
} }
return .{ return .{
Token.init(input[start..final_pos], TokenType.Identifier, start), Token.init(input[start..final_pos], TokenType.Datatype, start),
final_pos, final_pos,
}; };
} }

121
src/01_lexic/grouping.zig Normal file
View File

@ -0,0 +1,121 @@
const std = @import("std");
const assert = std.debug.assert;
const token = @import("./token.zig");
const utils = @import("./utils.zig");
const Token = token.Token;
const TokenType = token.TokenType;
const LexError = token.LexError;
const LexReturn = token.LexReturn;
// lex grouping signs
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
// there should be at least 1 char
assert(start < input.len);
const c = input[start];
const token_type = switch (c) {
'(' => TokenType.LeftParen,
')' => TokenType.RightParen,
'[' => TokenType.LeftBracket,
']' => TokenType.RightBracket,
'{' => TokenType.LeftBrace,
'}' => TokenType.RightBrace,
else => {
return null;
},
};
return .{ Token.init(input[start .. start + 1], token_type, start), start + 1 };
}
test "shouldnt lex other things" {
const input = "322";
const output = try lex(input, 0);
try std.testing.expect(output == null);
}
test "should lex opening paren" {
const input = "( hello )";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("(", t.value);
try std.testing.expectEqual(TokenType.LeftParen, t.token_type);
try std.testing.expectEqual(1, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex closing paren" {
const input = "( hello )";
const output = try lex(input, 8);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep(")", t.value);
try std.testing.expectEqual(TokenType.RightParen, t.token_type);
try std.testing.expectEqual(9, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex opening bracket" {
const input = "[ hello ]";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("[", t.value);
try std.testing.expectEqual(TokenType.LeftBracket, t.token_type);
try std.testing.expectEqual(1, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex closing bracket" {
const input = "[ hello ]";
const output = try lex(input, 8);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("]", t.value);
try std.testing.expectEqual(TokenType.RightBracket, t.token_type);
try std.testing.expectEqual(9, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex opening brace" {
const input = "{ hello }";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("{", t.value);
try std.testing.expectEqual(TokenType.LeftBrace, t.token_type);
try std.testing.expectEqual(1, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex closing brace" {
const input = "{ hello }";
const output = try lex(input, 8);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("}", t.value);
try std.testing.expectEqual(TokenType.RightBrace, t.token_type);
try std.testing.expectEqual(9, tuple[1]);
} else {
try std.testing.expect(false);
}
}

View File

@ -6,6 +6,8 @@ const datatype = @import("./datatype.zig");
const token = @import("./token.zig"); const token = @import("./token.zig");
const operator = @import("./operator.zig"); const operator = @import("./operator.zig");
const comment = @import("./comment.zig"); const comment = @import("./comment.zig");
const string = @import("./string.zig");
const grouping = @import("./grouping.zig");
const TokenType = token.TokenType; const TokenType = token.TokenType;
const Token = token.Token; const Token = token.Token;
@ -37,6 +39,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
try tokens.append(t); try tokens.append(t);
} }
// attempt to lex a string
else if (try string.lex(input, actual_next_pos)) |tuple| {
assert(tuple[1] > current_pos);
const t = tuple[0];
current_pos = tuple[1];
try tokens.append(t);
}
// attempt to lex a datatype // attempt to lex a datatype
else if (try datatype.lex(input, actual_next_pos)) |tuple| { else if (try datatype.lex(input, actual_next_pos)) |tuple| {
assert(tuple[1] > current_pos); assert(tuple[1] > current_pos);
@ -61,6 +71,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
try tokens.append(t); try tokens.append(t);
} }
// attempt to lex grouping signs
else if (try grouping.lex(input, actual_next_pos)) |tuple| {
assert(tuple[1] > current_pos);
const t = tuple[0];
current_pos = tuple[1];
try tokens.append(t);
}
// nothing was matched. fail // nothing was matched. fail
// TODO: instead of failing add an error, ignore all chars // TODO: instead of failing add an error, ignore all chars
// until next whitespace, and continue lexing // until next whitespace, and continue lexing

View File

@ -18,10 +18,7 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
} }
// lex everything but quote and newline // lex everything but quote and newline
// TODO: escape characters
var current_pos = start + 1; var current_pos = start + 1;
while (current_pos < cap) { while (current_pos < cap) {
const next_char = input[current_pos]; const next_char = input[current_pos];
// string is finished, return it // string is finished, return it

View File

@ -2,9 +2,22 @@ pub const TokenType = enum {
Int, Int,
Float, Float,
Identifier, Identifier,
Datatype,
Operator, Operator,
Comment, Comment,
String, String,
// grouping signs
LeftParen,
RightParen,
LeftBracket,
RightBracket,
LeftBrace,
RightBrace,
// punctiation that carries special meaning
Comma,
Newline,
// Others
Keyword,
}; };
pub const Token = struct { pub const Token = struct {