feat: lex string without escape characters

This commit is contained in:
Araozu 2024-11-27 19:36:45 -05:00
parent 3f95515964
commit b8c542d88e
3 changed files with 105 additions and 0 deletions

View File

@ -97,6 +97,7 @@ pub fn build(b: *std.Build) void {
"src/01_lexic/datatype.zig", "src/01_lexic/datatype.zig",
"src/01_lexic/operator.zig", "src/01_lexic/operator.zig",
"src/01_lexic/comment.zig", "src/01_lexic/comment.zig",
"src/01_lexic/string.zig",
"src/01_lexic/token.zig", "src/01_lexic/token.zig",
"src/01_lexic/utils.zig", "src/01_lexic/utils.zig",
}; };

102
src/01_lexic/string.zig Normal file
View File

@ -0,0 +1,102 @@
const std = @import("std");
const assert = std.debug.assert;
const token = @import("./token.zig");
const utils = @import("./utils.zig");
const Token = token.Token;
const TokenType = token.TokenType;
const LexError = token.LexError;
const LexReturn = token.LexReturn;
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
const cap = input.len;
assert(start < cap);
// lex starting quote
if (input[start] != '"') {
return null;
}
// lex everything but quote and newline
// TODO: escape characters
var current_pos = start + 1;
while (current_pos < cap and input[current_pos] != '"' and input[current_pos] != '\n') {
current_pos += 1;
}
// expect ending quote
if (current_pos == cap or input[current_pos] == '\n') {
// Error: EOF before ending the string
return LexError.IncompleteString;
} else {
return .{
Token.init(input[start .. current_pos + 1], TokenType.String, start),
current_pos + 1,
};
}
}
test "should lex empty string" {
const input = "\"\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"\"", t.value);
try std.testing.expectEqual(2, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex string with 1 char" {
const input = "\"a\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"a\"", t.value);
try std.testing.expectEqual(3, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex string with unicode" {
const input = "\"😭\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"😭\"", t.value);
try std.testing.expectEqual(6, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "shouldnt lex other things" {
const input = "322";
const output = try lex(input, 0);
try std.testing.expect(output == null);
}
test "should fail on EOF before closing string" {
const input = "\"hello";
_ = lex(input, 0) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err);
return;
};
try std.testing.expect(false);
}
test "should fail on newline before closing string" {
const input = "\"hello\n";
_ = lex(input, 0) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err);
return;
};
try std.testing.expect(false);
}

View File

@ -4,6 +4,7 @@ pub const TokenType = enum {
Identifier, Identifier,
Operator, Operator,
Comment, Comment,
String,
}; };
pub const Token = struct { pub const Token = struct {
@ -25,6 +26,7 @@ pub const LexError = error{
Incomplete, Incomplete,
IncompleteFloatingNumber, IncompleteFloatingNumber,
IncompleteScientificNumber, IncompleteScientificNumber,
IncompleteString,
CRLF, CRLF,
}; };