Compare commits

...

2 Commits

Author SHA1 Message Date
71d617928a feat: lex escape characters inside strings 2024-11-27 20:07:01 -05:00
b8c542d88e feat: lex string without escape characters 2024-11-27 19:36:45 -05:00
3 changed files with 173 additions and 0 deletions

View File

@ -97,6 +97,7 @@ pub fn build(b: *std.Build) void {
"src/01_lexic/datatype.zig", "src/01_lexic/datatype.zig",
"src/01_lexic/operator.zig", "src/01_lexic/operator.zig",
"src/01_lexic/comment.zig", "src/01_lexic/comment.zig",
"src/01_lexic/string.zig",
"src/01_lexic/token.zig", "src/01_lexic/token.zig",
"src/01_lexic/utils.zig", "src/01_lexic/utils.zig",
}; };

170
src/01_lexic/string.zig Normal file
View File

@ -0,0 +1,170 @@
const std = @import("std");
const assert = std.debug.assert;
const token = @import("./token.zig");
const utils = @import("./utils.zig");
const Token = token.Token;
const TokenType = token.TokenType;
const LexError = token.LexError;
const LexReturn = token.LexReturn;
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
const cap = input.len;
assert(start < cap);
// lex starting quote
if (input[start] != '"') {
return null;
}
// lex everything but quote and newline
// TODO: escape characters
var current_pos = start + 1;
while (current_pos < cap) {
const next_char = input[current_pos];
// string is finished, return it
if (next_char == '"') {
return .{
Token.init(input[start .. current_pos + 1], TokenType.String, start),
current_pos + 1,
};
}
// new line, return error
else if (next_char == '\n') {
return LexError.IncompleteString;
}
// lex escape characters
else if (next_char == '\\') {
// if next char is EOF, return error
if (current_pos + 1 == cap) {
return LexError.IncompleteString;
}
// if next char is newline, return error
else if (input[current_pos + 1] == '\n') {
return LexError.IncompleteString;
}
// here just consume whatever char is after
// TODO: if next char is not an escape char, return warning?
current_pos += 2;
continue;
}
current_pos += 1;
}
// this could only reach when EOF is hit, return error
return LexError.IncompleteString;
}
test "should lex empty string" {
const input = "\"\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"\"", t.value);
try std.testing.expectEqual(2, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex string with 1 char" {
const input = "\"a\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"a\"", t.value);
try std.testing.expectEqual(3, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex string with unicode" {
const input = "\"😭\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"😭\"", t.value);
try std.testing.expectEqual(6, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "shouldnt lex other things" {
const input = "322";
const output = try lex(input, 0);
try std.testing.expect(output == null);
}
test "should fail on EOF before closing string" {
const input = "\"hello";
_ = lex(input, 0) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err);
return;
};
try std.testing.expect(false);
}
test "should fail on newline before closing string" {
const input = "\"hello\n";
_ = lex(input, 0) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err);
return;
};
try std.testing.expect(false);
}
test "should lex string with escape character 1" {
const input = "\"test\\\"string\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"test\\\"string\"", t.value);
try std.testing.expectEqual(14, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should lex string with escape character 2" {
const input = "\"test\\\\string\"";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("\"test\\\\string\"", t.value);
try std.testing.expectEqual(14, tuple[1]);
} else {
try std.testing.expect(false);
}
}
test "should fail on EOF after backslash" {
const input = "\"hello \\";
_ = lex(input, 0) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err);
return;
};
try std.testing.expect(false);
}
test "should fail on newline after backslash" {
const input = "\"hello \\\n";
_ = lex(input, 0) catch |err| {
try std.testing.expectEqual(LexError.IncompleteString, err);
return;
};
try std.testing.expect(false);
}

View File

@ -4,6 +4,7 @@ pub const TokenType = enum {
Identifier, Identifier,
Operator, Operator,
Comment, Comment,
String,
}; };
pub const Token = struct { pub const Token = struct {
@ -25,6 +26,7 @@ pub const LexError = error{
Incomplete, Incomplete,
IncompleteFloatingNumber, IncompleteFloatingNumber,
IncompleteScientificNumber, IncompleteScientificNumber,
IncompleteString,
CRLF, CRLF,
}; };