diff --git a/build.zig b/build.zig index 7678f5d..3fd3e05 100644 --- a/build.zig +++ b/build.zig @@ -97,6 +97,7 @@ pub fn build(b: *std.Build) void { "src/01_lexic/datatype.zig", "src/01_lexic/operator.zig", "src/01_lexic/comment.zig", + "src/01_lexic/string.zig", "src/01_lexic/token.zig", "src/01_lexic/utils.zig", }; diff --git a/src/01_lexic/string.zig b/src/01_lexic/string.zig new file mode 100644 index 0000000..1d673f3 --- /dev/null +++ b/src/01_lexic/string.zig @@ -0,0 +1,102 @@ +const std = @import("std"); +const assert = std.debug.assert; +const token = @import("./token.zig"); +const utils = @import("./utils.zig"); + +const Token = token.Token; +const TokenType = token.TokenType; +const LexError = token.LexError; +const LexReturn = token.LexReturn; + +pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { + const cap = input.len; + assert(start < cap); + + // lex starting quote + if (input[start] != '"') { + return null; + } + + // lex everything but quote and newline + // TODO: escape characters + var current_pos = start + 1; + while (current_pos < cap and input[current_pos] != '"' and input[current_pos] != '\n') { + current_pos += 1; + } + + // expect ending quote + if (current_pos == cap or input[current_pos] == '\n') { + // Error: EOF before ending the string + return LexError.IncompleteString; + } else { + return .{ + Token.init(input[start .. current_pos + 1], TokenType.String, start), + current_pos + 1, + }; + } +} + +test "should lex empty string" { + const input = "\"\""; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("\"\"", t.value); + try std.testing.expectEqual(2, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex string with 1 char" { + const input = "\"a\""; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("\"a\"", t.value); + try std.testing.expectEqual(3, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "should lex string with unicode" { + const input = "\"😭\""; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("\"😭\"", t.value); + try std.testing.expectEqual(6, tuple[1]); + } else { + try std.testing.expect(false); + } +} + +test "shouldnt lex other things" { + const input = "322"; + const output = try lex(input, 0); + try std.testing.expect(output == null); +} + +test "should fail on EOF before closing string" { + const input = "\"hello"; + _ = lex(input, 0) catch |err| { + try std.testing.expectEqual(LexError.IncompleteString, err); + return; + }; + + try std.testing.expect(false); +} + +test "should fail on newline before closing string" { + const input = "\"hello\n"; + _ = lex(input, 0) catch |err| { + try std.testing.expectEqual(LexError.IncompleteString, err); + return; + }; + + try std.testing.expect(false); +} diff --git a/src/01_lexic/token.zig b/src/01_lexic/token.zig index 17b176d..209a96f 100644 --- a/src/01_lexic/token.zig +++ b/src/01_lexic/token.zig @@ -4,6 +4,7 @@ pub const TokenType = enum { Identifier, Operator, Comment, + String, }; pub const Token = struct { @@ -25,6 +26,7 @@ pub const LexError = error{ Incomplete, IncompleteFloatingNumber, IncompleteScientificNumber, + IncompleteString, CRLF, };