feat: lex comments, forbid CR
This commit is contained in:
parent
00597752da
commit
3f95515964
@ -96,6 +96,7 @@ pub fn build(b: *std.Build) void {
|
|||||||
"src/01_lexic/identifier.zig",
|
"src/01_lexic/identifier.zig",
|
||||||
"src/01_lexic/datatype.zig",
|
"src/01_lexic/datatype.zig",
|
||||||
"src/01_lexic/operator.zig",
|
"src/01_lexic/operator.zig",
|
||||||
|
"src/01_lexic/comment.zig",
|
||||||
"src/01_lexic/token.zig",
|
"src/01_lexic/token.zig",
|
||||||
"src/01_lexic/utils.zig",
|
"src/01_lexic/utils.zig",
|
||||||
};
|
};
|
||||||
|
77
src/01_lexic/comment.zig
Normal file
77
src/01_lexic/comment.zig
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
const token = @import("./token.zig");
|
||||||
|
const utils = @import("./utils.zig");
|
||||||
|
|
||||||
|
const Token = token.Token;
|
||||||
|
const TokenType = token.TokenType;
|
||||||
|
const LexError = token.LexError;
|
||||||
|
const LexReturn = token.LexReturn;
|
||||||
|
|
||||||
|
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
|
||||||
|
const cap = input.len;
|
||||||
|
assert(start < cap);
|
||||||
|
|
||||||
|
// there should be at least 2 characters
|
||||||
|
if (start + 1 >= cap) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input[start] == '/' and input[start + 1] == '/') {
|
||||||
|
var current_pos = start + 2;
|
||||||
|
|
||||||
|
// consume all bytes until newline (LF)
|
||||||
|
while (current_pos < cap and input[current_pos] != '\n') {
|
||||||
|
// check for CR, and throw error
|
||||||
|
if (input[current_pos] == '\r') {
|
||||||
|
return LexError.CRLF;
|
||||||
|
}
|
||||||
|
current_pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return .{ Token.init(input[start..current_pos], TokenType.Comment, start), current_pos };
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex comment until EOF" {
|
||||||
|
const input = "// aea";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("// aea", t.value);
|
||||||
|
try std.testing.expectEqual(6, tuple[1]);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex comment until newline (LF)" {
|
||||||
|
const input = "// my comment\n// other comment";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("// my comment", t.value);
|
||||||
|
try std.testing.expectEqual(13, tuple[1]);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "shouldn lex incomplete comment" {
|
||||||
|
const input = "/aa";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
try std.testing.expect(output == null);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should fail on CRLF" {
|
||||||
|
const input = "// my comment\x0D\x0A// other comment";
|
||||||
|
_ = lex(input, 0) catch |err| {
|
||||||
|
try std.testing.expectEqual(LexError.CRLF, err);
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
@ -5,6 +5,7 @@ const identifier = @import("./identifier.zig");
|
|||||||
const datatype = @import("./datatype.zig");
|
const datatype = @import("./datatype.zig");
|
||||||
const token = @import("./token.zig");
|
const token = @import("./token.zig");
|
||||||
const operator = @import("./operator.zig");
|
const operator = @import("./operator.zig");
|
||||||
|
const comment = @import("./comment.zig");
|
||||||
|
|
||||||
const TokenType = token.TokenType;
|
const TokenType = token.TokenType;
|
||||||
const Token = token.Token;
|
const Token = token.Token;
|
||||||
@ -44,6 +45,14 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
|
|||||||
|
|
||||||
try tokens.append(t);
|
try tokens.append(t);
|
||||||
}
|
}
|
||||||
|
// attempt to lex a comment
|
||||||
|
else if (try comment.lex(input, actual_next_pos)) |tuple| {
|
||||||
|
assert(tuple[1] > current_pos);
|
||||||
|
const t = tuple[0];
|
||||||
|
current_pos = tuple[1];
|
||||||
|
|
||||||
|
try tokens.append(t);
|
||||||
|
}
|
||||||
// attempt to lex an operator
|
// attempt to lex an operator
|
||||||
else if (try operator.lex(input, actual_next_pos)) |tuple| {
|
else if (try operator.lex(input, actual_next_pos)) |tuple| {
|
||||||
assert(tuple[1] > current_pos);
|
assert(tuple[1] > current_pos);
|
||||||
|
@ -3,6 +3,7 @@ pub const TokenType = enum {
|
|||||||
Float,
|
Float,
|
||||||
Identifier,
|
Identifier,
|
||||||
Operator,
|
Operator,
|
||||||
|
Comment,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Token = struct {
|
pub const Token = struct {
|
||||||
@ -24,6 +25,7 @@ pub const LexError = error{
|
|||||||
Incomplete,
|
Incomplete,
|
||||||
IncompleteFloatingNumber,
|
IncompleteFloatingNumber,
|
||||||
IncompleteScientificNumber,
|
IncompleteScientificNumber,
|
||||||
|
CRLF,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Contains the lexed token and the next position
|
/// Contains the lexed token and the next position
|
||||||
|
Loading…
Reference in New Issue
Block a user