feat: lex operators
This commit is contained in:
parent
4e1b2c3cab
commit
00597752da
@ -95,6 +95,7 @@ pub fn build(b: *std.Build) void {
|
|||||||
"src/01_lexic/number.zig",
|
"src/01_lexic/number.zig",
|
||||||
"src/01_lexic/identifier.zig",
|
"src/01_lexic/identifier.zig",
|
||||||
"src/01_lexic/datatype.zig",
|
"src/01_lexic/datatype.zig",
|
||||||
|
"src/01_lexic/operator.zig",
|
||||||
"src/01_lexic/token.zig",
|
"src/01_lexic/token.zig",
|
||||||
"src/01_lexic/utils.zig",
|
"src/01_lexic/utils.zig",
|
||||||
};
|
};
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
const token = @import("./token.zig");
|
const token = @import("./token.zig");
|
||||||
const utils = @import("./utils.zig");
|
const utils = @import("./utils.zig");
|
||||||
|
|
||||||
@ -11,10 +12,7 @@ const LexReturn = token.LexReturn;
|
|||||||
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
|
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
|
||||||
const cap = input.len;
|
const cap = input.len;
|
||||||
var final_pos = start;
|
var final_pos = start;
|
||||||
|
assert(start < cap);
|
||||||
if (start >= cap) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// lex lowercase or underscore
|
// lex lowercase or underscore
|
||||||
if (!utils.is_lowercase_underscore(input[start])) {
|
if (!utils.is_lowercase_underscore(input[start])) {
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
const token = @import("./token.zig");
|
const token = @import("./token.zig");
|
||||||
const utils = @import("./utils.zig");
|
const utils = @import("./utils.zig");
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ const is_decimal_digit = utils.is_decimal_digit;
|
|||||||
///
|
///
|
||||||
/// A number is either an Int or a Float.
|
/// A number is either an Int or a Float.
|
||||||
pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn {
|
pub fn lex(input: []const u8, cap: usize, start: usize) LexError!?LexReturn {
|
||||||
|
assert(start < cap);
|
||||||
const first_char = input[start];
|
const first_char = input[start];
|
||||||
|
|
||||||
// Attempt to lex a hex, octal or binary number
|
// Attempt to lex a hex, octal or binary number
|
||||||
@ -74,6 +76,7 @@ fn prefixed(comptime prefix: u8, input: []const u8, cap: usize, start: usize) !?
|
|||||||
/// avoid confussion with PHP literal octals.
|
/// avoid confussion with PHP literal octals.
|
||||||
/// Floating point numbers can.
|
/// Floating point numbers can.
|
||||||
fn integer(input: []const u8, cap: usize, start: usize) LexError!?LexReturn {
|
fn integer(input: []const u8, cap: usize, start: usize) LexError!?LexReturn {
|
||||||
|
assert(start < cap);
|
||||||
const first_char = input[start];
|
const first_char = input[start];
|
||||||
if (!is_decimal_digit(first_char)) {
|
if (!is_decimal_digit(first_char)) {
|
||||||
return null;
|
return null;
|
||||||
|
73
src/01_lexic/operator.zig
Normal file
73
src/01_lexic/operator.zig
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
const token = @import("./token.zig");
|
||||||
|
const utils = @import("./utils.zig");
|
||||||
|
|
||||||
|
const Token = token.Token;
|
||||||
|
const TokenType = token.TokenType;
|
||||||
|
const LexError = token.LexError;
|
||||||
|
const LexReturn = token.LexReturn;
|
||||||
|
|
||||||
|
// lex an operator
|
||||||
|
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
|
||||||
|
const cap = input.len;
|
||||||
|
assert(start < cap);
|
||||||
|
|
||||||
|
// lex operator
|
||||||
|
if (utils.lex_many_1(utils.is_operator_char, input, start)) |final_pos| {
|
||||||
|
return .{
|
||||||
|
Token.init(input[start..final_pos], TokenType.Operator, start),
|
||||||
|
final_pos,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// no operator found
|
||||||
|
else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex single operator" {
|
||||||
|
const input = "=";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("=", t.value);
|
||||||
|
try std.testing.expectEqual(1, tuple[1]);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex operator of len 2" {
|
||||||
|
const input = "+=";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep("+=", t.value);
|
||||||
|
try std.testing.expectEqual(2, tuple[1]);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should lex operator of len 3" {
|
||||||
|
const input = " >>= ";
|
||||||
|
const output = try lex(input, 1);
|
||||||
|
|
||||||
|
if (output) |tuple| {
|
||||||
|
const t = tuple[0];
|
||||||
|
try std.testing.expectEqualDeep(">>=", t.value);
|
||||||
|
try std.testing.expectEqual(4, tuple[1]);
|
||||||
|
} else {
|
||||||
|
try std.testing.expect(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "should not lex something else" {
|
||||||
|
const input = "322";
|
||||||
|
const output = try lex(input, 0);
|
||||||
|
|
||||||
|
try std.testing.expect(output == null);
|
||||||
|
}
|
@ -1,8 +1,10 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
const number = @import("./number.zig");
|
const number = @import("./number.zig");
|
||||||
const identifier = @import("./identifier.zig");
|
const identifier = @import("./identifier.zig");
|
||||||
const datatype = @import("./datatype.zig");
|
const datatype = @import("./datatype.zig");
|
||||||
const token = @import("./token.zig");
|
const token = @import("./token.zig");
|
||||||
|
const operator = @import("./operator.zig");
|
||||||
|
|
||||||
const TokenType = token.TokenType;
|
const TokenType = token.TokenType;
|
||||||
const Token = token.Token;
|
const Token = token.Token;
|
||||||
@ -16,9 +18,11 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
|
|||||||
|
|
||||||
while (current_pos < input_len) {
|
while (current_pos < input_len) {
|
||||||
const actual_next_pos = ignore_whitespace(input, current_pos);
|
const actual_next_pos = ignore_whitespace(input, current_pos);
|
||||||
|
assert(current_pos <= actual_next_pos);
|
||||||
|
|
||||||
// attempt to lex a number
|
// attempt to lex a number
|
||||||
if (try number.lex(input, input_len, actual_next_pos)) |tuple| {
|
if (try number.lex(input, input_len, actual_next_pos)) |tuple| {
|
||||||
|
assert(tuple[1] > current_pos);
|
||||||
const t = tuple[0];
|
const t = tuple[0];
|
||||||
current_pos = tuple[1];
|
current_pos = tuple[1];
|
||||||
|
|
||||||
@ -26,6 +30,7 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
|
|||||||
}
|
}
|
||||||
// attempt to lex an identifier
|
// attempt to lex an identifier
|
||||||
else if (try identifier.lex(input, actual_next_pos)) |tuple| {
|
else if (try identifier.lex(input, actual_next_pos)) |tuple| {
|
||||||
|
assert(tuple[1] > current_pos);
|
||||||
const t = tuple[0];
|
const t = tuple[0];
|
||||||
current_pos = tuple[1];
|
current_pos = tuple[1];
|
||||||
|
|
||||||
@ -33,6 +38,15 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
|
|||||||
}
|
}
|
||||||
// attempt to lex a datatype
|
// attempt to lex a datatype
|
||||||
else if (try datatype.lex(input, actual_next_pos)) |tuple| {
|
else if (try datatype.lex(input, actual_next_pos)) |tuple| {
|
||||||
|
assert(tuple[1] > current_pos);
|
||||||
|
const t = tuple[0];
|
||||||
|
current_pos = tuple[1];
|
||||||
|
|
||||||
|
try tokens.append(t);
|
||||||
|
}
|
||||||
|
// attempt to lex an operator
|
||||||
|
else if (try operator.lex(input, actual_next_pos)) |tuple| {
|
||||||
|
assert(tuple[1] > current_pos);
|
||||||
const t = tuple[0];
|
const t = tuple[0];
|
||||||
current_pos = tuple[1];
|
current_pos = tuple[1];
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ pub const TokenType = enum {
|
|||||||
Int,
|
Int,
|
||||||
Float,
|
Float,
|
||||||
Identifier,
|
Identifier,
|
||||||
|
Operator,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Token = struct {
|
pub const Token = struct {
|
||||||
|
@ -35,6 +35,10 @@ pub fn is_identifier_char(c: u8) bool {
|
|||||||
return c == '_' or ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') or ('0' <= c and c <= '9');
|
return c == '_' or ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') or ('0' <= c and c <= '9');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_operator_char(c: u8) bool {
|
||||||
|
return c == '+' or c == '-' or c == '=' or c == '*' or c == '!' or c == '/' or c == '|' or c == '@' or c == '#' or c == '$' or c == '~' or c == '%' or c == '&' or c == '?' or c == '<' or c == '>' or c == '^' or c == '.' or c == ':';
|
||||||
|
}
|
||||||
|
|
||||||
/// Runs a discriminator function at least once,
|
/// Runs a discriminator function at least once,
|
||||||
/// and returns the end position of the lex.
|
/// and returns the end position of the lex.
|
||||||
///
|
///
|
||||||
@ -44,7 +48,7 @@ pub fn lex_many_1(
|
|||||||
comptime lex_fun: fn (c: u8) bool,
|
comptime lex_fun: fn (c: u8) bool,
|
||||||
input: []const u8,
|
input: []const u8,
|
||||||
start: usize,
|
start: usize,
|
||||||
) usize {
|
) ?usize {
|
||||||
// assert that there is input left
|
// assert that there is input left
|
||||||
const cap = input.len;
|
const cap = input.len;
|
||||||
var current_pos = start;
|
var current_pos = start;
|
||||||
|
Loading…
Reference in New Issue
Block a user