Compare commits

...

2 Commits

Author SHA1 Message Date
4e1b2c3cab feat: integrate identifier/datatype lexer 2024-11-18 21:14:09 -05:00
15a66ebc3d feat: lex datatype 2024-11-18 21:08:37 -05:00
5 changed files with 107 additions and 5 deletions

View File

@ -94,6 +94,7 @@ pub fn build(b: *std.Build) void {
"src/01_lexic/root.zig", "src/01_lexic/root.zig",
"src/01_lexic/number.zig", "src/01_lexic/number.zig",
"src/01_lexic/identifier.zig", "src/01_lexic/identifier.zig",
"src/01_lexic/datatype.zig",
"src/01_lexic/token.zig", "src/01_lexic/token.zig",
"src/01_lexic/utils.zig", "src/01_lexic/utils.zig",
}; };

65
src/01_lexic/datatype.zig Normal file
View File

@ -0,0 +1,65 @@
const std = @import("std");
const token = @import("./token.zig");
const utils = @import("./utils.zig");
const Token = token.Token;
const TokenType = token.TokenType;
const LexError = token.LexError;
const LexReturn = token.LexReturn;
/// Lexes a Datatype
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
const cap = input.len;
var final_pos = start;
if (start >= cap) {
return null;
}
// lex uppercase
if (!utils.is_uppercase(input[start])) {
return null;
}
final_pos += 1;
// lex many lowercase/uppercase/underscore/number
if (utils.lex_many(utils.is_identifier_char, input, final_pos)) |new_pos| {
final_pos = new_pos;
}
return .{
Token.init(input[start..final_pos], TokenType.Identifier, start),
final_pos,
};
}
test "should lex datatype" {
const input = "MyType";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("MyType", t.value);
} else {
try std.testing.expect(false);
}
}
test "should lex datatype 2" {
const input = "MyTypeWith322";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("MyTypeWith322", t.value);
} else {
try std.testing.expect(false);
}
}
test "shouldnt lex identifier" {
const input = "myDatatype";
const output = try lex(input, 0);
try std.testing.expect(output == null);
}

View File

@ -7,6 +7,7 @@ const TokenType = token.TokenType;
const LexError = token.LexError; const LexError = token.LexError;
const LexReturn = token.LexReturn; const LexReturn = token.LexReturn;
/// Lexes a datatype
pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { pub fn lex(input: []const u8, start: usize) LexError!?LexReturn {
const cap = input.len; const cap = input.len;
var final_pos = start; var final_pos = start;
@ -92,6 +93,18 @@ test "should lex identifier 3" {
} }
} }
test "should lex identifier 4" {
const input = "identifier_number_3";
const output = try lex(input, 0);
if (output) |tuple| {
const t = tuple[0];
try std.testing.expectEqualDeep("identifier_number_3", t.value);
} else {
try std.testing.expect(false);
}
}
test "shouldnt lex datatype" { test "shouldnt lex datatype" {
const input = "MyDatatype"; const input = "MyDatatype";
const output = try lex(input, 0); const output = try lex(input, 0);

View File

@ -1,5 +1,7 @@
const std = @import("std"); const std = @import("std");
const number = @import("./number.zig"); const number = @import("./number.zig");
const identifier = @import("./identifier.zig");
const datatype = @import("./datatype.zig");
const token = @import("./token.zig"); const token = @import("./token.zig");
const TokenType = token.TokenType; const TokenType = token.TokenType;
@ -15,20 +17,37 @@ pub fn tokenize(input: []const u8, alloc: std.mem.Allocator) !void {
while (current_pos < input_len) { while (current_pos < input_len) {
const actual_next_pos = ignore_whitespace(input, current_pos); const actual_next_pos = ignore_whitespace(input, current_pos);
const next_token = try number.lex(input, input_len, actual_next_pos); // attempt to lex a number
if (next_token) |tuple| { if (try number.lex(input, input_len, actual_next_pos)) |tuple| {
const t = tuple[0]; const t = tuple[0];
current_pos = tuple[1]; current_pos = tuple[1];
try tokens.append(t); try tokens.append(t);
} else { }
// attempt to lex an identifier
else if (try identifier.lex(input, actual_next_pos)) |tuple| {
const t = tuple[0];
current_pos = tuple[1];
try tokens.append(t);
}
// attempt to lex a datatype
else if (try datatype.lex(input, actual_next_pos)) |tuple| {
const t = tuple[0];
current_pos = tuple[1];
try tokens.append(t);
}
// nothing was matched. fail
// TODO: instead of failing add an error, ignore all chars
// until next whitespace, and continue lexing
// TODO: check if this is a good error recovery strategy
else {
// no lexer matched // no lexer matched
std.debug.print("unmatched args: anytype:c\n", .{}); std.debug.print("unmatched args: anytype:c\n", .{});
break; break;
} }
} }
std.debug.print("array list len: {d}", .{tokens.items.len});
} }
/// Ignores all whitespace on `input` since `start` /// Ignores all whitespace on `input` since `start`

View File

@ -22,6 +22,10 @@ pub fn is_lowercase(c: u8) bool {
return 'a' <= c and c <= 'z'; return 'a' <= c and c <= 'z';
} }
pub fn is_uppercase(c: u8) bool {
return 'A' <= c and c <= 'Z';
}
pub fn is_lowercase_underscore(c: u8) bool { pub fn is_lowercase_underscore(c: u8) bool {
return c == '_' or ('a' <= c and c <= 'z'); return c == '_' or ('a' <= c and c <= 'z');
} }