diff --git a/build.zig b/build.zig index 22a2733..1205135 100644 --- a/build.zig +++ b/build.zig @@ -92,6 +92,8 @@ pub fn build(b: *std.Build) void { const files = [_][]const u8{ "src/01_lexic/root.zig", "src/02_syntax/root.zig", + "src/02_syntax/variable.zig", + "src/02_syntax/expression.zig", }; for (files) |file| { const file_unit_test = b.addTest(.{ diff --git a/src/01_lexic/identifier.zig b/src/01_lexic/identifier.zig index 18b1064..35711ed 100644 --- a/src/01_lexic/identifier.zig +++ b/src/01_lexic/identifier.zig @@ -25,8 +25,13 @@ pub fn lex(input: []const u8, start: usize) LexError!?LexReturn { final_pos = new_pos; } + const value = input[start..final_pos]; + + // check for keywords + const new_token_type = if (utils.try_keyword("var", value)) TokenType.K_Var else TokenType.Identifier; + return .{ - Token.init(input[start..final_pos], TokenType.Identifier, start), + Token.init(value, new_token_type, start), final_pos, }; } @@ -38,6 +43,7 @@ test "should lex single letter" { if (output) |tuple| { const t = tuple[0]; try std.testing.expectEqualDeep("a", t.value); + try std.testing.expectEqual(TokenType.Identifier, t.token_type); } else { try std.testing.expect(false); } @@ -50,6 +56,7 @@ test "should lex single underscore" { if (output) |tuple| { const t = tuple[0]; try std.testing.expectEqualDeep("_", t.value); + try std.testing.expectEqual(TokenType.Identifier, t.token_type); } else { try std.testing.expect(false); } @@ -62,6 +69,7 @@ test "should lex identifier 1" { if (output) |tuple| { const t = tuple[0]; try std.testing.expectEqualDeep("abc", t.value); + try std.testing.expectEqual(TokenType.Identifier, t.token_type); } else { try std.testing.expect(false); } @@ -74,6 +82,7 @@ test "should lex identifier 2" { if (output) |tuple| { const t = tuple[0]; try std.testing.expectEqualDeep("snake_case", t.value); + try std.testing.expectEqual(TokenType.Identifier, t.token_type); } else { try std.testing.expect(false); } @@ -86,6 +95,7 @@ test "should lex identifier 3" { if (output) |tuple| { const t = tuple[0]; try std.testing.expectEqualDeep("camelCase", t.value); + try std.testing.expectEqual(TokenType.Identifier, t.token_type); } else { try std.testing.expect(false); } @@ -98,6 +108,7 @@ test "should lex identifier 4" { if (output) |tuple| { const t = tuple[0]; try std.testing.expectEqualDeep("identifier_number_3", t.value); + try std.testing.expectEqual(TokenType.Identifier, t.token_type); } else { try std.testing.expect(false); } @@ -109,3 +120,16 @@ test "shouldnt lex datatype" { try std.testing.expect(output == null); } + +test "should lex var keyword" { + const input = "var"; + const output = try lex(input, 0); + + if (output) |tuple| { + const t = tuple[0]; + try std.testing.expectEqualDeep("var", t.value); + try std.testing.expectEqual(TokenType.K_Var, t.token_type); + } else { + try std.testing.expect(false); + } +} diff --git a/src/01_lexic/token.zig b/src/01_lexic/token.zig index bebd2ad..bf1aae1 100644 --- a/src/01_lexic/token.zig +++ b/src/01_lexic/token.zig @@ -17,6 +17,7 @@ pub const TokenType = enum { Comma, Newline, // Each keyword will have its own token + K_Var, }; pub const Token = struct { diff --git a/src/01_lexic/utils.zig b/src/01_lexic/utils.zig index 13bee33..2bc297e 100644 --- a/src/01_lexic/utils.zig +++ b/src/01_lexic/utils.zig @@ -1,3 +1,4 @@ +const std = @import("std"); const token = @import("./token.zig"); const LexError = token.LexError; const LexReturn = token.LexReturn; @@ -96,3 +97,7 @@ pub fn lex_many( return current_pos; } + +pub inline fn try_keyword(comptime expected: []const u8, actual: []const u8) bool { + return std.mem.eql(u8, expected, actual); +} diff --git a/src/02_syntax/expression.zig b/src/02_syntax/expression.zig index 34a7cfa..2f22e6e 100644 --- a/src/02_syntax/expression.zig +++ b/src/02_syntax/expression.zig @@ -4,7 +4,7 @@ const Token = lexic.Token; const TokenType = lexic.TokenType; const ParseError = @import("./types.zig").ParseError; -const Expression = union(enum) { +pub const Expression = union(enum) { number: *const Token, /// Attempts to parse an expression from a token stream. diff --git a/src/02_syntax/root.zig b/src/02_syntax/root.zig index 67a2728..7696b57 100644 --- a/src/02_syntax/root.zig +++ b/src/02_syntax/root.zig @@ -1,21 +1,21 @@ const std = @import("std"); const lexic = @import("lexic"); const expression = @import("./expression.zig"); +const variable = @import("./variable.zig"); +const types = @import("./types.zig"); + const Token = lexic.Token; const TokenType = lexic.TokenType; -const ParseError = @import("./types.zig").ParseError; +const ParseError = types.ParseError; const Statement = union(enum) { VariableBinding: u8, -}; -const VariableBinding = struct { - is_mutable: bool, - datatype: ?*Token, - identifier: *Token, - expression: expression.Expression, - - fn parse() !@This() {} + fn parse(tokens: *const std.ArrayList(Token), pos: usize) ParseError!@This() { + _ = tokens; + _ = pos; + return ParseError.Error; + } }; test { diff --git a/src/02_syntax/types.zig b/src/02_syntax/types.zig index d0e469f..ae1199c 100644 --- a/src/02_syntax/types.zig +++ b/src/02_syntax/types.zig @@ -1,4 +1,15 @@ +const std = @import("std"); +const lexic = @import("lexic"); + +/// Respresents a failure of parsing. pub const ParseError = error{ + /// The parse operation failed, but it is recoverable. + /// Other parsers should be considered. Unmatched, + /// The parse operation parsed after a point of no return. + /// For example, a `var` keyword was found, but then no identifier + /// The parsing should stop Error, }; + +pub const TokenStream = std.ArrayList(lexic.Token); diff --git a/src/02_syntax/utils.zig b/src/02_syntax/utils.zig new file mode 100644 index 0000000..262ba87 --- /dev/null +++ b/src/02_syntax/utils.zig @@ -0,0 +1,21 @@ +const std = @import("std"); +const lexic = @import("lexic"); + +/// Expects that the given token `t` has type `value`. +/// If it fails returns `error.Unmatched`, otherwise +/// returns the same token passed (`t`) +pub inline fn expect_token_type(comptime value: lexic.TokenType, t: *lexic.Token) error{Unmatched}!*lexic.Token { + if (t.token_type == value) { + return t; + } else { + return error.Unmatched; + } +} + +pub inline fn expect_operator(comptime value: []const u8, t: *lexic.Token) error{Unmatched}!*lexic.Token { + if (t.token_type == lexic.TokenType.Operator and std.mem.eql(u8, value, t.value)) { + return t; + } else { + return error.Unmatched; + } +} diff --git a/src/02_syntax/variable.zig b/src/02_syntax/variable.zig new file mode 100644 index 0000000..f8e7358 --- /dev/null +++ b/src/02_syntax/variable.zig @@ -0,0 +1,67 @@ +const std = @import("std"); +const lexic = @import("lexic"); +const expression = @import("expression.zig"); +const types = @import("./types.zig"); +const utils = @import("./utils.zig"); + +const TokenStream = types.TokenStream; +const ParseError = types.ParseError; + +const VariableBinding = struct { + is_mutable: bool, + datatype: ?*lexic.Token, + identifier: *lexic.Token, + expression: expression.Expression, + alloc: std.mem.Allocator, + + fn init(tokens: *const TokenStream, pos: usize, allocator: std.mem.Allocator) ParseError!@This() { + std.debug.assert(pos < tokens.items.len); + + _ = allocator; + + // try to parse a var keyword + const var_keyword = try utils.expect_token_type(lexic.TokenType.K_Var, &tokens.items[pos]); + _ = var_keyword; + + // check there is still input + if (pos + 1 >= tokens.items.len) { + // return error + return ParseError.Error; + } + + // try to parse an identifier + const identifier = utils.expect_token_type(lexic.TokenType.Identifier, &tokens.items[pos + 1]) catch { + return ParseError.Error; + }; + _ = identifier; + + // parse equal sign + if (pos + 2 >= tokens.items.len) return ParseError.Error; + const equal_sign = utils.expect_operator("=", &tokens.items[pos + 2]) catch { + return ParseError.Error; + }; + _ = equal_sign; + + // parse expression + + // provisional good return value + return ParseError.Unmatched; + } + + fn deinit(self: *@This()) void { + _ = self; + } +}; + +test "should parse a minimal var" { + const input = "var my_variable ="; + const tokens = try lexic.tokenize(input, std.testing.allocator); + defer tokens.deinit(); + + const binding = VariableBinding.init(&tokens, 0, std.testing.allocator) catch |err| { + try std.testing.expectEqual(ParseError.Unmatched, err); + return; + }; + + try std.testing.expectEqual(false, binding.is_mutable); +}