From 4665d87b5f8044d9d3507c60a4ce5f92cfdc2a0d Mon Sep 17 00:00:00 2001 From: Araozu Date: Wed, 27 Mar 2024 08:12:32 -0500 Subject: [PATCH] scan identifiers --- lexer/identifier_lexer.test.ts | 48 ++++++++++++++++++++++++++++++++++ lexer/identifier_lexer.ts | 35 +++++++++++++++++++++++++ lexer/lexer.ts | 21 ++++++++++++--- lexer/number_lexer.test.ts | 6 ++--- lexer/number_lexer.ts | 2 +- lexer/utils.ts | 14 +++++++++- md/learn/index.md | 2 +- 7 files changed, 119 insertions(+), 9 deletions(-) create mode 100644 lexer/identifier_lexer.test.ts create mode 100644 lexer/identifier_lexer.ts diff --git a/lexer/identifier_lexer.test.ts b/lexer/identifier_lexer.test.ts new file mode 100644 index 0000000..6016882 --- /dev/null +++ b/lexer/identifier_lexer.test.ts @@ -0,0 +1,48 @@ +import { expect, test, describe } from "bun:test"; +import { scan_identifier } from "./identifier_lexer"; + + +describe("Identifier Lexer", () => { + test("should return an identifier token", () => { + const code = "a"; + const token = scan_identifier(code, 0); + + expect(token).toEqual([{ v: "a", token_type: "identifier" }, 1]); + }); + + test("should scan an underscore", () => { + const code = "_"; + const token = scan_identifier(code, 0); + + expect(token).toEqual([{ v: "_", token_type: "identifier" }, 1]); + }); + + test("should scan an identifier with an underscore", () => { + const code = "a_"; + const token = scan_identifier(code, 0); + + expect(token).toEqual([{ v: "a_", token_type: "identifier" }, 2]); + }); + + test("should scan an identifier that starts with an underscore", () => { + const code = "_a"; + const token = scan_identifier(code, 0); + + expect(token).toEqual([{ v: "_a", token_type: "identifier" }, 2]); + }); + + test("should scan an identifier with numbers and uppercase letters", () => { + const code = "aA1"; + const token = scan_identifier(code, 0); + + expect(token).toEqual([{ v: "aA1", token_type: "identifier" }, 3]); + }); + + test("should scan a keyword", () => { + const code = "val"; + const token = scan_identifier(code, 0); + + expect(token).toEqual([{ v: "val", token_type: "keyword" }, 3]); + }); +}); + diff --git a/lexer/identifier_lexer.ts b/lexer/identifier_lexer.ts new file mode 100644 index 0000000..086740e --- /dev/null +++ b/lexer/identifier_lexer.ts @@ -0,0 +1,35 @@ +import type { Token } from "./lexer.ts"; +import { is_identifier_char } from "./utils.ts"; + +/** + * Scans an identifier, at the given position in the input string. + * This function assumes that the character at the given position is a letter. + */ +export function scan_identifier(input: string, starting_position: number): [Token, number] { + let value = input[starting_position]; + let pos = starting_position + 1; + + while (pos < input.length) { + const c = input[pos]; + + if (is_identifier_char(c)) { + pos += 1; + value += c; + } + else { + break; + } + } + + return [{ v: value, token_type: check_keyword(value) }, pos]; +} + +function check_keyword(value: string): string { + const keywords = ["case", "static", "const", "enum", "loop", "use", "break", "catch", "continue", "do", "else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw", "try", "while", "type", "match", "with", "of", "abstract", "class", "interface", "private", "pub", "map", "override", "open", "init", "val", "var", "mut", "clone"]; + + if (keywords.includes(value)) { + return "keyword"; + } + return "identifier"; +} + diff --git a/lexer/lexer.ts b/lexer/lexer.ts index aa3f2af..a62cb57 100644 --- a/lexer/lexer.ts +++ b/lexer/lexer.ts @@ -1,5 +1,6 @@ -import { lex_number } from "./number_lexer.ts"; -import { is_digit } from "./utils.ts"; +import { scan_identifier } from "./identifier_lexer.ts"; +import { scan_number } from "./number_lexer.ts"; +import { is_digit, is_lowercase } from "./utils.ts"; export type Token = { v: string, @@ -37,6 +38,7 @@ export function lex(code: string): Array { let next_token: Token | null = null; let next_position: number | null = null; + // try to scan a number if (is_digit(c)) { // if the current default token is not empty, push it to the tokens array if (current_default_token !== "") { @@ -45,7 +47,20 @@ export function lex(code: string): Array { } // lex a number - const [token, next] = lex_number(code, current_pos); + const [token, next] = scan_number(code, current_pos); + current_pos = next; + tokens.push(token); + continue; + } + // try to scan an identifier/keyword + else if (is_lowercase(c) || c === "_") { + // if the current default token is not empty, push it to the tokens array + if (current_default_token !== "") { + tokens.push({ v: current_default_token, token_type: "" }); + current_default_token = ""; + } + + const [token, next] = scan_identifier(code, current_pos); current_pos = next; tokens.push(token); continue; diff --git a/lexer/number_lexer.test.ts b/lexer/number_lexer.test.ts index 4c1da1d..f51bdfc 100644 --- a/lexer/number_lexer.test.ts +++ b/lexer/number_lexer.test.ts @@ -1,17 +1,17 @@ import { expect, test, describe } from "bun:test"; -import { lex_number } from "./number_lexer"; +import { scan_number } from "./number_lexer"; describe("Number Lexer", () => { test("should return a whole number token", () => { const code = "1"; - const token = lex_number(code, 0); + const token = scan_number(code, 0); expect(token).toEqual([{ v: "1", token_type: "number" }, 1]); }); test("should return a whole number token pt 2", () => { const code = "12345"; - const token = lex_number(code, 0); + const token = scan_number(code, 0); expect(token).toEqual([{ v: "12345", token_type: "number" }, 5]); }); diff --git a/lexer/number_lexer.ts b/lexer/number_lexer.ts index 6f86e9e..e5875e8 100644 --- a/lexer/number_lexer.ts +++ b/lexer/number_lexer.ts @@ -10,7 +10,7 @@ import { is_digit } from "./utils.ts"; * @param pos the position to start scanning from * @returns */ -export function lex_number(input: string, pos: number): [Token, number] { +export function scan_number(input: string, pos: number): [Token, number] { const [token_value, next] = scan_decimal(input, pos); return [{ v: token_value, token_type: "number" }, next]; diff --git a/lexer/utils.ts b/lexer/utils.ts index cc221eb..be87f71 100644 --- a/lexer/utils.ts +++ b/lexer/utils.ts @@ -1,3 +1,15 @@ export function is_digit(c: string): boolean { return c >= '0' && c <= '9'; -} \ No newline at end of file +} + +export function is_lowercase(c: string): boolean { + return c >= 'a' && c <= 'z'; +} + +export function is_uppercase(c: string): boolean { + return c >= 'A' && c <= 'Z'; +} + +export function is_identifier_char(c: string): boolean { + return is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === '_'; +} diff --git a/md/learn/index.md b/md/learn/index.md index 10573db..bac43e4 100644 --- a/md/learn/index.md +++ b/md/learn/index.md @@ -76,7 +76,7 @@ val has_key = haystack.contains("needle") ] // THP -Obj { +.{ names: #("Toni", "Stark"), // Tuple age: 33, numbers: [32, 64, 128]