diff --git a/lexer/lexer.ts b/lexer/lexer.ts index ff1de07..982f30f 100644 --- a/lexer/lexer.ts +++ b/lexer/lexer.ts @@ -1,5 +1,6 @@ import { scan_identifier } from "./identifier_lexer.ts"; import { scan_number } from "./number_lexer.ts"; +import { scan_string } from "./string_lexer.ts"; import { is_digit, is_lowercase, is_uppercase } from "./utils.ts"; export type Token = { @@ -78,6 +79,45 @@ export function lex(code: string): Array { tokens.push(token); continue; } + // try to scan a string + else if (c === "\"") { + // if the current default token is not empty, push it to the tokens array + if (current_default_token !== "") { + tokens.push({ v: current_default_token, token_type: "" }); + current_default_token = ""; + } + + const [token, next] = scan_string(code, current_pos); + current_pos = next; + tokens.push(token); + continue; + } + // try to scan a comment + else if (c === "/" && code[current_pos + 1] === "/") { + // if the current default token is not empty, push it to the tokens array + if (current_default_token !== "") { + tokens.push({ v: current_default_token, token_type: "" }); + current_default_token = ""; + } + + let comment = ""; + let pos = current_pos; + + while (pos < code_len) { + const char = code[pos]; + + if (char === "\n") { + break; + } + + comment += char; + pos++; + } + + tokens.push({ v: comment, token_type: "comment" }); + current_pos = pos; + continue; + } // here, check if a token was found if (next_token !== null && next_position !== null) { diff --git a/lexer/string_lexer.test.ts b/lexer/string_lexer.test.ts new file mode 100644 index 0000000..79ce9c9 --- /dev/null +++ b/lexer/string_lexer.test.ts @@ -0,0 +1,32 @@ +import { expect, test, describe } from "bun:test"; +import { scan_string } from "./string_lexer"; + +describe("String Lexer", () => { + test("should scan an empty string", () => { + const code = "\"\""; + const token = scan_string(code, 0); + + expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]); + }); + + test("should scan a string with a single character", () => { + const code = "\"a\""; + const token = scan_string(code, 0); + + expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]); + }); + + test("should scan a string with multiple characters", () => { + const code = "\"hello\""; + const token = scan_string(code, 0); + + expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]); + }); + + test("should scan a string with an escape character", () => { + const code = "\"\\n\""; + const token = scan_string(code, 0); + + expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]); + }); +}); diff --git a/lexer/string_lexer.ts b/lexer/string_lexer.ts new file mode 100644 index 0000000..a03569d --- /dev/null +++ b/lexer/string_lexer.ts @@ -0,0 +1,49 @@ +import type { Token } from "./lexer.ts"; + +export function scan_string(input: string, starting_position: number): [Token, number] { + let value = "\""; + let pos = starting_position + 1; + + while (pos < input.length) { + const c = input[pos]; + + if (c === "\"") { + value += c; + pos += 1; + break; + } + if (c === "\n") { + // todo: error handling, return an error indicator and the caller should render a red wavy underline + break; + } + if (c === "\\") { + const next_char = input[pos + 1]; + value += handle_escape_char(next_char); + pos += 2; + continue; + } + + value += c; + pos += 1; + } + + return [{ v: value, token_type: "string" }, pos]; +} + +function handle_escape_char(next_char: string): string { + switch (next_char) { + case "n": + return "\\n" + case "t": + return "\\t" + case "r": + return "\\r" + case "\"": + return "\\\"" + case "\\": + return "\\\\" + default: + return "\\" + next_char + } +} +