scan strings and comments

2024-03-27 08:36:34 -05:00 · 2024-03-27 08:36:34 -05:00 · d098b60a38
commit d098b60a38
parent 2e93df0fd8
3 changed files with 121 additions and 0 deletions
--- a/lexer/lexer.ts
+++ b/lexer/lexer.ts
@ -1,5 +1,6 @@
 import { scan_identifier } from "./identifier_lexer.ts";
 import { scan_number } from "./number_lexer.ts";
 import { scan_string } from "./string_lexer.ts";
 import { is_digit, is_lowercase, is_uppercase } from "./utils.ts";
 export type Token = {
@ -78,6 +79,45 @@ export function lex(code: string): Array<Token> {
            tokens.push(token);
            continue;
        }
        // try to scan a string
        else if (c === "\"") {
            // if the current default token is not empty, push it to the tokens array
            if (current_default_token !== "") {
                tokens.push({ v: current_default_token, token_type: "" });
                current_default_token = "";
            }
            const [token, next] = scan_string(code, current_pos);
            current_pos = next;
            tokens.push(token);
            continue;
        }
        // try to scan a comment
        else if (c === "/" && code[current_pos + 1] === "/") {
            // if the current default token is not empty, push it to the tokens array
            if (current_default_token !== "") {
                tokens.push({ v: current_default_token, token_type: "" });
                current_default_token = "";
            }
            let comment = "";
            let pos = current_pos;
            while (pos < code_len) {
                const char = code[pos];
                if (char === "\n") {
                    break;
                }
                comment += char;
                pos++;
            }
            tokens.push({ v: comment, token_type: "comment" });
            current_pos = pos;
            continue;
        }
        // here, check if a token was found
        if (next_token !== null && next_position !== null) {
--- a/lexer/string_lexer.test.ts
+++ b/lexer/string_lexer.test.ts
@ -0,0 +1,32 @@
 import { expect, test, describe } from "bun:test";
 import { scan_string } from "./string_lexer";
 describe("String Lexer", () => {
    test("should scan an empty string", () => {
        const code = "\"\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
    });
    test("should scan a string with a single character", () => {
        const code = "\"a\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
    });
    test("should scan a string with multiple characters", () => {
        const code = "\"hello\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
    });
    test("should scan a string with an escape character", () => {
        const code = "\"\\n\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
    });
 });
--- a/lexer/string_lexer.ts
+++ b/lexer/string_lexer.ts
@ -0,0 +1,49 @@
 import type { Token } from "./lexer.ts";
 export function scan_string(input: string, starting_position: number): [Token, number] {
    let value = "\"";
    let pos = starting_position + 1;
    while (pos < input.length) {
        const c = input[pos];
        if (c === "\"") {
            value += c;
            pos += 1;
            break;
        }
        if (c === "\n") {
            // todo: error handling, return an error indicator and the caller should render a red wavy underline
            break;
        }
        if (c === "\\") {
            const next_char = input[pos + 1];
            value += handle_escape_char(next_char);
            pos += 2;
            continue;
        }
        value += c;
        pos += 1;
    }
    return [{ v: value, token_type: "string" }, pos];
 }
 function handle_escape_char(next_char: string): string {
    switch (next_char) {
        case "n":
            return "\\n"
        case "t":
            return "\\t"
        case "r":
            return "\\r"
        case "\"":
            return "\\\""
        case "\\":
            return "\\\\"
        default:
            return "\\" + next_char
    }
 }