use the new highlighter in all the pages

scan strings and comments
scan datatypes
2024-03-27 12:01:14 -05:00 · 2024-03-27 08:36:34 -05:00 · 2024-03-27 08:18:31 -05:00 · 2024-03-27 08:12:32 -05:00 · 2024-03-26 18:29:52 -05:00 · 2024-03-26 18:05:58 -05:00
21 changed files with 566 additions and 1304 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 node_modules
 static/css/out.css
 static/learn
 static/js/highlighter.js
--- a/bun.lockb
+++ b/bun.lockb
--- a/lexer/highlighter.ts
+++ b/lexer/highlighter.ts
@ -0,0 +1,21 @@
 import {lex} from "./lexer.ts";
 import { CodeJar } from "codejar"
 function thp_highlighter(editor: any) {
    let code: string = editor.textContent;
    let tokens = lex(code);
    let highlighted_code = "";
    for (let token of tokens) {
        highlighted_code += `<span class="token ${token.token_type}">${token.v}</span>`;
    }
    editor.innerHTML = highlighted_code;
 }
 // @ts-ignore
 window.thp_highlighter = thp_highlighter;
 // @ts-ignore
 window.CodeJar = CodeJar;
--- a/lexer/identifier_lexer.test.ts
+++ b/lexer/identifier_lexer.test.ts
@ -0,0 +1,55 @@
 import { expect, test, describe } from "bun:test";
 import { scan_identifier } from "./identifier_lexer";
 describe("Identifier Lexer", () => {
    test("should return an identifier token", () => {
        const code = "a";
        const token = scan_identifier(code, 0);
        expect(token).toEqual([{ v: "a", token_type: "identifier" }, 1]);
    });
    test("should scan an underscore", () => {
        const code = "_";
        const token = scan_identifier(code, 0);
        expect(token).toEqual([{ v: "_", token_type: "identifier" }, 1]);
    });
    test("should scan an identifier with an underscore", () => {
        const code = "a_";
        const token = scan_identifier(code, 0);
        expect(token).toEqual([{ v: "a_", token_type: "identifier" }, 2]);
    });
    test("should scan an identifier that starts with an underscore", () => {
        const code = "_a";
        const token = scan_identifier(code, 0);
        expect(token).toEqual([{ v: "_a", token_type: "identifier" }, 2]);
    });
    test("should scan an identifier with numbers and uppercase letters", () => {
        const code = "aA1";
        const token = scan_identifier(code, 0);
        expect(token).toEqual([{ v: "aA1", token_type: "identifier" }, 3]);
    });
    test("should scan a keyword", () => {
        const code = "val";
        const token = scan_identifier(code, 0);
        expect(token).toEqual([{ v: "val", token_type: "keyword" }, 3]);
    });
    test("should scan a datatype", () => {
        const code = "Int";
        const token = scan_identifier(code, 0, true);
        expect(token).toEqual([{ v: "Int", token_type: "class-name" }, 3]);
    });
 });
--- a/lexer/identifier_lexer.ts
+++ b/lexer/identifier_lexer.ts
@ -0,0 +1,44 @@
 import type { Token } from "./lexer.ts";
 import { is_identifier_char } from "./utils.ts";
 /**
 * Scans an identifier, at the given position in the input string.
 * This function assumes that the character at the given position is a letter.
 * 
 * @param input the input string
 * @param starting_position the position to start scanning from
 * @param is_datatype whether the identifier is a datatype
 */
 export function scan_identifier(input: string, starting_position: number, is_datatype = false): [Token, number] {
    let value = input[starting_position];
    let pos = starting_position + 1;
    while (pos < input.length) {
        const c = input[pos];
        if (is_identifier_char(c)) {
            pos += 1;
            value += c;
        }
        else {
            break;
        }
    }
    if (is_datatype) {
        return [{ v: value, token_type: "class-name" }, pos];
    }
    else {
        return [{ v: value, token_type: check_keyword(value) }, pos];
    }
 }
 function check_keyword(value: string): string {
    const keywords = ["case", "static", "const", "enum", "loop", "use", "break", "catch", "continue", "do", "else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw", "try", "while", "type", "match", "with", "of", "abstract", "class", "interface", "private", "pub", "map", "override", "open", "init", "val", "var", "mut", "clone"];
    if (keywords.includes(value)) {
        return "keyword";
    }
    return "identifier";
 }
--- a/lexer/lexer.test.ts
+++ b/lexer/lexer.test.ts
@ -0,0 +1,45 @@
 import { expect, test, describe } from "bun:test";
 import { lex } from "./lexer";
 describe("Lexer", () => {
    test("empty program should return no tokens", () => {
        const code = "";
        const tokens = lex(code);
        expect(tokens).toEqual([]);
    });
    test("program with whitespace should return a single token", () => {
        const code = " ";
        const tokens = lex(code);
        expect(tokens).toEqual([{v: " ", token_type: ""}]);
    })
    test("program with newlines should return a single token", () => {
        const code = "\n";
        const tokens = lex(code);
        expect(tokens).toEqual([{v: "\n", token_type: ""}]);
    });
    test("program with random unicode should return the same unicode", () => {
        const code = "🍕";
        const tokens = lex(code);
        expect(tokens).toEqual([{v: "🍕", token_type: ""}]);
    });
    test("should scan integers", () => {
        const code = "12345";
        const tokens = lex(code);
        expect(tokens).toEqual([{v: "12345", token_type: "number"}]);
    });
    test("should scan integers and whitespace around", () => {
        const code = "   12345  \n  ";
        const tokens = lex(code);
        expect(tokens).toEqual([
            {v: "   ", token_type: ""},
            {v: "12345", token_type: "number"},
            {v: "  \n  ", token_type: ""},
        ]);
    });
 });
--- a/lexer/lexer.ts
+++ b/lexer/lexer.ts
@ -0,0 +1,138 @@
 import { scan_identifier } from "./identifier_lexer.ts";
 import { scan_number } from "./number_lexer.ts";
 import { scan_string } from "./string_lexer.ts";
 import { is_digit, is_lowercase, is_uppercase } from "./utils.ts";
 export type Token = {
    v: string,
    token_type: string,
 };
 /**
 * Lexes a string of THP code, and returns an array of tokens. Unlike a regular
 * lexer, whitespace and other characters are not ignored, and are instead treated
 * as a default token.
 * 
 * This lexer implements a subset of the grammar defined in the THP language specification,
 * only recognizing the following tokens:
 * - Identifier
 * - Datatype
 * - String
 * - Number
 * - Single line comment
 * - Multi line comment
 * - Keywords
 * 
 * @param code Code to lex
 * @returns An array of all the tokens found
 */
 export function lex(code: string): Array<Token> {
    const code_len = code.length;
    const tokens: Array<Token> = [];
    let current_pos = 0;
    let current_default_token = "";
    while (current_pos < code_len) {
        const c = code[current_pos];
        // try to scan a number
        if (is_digit(c)) {
            // if the current default token is not empty, push it to the tokens array
            if (current_default_token !== "") {
                tokens.push({ v: current_default_token, token_type: "" });
                current_default_token = "";
            }
            // lex a number
            const [token, next] = scan_number(code, current_pos);
            current_pos = next;
            tokens.push(token);
            continue;
        }
        // try to scan an identifier/keyword
        else if (is_lowercase(c) || c === "_") {
            // if the current default token is not empty, push it to the tokens array
            if (current_default_token !== "") {
                tokens.push({ v: current_default_token, token_type: "" });
                current_default_token = "";
            }
            const [token, next] = scan_identifier(code, current_pos);
            current_pos = next;
            tokens.push(token);
            continue;
        }
        // try to scan a datatype
        else if (is_uppercase(c)) {
            // if the current default token is not empty, push it to the tokens array
            if (current_default_token !== "") {
                tokens.push({ v: current_default_token, token_type: "" });
                current_default_token = "";
            }
            const [token, next] = scan_identifier(code, current_pos, true);
            current_pos = next;
            tokens.push(token);
            continue;
        }
        // try to scan a string
        else if (c === "\"") {
            // if the current default token is not empty, push it to the tokens array
            if (current_default_token !== "") {
                tokens.push({ v: current_default_token, token_type: "" });
                current_default_token = "";
            }
            const [token, next] = scan_string(code, current_pos);
            current_pos = next;
            tokens.push(token);
            continue;
        }
        // try to scan a comment
        else if (c === "/" && code[current_pos + 1] === "/") {
            // if the current default token is not empty, push it to the tokens array
            if (current_default_token !== "") {
                tokens.push({ v: current_default_token, token_type: "" });
                current_default_token = "";
            }
            let comment = "";
            let pos = current_pos;
            while (pos < code_len) {
                const char = code[pos];
                if (char === "\n") {
                    break;
                }
                comment += char;
                pos++;
            }
            tokens.push({ v: comment, token_type: "comment" });
            current_pos = pos;
            continue;
        }
        // replace < with &lt;
        else if (c === "<") {
            current_default_token += "&lt;";
            current_pos++;
            continue;
        }
        current_default_token += c;
        current_pos++;
    }
    // if there was a default token, push it to the tokens array
    if (current_default_token !== "") {
        tokens.push({ v: current_default_token, token_type: "" });
        current_default_token = "";
    }
    return tokens;
 }
--- a/lexer/number_lexer.test.ts
+++ b/lexer/number_lexer.test.ts
@ -0,0 +1,19 @@
 import { expect, test, describe } from "bun:test";
 import { scan_number } from "./number_lexer";
 describe("Number Lexer", () => {
    test("should return a whole number token", () => {
        const code = "1";
        const token = scan_number(code, 0);
        expect(token).toEqual([{ v: "1", token_type: "number" }, 1]);
    });
    test("should return a whole number token pt 2", () => {
        const code = "12345";
        const token = scan_number(code, 0);
        expect(token).toEqual([{ v: "12345", token_type: "number" }, 5]);
    });
 });
--- a/lexer/number_lexer.ts
+++ b/lexer/number_lexer.ts
@ -0,0 +1,47 @@
 import type { Token } from "./lexer.ts";
 import { is_digit } from "./utils.ts";
 /**
 * Scans a number, at the given position in the input string.
 * This function assumes that the character at the given position is a digit.
 * It follows this grammar:
 * 
 * @param input the input string
 * @param pos the position to start scanning from
 * @returns 
 */
 export function scan_number(input: string, pos: number): [Token, number] {
    const [token_value, next] = scan_decimal(input, pos);
    return [{ v: token_value, token_type: "number" }, next];
 }
 function scan_decimal(input: string, starting_position: number): [string, number] {
    let current_value = "";
    let pos = starting_position;
    while (pos < input.length) {
        const c = input[pos];
        if (c === ".") {
            // todo
            throw new Error("Not implemented");
        }
        else if (c == "e" || c == "E") {
            // todo
            throw new Error("Not implemented");
        }
        else if (is_digit(c)) {
            current_value += c;
            pos += 1;
        }
        else {
            break;
        }
    }
    return [current_value, pos];
 }
--- a/lexer/string_lexer.test.ts
+++ b/lexer/string_lexer.test.ts
@ -0,0 +1,32 @@
 import { expect, test, describe } from "bun:test";
 import { scan_string } from "./string_lexer";
 describe("String Lexer", () => {
    test("should scan an empty string", () => {
        const code = "\"\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
    });
    test("should scan a string with a single character", () => {
        const code = "\"a\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
    });
    test("should scan a string with multiple characters", () => {
        const code = "\"hello\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
    });
    test("should scan a string with an escape character", () => {
        const code = "\"\\n\"";
        const token = scan_string(code, 0);
        expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
    });
 });
--- a/lexer/string_lexer.ts
+++ b/lexer/string_lexer.ts
@ -0,0 +1,49 @@
 import type { Token } from "./lexer.ts";
 export function scan_string(input: string, starting_position: number): [Token, number] {
    let value = "\"";
    let pos = starting_position + 1;
    while (pos < input.length) {
        const c = input[pos];
        if (c === "\"") {
            value += c;
            pos += 1;
            break;
        }
        if (c === "\n") {
            // todo: error handling, return an error indicator and the caller should render a red wavy underline
            break;
        }
        if (c === "\\") {
            const next_char = input[pos + 1];
            value += handle_escape_char(next_char);
            pos += 2;
            continue;
        }
        value += c;
        pos += 1;
    }
    return [{ v: value, token_type: "string" }, pos];
 }
 function handle_escape_char(next_char: string): string {
    switch (next_char) {
        case "n":
            return "\\n"
        case "t":
            return "\\t"
        case "r":
            return "\\r"
        case "\"":
            return "\\\""
        case "\\":
            return "\\\\"
        default:
            return "\\" + next_char
    }
 }
--- a/lexer/utils.ts
+++ b/lexer/utils.ts
@ -0,0 +1,15 @@
 export function is_digit(c: string): boolean {
    return c >= '0' && c <= '9';
 }
 export function is_lowercase(c: string): boolean {
    return c >= 'a' && c <= 'z';
 }
 export function is_uppercase(c: string): boolean {
    return c >= 'A' && c <= 'Z';
 }
 export function is_identifier_char(c: string): boolean {
    return is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === '_';
 }
--- a/md/learn/index.md
+++ b/md/learn/index.md
@ -76,7 +76,7 @@ val has_key = haystack.contains("needle")
 ]
 // THP
-Obj {
+.{
    names: #("Toni", "Stark"), // Tuple
    age: 33,
    numbers: [32, 64, 128]
--- a/package.json
+++ b/package.json
@ -5,7 +5,9 @@
  "main": "index.js",
  "scripts": {
    "generate": "md-docs",
-    "dev": "concurrently -k \"pnpm tailwind:watch\" \"serve ./static/ -l 3333\"",
+    "bundle": "bun build ./lexer/highlighter.ts --outdir ./static/js/ --format esm --minify",
    "dev": "concurrently -k \"tailwindcss -i ./tailwind.css -o ./static/css/out.css --watch\" \"serve ./static/ -l 3333\"",
    "codemirror": "esbuild --bundle ./static/js/codemirror.js  --outfile=./static/js/codemirror.min.js --minify --sourcemap",
    "tailwind:watch": "tailwindcss -i ./tailwind.css -o ./static/css/out.css --watch",
    "tailwind:build": "tailwindcss -i ./tailwind.css -o ./static/css/out.css --minify"
  },
@ -13,10 +15,16 @@
  "author": "",
  "license": "ISC",
  "dependencies": {
    "@types/bun": "^1.0.10",
    "codejar": "^4.2.0",
    "tailwindcss": "^3.2.7"
  },
  "devDependencies": {
    "concurrently": "^8.2.0",
-    "serve": "^14.2.0"
+    "serve": "^14.2.1",
    "bun-types": "latest"
  },
  "peerDependencies": {
    "typescript": "^5.0.0"
  }
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/static/index.html
+++ b/static/index.html
@ -13,7 +13,7 @@
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link
-        href="https://fonts.googleapis.com/css2?family=Fira+Sans:wght@400;500;600;700;800;900&family=Fira+Code&family=Inter:ital,wght@0,400;1,700&display=swap"
+        href="https://fonts.googleapis.com/css2?family=Fira+Sans:wght@400;500;600;700;800;900&family=Fira+Code&display=swap"
        rel="stylesheet">
 </head>
@ -26,7 +26,7 @@
        </div>
    </nav>
-    <div class="container mx-auto py-16 grid grid-cols-[5fr_4fr] gap-4 px-10">
+    <div class="container mx-auto py-16 grid grid-cols-[auto_32rem] gap-4 px-10">
        <div class="pl-10 table">
            <div class="table-cell align-middle">
                <h1 class="font-display font-bold text-5xl leading-tight">
@ -43,19 +43,13 @@
                <br>
                <br>
                <div class="text-center">
-                    <a 
+                    <a class="inline-block font-display text-lg border-2 border-pink-400 hover:bg-pink-400 transition-colors
-            class="inline-block font-display text-lg border-2 border-pink-400 hover:bg-pink-400 transition-colors
+                hover:text-c-bg py-3 px-8 mx-6 rounded" href="/learn/">
                hover:text-c-bg py-3 px-8 mx-6 rounded"
            href="/learn/"
        >
                        Learn
                    </a>
-        <a 
+                    <a class="inline-block font-display text-lg border-2 border-sky-400 py-3 px-8 mx-6 rounded
-            class="inline-block font-display text-lg border-2 border-sky-400 py-3 px-8 mx-6 rounded
+                transition-colors hover:text-black hover:bg-sky-400" href="/install/">
                transition-colors hover:text-black hover:bg-sky-400" 
            href="/install/"
        >
                        Install
                    </a>
                </div>
@ -76,9 +70,18 @@
                </g>
            </svg>
            <div class="h-1"></div>
-            <pre style="padding: 0 !important; border: none !important;">
+            <div id="editor" class="font-mono language-thp"></div>
-                <code class="language-thp">
+        </div>
-                    // Actual generics & sum types
+    </div>
    <script src="/js/highlighter.js"></script>
    <script>
        let jar = CodeJar(document.getElementById("editor"), thp_highlighter, {
            tab: "    ",
        });
        jar.updateCode(
            `// Actual generics & sum types
 fun find_person(Int person_id) -> Result[String, String] {
    // Easy, explicit error bubbling
    try Person::find_by_id(person_id)
@ -93,18 +96,13 @@
 // First class HTML-like templates & components
 print(
-                        &lt;a href="/person/reports/{person.id}"&gt;
+    <a href="/person/reports/{person.id}">
        welcome, {person.name}
-                        &lt;/a&gt;
+    </a>
 )
-                    // And more!
+// And more!`
-                </code>
+        )
-            </pre>
+    </script>
        </div>
    </div>
    <script src="/js/prism.min.js"></script>
    <script src="/js/prism.thp.js"></script>
 </body>
 </html>
--- a/static/js/prism.min.js
+++ b/static/js/prism.min.js
--- a/static/js/prism.thp.js
+++ b/static/js/prism.thp.js
@ -1,25 +0,0 @@
 Prism.languages.thp = {
    "comment": [
        {
            pattern: /(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,
            lookbehind: true,
            greedy: true,
        },
        {
            pattern: /(^|[^\\:])\/\/.*/,
            lookbehind: true,
            greedy: true,
        },
    ],
    "string": {
        pattern: /(["])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,
        greedy: true,
    },
    "keyword": /\b(?:case|static|const|enum|loop|use|break|catch|continue|do|else|finally|for|fun|if|in|fn|nil|return|throw|try|while|type|match|with|of|abstract|class|interface|private|pub|map|override|open|init|val|var|mut|clone)\b/,
    "number": /\b0x[\da-f]+\b|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i,
    "operator": /[<>]=?|[!=]=?=?|--?|\$|\+\+?|&&?|\|\|?|[?*/~^%]/,
    "punctuation": /[{}[\];(),.]/,
    "boolean": /\b(?:false|true)\b/,
    "class-name": /\b[A-Z][a-zA-Z_0-9]*\b/,
    "variable": /\b[a-z_0-9][a-zA-Z_0-9]+:/,
 };
--- a/static/template.html
+++ b/static/template.html
@ -49,8 +49,25 @@
        </main>
    </div>
-    <script src="/js/prism.min.js"></script>
+    <script src="/js/highlighter.js"></script>
-    <script src="/js/prism.thp.js"></script>
+    <script>
-</body>
+        // Add an editor to all code samples
        const code_elements = document.querySelectorAll(".language-thp");
        for (const el of [...code_elements]) {
            const pre_parent = el.parentElement;
            const new_div = document.createElement("div");
            const code = el.innerHTML;
            el.parentElement.classList.add("language-thp");
            pre_parent.removeChild(el);
            pre_parent.appendChild(new_div);
            CodeJar(new_div, thp_highlighter, {
                tab: "    "
            }).updateCode(code);
        }
    </script>
 </body>
 </html>
--- a/tailwind.config.js
+++ b/tailwind.config.js
@ -17,7 +17,7 @@ module.exports = {
            }
        },
        fontFamily: {
-            "mono": ["'Fira Code'", "Inconsolata", "Iosevka", "monospace"],
+            "mono": ["Iosevka", "monospace"],
            "display": ["Inter", "'Josefin Sans'", "'Fugaz One'", "sans-serif"],
            "body": ["'Fira Sans'", "Inter", "sans-serif"],
        },
--- a/tsconfig.json
+++ b/tsconfig.json
@ -0,0 +1,27 @@
 {
  "compilerOptions": {
    // Enable latest features
    "lib": ["ESNext"],
    "target": "ESNext",
    "module": "ESNext",
    "moduleDetection": "force",
    "jsx": "react-jsx",
    "allowJs": true,
    // Bundler mode
    "moduleResolution": "bundler",
    "allowImportingTsExtensions": true,
    "verbatimModuleSyntax": true,
    "noEmit": true,
    // Best practices
    "strict": true,
    "skipLibCheck": true,
    "noFallthroughCasesInSwitch": true,
    // Some stricter flags
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noPropertyAccessFromIndexSignature": true
  }
 }
Author	SHA1	Message	Date
Araozu	b78b30a34d	use the new highlighter in all the pages	2024-03-27 12:01:14 -05:00
Araozu	d098b60a38	scan strings and comments	2024-03-27 08:36:34 -05:00
Araozu	2e93df0fd8	scan datatypes	2024-03-27 08:18:31 -05:00
Araozu	4665d87b5f	scan identifiers	2024-03-27 08:12:32 -05:00
Araozu	6490e8dbaa	Complete minimal flow for editor highlighting	2024-03-26 18:29:52 -05:00
Araozu	184ed14435	write a custom highlighter for codejar	2024-03-26 18:05:58 -05:00