refactor: use a highlight level to emit errors

2024-08-26 08:43:36 -05:00 · 2024-08-26 08:43:36 -05:00 · 980b92f631
commit 980b92f631
parent 25a5b20d5f
15 changed files with 125 additions and 937 deletions
--- a/src/components/Code.astro
+++ b/src/components/Code.astro
@ -1,10 +1,11 @@
 ---
 import { native_highlighter } from "../lexer/highlighter";
+import type { HighlightLevel } from "../lexer/types";
 import CodeError from "./docs/CodeError.astro";

-const { thpcode, no_warnings } = Astro.props;
+const { thpcode, no_warnings, level } = Astro.props;

-const [native_html, error_type, error_message] = await native_highlighter(thpcode);
+const [native_html, error_type, error_message] = await native_highlighter(thpcode, level as HighlightLevel);
 ---

 <pre
--- a/src/components/InteractiveCode.astro
+++ b/src/components/InteractiveCode.astro
@ -1,163 +1,7 @@
 ---
-import { lex } from "../lexer/lexer";
-import type { Instruction } from "../thp_machine/machine_parser";
-import { parse_str } from "../thp_machine/machine_parser";
-import { leftTrimDedent } from "./utils";
-const { code, steps } = Astro.props;
-
-function highlightCode(lines: Array<string>): string {
-    let outLines: Array<string> = [];
-
-    for (const [idx, line] of lines.entries()) {
-        const tokens = lex(line);
-        const lineArray = [
-            `<div class=\"inline-block w-full\" :class=\"line === ${idx + 1}? 'bg-green-200 dark:bg-green-900': ''\">`,
-        ];
-
-        for (const token of tokens) {
-            if (token.token_type !== "") {
-                lineArray.push(
-                    `<span class="token ${token.token_type}">${token.v}</span>`,
-                );
-            } else {
-                lineArray.push(token.v);
-            }
-        }
-        lineArray.push("</div>");
-
-        outLines.push(lineArray.join(""));
-    }
-
-    return outLines.join("\n");
-}
-
-const codeHtml = highlightCode(leftTrimDedent(code));
-let instructionSet: Array<Array<Instruction>>;
-try {
-    instructionSet = parse_str(steps);
-} catch (e) {
-    console.error(Astro.url);
-    throw e;
-}
-
-const serialized_inst = JSON.stringify(instructionSet);
+import Code from "./Code.astro";
+const { code } = Astro.props;
+// TODO: Delete this component, replace with Code
 ---

-<div
-    class="bg-black text-white rounded px-1"
-    x-data={`{
-        line: 0,
-        stdout: "",
-        ip: 0,
-        inst: ${serialized_inst},
-        done: false,
-        state: {},
-    }`}
->
-    <span
-        class="inline-block bg-[var(--code-theme-bg-acolor)] px-2 rounded-tl rounded-tr font-mono text-sm"
-        >thp code</span
-    >
-    <pre
-        class="language-thp"
-        style="margin: 0;"
-        data-disabled><code set:html={codeHtml} /></pre>
-    <div class="grid grid-cols-2 font-mono text-sm">
-        <div>
-            <div class="p-1 border-b border-r border-white">stdout</div>
-            <div class="h-24 p-1 border-r border-white">
-                <pre><code class="bg-black" x-text="stdout" /></pre>
-            </div>
-        </div>
-        <div>
-            <div class="p-1 border-b border-white">state</div>
-            <div class="h-24 p-1 overflow-y-scroll">
-                <template x-for="(value, key) in state">
-                    <div x-text="key.replaceAll(' ', ' ') + ' = ' + value">
-                    </div>
-                </template>
-            </div>
-        </div>
-    </div>
-    <div class="border-t border-white p-1">
-        <button
-            class="font-mono px-1 rounded bg-pink-200 dark:bg-pink-950 text-black dark:text-white disabled:opacity-50 disabled:cursor-not-allowed"
-            @click="alpineNext($data)"
-            :disabled="done && 'true'"
-        >
-            Step: <span x-text="ip"></span>
-        </button>
-        <button
-            class="font-mono px-1 rounded bg-pink-200 dark:bg-pink-950 text-black dark:text-white"
-            @click="alpineReset($data)"
-        >
-            Reset
-        </button>
-    </div>
-</div>
-
-<script>
-    import {
-        InstructionType,
-        type Instruction,
-    } from "../thp_machine/machine_parser";
-
-    type AlpineState = {
-        line: number;
-        stdout: string;
-        ip: number;
-        inst: Array<Array<Instruction>>;
-        done: boolean;
-        state: { [key: string]: string };
-    };
-
-    /// Executes the instruction following the state of the machine.
-    function alpineNext(data: AlpineState) {
-        const len = data.inst.length;
-        const ip = data.ip;
-        data.ip += 1;
-
-        const instructions = data.inst[ip]!;
-        for (const instructionSet of instructions) {
-            const i = instructionSet;
-
-            switch (i.t) {
-                case InstructionType.Line: {
-                    data.line = Number(i.v0);
-                    break;
-                }
-                case InstructionType.Out: {
-                    data.stdout += i.v0.slice(1, -1) + "\n";
-                    break;
-                }
-                case InstructionType.Set: {
-                    const i_key = i.v0.slice(1, -1);
-                    const i_value = i.v1!.slice(1, -1);
-                    data.state[i_key] = i_value;
-                    break;
-                }
-                case InstructionType.Unset: {
-                    delete data.state[i.v0.slice(1, -1)];
-                    break;
-                }
-            }
-        }
-
-        if (data.ip >= len) {
-            data.done = true;
-            return;
-        }
-    }
-    // @ts-ignore
-    window.alpineNext = alpineNext;
-
-    function alpineReset(data: AlpineState) {
-        data.line = 0;
-        data.stdout = "";
-        data.ip = 0;
-        data.done = false;
-        data.state = {};
-    }
-    // @ts-ignore
-    window.alpineReset = alpineReset;
-</script>
+<Code thpcode={code} />
--- a/src/components/Navbar.astro
+++ b/src/components/Navbar.astro
@ -30,12 +30,6 @@ const { showSidebarButton = true } = Astro.props;
        >
            Learn
        </a>
-        <a
-            href="/how-to/"
-            class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline"
-        >
-            How to guides
-        </a>
        <a
            href="/spec/"
            class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline"
--- a/src/lexer/highlighter.ts
+++ b/src/lexer/highlighter.ts
@ -1,105 +1,54 @@
 import { spawn } from "node:child_process";
 import { leftTrimDedent } from "../components/utils";
-
-export type ReferenceItem = {
-    symbol_start: number
-    symbol_end: number
-    reference: string
-}
-
-export interface Token {
-    token_type: TokenType
-    value: string
-    position: number
-}
-
-type TokenType =
-    "Identifier" |
-    "Datatype" |
-    "Int" |
-    "Float" |
-    "String" |
-    "Operator" |
-    "LeftParen" |
-    "RightParen" |
-    "LeftBracket" |
-    "RightBracket" |
-    "LeftBrace" |
-    "RightBrace" |
-    "NewLine" |
-    "Comment" |
-    "MultilineComment" |
-    "Comma" |
-    "INDENT" |
-    "DEDENT" |
-    "VAL" |
-    "VAR" |
-    "EOF" |
-    "FUN";
-
-export interface Err {
-    Lex?: LexError
-    Syntax?: SyntaxError
-    Semantic?: SemanticError
-}
-
-export interface LexError {
-    position: number
-    reason: string
-}
-
-export interface SyntaxError {
-    error_start: number
-    error_end: number
-    reason: string
-}
-
-export interface SemanticError {
-    error_start: number
-    error_end: number
-    reason: string
-}
-
-export interface TokenizeResult {
-    Ok?: [Array<Token>, Array<ReferenceItem>],
-    SyntaxOnly?: [Token[], Err],
-    TokensOnly?: [Token[], Err],
-    Err?: Err,
-}
+import { HighlightLevel } from "./types";
+import type { LexError, SyntaxError, SemanticError, Token, TokenizeResult, TokenType } from "./types";

 const error_classes = "underline underline-offset-4 decoration-wavy decoration-red-500";

-export async function native_highlighter(code: string): Promise<[string, string, string | null]> {
+export async function native_highlighter(code: string, level = HighlightLevel.Lexic): Promise<[string, string, string | null]> {
    let formatted_code = leftTrimDedent(code).join("\n");

-    let result: TokenizeResult;
    try {
-        result = await native_lex(formatted_code);
+        let result = await native_lex(formatted_code);
+        return highlight_syntax(formatted_code, result, level);
    } catch (error) {
        return compiler_error(formatted_code, error as Error);
    }
-
-    if (result.Err) {
-        return lex_error_highlighter(formatted_code, result.Err!.Lex!);
-    }
-    else if (result.TokensOnly) {
-        const [tokens, error] = result.TokensOnly!;
-        return syntax_error_highlighter(formatted_code, tokens, error.Syntax!);
-    }
-    else if (result.SyntaxOnly) {
-        const [tokens, error] = result.SyntaxOnly!;
-        return semantic_error_highlighter(formatted_code, tokens, error.Semantic!);
 }

-    const tokens = result.Ok! as unknown as Array<Token>;
-    // TODO: this is disable because the compiler has not
-    // implemented this feature yet
-    // const [tokens, references] = result.Ok!;
-    // console.log("refs:");
-    // console.log(references);
+function highlight_syntax(code: string, result: TokenizeResult, level: HighlightLevel): [string, string, string | null] {
+    let tokens_final: Array<Token>;

-    const output = highlight_tokens(formatted_code, tokens);
+    if (result.SemanticError) {
+        const [tokens, semanticError] = result.SemanticError;

+        if (level === HighlightLevel.Semantic) {
+            return semantic_error_highlighter(code, tokens, semanticError.Semantic!);
+        } else {
+            tokens_final = tokens;
+        }
+    } else if (result.SyntaxError) {
+        const [tokens, syntaxError] = result.SyntaxError;
+
+        if (level === HighlightLevel.Semantic || level === HighlightLevel.Syntactic) {
+            return syntax_error_highlighter(code, tokens, syntaxError.Syntax!);
+        } else {
+            tokens_final = tokens;
+        }
+    } else if (result.LexError) {
+        // There is no error level that bypasses a lex error
+        return lex_error_highlighter(code, result.LexError!.Lex!);
+    } else if (result.Ok) {
+        tokens_final = result.Ok;
+    } else {
+        console.error(result);
+        throw new Error("Web page error: The compiler returned a case that wasn't handled.");
+    }
+
+    // At this point all error cases have been handled
+    // and tokens_final contains valid tokens.
+
+    const output = highlight_tokens(code, tokens_final);
    return [output, "", null];
 }

--- a/src/lexer/identifier_lexer.test.ts
+++ b/src/lexer/identifier_lexer.test.ts
@ -1,55 +0,0 @@
-import { expect, test, describe } from "vitest";
-import { scan_identifier } from "./identifier_lexer";
-
-
-describe("Identifier Lexer", () => {
-    test("should return an identifier token", () => {
-        const code = "a";
-        const token = scan_identifier(code, 0);
-
-        expect(token).toEqual([{ v: "a", token_type: "identifier" }, 1]);
-    });
-
-    test("should scan an underscore", () => {
-        const code = "_";
-        const token = scan_identifier(code, 0);
-
-        expect(token).toEqual([{ v: "_", token_type: "identifier" }, 1]);
-    });
-
-    test("should scan an identifier with an underscore", () => {
-        const code = "a_";
-        const token = scan_identifier(code, 0);
-
-        expect(token).toEqual([{ v: "a_", token_type: "identifier" }, 2]);
-    });
-
-    test("should scan an identifier that starts with an underscore", () => {
-        const code = "_a";
-        const token = scan_identifier(code, 0);
-
-        expect(token).toEqual([{ v: "_a", token_type: "identifier" }, 2]);
-    });
-
-    test("should scan an identifier with numbers and uppercase letters", () => {
-        const code = "aA1";
-        const token = scan_identifier(code, 0);
-
-        expect(token).toEqual([{ v: "aA1", token_type: "identifier" }, 3]);
-    });
-
-    test("should scan a keyword", () => {
-        const code = "val";
-        const token = scan_identifier(code, 0);
-
-        expect(token).toEqual([{ v: "val", token_type: "keyword" }, 3]);
-    });
-
-    test("should scan a datatype", () => {
-        const code = "Int";
-        const token = scan_identifier(code, 0, true);
-
-        expect(token).toEqual([{ v: "Int", token_type: "class-name" }, 3]);
-    });
-});
-
--- a/src/lexer/identifier_lexer.ts
+++ b/src/lexer/identifier_lexer.ts
@ -1,44 +0,0 @@
-import type { Token } from "./lexer";
-import { is_identifier_char } from "./utils";
-
-/**
- * Scans an identifier, at the given position in the input string.
- * This function assumes that the character at the given position is a letter.
- * 
- * @param input the input string
- * @param starting_position the position to start scanning from
- * @param is_datatype whether the identifier is a datatype
- */
-export function scan_identifier(input: string, starting_position: number, is_datatype = false): [Token, number] {
-    let value = input[starting_position]!;
-    let pos = starting_position + 1;
-
-    while (pos < input.length) {
-        const c = input[pos]!;
-
-        if (is_identifier_char(c)) {
-            pos += 1;
-            value += c;
-        }
-        else {
-            break;
-        }
-    }
-
-    if (is_datatype) {
-        return [{ v: value, token_type: "class-name" }, pos];
-    }
-    else {
-        return [{ v: value, token_type: check_keyword(value) }, pos];
-    }
-}
-
-function check_keyword(value: string): string {
-    const keywords = ["throws", "extends", "constructor", "case", "static", "const", "enum", "union", "loop", "use", "break", "catch", "continue", "as", "do", "else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw", "try", "while", "type", "match", "with", "of", "abstract", "class", "interface", "private", "pub", "override", "open", "init", "val", "var", "mut", "clone"];
-
-    if (keywords.includes(value)) {
-        return "keyword";
-    }
-    return "identifier";
-}
-
--- a/src/lexer/lexer.test.ts
+++ b/src/lexer/lexer.test.ts
@ -1,45 +0,0 @@
-import { expect, test, describe } from "vitest";
-import { lex } from "./lexer";
-
-describe("Lexer", () => {
-    test("empty program should return no tokens", () => {
-        const code = "";
-        const tokens = lex(code);
-        expect(tokens).toEqual([]);
-    });
-
-    test("program with whitespace should return a single token", () => {
-        const code = " ";
-        const tokens = lex(code);
-        expect(tokens).toEqual([{v: " ", token_type: ""}]);
-    })
-
-    test("program with newlines should return a single token", () => {
-        const code = "\n";
-        const tokens = lex(code);
-        expect(tokens).toEqual([{v: "\n", token_type: ""}]);
-    });
-
-    test("program with random unicode should return the same unicode", () => {
-        const code = "🍕";
-        const tokens = lex(code);
-        expect(tokens).toEqual([{v: "🍕", token_type: ""}]);
-    });
-
-    test("should scan integers", () => {
-        const code = "12345";
-        const tokens = lex(code);
-        expect(tokens).toEqual([{v: "12345", token_type: "number"}]);
-    });
-
-    test("should scan integers and whitespace around", () => {
-        const code = "   12345  \n  ";
-        const tokens = lex(code);
-        expect(tokens).toEqual([
-            {v: "   ", token_type: ""},
-            {v: "12345", token_type: "number"},
-            {v: "  \n  ", token_type: ""},
-        ]);
-    });
-});
-
--- a/src/lexer/lexer.ts
+++ b/src/lexer/lexer.ts
@ -1,166 +0,0 @@
-import { scan_identifier } from "./identifier_lexer";
-import { scan_number } from "./number_lexer";
-import { scan_string } from "./string_lexer";
-import { is_digit, is_lowercase, is_uppercase } from "./utils";
-
-export type Token = {
-    v: string,
-    token_type: string,
-};
-
-/**
- * Lexes a string of THP code, and returns an array of tokens. Unlike a regular
- * lexer, whitespace and other characters are not ignored, and are instead treated
- * as a default token.
- * 
- * This lexer implements a subset of the grammar defined in the THP language specification,
- * only recognizing the following tokens:
- * - Identifier
- * - Datatype
- * - String
- * - Number
- * - Single line comment
- * - Multi line comment
- * - Keywords
- * 
- * @param code Code to lex
- * @returns An array of all the tokens found
- */
-export function lex(code: string, start = 0): Array<Token> {
-    const code_len = code.length;
-    const tokens: Array<Token> = [];
-
-    let current_pos = start;
-    let current_default_token = "";
-
-    while (current_pos < code_len) {
-        const c = code[current_pos]!;
-
-        // try to scan a number
-        if (is_digit(c)) {
-            // if the current default token is not empty, push it to the tokens array
-            if (current_default_token !== "") {
-                tokens.push({ v: current_default_token, token_type: "" });
-                current_default_token = "";
-            }
-
-            // lex a number
-            const [token, next] = scan_number(code, current_pos);
-            current_pos = next;
-            tokens.push(token);
-            continue;
-        }
-        // try to scan an identifier/keyword
-        else if (is_lowercase(c) || c === "_") {
-            // if the current default token is not empty, push it to the tokens array
-            if (current_default_token !== "") {
-                tokens.push({ v: current_default_token, token_type: "" });
-                current_default_token = "";
-            }
-
-            const [token, next] = scan_identifier(code, current_pos);
-            current_pos = next;
-            tokens.push(token);
-            continue;
-        }
-        // try to scan a datatype
-        else if (is_uppercase(c)) {
-            // if the current default token is not empty, push it to the tokens array
-            if (current_default_token !== "") {
-                tokens.push({ v: current_default_token, token_type: "" });
-                current_default_token = "";
-            }
-
-            const [token, next] = scan_identifier(code, current_pos, true);
-            current_pos = next;
-            tokens.push(token);
-            continue;
-        }
-        // try to scan a string
-        else if (c === "\"") {
-            // if the current default token is not empty, push it to the tokens array
-            if (current_default_token !== "") {
-                tokens.push({ v: current_default_token, token_type: "" });
-                current_default_token = "";
-            }
-
-            const [token, next] = scan_string(code, current_pos);
-            current_pos = next;
-            tokens.push(token);
-            continue;
-        }
-        // try to scan a comment
-        else if (c === "/" && code[current_pos + 1] === "/") {
-            // if the current default token is not empty, push it to the tokens array
-            if (current_default_token !== "") {
-                tokens.push({ v: current_default_token, token_type: "" });
-                current_default_token = "";
-            }
-
-            let comment = "";
-            let pos = current_pos;
-
-            while (pos < code_len) {
-                const char = code[pos];
-
-                if (char === "\n") {
-                    break;
-                }
-
-                comment += char;
-                pos++;
-            }
-
-            tokens.push({ v: comment, token_type: "comment" });
-            current_pos = pos;
-            continue;
-        }
-        // try to scan a multiline comment
-        else if (c === "/" && code[current_pos + 1] === "*") {
-            // if the current default token is not empty, push it to the tokens array
-            if (current_default_token !== "") {
-                tokens.push({ v: current_default_token, token_type: "" });
-                current_default_token = "";
-            }
-
-            let comment = "";
-            let pos = current_pos;
-
-            while (pos < code_len) {
-                const char = code[pos];
-
-                if (char === "*" && code[pos + 1] === "/") {
-                    pos += 2;
-                    comment += "*/";
-                    break;
-                }
-
-                comment += char;
-                pos++;
-            }
-
-            tokens.push({ v: comment, token_type: "comment" });
-            current_pos = pos;
-            continue;
-        }
-        // replace < with &lt;
-        else if (c === "<") {
-            current_default_token += "&lt;";
-            current_pos++;
-            continue;
-        }
-
-        current_default_token += c;
-        current_pos++;
-    }
-
-    // if there was a default token, push it to the tokens array
-    if (current_default_token !== "") {
-        tokens.push({ v: current_default_token, token_type: "" });
-        current_default_token = "";
-    }
-
-    return tokens;
-}
-
-
--- a/src/lexer/number_lexer.test.ts
+++ b/src/lexer/number_lexer.test.ts
@ -1,19 +0,0 @@
-import { expect, test, describe } from "vitest";
-import { scan_number } from "./number_lexer";
-
-describe("Number Lexer", () => {
-    test("should return a whole number token", () => {
-        const code = "1";
-        const token = scan_number(code, 0);
-
-        expect(token).toEqual([{ v: "1", token_type: "number" }, 1]);
-    });
-
-    test("should return a whole number token pt 2", () => {
-        const code = "12345";
-        const token = scan_number(code, 0);
-
-        expect(token).toEqual([{ v: "12345", token_type: "number" }, 5]);
-    });
-});
-
--- a/src/lexer/number_lexer.ts
+++ b/src/lexer/number_lexer.ts
@ -1,47 +0,0 @@
-import type { Token } from "./lexer";
-import { is_digit } from "./utils";
-
-/**
- * Scans a number, at the given position in the input string.
- * This function assumes that the character at the given position is a digit.
- * It follows this grammar:
- * 
- * @param input the input string
- * @param pos the position to start scanning from
- * @returns 
- */
-export function scan_number(input: string, pos: number): [Token, number] {
-    const [token_value, next] = scan_decimal(input, pos);
-
-    return [{ v: token_value, token_type: "number" }, next];
-}
-
-function scan_decimal(input: string, starting_position: number): [string, number] {
-    let current_value = "";
-    let pos = starting_position;
-
-    while (pos < input.length) {
-        const c = input[pos]!;
-
-        if (c === ".") {
-            // todo
-            return [current_value, pos];
-        }
-        else if (c == "e" || c == "E") {
-            // todo
-            return [current_value, pos];
-        }
-        else if (is_digit(c)) {
-            current_value += c;
-            pos += 1;
-        }
-        else {
-            break;
-        }
-
-    }
-
-    return [current_value, pos];
-}
-
-
--- a/src/lexer/string_lexer.test.ts
+++ b/src/lexer/string_lexer.test.ts
@ -1,32 +0,0 @@
-import { expect, test, describe } from "vitest";
-import { scan_string } from "./string_lexer";
-
-describe("String Lexer", () => {
-    test("should scan an empty string", () => {
-        const code = "\"\"";
-        const token = scan_string(code, 0);
-
-        expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
-    });
-
-    test("should scan a string with a single character", () => {
-        const code = "\"a\"";
-        const token = scan_string(code, 0);
-
-        expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
-    });
-
-    test("should scan a string with multiple characters", () => {
-        const code = "\"hello\"";
-        const token = scan_string(code, 0);
-
-        expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
-    });
-    
-    test("should scan a string with an escape character", () => {
-        const code = "\"\\n\"";
-        const token = scan_string(code, 0);
-
-        expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
-    });
-});
--- a/src/lexer/string_lexer.ts
+++ b/src/lexer/string_lexer.ts
@ -1,49 +0,0 @@
-import type { Token } from "./lexer";
-
-export function scan_string(input: string, starting_position: number): [Token, number] {
-    let value = "\"";
-    let pos = starting_position + 1;
-
-    while (pos < input.length) {
-        const c = input[pos];
-
-        if (c === "\"") {
-            value += c;
-            pos += 1;
-            break;
-        }
-        if (c === "\n") {
-            // todo: error handling, return an error indicator and the caller should render a red wavy underline
-            break;
-        }
-        if (c === "\\") {
-            const next_char = input[pos + 1];
-            value += handle_escape_char(next_char);
-            pos += 2;
-            continue;
-        }
-
-        value += c;
-        pos += 1;
-    }
-
-    return [{ v: value, token_type: "string" }, pos];
-}
-
-function handle_escape_char(next_char: string): string {
-    switch (next_char) {
-        case "n":
-            return "\\n"
-        case "t":
-            return "\\t"
-        case "r":
-            return "\\r"
-        case "\"":
-            return "\\\""
-        case "\\":
-            return "\\\\"
-        default:
-            return "\\" + next_char
-    }
-}
-
--- a/src/lexer/types.ts
+++ b/src/lexer/types.ts
@ -0,0 +1,75 @@
+export type ReferenceItem = {
+    symbol_start: number
+    symbol_end: number
+    reference: string
+}
+
+export interface Token {
+    token_type: TokenType
+    value: string
+    position: number
+}
+
+export type TokenType =
+    "Identifier" |
+    "Datatype" |
+    "Int" |
+    "Float" |
+    "String" |
+    "Operator" |
+    "LeftParen" |
+    "RightParen" |
+    "LeftBracket" |
+    "RightBracket" |
+    "LeftBrace" |
+    "RightBrace" |
+    "NewLine" |
+    "Comment" |
+    "MultilineComment" |
+    "Comma" |
+    "INDENT" |
+    "DEDENT" |
+    "VAL" |
+    "VAR" |
+    "EOF" |
+    "FUN";
+
+export interface Err {
+    Lex?: LexError
+    Syntax?: SyntaxError
+    Semantic?: SemanticError
+}
+
+export interface LexError {
+    position: number
+    reason: string
+}
+
+export interface SyntaxError {
+    error_start: number
+    error_end: number
+    reason: string
+}
+
+export interface SemanticError {
+    error_start: number
+    error_end: number
+    reason: string
+}
+
+export interface TokenizeResult {
+    /** All checks passed */
+    Ok?: Array<Token>,
+    /** There were semantic errors */
+    SemanticError?: [Array<Token>, Err],
+    /** There were syntax errors */
+    SyntaxError?: [Array<Token>, Err],
+    /** No checks passed */
+    LexError?: Err,
+}
+
+export enum HighlightLevel {
+    Lexic = 0,
+    Syntactic = 1,
+    Semantic = 2,
+}
--- a/src/pages/learn/basics/variables.mdx
+++ b/src/pages/learn/basics/variables.mdx
@ -24,7 +24,7 @@ As a regex: `[a-z_][a-zA-Z0-9_]*`

 Defined with `val`, followed by a variable name and a value.

-<Code thpcode={`
+<Code level={2} thpcode={`
 val surname = "Doe"
 val year_of_birth = 1984
 `} />
@ -33,14 +33,14 @@ val year_of_birth = 1984

 Written after the `val` keyword but before the variable name.

-<Code thpcode={`
+<Code level={2} thpcode={`
 val String surname = "Doe"
 val Int year_of_birth = 1984
 `} />

 When annotating an immutable variable the `val` keyword is optional

-<Code thpcode={`
+<Code level={2} thpcode={`
 // Equivalent to the previous code
 String surname = "Doe"
 Int year_of_birth = 1984
@ -51,7 +51,7 @@ This means that if a variable only has a datatype, it is immutable.
 It is a compile error to declare a variable of a datatype,
 but use another.

-<Code thpcode={`
+<Code level={2} thpcode={`
 // Declare the variable as a String, but use a Float as its value
 String capital = 123.456
 `} />
@ -61,7 +61,7 @@ String capital = 123.456

 Defined with `var`, followed by a variable name and a value.

-<Code thpcode={`
+<Code level={2} thpcode={`
 var name = "John"
 var age = 32
 `} />
@ -70,14 +70,14 @@ var age = 32

 Written after the `var` keywords but before the variable name.

-<Code thpcode={`
+<Code level={2} thpcode={`
 var String name = "John"
 var Int age = 32
 `} />

 When annotating a mutable variable the keyword `var` is still **required**.

-<Code thpcode={`
+<Code level={2} thpcode={`
 // Equivalent to the previous code
 var String name = "John"
 var Int age = 32
--- a/src/thp_machine/machine_parser.ts
+++ b/src/thp_machine/machine_parser.ts
@ -1,218 +0,0 @@
-/*
-step {
-    line 1
-    set "a" "b"
-    unset "a"
-}
-*/
-
-import { scan_number } from "../lexer/number_lexer";
-import { scan_string } from "../lexer/string_lexer";
-import { is_digit, is_lowercase, is_uppercase } from "../lexer/utils";
-
-enum TokenType {
-    Step,
-    Line,
-    Set,
-    Out,
-    Number,
-    String,
-    Unset,
-    BraceOpen,
-    BraceClose,
-};
-
-type Token = [TokenType, string | undefined];
-
-// Creates a stream of tokens
-function lex(input: string): Array<Token> {
-    const characters = input.split("");
-    const characters_len = characters.length;
-    let next_p = 0;
-
-    const tokens: Array<Token> = [];
-
-    while (next_p < characters_len)
-    {
-        const c = characters[next_p]!;
-
-        // word
-        if (is_lowercase(c) || is_uppercase(c))
-        {
-            const [token, next] = lex_word(characters, next_p);
-            tokens.push(token);
-            next_p = next;
-        }
-        // number
-        else if (is_digit(c))
-        {
-            const [token, next] = scan_number(input, next_p);
-            tokens.push([TokenType.Number, token.v]);
-            next_p = next;
-        }
-        // string
-        else if (c === "\"")
-        {
-            const [token, next] = scan_string(input, next_p);
-            tokens.push([TokenType.String, token.v]);
-            next_p = next;
-        }
-        else if (c === "{")
-        {
-            tokens.push([TokenType.BraceOpen, undefined]);
-            next_p += 1;
-        }
-        else if (c === "}")
-        {
-            tokens.push([TokenType.BraceClose, undefined]);
-            next_p += 1;
-        }
-        else if (c === " " || c === "\n")
-        {
-            next_p += 1;
-        }
-        else
-        {
-            throw new Error(`Invalid character: \`${c}\``);
-        }
-    }
-
-    return tokens;
-}
-
-function lex_word(input: Array<string>, pos: number): [Token, number] {
-    let next_p = pos;
-    let value = "";
-
-    let c = input[next_p];
-    while (c !== undefined && (is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === "_"))
-    {
-        value += c;
-        next_p += 1;
-        c = input[next_p];
-    }
-
-    let token_type;
-    if (value === "step")      { token_type = TokenType.Step; }
-    else if (value === "line") { token_type = TokenType.Line; }
-    else if (value === "set")  { token_type = TokenType.Set; }
-    else if (value === "unset"){ token_type = TokenType.Unset; }
-    else if (value === "out")  { token_type = TokenType.Out; }
-    else
-    {
-        throw new Error(`Invalid word: ${value}`);
-    }
-
-    return [[token_type, value], next_p]
-}
-
-export enum InstructionType {
-    Line,
-    Set,
-    Unset,
-    Out,
-}
-
-export type Instruction = {
-    t: InstructionType,
-    v0: string,
-    v1?: string,
-}
-
-export function parse_str(input: string): Array<Array<Instruction>> {
-    return parse(lex(input));
-}
-
-// Parses the tokens into a instruction set
-function parse(tokens: Array<Token>): Array<Array<Instruction>> {
-    let pos = 0;
-    let max = tokens.length;
-
-    const ret = [];
-
-    while (pos < max) {
-        const [steps, next_pos] = parse_step(tokens, pos);
-        pos = next_pos;
-        ret.push(steps);
-    }
-
-    return ret;
-}
-
-function parse_step(tokens: Array<Token>, _pos: number): [Array<Instruction>, number] {
-    let pos = _pos;
-
-    expect(tokens, pos, TokenType.Step, "expected step");
-    pos += 1;
-    expect(tokens, pos, TokenType.BraceOpen, "expected opening brace");
-    pos += 1;
-
-    const instructions = [];
-
-    while (true) {
-        const [inst, next] = parse_instruction(tokens, pos);
-        if (inst === null) {
-            break;
-        }
-        instructions.push(inst);
-        pos = next;
-    }
-
-    expect(tokens, pos, TokenType.BraceClose, "expected closing brace");
-    pos += 1
-
-    return [instructions, pos];
-}
-
-function parse_instruction(tokens: Array<Token>, _pos: number): [Instruction|null, number] {
-    let pos = _pos;
-
-    let instruction_type = tokens[pos]![0];
-    if (instruction_type === TokenType.Line) {
-        pos += 1;
-        expect(tokens, pos, TokenType.Number, "expected a number after the `line` instruction");
-        return [{
-            t: InstructionType.Line,
-            v0: tokens[pos]![1]!,
-        }, pos + 1]
-    }
-    else if (instruction_type === TokenType.Set) {
-        pos += 1;
-        expect(tokens, pos, TokenType.String, "expected a string after the `set` instruction");
-        pos += 1;
-        expect(tokens, pos, TokenType.String, "expected a second string after the `set` instruction");
-
-        return [{
-            t: InstructionType.Set,
-            v0: tokens[pos - 1]![1]!,
-            v1: tokens[pos]![1]!,
-        }, pos + 1]
-    }
-    else if (instruction_type === TokenType.Unset) {
-        expect(tokens, pos + 1, TokenType.String, "expected a a string after the `unset` instruction");
-
-        return [{
-            t: InstructionType.Unset,
-            v0: tokens[pos + 1]![1]!,
-        }, pos + 2]
-    }
-    else if (instruction_type === TokenType.Out) {
-        expect(tokens, pos + 1, TokenType.String, "expected a a string after the `unset` instruction");
-
-        return [{
-            t: InstructionType.Out,
-            v0: tokens[pos + 1]![1]!,
-        }, pos + 2]
-    }
-
-    return [null, pos];
-}
-
-function expect(t: Array<Token>, pos: number, type: TokenType, err: string) {
-    const [t_type] = t[pos]!;
-    if (t_type !== type) {
-        console.error("`" + String(t[pos]) + "`");
-        throw new Error(err + " , got " + t[pos]);
-    }
-}
-