refactor: use a highlight level to emit errors
This commit is contained in:
parent
25a5b20d5f
commit
980b92f631
@ -1,10 +1,11 @@
|
||||
---
|
||||
import { native_highlighter } from "../lexer/highlighter";
|
||||
import type { HighlightLevel } from "../lexer/types";
|
||||
import CodeError from "./docs/CodeError.astro";
|
||||
|
||||
const { thpcode, no_warnings } = Astro.props;
|
||||
const { thpcode, no_warnings, level } = Astro.props;
|
||||
|
||||
const [native_html, error_type, error_message] = await native_highlighter(thpcode);
|
||||
const [native_html, error_type, error_message] = await native_highlighter(thpcode, level as HighlightLevel);
|
||||
---
|
||||
|
||||
<pre
|
||||
|
@ -1,163 +1,7 @@
|
||||
---
|
||||
import { lex } from "../lexer/lexer";
|
||||
import type { Instruction } from "../thp_machine/machine_parser";
|
||||
import { parse_str } from "../thp_machine/machine_parser";
|
||||
import { leftTrimDedent } from "./utils";
|
||||
const { code, steps } = Astro.props;
|
||||
|
||||
function highlightCode(lines: Array<string>): string {
|
||||
let outLines: Array<string> = [];
|
||||
|
||||
for (const [idx, line] of lines.entries()) {
|
||||
const tokens = lex(line);
|
||||
const lineArray = [
|
||||
`<div class=\"inline-block w-full\" :class=\"line === ${idx + 1}? 'bg-green-200 dark:bg-green-900': ''\">`,
|
||||
];
|
||||
|
||||
for (const token of tokens) {
|
||||
if (token.token_type !== "") {
|
||||
lineArray.push(
|
||||
`<span class="token ${token.token_type}">${token.v}</span>`,
|
||||
);
|
||||
} else {
|
||||
lineArray.push(token.v);
|
||||
}
|
||||
}
|
||||
lineArray.push("</div>");
|
||||
|
||||
outLines.push(lineArray.join(""));
|
||||
}
|
||||
|
||||
return outLines.join("\n");
|
||||
}
|
||||
|
||||
const codeHtml = highlightCode(leftTrimDedent(code));
|
||||
let instructionSet: Array<Array<Instruction>>;
|
||||
try {
|
||||
instructionSet = parse_str(steps);
|
||||
} catch (e) {
|
||||
console.error(Astro.url);
|
||||
throw e;
|
||||
}
|
||||
|
||||
const serialized_inst = JSON.stringify(instructionSet);
|
||||
import Code from "./Code.astro";
|
||||
const { code } = Astro.props;
|
||||
// TODO: Delete this component, replace with Code
|
||||
---
|
||||
|
||||
<div
|
||||
class="bg-black text-white rounded px-1"
|
||||
x-data={`{
|
||||
line: 0,
|
||||
stdout: "",
|
||||
ip: 0,
|
||||
inst: ${serialized_inst},
|
||||
done: false,
|
||||
state: {},
|
||||
}`}
|
||||
>
|
||||
<span
|
||||
class="inline-block bg-[var(--code-theme-bg-acolor)] px-2 rounded-tl rounded-tr font-mono text-sm"
|
||||
>thp code</span
|
||||
>
|
||||
<pre
|
||||
class="language-thp"
|
||||
style="margin: 0;"
|
||||
data-disabled><code set:html={codeHtml} /></pre>
|
||||
<div class="grid grid-cols-2 font-mono text-sm">
|
||||
<div>
|
||||
<div class="p-1 border-b border-r border-white">stdout</div>
|
||||
<div class="h-24 p-1 border-r border-white">
|
||||
<pre><code class="bg-black" x-text="stdout" /></pre>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div class="p-1 border-b border-white">state</div>
|
||||
<div class="h-24 p-1 overflow-y-scroll">
|
||||
<template x-for="(value, key) in state">
|
||||
<div x-text="key.replaceAll(' ', ' ') + ' = ' + value">
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="border-t border-white p-1">
|
||||
<button
|
||||
class="font-mono px-1 rounded bg-pink-200 dark:bg-pink-950 text-black dark:text-white disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
@click="alpineNext($data)"
|
||||
:disabled="done && 'true'"
|
||||
>
|
||||
Step: <span x-text="ip"></span>
|
||||
</button>
|
||||
<button
|
||||
class="font-mono px-1 rounded bg-pink-200 dark:bg-pink-950 text-black dark:text-white"
|
||||
@click="alpineReset($data)"
|
||||
>
|
||||
Reset
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
import {
|
||||
InstructionType,
|
||||
type Instruction,
|
||||
} from "../thp_machine/machine_parser";
|
||||
|
||||
type AlpineState = {
|
||||
line: number;
|
||||
stdout: string;
|
||||
ip: number;
|
||||
inst: Array<Array<Instruction>>;
|
||||
done: boolean;
|
||||
state: { [key: string]: string };
|
||||
};
|
||||
|
||||
/// Executes the instruction following the state of the machine.
|
||||
function alpineNext(data: AlpineState) {
|
||||
const len = data.inst.length;
|
||||
const ip = data.ip;
|
||||
data.ip += 1;
|
||||
|
||||
const instructions = data.inst[ip]!;
|
||||
for (const instructionSet of instructions) {
|
||||
const i = instructionSet;
|
||||
|
||||
switch (i.t) {
|
||||
case InstructionType.Line: {
|
||||
data.line = Number(i.v0);
|
||||
break;
|
||||
}
|
||||
case InstructionType.Out: {
|
||||
data.stdout += i.v0.slice(1, -1) + "\n";
|
||||
break;
|
||||
}
|
||||
case InstructionType.Set: {
|
||||
const i_key = i.v0.slice(1, -1);
|
||||
const i_value = i.v1!.slice(1, -1);
|
||||
data.state[i_key] = i_value;
|
||||
break;
|
||||
}
|
||||
case InstructionType.Unset: {
|
||||
delete data.state[i.v0.slice(1, -1)];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (data.ip >= len) {
|
||||
data.done = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// @ts-ignore
|
||||
window.alpineNext = alpineNext;
|
||||
|
||||
function alpineReset(data: AlpineState) {
|
||||
data.line = 0;
|
||||
data.stdout = "";
|
||||
data.ip = 0;
|
||||
data.done = false;
|
||||
data.state = {};
|
||||
}
|
||||
// @ts-ignore
|
||||
window.alpineReset = alpineReset;
|
||||
</script>
|
||||
<Code thpcode={code} />
|
||||
|
@ -30,12 +30,6 @@ const { showSidebarButton = true } = Astro.props;
|
||||
>
|
||||
Learn
|
||||
</a>
|
||||
<a
|
||||
href="/how-to/"
|
||||
class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline"
|
||||
>
|
||||
How to guides
|
||||
</a>
|
||||
<a
|
||||
href="/spec/"
|
||||
class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline"
|
||||
|
@ -1,105 +1,54 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { leftTrimDedent } from "../components/utils";
|
||||
|
||||
export type ReferenceItem = {
|
||||
symbol_start: number
|
||||
symbol_end: number
|
||||
reference: string
|
||||
}
|
||||
|
||||
export interface Token {
|
||||
token_type: TokenType
|
||||
value: string
|
||||
position: number
|
||||
}
|
||||
|
||||
type TokenType =
|
||||
"Identifier" |
|
||||
"Datatype" |
|
||||
"Int" |
|
||||
"Float" |
|
||||
"String" |
|
||||
"Operator" |
|
||||
"LeftParen" |
|
||||
"RightParen" |
|
||||
"LeftBracket" |
|
||||
"RightBracket" |
|
||||
"LeftBrace" |
|
||||
"RightBrace" |
|
||||
"NewLine" |
|
||||
"Comment" |
|
||||
"MultilineComment" |
|
||||
"Comma" |
|
||||
"INDENT" |
|
||||
"DEDENT" |
|
||||
"VAL" |
|
||||
"VAR" |
|
||||
"EOF" |
|
||||
"FUN";
|
||||
|
||||
export interface Err {
|
||||
Lex?: LexError
|
||||
Syntax?: SyntaxError
|
||||
Semantic?: SemanticError
|
||||
}
|
||||
|
||||
export interface LexError {
|
||||
position: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export interface SyntaxError {
|
||||
error_start: number
|
||||
error_end: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export interface SemanticError {
|
||||
error_start: number
|
||||
error_end: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export interface TokenizeResult {
|
||||
Ok?: [Array<Token>, Array<ReferenceItem>],
|
||||
SyntaxOnly?: [Token[], Err],
|
||||
TokensOnly?: [Token[], Err],
|
||||
Err?: Err,
|
||||
}
|
||||
import { HighlightLevel } from "./types";
|
||||
import type { LexError, SyntaxError, SemanticError, Token, TokenizeResult, TokenType } from "./types";
|
||||
|
||||
const error_classes = "underline underline-offset-4 decoration-wavy decoration-red-500";
|
||||
|
||||
export async function native_highlighter(code: string): Promise<[string, string, string | null]> {
|
||||
export async function native_highlighter(code: string, level = HighlightLevel.Lexic): Promise<[string, string, string | null]> {
|
||||
let formatted_code = leftTrimDedent(code).join("\n");
|
||||
|
||||
let result: TokenizeResult;
|
||||
try {
|
||||
result = await native_lex(formatted_code);
|
||||
let result = await native_lex(formatted_code);
|
||||
return highlight_syntax(formatted_code, result, level);
|
||||
} catch (error) {
|
||||
return compiler_error(formatted_code, error as Error);
|
||||
}
|
||||
|
||||
if (result.Err) {
|
||||
return lex_error_highlighter(formatted_code, result.Err!.Lex!);
|
||||
}
|
||||
else if (result.TokensOnly) {
|
||||
const [tokens, error] = result.TokensOnly!;
|
||||
return syntax_error_highlighter(formatted_code, tokens, error.Syntax!);
|
||||
}
|
||||
else if (result.SyntaxOnly) {
|
||||
const [tokens, error] = result.SyntaxOnly!;
|
||||
return semantic_error_highlighter(formatted_code, tokens, error.Semantic!);
|
||||
}
|
||||
|
||||
const tokens = result.Ok! as unknown as Array<Token>;
|
||||
// TODO: this is disable because the compiler has not
|
||||
// implemented this feature yet
|
||||
// const [tokens, references] = result.Ok!;
|
||||
// console.log("refs:");
|
||||
// console.log(references);
|
||||
function highlight_syntax(code: string, result: TokenizeResult, level: HighlightLevel): [string, string, string | null] {
|
||||
let tokens_final: Array<Token>;
|
||||
|
||||
const output = highlight_tokens(formatted_code, tokens);
|
||||
if (result.SemanticError) {
|
||||
const [tokens, semanticError] = result.SemanticError;
|
||||
|
||||
if (level === HighlightLevel.Semantic) {
|
||||
return semantic_error_highlighter(code, tokens, semanticError.Semantic!);
|
||||
} else {
|
||||
tokens_final = tokens;
|
||||
}
|
||||
} else if (result.SyntaxError) {
|
||||
const [tokens, syntaxError] = result.SyntaxError;
|
||||
|
||||
if (level === HighlightLevel.Semantic || level === HighlightLevel.Syntactic) {
|
||||
return syntax_error_highlighter(code, tokens, syntaxError.Syntax!);
|
||||
} else {
|
||||
tokens_final = tokens;
|
||||
}
|
||||
} else if (result.LexError) {
|
||||
// There is no error level that bypasses a lex error
|
||||
return lex_error_highlighter(code, result.LexError!.Lex!);
|
||||
} else if (result.Ok) {
|
||||
tokens_final = result.Ok;
|
||||
} else {
|
||||
console.error(result);
|
||||
throw new Error("Web page error: The compiler returned a case that wasn't handled.");
|
||||
}
|
||||
|
||||
// At this point all error cases have been handled
|
||||
// and tokens_final contains valid tokens.
|
||||
|
||||
const output = highlight_tokens(code, tokens_final);
|
||||
return [output, "", null];
|
||||
}
|
||||
|
||||
|
@ -1,55 +0,0 @@
|
||||
import { expect, test, describe } from "vitest";
|
||||
import { scan_identifier } from "./identifier_lexer";
|
||||
|
||||
|
||||
describe("Identifier Lexer", () => {
|
||||
test("should return an identifier token", () => {
|
||||
const code = "a";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "a", token_type: "identifier" }, 1]);
|
||||
});
|
||||
|
||||
test("should scan an underscore", () => {
|
||||
const code = "_";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "_", token_type: "identifier" }, 1]);
|
||||
});
|
||||
|
||||
test("should scan an identifier with an underscore", () => {
|
||||
const code = "a_";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "a_", token_type: "identifier" }, 2]);
|
||||
});
|
||||
|
||||
test("should scan an identifier that starts with an underscore", () => {
|
||||
const code = "_a";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "_a", token_type: "identifier" }, 2]);
|
||||
});
|
||||
|
||||
test("should scan an identifier with numbers and uppercase letters", () => {
|
||||
const code = "aA1";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "aA1", token_type: "identifier" }, 3]);
|
||||
});
|
||||
|
||||
test("should scan a keyword", () => {
|
||||
const code = "val";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "val", token_type: "keyword" }, 3]);
|
||||
});
|
||||
|
||||
test("should scan a datatype", () => {
|
||||
const code = "Int";
|
||||
const token = scan_identifier(code, 0, true);
|
||||
|
||||
expect(token).toEqual([{ v: "Int", token_type: "class-name" }, 3]);
|
||||
});
|
||||
});
|
||||
|
@ -1,44 +0,0 @@
|
||||
import type { Token } from "./lexer";
|
||||
import { is_identifier_char } from "./utils";
|
||||
|
||||
/**
|
||||
* Scans an identifier, at the given position in the input string.
|
||||
* This function assumes that the character at the given position is a letter.
|
||||
*
|
||||
* @param input the input string
|
||||
* @param starting_position the position to start scanning from
|
||||
* @param is_datatype whether the identifier is a datatype
|
||||
*/
|
||||
export function scan_identifier(input: string, starting_position: number, is_datatype = false): [Token, number] {
|
||||
let value = input[starting_position]!;
|
||||
let pos = starting_position + 1;
|
||||
|
||||
while (pos < input.length) {
|
||||
const c = input[pos]!;
|
||||
|
||||
if (is_identifier_char(c)) {
|
||||
pos += 1;
|
||||
value += c;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_datatype) {
|
||||
return [{ v: value, token_type: "class-name" }, pos];
|
||||
}
|
||||
else {
|
||||
return [{ v: value, token_type: check_keyword(value) }, pos];
|
||||
}
|
||||
}
|
||||
|
||||
function check_keyword(value: string): string {
|
||||
const keywords = ["throws", "extends", "constructor", "case", "static", "const", "enum", "union", "loop", "use", "break", "catch", "continue", "as", "do", "else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw", "try", "while", "type", "match", "with", "of", "abstract", "class", "interface", "private", "pub", "override", "open", "init", "val", "var", "mut", "clone"];
|
||||
|
||||
if (keywords.includes(value)) {
|
||||
return "keyword";
|
||||
}
|
||||
return "identifier";
|
||||
}
|
||||
|
@ -1,45 +0,0 @@
|
||||
import { expect, test, describe } from "vitest";
|
||||
import { lex } from "./lexer";
|
||||
|
||||
describe("Lexer", () => {
|
||||
test("empty program should return no tokens", () => {
|
||||
const code = "";
|
||||
const tokens = lex(code);
|
||||
expect(tokens).toEqual([]);
|
||||
});
|
||||
|
||||
test("program with whitespace should return a single token", () => {
|
||||
const code = " ";
|
||||
const tokens = lex(code);
|
||||
expect(tokens).toEqual([{v: " ", token_type: ""}]);
|
||||
})
|
||||
|
||||
test("program with newlines should return a single token", () => {
|
||||
const code = "\n";
|
||||
const tokens = lex(code);
|
||||
expect(tokens).toEqual([{v: "\n", token_type: ""}]);
|
||||
});
|
||||
|
||||
test("program with random unicode should return the same unicode", () => {
|
||||
const code = "🍕";
|
||||
const tokens = lex(code);
|
||||
expect(tokens).toEqual([{v: "🍕", token_type: ""}]);
|
||||
});
|
||||
|
||||
test("should scan integers", () => {
|
||||
const code = "12345";
|
||||
const tokens = lex(code);
|
||||
expect(tokens).toEqual([{v: "12345", token_type: "number"}]);
|
||||
});
|
||||
|
||||
test("should scan integers and whitespace around", () => {
|
||||
const code = " 12345 \n ";
|
||||
const tokens = lex(code);
|
||||
expect(tokens).toEqual([
|
||||
{v: " ", token_type: ""},
|
||||
{v: "12345", token_type: "number"},
|
||||
{v: " \n ", token_type: ""},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
@ -1,166 +0,0 @@
|
||||
import { scan_identifier } from "./identifier_lexer";
|
||||
import { scan_number } from "./number_lexer";
|
||||
import { scan_string } from "./string_lexer";
|
||||
import { is_digit, is_lowercase, is_uppercase } from "./utils";
|
||||
|
||||
export type Token = {
|
||||
v: string,
|
||||
token_type: string,
|
||||
};
|
||||
|
||||
/**
|
||||
* Lexes a string of THP code, and returns an array of tokens. Unlike a regular
|
||||
* lexer, whitespace and other characters are not ignored, and are instead treated
|
||||
* as a default token.
|
||||
*
|
||||
* This lexer implements a subset of the grammar defined in the THP language specification,
|
||||
* only recognizing the following tokens:
|
||||
* - Identifier
|
||||
* - Datatype
|
||||
* - String
|
||||
* - Number
|
||||
* - Single line comment
|
||||
* - Multi line comment
|
||||
* - Keywords
|
||||
*
|
||||
* @param code Code to lex
|
||||
* @returns An array of all the tokens found
|
||||
*/
|
||||
export function lex(code: string, start = 0): Array<Token> {
|
||||
const code_len = code.length;
|
||||
const tokens: Array<Token> = [];
|
||||
|
||||
let current_pos = start;
|
||||
let current_default_token = "";
|
||||
|
||||
while (current_pos < code_len) {
|
||||
const c = code[current_pos]!;
|
||||
|
||||
// try to scan a number
|
||||
if (is_digit(c)) {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
// lex a number
|
||||
const [token, next] = scan_number(code, current_pos);
|
||||
current_pos = next;
|
||||
tokens.push(token);
|
||||
continue;
|
||||
}
|
||||
// try to scan an identifier/keyword
|
||||
else if (is_lowercase(c) || c === "_") {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
const [token, next] = scan_identifier(code, current_pos);
|
||||
current_pos = next;
|
||||
tokens.push(token);
|
||||
continue;
|
||||
}
|
||||
// try to scan a datatype
|
||||
else if (is_uppercase(c)) {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
const [token, next] = scan_identifier(code, current_pos, true);
|
||||
current_pos = next;
|
||||
tokens.push(token);
|
||||
continue;
|
||||
}
|
||||
// try to scan a string
|
||||
else if (c === "\"") {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
const [token, next] = scan_string(code, current_pos);
|
||||
current_pos = next;
|
||||
tokens.push(token);
|
||||
continue;
|
||||
}
|
||||
// try to scan a comment
|
||||
else if (c === "/" && code[current_pos + 1] === "/") {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
let comment = "";
|
||||
let pos = current_pos;
|
||||
|
||||
while (pos < code_len) {
|
||||
const char = code[pos];
|
||||
|
||||
if (char === "\n") {
|
||||
break;
|
||||
}
|
||||
|
||||
comment += char;
|
||||
pos++;
|
||||
}
|
||||
|
||||
tokens.push({ v: comment, token_type: "comment" });
|
||||
current_pos = pos;
|
||||
continue;
|
||||
}
|
||||
// try to scan a multiline comment
|
||||
else if (c === "/" && code[current_pos + 1] === "*") {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
let comment = "";
|
||||
let pos = current_pos;
|
||||
|
||||
while (pos < code_len) {
|
||||
const char = code[pos];
|
||||
|
||||
if (char === "*" && code[pos + 1] === "/") {
|
||||
pos += 2;
|
||||
comment += "*/";
|
||||
break;
|
||||
}
|
||||
|
||||
comment += char;
|
||||
pos++;
|
||||
}
|
||||
|
||||
tokens.push({ v: comment, token_type: "comment" });
|
||||
current_pos = pos;
|
||||
continue;
|
||||
}
|
||||
// replace < with <
|
||||
else if (c === "<") {
|
||||
current_default_token += "<";
|
||||
current_pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
current_default_token += c;
|
||||
current_pos++;
|
||||
}
|
||||
|
||||
// if there was a default token, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
|
@ -1,19 +0,0 @@
|
||||
import { expect, test, describe } from "vitest";
|
||||
import { scan_number } from "./number_lexer";
|
||||
|
||||
describe("Number Lexer", () => {
|
||||
test("should return a whole number token", () => {
|
||||
const code = "1";
|
||||
const token = scan_number(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "1", token_type: "number" }, 1]);
|
||||
});
|
||||
|
||||
test("should return a whole number token pt 2", () => {
|
||||
const code = "12345";
|
||||
const token = scan_number(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "12345", token_type: "number" }, 5]);
|
||||
});
|
||||
});
|
||||
|
@ -1,47 +0,0 @@
|
||||
import type { Token } from "./lexer";
|
||||
import { is_digit } from "./utils";
|
||||
|
||||
/**
|
||||
* Scans a number, at the given position in the input string.
|
||||
* This function assumes that the character at the given position is a digit.
|
||||
* It follows this grammar:
|
||||
*
|
||||
* @param input the input string
|
||||
* @param pos the position to start scanning from
|
||||
* @returns
|
||||
*/
|
||||
export function scan_number(input: string, pos: number): [Token, number] {
|
||||
const [token_value, next] = scan_decimal(input, pos);
|
||||
|
||||
return [{ v: token_value, token_type: "number" }, next];
|
||||
}
|
||||
|
||||
function scan_decimal(input: string, starting_position: number): [string, number] {
|
||||
let current_value = "";
|
||||
let pos = starting_position;
|
||||
|
||||
while (pos < input.length) {
|
||||
const c = input[pos]!;
|
||||
|
||||
if (c === ".") {
|
||||
// todo
|
||||
return [current_value, pos];
|
||||
}
|
||||
else if (c == "e" || c == "E") {
|
||||
// todo
|
||||
return [current_value, pos];
|
||||
}
|
||||
else if (is_digit(c)) {
|
||||
current_value += c;
|
||||
pos += 1;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return [current_value, pos];
|
||||
}
|
||||
|
||||
|
@ -1,32 +0,0 @@
|
||||
import { expect, test, describe } from "vitest";
|
||||
import { scan_string } from "./string_lexer";
|
||||
|
||||
describe("String Lexer", () => {
|
||||
test("should scan an empty string", () => {
|
||||
const code = "\"\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
|
||||
});
|
||||
|
||||
test("should scan a string with a single character", () => {
|
||||
const code = "\"a\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
|
||||
});
|
||||
|
||||
test("should scan a string with multiple characters", () => {
|
||||
const code = "\"hello\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
|
||||
});
|
||||
|
||||
test("should scan a string with an escape character", () => {
|
||||
const code = "\"\\n\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
|
||||
});
|
||||
});
|
@ -1,49 +0,0 @@
|
||||
import type { Token } from "./lexer";
|
||||
|
||||
export function scan_string(input: string, starting_position: number): [Token, number] {
|
||||
let value = "\"";
|
||||
let pos = starting_position + 1;
|
||||
|
||||
while (pos < input.length) {
|
||||
const c = input[pos];
|
||||
|
||||
if (c === "\"") {
|
||||
value += c;
|
||||
pos += 1;
|
||||
break;
|
||||
}
|
||||
if (c === "\n") {
|
||||
// todo: error handling, return an error indicator and the caller should render a red wavy underline
|
||||
break;
|
||||
}
|
||||
if (c === "\\") {
|
||||
const next_char = input[pos + 1];
|
||||
value += handle_escape_char(next_char);
|
||||
pos += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
value += c;
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
return [{ v: value, token_type: "string" }, pos];
|
||||
}
|
||||
|
||||
function handle_escape_char(next_char: string): string {
|
||||
switch (next_char) {
|
||||
case "n":
|
||||
return "\\n"
|
||||
case "t":
|
||||
return "\\t"
|
||||
case "r":
|
||||
return "\\r"
|
||||
case "\"":
|
||||
return "\\\""
|
||||
case "\\":
|
||||
return "\\\\"
|
||||
default:
|
||||
return "\\" + next_char
|
||||
}
|
||||
}
|
||||
|
75
src/lexer/types.ts
Normal file
75
src/lexer/types.ts
Normal file
@ -0,0 +1,75 @@
|
||||
export type ReferenceItem = {
|
||||
symbol_start: number
|
||||
symbol_end: number
|
||||
reference: string
|
||||
}
|
||||
|
||||
export interface Token {
|
||||
token_type: TokenType
|
||||
value: string
|
||||
position: number
|
||||
}
|
||||
|
||||
export type TokenType =
|
||||
"Identifier" |
|
||||
"Datatype" |
|
||||
"Int" |
|
||||
"Float" |
|
||||
"String" |
|
||||
"Operator" |
|
||||
"LeftParen" |
|
||||
"RightParen" |
|
||||
"LeftBracket" |
|
||||
"RightBracket" |
|
||||
"LeftBrace" |
|
||||
"RightBrace" |
|
||||
"NewLine" |
|
||||
"Comment" |
|
||||
"MultilineComment" |
|
||||
"Comma" |
|
||||
"INDENT" |
|
||||
"DEDENT" |
|
||||
"VAL" |
|
||||
"VAR" |
|
||||
"EOF" |
|
||||
"FUN";
|
||||
|
||||
export interface Err {
|
||||
Lex?: LexError
|
||||
Syntax?: SyntaxError
|
||||
Semantic?: SemanticError
|
||||
}
|
||||
|
||||
export interface LexError {
|
||||
position: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export interface SyntaxError {
|
||||
error_start: number
|
||||
error_end: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export interface SemanticError {
|
||||
error_start: number
|
||||
error_end: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export interface TokenizeResult {
|
||||
/** All checks passed */
|
||||
Ok?: Array<Token>,
|
||||
/** There were semantic errors */
|
||||
SemanticError?: [Array<Token>, Err],
|
||||
/** There were syntax errors */
|
||||
SyntaxError?: [Array<Token>, Err],
|
||||
/** No checks passed */
|
||||
LexError?: Err,
|
||||
}
|
||||
|
||||
export enum HighlightLevel {
|
||||
Lexic = 0,
|
||||
Syntactic = 1,
|
||||
Semantic = 2,
|
||||
}
|
@ -24,7 +24,7 @@ As a regex: `[a-z_][a-zA-Z0-9_]*`
|
||||
|
||||
Defined with `val`, followed by a variable name and a value.
|
||||
|
||||
<Code thpcode={`
|
||||
<Code level={2} thpcode={`
|
||||
val surname = "Doe"
|
||||
val year_of_birth = 1984
|
||||
`} />
|
||||
@ -33,14 +33,14 @@ val year_of_birth = 1984
|
||||
|
||||
Written after the `val` keyword but before the variable name.
|
||||
|
||||
<Code thpcode={`
|
||||
<Code level={2} thpcode={`
|
||||
val String surname = "Doe"
|
||||
val Int year_of_birth = 1984
|
||||
`} />
|
||||
|
||||
When annotating an immutable variable the `val` keyword is optional
|
||||
|
||||
<Code thpcode={`
|
||||
<Code level={2} thpcode={`
|
||||
// Equivalent to the previous code
|
||||
String surname = "Doe"
|
||||
Int year_of_birth = 1984
|
||||
@ -51,7 +51,7 @@ This means that if a variable only has a datatype, it is immutable.
|
||||
It is a compile error to declare a variable of a datatype,
|
||||
but use another.
|
||||
|
||||
<Code thpcode={`
|
||||
<Code level={2} thpcode={`
|
||||
// Declare the variable as a String, but use a Float as its value
|
||||
String capital = 123.456
|
||||
`} />
|
||||
@ -61,7 +61,7 @@ String capital = 123.456
|
||||
|
||||
Defined with `var`, followed by a variable name and a value.
|
||||
|
||||
<Code thpcode={`
|
||||
<Code level={2} thpcode={`
|
||||
var name = "John"
|
||||
var age = 32
|
||||
`} />
|
||||
@ -70,14 +70,14 @@ var age = 32
|
||||
|
||||
Written after the `var` keywords but before the variable name.
|
||||
|
||||
<Code thpcode={`
|
||||
<Code level={2} thpcode={`
|
||||
var String name = "John"
|
||||
var Int age = 32
|
||||
`} />
|
||||
|
||||
When annotating a mutable variable the keyword `var` is still **required**.
|
||||
|
||||
<Code thpcode={`
|
||||
<Code level={2} thpcode={`
|
||||
// Equivalent to the previous code
|
||||
var String name = "John"
|
||||
var Int age = 32
|
||||
|
@ -1,218 +0,0 @@
|
||||
/*
|
||||
step {
|
||||
line 1
|
||||
set "a" "b"
|
||||
unset "a"
|
||||
}
|
||||
*/
|
||||
|
||||
import { scan_number } from "../lexer/number_lexer";
|
||||
import { scan_string } from "../lexer/string_lexer";
|
||||
import { is_digit, is_lowercase, is_uppercase } from "../lexer/utils";
|
||||
|
||||
enum TokenType {
|
||||
Step,
|
||||
Line,
|
||||
Set,
|
||||
Out,
|
||||
Number,
|
||||
String,
|
||||
Unset,
|
||||
BraceOpen,
|
||||
BraceClose,
|
||||
};
|
||||
|
||||
type Token = [TokenType, string | undefined];
|
||||
|
||||
// Creates a stream of tokens
|
||||
function lex(input: string): Array<Token> {
|
||||
const characters = input.split("");
|
||||
const characters_len = characters.length;
|
||||
let next_p = 0;
|
||||
|
||||
const tokens: Array<Token> = [];
|
||||
|
||||
while (next_p < characters_len)
|
||||
{
|
||||
const c = characters[next_p]!;
|
||||
|
||||
// word
|
||||
if (is_lowercase(c) || is_uppercase(c))
|
||||
{
|
||||
const [token, next] = lex_word(characters, next_p);
|
||||
tokens.push(token);
|
||||
next_p = next;
|
||||
}
|
||||
// number
|
||||
else if (is_digit(c))
|
||||
{
|
||||
const [token, next] = scan_number(input, next_p);
|
||||
tokens.push([TokenType.Number, token.v]);
|
||||
next_p = next;
|
||||
}
|
||||
// string
|
||||
else if (c === "\"")
|
||||
{
|
||||
const [token, next] = scan_string(input, next_p);
|
||||
tokens.push([TokenType.String, token.v]);
|
||||
next_p = next;
|
||||
}
|
||||
else if (c === "{")
|
||||
{
|
||||
tokens.push([TokenType.BraceOpen, undefined]);
|
||||
next_p += 1;
|
||||
}
|
||||
else if (c === "}")
|
||||
{
|
||||
tokens.push([TokenType.BraceClose, undefined]);
|
||||
next_p += 1;
|
||||
}
|
||||
else if (c === " " || c === "\n")
|
||||
{
|
||||
next_p += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Error(`Invalid character: \`${c}\``);
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function lex_word(input: Array<string>, pos: number): [Token, number] {
|
||||
let next_p = pos;
|
||||
let value = "";
|
||||
|
||||
let c = input[next_p];
|
||||
while (c !== undefined && (is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === "_"))
|
||||
{
|
||||
value += c;
|
||||
next_p += 1;
|
||||
c = input[next_p];
|
||||
}
|
||||
|
||||
let token_type;
|
||||
if (value === "step") { token_type = TokenType.Step; }
|
||||
else if (value === "line") { token_type = TokenType.Line; }
|
||||
else if (value === "set") { token_type = TokenType.Set; }
|
||||
else if (value === "unset"){ token_type = TokenType.Unset; }
|
||||
else if (value === "out") { token_type = TokenType.Out; }
|
||||
else
|
||||
{
|
||||
throw new Error(`Invalid word: ${value}`);
|
||||
}
|
||||
|
||||
return [[token_type, value], next_p]
|
||||
}
|
||||
|
||||
export enum InstructionType {
|
||||
Line,
|
||||
Set,
|
||||
Unset,
|
||||
Out,
|
||||
}
|
||||
|
||||
export type Instruction = {
|
||||
t: InstructionType,
|
||||
v0: string,
|
||||
v1?: string,
|
||||
}
|
||||
|
||||
export function parse_str(input: string): Array<Array<Instruction>> {
|
||||
return parse(lex(input));
|
||||
}
|
||||
|
||||
// Parses the tokens into a instruction set
|
||||
function parse(tokens: Array<Token>): Array<Array<Instruction>> {
|
||||
let pos = 0;
|
||||
let max = tokens.length;
|
||||
|
||||
const ret = [];
|
||||
|
||||
while (pos < max) {
|
||||
const [steps, next_pos] = parse_step(tokens, pos);
|
||||
pos = next_pos;
|
||||
ret.push(steps);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
function parse_step(tokens: Array<Token>, _pos: number): [Array<Instruction>, number] {
|
||||
let pos = _pos;
|
||||
|
||||
expect(tokens, pos, TokenType.Step, "expected step");
|
||||
pos += 1;
|
||||
expect(tokens, pos, TokenType.BraceOpen, "expected opening brace");
|
||||
pos += 1;
|
||||
|
||||
const instructions = [];
|
||||
|
||||
while (true) {
|
||||
const [inst, next] = parse_instruction(tokens, pos);
|
||||
if (inst === null) {
|
||||
break;
|
||||
}
|
||||
instructions.push(inst);
|
||||
pos = next;
|
||||
}
|
||||
|
||||
expect(tokens, pos, TokenType.BraceClose, "expected closing brace");
|
||||
pos += 1
|
||||
|
||||
return [instructions, pos];
|
||||
}
|
||||
|
||||
function parse_instruction(tokens: Array<Token>, _pos: number): [Instruction|null, number] {
|
||||
let pos = _pos;
|
||||
|
||||
let instruction_type = tokens[pos]![0];
|
||||
if (instruction_type === TokenType.Line) {
|
||||
pos += 1;
|
||||
expect(tokens, pos, TokenType.Number, "expected a number after the `line` instruction");
|
||||
return [{
|
||||
t: InstructionType.Line,
|
||||
v0: tokens[pos]![1]!,
|
||||
}, pos + 1]
|
||||
}
|
||||
else if (instruction_type === TokenType.Set) {
|
||||
pos += 1;
|
||||
expect(tokens, pos, TokenType.String, "expected a string after the `set` instruction");
|
||||
pos += 1;
|
||||
expect(tokens, pos, TokenType.String, "expected a second string after the `set` instruction");
|
||||
|
||||
return [{
|
||||
t: InstructionType.Set,
|
||||
v0: tokens[pos - 1]![1]!,
|
||||
v1: tokens[pos]![1]!,
|
||||
}, pos + 1]
|
||||
}
|
||||
else if (instruction_type === TokenType.Unset) {
|
||||
expect(tokens, pos + 1, TokenType.String, "expected a a string after the `unset` instruction");
|
||||
|
||||
return [{
|
||||
t: InstructionType.Unset,
|
||||
v0: tokens[pos + 1]![1]!,
|
||||
}, pos + 2]
|
||||
}
|
||||
else if (instruction_type === TokenType.Out) {
|
||||
expect(tokens, pos + 1, TokenType.String, "expected a a string after the `unset` instruction");
|
||||
|
||||
return [{
|
||||
t: InstructionType.Out,
|
||||
v0: tokens[pos + 1]![1]!,
|
||||
}, pos + 2]
|
||||
}
|
||||
|
||||
return [null, pos];
|
||||
}
|
||||
|
||||
function expect(t: Array<Token>, pos: number, type: TokenType, err: string) {
|
||||
const [t_type] = t[pos]!;
|
||||
if (t_type !== type) {
|
||||
console.error("`" + String(t[pos]) + "`");
|
||||
throw new Error(err + " , got " + t[pos]);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user