refactor: use a highlight level to emit errors

This commit is contained in:
Araozu 2024-08-26 08:43:36 -05:00
parent 25a5b20d5f
commit 980b92f631
15 changed files with 125 additions and 937 deletions

View File

@ -1,10 +1,11 @@
--- ---
import { native_highlighter } from "../lexer/highlighter"; import { native_highlighter } from "../lexer/highlighter";
import type { HighlightLevel } from "../lexer/types";
import CodeError from "./docs/CodeError.astro"; import CodeError from "./docs/CodeError.astro";
const { thpcode, no_warnings } = Astro.props; const { thpcode, no_warnings, level } = Astro.props;
const [native_html, error_type, error_message] = await native_highlighter(thpcode); const [native_html, error_type, error_message] = await native_highlighter(thpcode, level as HighlightLevel);
--- ---
<pre <pre

View File

@ -1,163 +1,7 @@
--- ---
import { lex } from "../lexer/lexer"; import Code from "./Code.astro";
import type { Instruction } from "../thp_machine/machine_parser"; const { code } = Astro.props;
import { parse_str } from "../thp_machine/machine_parser"; // TODO: Delete this component, replace with Code
import { leftTrimDedent } from "./utils";
const { code, steps } = Astro.props;
function highlightCode(lines: Array<string>): string {
let outLines: Array<string> = [];
for (const [idx, line] of lines.entries()) {
const tokens = lex(line);
const lineArray = [
`<div class=\"inline-block w-full\" :class=\"line === ${idx + 1}? 'bg-green-200 dark:bg-green-900': ''\">`,
];
for (const token of tokens) {
if (token.token_type !== "") {
lineArray.push(
`<span class="token ${token.token_type}">${token.v}</span>`,
);
} else {
lineArray.push(token.v);
}
}
lineArray.push("</div>");
outLines.push(lineArray.join(""));
}
return outLines.join("\n");
}
const codeHtml = highlightCode(leftTrimDedent(code));
let instructionSet: Array<Array<Instruction>>;
try {
instructionSet = parse_str(steps);
} catch (e) {
console.error(Astro.url);
throw e;
}
const serialized_inst = JSON.stringify(instructionSet);
--- ---
<div <Code thpcode={code} />
class="bg-black text-white rounded px-1"
x-data={`{
line: 0,
stdout: "",
ip: 0,
inst: ${serialized_inst},
done: false,
state: {},
}`}
>
<span
class="inline-block bg-[var(--code-theme-bg-acolor)] px-2 rounded-tl rounded-tr font-mono text-sm"
>thp code</span
>
<pre
class="language-thp"
style="margin: 0;"
data-disabled><code set:html={codeHtml} /></pre>
<div class="grid grid-cols-2 font-mono text-sm">
<div>
<div class="p-1 border-b border-r border-white">stdout</div>
<div class="h-24 p-1 border-r border-white">
<pre><code class="bg-black" x-text="stdout" /></pre>
</div>
</div>
<div>
<div class="p-1 border-b border-white">state</div>
<div class="h-24 p-1 overflow-y-scroll">
<template x-for="(value, key) in state">
<div x-text="key.replaceAll(' ', ' ') + ' = ' + value">
</div>
</template>
</div>
</div>
</div>
<div class="border-t border-white p-1">
<button
class="font-mono px-1 rounded bg-pink-200 dark:bg-pink-950 text-black dark:text-white disabled:opacity-50 disabled:cursor-not-allowed"
@click="alpineNext($data)"
:disabled="done && 'true'"
>
Step: <span x-text="ip"></span>
</button>
<button
class="font-mono px-1 rounded bg-pink-200 dark:bg-pink-950 text-black dark:text-white"
@click="alpineReset($data)"
>
Reset
</button>
</div>
</div>
<script>
import {
InstructionType,
type Instruction,
} from "../thp_machine/machine_parser";
type AlpineState = {
line: number;
stdout: string;
ip: number;
inst: Array<Array<Instruction>>;
done: boolean;
state: { [key: string]: string };
};
/// Executes the instruction following the state of the machine.
function alpineNext(data: AlpineState) {
const len = data.inst.length;
const ip = data.ip;
data.ip += 1;
const instructions = data.inst[ip]!;
for (const instructionSet of instructions) {
const i = instructionSet;
switch (i.t) {
case InstructionType.Line: {
data.line = Number(i.v0);
break;
}
case InstructionType.Out: {
data.stdout += i.v0.slice(1, -1) + "\n";
break;
}
case InstructionType.Set: {
const i_key = i.v0.slice(1, -1);
const i_value = i.v1!.slice(1, -1);
data.state[i_key] = i_value;
break;
}
case InstructionType.Unset: {
delete data.state[i.v0.slice(1, -1)];
break;
}
}
}
if (data.ip >= len) {
data.done = true;
return;
}
}
// @ts-ignore
window.alpineNext = alpineNext;
function alpineReset(data: AlpineState) {
data.line = 0;
data.stdout = "";
data.ip = 0;
data.done = false;
data.state = {};
}
// @ts-ignore
window.alpineReset = alpineReset;
</script>

View File

@ -30,12 +30,6 @@ const { showSidebarButton = true } = Astro.props;
> >
Learn Learn
</a> </a>
<a
href="/how-to/"
class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline"
>
How to guides
</a>
<a <a
href="/spec/" href="/spec/"
class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline" class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline"

View File

@ -1,105 +1,54 @@
import { spawn } from "node:child_process"; import { spawn } from "node:child_process";
import { leftTrimDedent } from "../components/utils"; import { leftTrimDedent } from "../components/utils";
import { HighlightLevel } from "./types";
export type ReferenceItem = { import type { LexError, SyntaxError, SemanticError, Token, TokenizeResult, TokenType } from "./types";
symbol_start: number
symbol_end: number
reference: string
}
export interface Token {
token_type: TokenType
value: string
position: number
}
type TokenType =
"Identifier" |
"Datatype" |
"Int" |
"Float" |
"String" |
"Operator" |
"LeftParen" |
"RightParen" |
"LeftBracket" |
"RightBracket" |
"LeftBrace" |
"RightBrace" |
"NewLine" |
"Comment" |
"MultilineComment" |
"Comma" |
"INDENT" |
"DEDENT" |
"VAL" |
"VAR" |
"EOF" |
"FUN";
export interface Err {
Lex?: LexError
Syntax?: SyntaxError
Semantic?: SemanticError
}
export interface LexError {
position: number
reason: string
}
export interface SyntaxError {
error_start: number
error_end: number
reason: string
}
export interface SemanticError {
error_start: number
error_end: number
reason: string
}
export interface TokenizeResult {
Ok?: [Array<Token>, Array<ReferenceItem>],
SyntaxOnly?: [Token[], Err],
TokensOnly?: [Token[], Err],
Err?: Err,
}
const error_classes = "underline underline-offset-4 decoration-wavy decoration-red-500"; const error_classes = "underline underline-offset-4 decoration-wavy decoration-red-500";
export async function native_highlighter(code: string): Promise<[string, string, string | null]> { export async function native_highlighter(code: string, level = HighlightLevel.Lexic): Promise<[string, string, string | null]> {
let formatted_code = leftTrimDedent(code).join("\n"); let formatted_code = leftTrimDedent(code).join("\n");
let result: TokenizeResult;
try { try {
result = await native_lex(formatted_code); let result = await native_lex(formatted_code);
return highlight_syntax(formatted_code, result, level);
} catch (error) { } catch (error) {
return compiler_error(formatted_code, error as Error); return compiler_error(formatted_code, error as Error);
} }
if (result.Err) {
return lex_error_highlighter(formatted_code, result.Err!.Lex!);
}
else if (result.TokensOnly) {
const [tokens, error] = result.TokensOnly!;
return syntax_error_highlighter(formatted_code, tokens, error.Syntax!);
}
else if (result.SyntaxOnly) {
const [tokens, error] = result.SyntaxOnly!;
return semantic_error_highlighter(formatted_code, tokens, error.Semantic!);
} }
const tokens = result.Ok! as unknown as Array<Token>; function highlight_syntax(code: string, result: TokenizeResult, level: HighlightLevel): [string, string, string | null] {
// TODO: this is disable because the compiler has not let tokens_final: Array<Token>;
// implemented this feature yet
// const [tokens, references] = result.Ok!;
// console.log("refs:");
// console.log(references);
const output = highlight_tokens(formatted_code, tokens); if (result.SemanticError) {
const [tokens, semanticError] = result.SemanticError;
if (level === HighlightLevel.Semantic) {
return semantic_error_highlighter(code, tokens, semanticError.Semantic!);
} else {
tokens_final = tokens;
}
} else if (result.SyntaxError) {
const [tokens, syntaxError] = result.SyntaxError;
if (level === HighlightLevel.Semantic || level === HighlightLevel.Syntactic) {
return syntax_error_highlighter(code, tokens, syntaxError.Syntax!);
} else {
tokens_final = tokens;
}
} else if (result.LexError) {
// There is no error level that bypasses a lex error
return lex_error_highlighter(code, result.LexError!.Lex!);
} else if (result.Ok) {
tokens_final = result.Ok;
} else {
console.error(result);
throw new Error("Web page error: The compiler returned a case that wasn't handled.");
}
// At this point all error cases have been handled
// and tokens_final contains valid tokens.
const output = highlight_tokens(code, tokens_final);
return [output, "", null]; return [output, "", null];
} }

View File

@ -1,55 +0,0 @@
import { expect, test, describe } from "vitest";
import { scan_identifier } from "./identifier_lexer";
describe("Identifier Lexer", () => {
test("should return an identifier token", () => {
const code = "a";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "a", token_type: "identifier" }, 1]);
});
test("should scan an underscore", () => {
const code = "_";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "_", token_type: "identifier" }, 1]);
});
test("should scan an identifier with an underscore", () => {
const code = "a_";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "a_", token_type: "identifier" }, 2]);
});
test("should scan an identifier that starts with an underscore", () => {
const code = "_a";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "_a", token_type: "identifier" }, 2]);
});
test("should scan an identifier with numbers and uppercase letters", () => {
const code = "aA1";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "aA1", token_type: "identifier" }, 3]);
});
test("should scan a keyword", () => {
const code = "val";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "val", token_type: "keyword" }, 3]);
});
test("should scan a datatype", () => {
const code = "Int";
const token = scan_identifier(code, 0, true);
expect(token).toEqual([{ v: "Int", token_type: "class-name" }, 3]);
});
});

View File

@ -1,44 +0,0 @@
import type { Token } from "./lexer";
import { is_identifier_char } from "./utils";
/**
* Scans an identifier, at the given position in the input string.
* This function assumes that the character at the given position is a letter.
*
* @param input the input string
* @param starting_position the position to start scanning from
* @param is_datatype whether the identifier is a datatype
*/
export function scan_identifier(input: string, starting_position: number, is_datatype = false): [Token, number] {
let value = input[starting_position]!;
let pos = starting_position + 1;
while (pos < input.length) {
const c = input[pos]!;
if (is_identifier_char(c)) {
pos += 1;
value += c;
}
else {
break;
}
}
if (is_datatype) {
return [{ v: value, token_type: "class-name" }, pos];
}
else {
return [{ v: value, token_type: check_keyword(value) }, pos];
}
}
function check_keyword(value: string): string {
const keywords = ["throws", "extends", "constructor", "case", "static", "const", "enum", "union", "loop", "use", "break", "catch", "continue", "as", "do", "else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw", "try", "while", "type", "match", "with", "of", "abstract", "class", "interface", "private", "pub", "override", "open", "init", "val", "var", "mut", "clone"];
if (keywords.includes(value)) {
return "keyword";
}
return "identifier";
}

View File

@ -1,45 +0,0 @@
import { expect, test, describe } from "vitest";
import { lex } from "./lexer";
describe("Lexer", () => {
test("empty program should return no tokens", () => {
const code = "";
const tokens = lex(code);
expect(tokens).toEqual([]);
});
test("program with whitespace should return a single token", () => {
const code = " ";
const tokens = lex(code);
expect(tokens).toEqual([{v: " ", token_type: ""}]);
})
test("program with newlines should return a single token", () => {
const code = "\n";
const tokens = lex(code);
expect(tokens).toEqual([{v: "\n", token_type: ""}]);
});
test("program with random unicode should return the same unicode", () => {
const code = "🍕";
const tokens = lex(code);
expect(tokens).toEqual([{v: "🍕", token_type: ""}]);
});
test("should scan integers", () => {
const code = "12345";
const tokens = lex(code);
expect(tokens).toEqual([{v: "12345", token_type: "number"}]);
});
test("should scan integers and whitespace around", () => {
const code = " 12345 \n ";
const tokens = lex(code);
expect(tokens).toEqual([
{v: " ", token_type: ""},
{v: "12345", token_type: "number"},
{v: " \n ", token_type: ""},
]);
});
});

View File

@ -1,166 +0,0 @@
import { scan_identifier } from "./identifier_lexer";
import { scan_number } from "./number_lexer";
import { scan_string } from "./string_lexer";
import { is_digit, is_lowercase, is_uppercase } from "./utils";
export type Token = {
v: string,
token_type: string,
};
/**
* Lexes a string of THP code, and returns an array of tokens. Unlike a regular
* lexer, whitespace and other characters are not ignored, and are instead treated
* as a default token.
*
* This lexer implements a subset of the grammar defined in the THP language specification,
* only recognizing the following tokens:
* - Identifier
* - Datatype
* - String
* - Number
* - Single line comment
* - Multi line comment
* - Keywords
*
* @param code Code to lex
* @returns An array of all the tokens found
*/
export function lex(code: string, start = 0): Array<Token> {
const code_len = code.length;
const tokens: Array<Token> = [];
let current_pos = start;
let current_default_token = "";
while (current_pos < code_len) {
const c = code[current_pos]!;
// try to scan a number
if (is_digit(c)) {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
// lex a number
const [token, next] = scan_number(code, current_pos);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan an identifier/keyword
else if (is_lowercase(c) || c === "_") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
const [token, next] = scan_identifier(code, current_pos);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan a datatype
else if (is_uppercase(c)) {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
const [token, next] = scan_identifier(code, current_pos, true);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan a string
else if (c === "\"") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
const [token, next] = scan_string(code, current_pos);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan a comment
else if (c === "/" && code[current_pos + 1] === "/") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
let comment = "";
let pos = current_pos;
while (pos < code_len) {
const char = code[pos];
if (char === "\n") {
break;
}
comment += char;
pos++;
}
tokens.push({ v: comment, token_type: "comment" });
current_pos = pos;
continue;
}
// try to scan a multiline comment
else if (c === "/" && code[current_pos + 1] === "*") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
let comment = "";
let pos = current_pos;
while (pos < code_len) {
const char = code[pos];
if (char === "*" && code[pos + 1] === "/") {
pos += 2;
comment += "*/";
break;
}
comment += char;
pos++;
}
tokens.push({ v: comment, token_type: "comment" });
current_pos = pos;
continue;
}
// replace < with &lt;
else if (c === "<") {
current_default_token += "&lt;";
current_pos++;
continue;
}
current_default_token += c;
current_pos++;
}
// if there was a default token, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
return tokens;
}

View File

@ -1,19 +0,0 @@
import { expect, test, describe } from "vitest";
import { scan_number } from "./number_lexer";
describe("Number Lexer", () => {
test("should return a whole number token", () => {
const code = "1";
const token = scan_number(code, 0);
expect(token).toEqual([{ v: "1", token_type: "number" }, 1]);
});
test("should return a whole number token pt 2", () => {
const code = "12345";
const token = scan_number(code, 0);
expect(token).toEqual([{ v: "12345", token_type: "number" }, 5]);
});
});

View File

@ -1,47 +0,0 @@
import type { Token } from "./lexer";
import { is_digit } from "./utils";
/**
* Scans a number, at the given position in the input string.
* This function assumes that the character at the given position is a digit.
* It follows this grammar:
*
* @param input the input string
* @param pos the position to start scanning from
* @returns
*/
export function scan_number(input: string, pos: number): [Token, number] {
const [token_value, next] = scan_decimal(input, pos);
return [{ v: token_value, token_type: "number" }, next];
}
function scan_decimal(input: string, starting_position: number): [string, number] {
let current_value = "";
let pos = starting_position;
while (pos < input.length) {
const c = input[pos]!;
if (c === ".") {
// todo
return [current_value, pos];
}
else if (c == "e" || c == "E") {
// todo
return [current_value, pos];
}
else if (is_digit(c)) {
current_value += c;
pos += 1;
}
else {
break;
}
}
return [current_value, pos];
}

View File

@ -1,32 +0,0 @@
import { expect, test, describe } from "vitest";
import { scan_string } from "./string_lexer";
describe("String Lexer", () => {
test("should scan an empty string", () => {
const code = "\"\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
});
test("should scan a string with a single character", () => {
const code = "\"a\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
});
test("should scan a string with multiple characters", () => {
const code = "\"hello\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
});
test("should scan a string with an escape character", () => {
const code = "\"\\n\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
});
});

View File

@ -1,49 +0,0 @@
import type { Token } from "./lexer";
export function scan_string(input: string, starting_position: number): [Token, number] {
let value = "\"";
let pos = starting_position + 1;
while (pos < input.length) {
const c = input[pos];
if (c === "\"") {
value += c;
pos += 1;
break;
}
if (c === "\n") {
// todo: error handling, return an error indicator and the caller should render a red wavy underline
break;
}
if (c === "\\") {
const next_char = input[pos + 1];
value += handle_escape_char(next_char);
pos += 2;
continue;
}
value += c;
pos += 1;
}
return [{ v: value, token_type: "string" }, pos];
}
function handle_escape_char(next_char: string): string {
switch (next_char) {
case "n":
return "\\n"
case "t":
return "\\t"
case "r":
return "\\r"
case "\"":
return "\\\""
case "\\":
return "\\\\"
default:
return "\\" + next_char
}
}

75
src/lexer/types.ts Normal file
View File

@ -0,0 +1,75 @@
export type ReferenceItem = {
symbol_start: number
symbol_end: number
reference: string
}
export interface Token {
token_type: TokenType
value: string
position: number
}
export type TokenType =
"Identifier" |
"Datatype" |
"Int" |
"Float" |
"String" |
"Operator" |
"LeftParen" |
"RightParen" |
"LeftBracket" |
"RightBracket" |
"LeftBrace" |
"RightBrace" |
"NewLine" |
"Comment" |
"MultilineComment" |
"Comma" |
"INDENT" |
"DEDENT" |
"VAL" |
"VAR" |
"EOF" |
"FUN";
export interface Err {
Lex?: LexError
Syntax?: SyntaxError
Semantic?: SemanticError
}
export interface LexError {
position: number
reason: string
}
export interface SyntaxError {
error_start: number
error_end: number
reason: string
}
export interface SemanticError {
error_start: number
error_end: number
reason: string
}
export interface TokenizeResult {
/** All checks passed */
Ok?: Array<Token>,
/** There were semantic errors */
SemanticError?: [Array<Token>, Err],
/** There were syntax errors */
SyntaxError?: [Array<Token>, Err],
/** No checks passed */
LexError?: Err,
}
export enum HighlightLevel {
Lexic = 0,
Syntactic = 1,
Semantic = 2,
}

View File

@ -24,7 +24,7 @@ As a regex: `[a-z_][a-zA-Z0-9_]*`
Defined with `val`, followed by a variable name and a value. Defined with `val`, followed by a variable name and a value.
<Code thpcode={` <Code level={2} thpcode={`
val surname = "Doe" val surname = "Doe"
val year_of_birth = 1984 val year_of_birth = 1984
`} /> `} />
@ -33,14 +33,14 @@ val year_of_birth = 1984
Written after the `val` keyword but before the variable name. Written after the `val` keyword but before the variable name.
<Code thpcode={` <Code level={2} thpcode={`
val String surname = "Doe" val String surname = "Doe"
val Int year_of_birth = 1984 val Int year_of_birth = 1984
`} /> `} />
When annotating an immutable variable the `val` keyword is optional When annotating an immutable variable the `val` keyword is optional
<Code thpcode={` <Code level={2} thpcode={`
// Equivalent to the previous code // Equivalent to the previous code
String surname = "Doe" String surname = "Doe"
Int year_of_birth = 1984 Int year_of_birth = 1984
@ -51,7 +51,7 @@ This means that if a variable only has a datatype, it is immutable.
It is a compile error to declare a variable of a datatype, It is a compile error to declare a variable of a datatype,
but use another. but use another.
<Code thpcode={` <Code level={2} thpcode={`
// Declare the variable as a String, but use a Float as its value // Declare the variable as a String, but use a Float as its value
String capital = 123.456 String capital = 123.456
`} /> `} />
@ -61,7 +61,7 @@ String capital = 123.456
Defined with `var`, followed by a variable name and a value. Defined with `var`, followed by a variable name and a value.
<Code thpcode={` <Code level={2} thpcode={`
var name = "John" var name = "John"
var age = 32 var age = 32
`} /> `} />
@ -70,14 +70,14 @@ var age = 32
Written after the `var` keywords but before the variable name. Written after the `var` keywords but before the variable name.
<Code thpcode={` <Code level={2} thpcode={`
var String name = "John" var String name = "John"
var Int age = 32 var Int age = 32
`} /> `} />
When annotating a mutable variable the keyword `var` is still **required**. When annotating a mutable variable the keyword `var` is still **required**.
<Code thpcode={` <Code level={2} thpcode={`
// Equivalent to the previous code // Equivalent to the previous code
var String name = "John" var String name = "John"
var Int age = 32 var Int age = 32

View File

@ -1,218 +0,0 @@
/*
step {
line 1
set "a" "b"
unset "a"
}
*/
import { scan_number } from "../lexer/number_lexer";
import { scan_string } from "../lexer/string_lexer";
import { is_digit, is_lowercase, is_uppercase } from "../lexer/utils";
enum TokenType {
Step,
Line,
Set,
Out,
Number,
String,
Unset,
BraceOpen,
BraceClose,
};
type Token = [TokenType, string | undefined];
// Creates a stream of tokens
function lex(input: string): Array<Token> {
const characters = input.split("");
const characters_len = characters.length;
let next_p = 0;
const tokens: Array<Token> = [];
while (next_p < characters_len)
{
const c = characters[next_p]!;
// word
if (is_lowercase(c) || is_uppercase(c))
{
const [token, next] = lex_word(characters, next_p);
tokens.push(token);
next_p = next;
}
// number
else if (is_digit(c))
{
const [token, next] = scan_number(input, next_p);
tokens.push([TokenType.Number, token.v]);
next_p = next;
}
// string
else if (c === "\"")
{
const [token, next] = scan_string(input, next_p);
tokens.push([TokenType.String, token.v]);
next_p = next;
}
else if (c === "{")
{
tokens.push([TokenType.BraceOpen, undefined]);
next_p += 1;
}
else if (c === "}")
{
tokens.push([TokenType.BraceClose, undefined]);
next_p += 1;
}
else if (c === " " || c === "\n")
{
next_p += 1;
}
else
{
throw new Error(`Invalid character: \`${c}\``);
}
}
return tokens;
}
function lex_word(input: Array<string>, pos: number): [Token, number] {
let next_p = pos;
let value = "";
let c = input[next_p];
while (c !== undefined && (is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === "_"))
{
value += c;
next_p += 1;
c = input[next_p];
}
let token_type;
if (value === "step") { token_type = TokenType.Step; }
else if (value === "line") { token_type = TokenType.Line; }
else if (value === "set") { token_type = TokenType.Set; }
else if (value === "unset"){ token_type = TokenType.Unset; }
else if (value === "out") { token_type = TokenType.Out; }
else
{
throw new Error(`Invalid word: ${value}`);
}
return [[token_type, value], next_p]
}
export enum InstructionType {
Line,
Set,
Unset,
Out,
}
export type Instruction = {
t: InstructionType,
v0: string,
v1?: string,
}
export function parse_str(input: string): Array<Array<Instruction>> {
return parse(lex(input));
}
// Parses the tokens into a instruction set
function parse(tokens: Array<Token>): Array<Array<Instruction>> {
let pos = 0;
let max = tokens.length;
const ret = [];
while (pos < max) {
const [steps, next_pos] = parse_step(tokens, pos);
pos = next_pos;
ret.push(steps);
}
return ret;
}
function parse_step(tokens: Array<Token>, _pos: number): [Array<Instruction>, number] {
let pos = _pos;
expect(tokens, pos, TokenType.Step, "expected step");
pos += 1;
expect(tokens, pos, TokenType.BraceOpen, "expected opening brace");
pos += 1;
const instructions = [];
while (true) {
const [inst, next] = parse_instruction(tokens, pos);
if (inst === null) {
break;
}
instructions.push(inst);
pos = next;
}
expect(tokens, pos, TokenType.BraceClose, "expected closing brace");
pos += 1
return [instructions, pos];
}
function parse_instruction(tokens: Array<Token>, _pos: number): [Instruction|null, number] {
let pos = _pos;
let instruction_type = tokens[pos]![0];
if (instruction_type === TokenType.Line) {
pos += 1;
expect(tokens, pos, TokenType.Number, "expected a number after the `line` instruction");
return [{
t: InstructionType.Line,
v0: tokens[pos]![1]!,
}, pos + 1]
}
else if (instruction_type === TokenType.Set) {
pos += 1;
expect(tokens, pos, TokenType.String, "expected a string after the `set` instruction");
pos += 1;
expect(tokens, pos, TokenType.String, "expected a second string after the `set` instruction");
return [{
t: InstructionType.Set,
v0: tokens[pos - 1]![1]!,
v1: tokens[pos]![1]!,
}, pos + 1]
}
else if (instruction_type === TokenType.Unset) {
expect(tokens, pos + 1, TokenType.String, "expected a a string after the `unset` instruction");
return [{
t: InstructionType.Unset,
v0: tokens[pos + 1]![1]!,
}, pos + 2]
}
else if (instruction_type === TokenType.Out) {
expect(tokens, pos + 1, TokenType.String, "expected a a string after the `unset` instruction");
return [{
t: InstructionType.Out,
v0: tokens[pos + 1]![1]!,
}, pos + 2]
}
return [null, pos];
}
function expect(t: Array<Token>, pos: number, type: TokenType, err: string) {
const [t_type] = t[pos]!;
if (t_type !== type) {
console.error("`" + String(t[pos]) + "`");
throw new Error(err + " , got " + t[pos]);
}
}