2024-07-06 00:12:30 +00:00
|
|
|
import { spawn } from "node:child_process";
|
|
|
|
import { leftTrimDedent } from "../components/utils";
|
2024-03-26 23:29:52 +00:00
|
|
|
|
2024-07-06 00:12:30 +00:00
|
|
|
export interface Token {
|
|
|
|
token_type: TokenType
|
|
|
|
value: string
|
|
|
|
position: number
|
|
|
|
}
|
|
|
|
|
|
|
|
type TokenType =
|
|
|
|
"Identifier" |
|
|
|
|
"Datatype" |
|
|
|
|
"Int" |
|
|
|
|
"Float" |
|
|
|
|
"String" |
|
|
|
|
"Operator" |
|
|
|
|
"LeftParen" |
|
|
|
|
"RightParen" |
|
|
|
|
"LeftBracket" |
|
|
|
|
"RightBracket" |
|
|
|
|
"LeftBrace" |
|
|
|
|
"RightBrace" |
|
|
|
|
"NewLine" |
|
|
|
|
"Comment" |
|
2024-07-29 22:03:09 +00:00
|
|
|
"MultilineComment" |
|
2024-07-06 00:12:30 +00:00
|
|
|
"Comma" |
|
|
|
|
"INDENT" |
|
|
|
|
"DEDENT" |
|
|
|
|
"VAL" |
|
|
|
|
"VAR" |
|
|
|
|
"EOF" |
|
|
|
|
"FUN";
|
|
|
|
|
|
|
|
export interface Err {
|
2024-07-28 23:23:51 +00:00
|
|
|
Lex?: LexError
|
|
|
|
Syntax?: SyntaxError
|
2024-07-06 00:12:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
export interface LexError {
|
|
|
|
position: number
|
|
|
|
reason: string
|
|
|
|
}
|
2024-03-26 23:29:52 +00:00
|
|
|
|
2024-07-28 23:23:51 +00:00
|
|
|
export interface SyntaxError {
|
|
|
|
error_start: number
|
|
|
|
error_end: number
|
|
|
|
reason: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export interface TokenizeResult {
|
|
|
|
Ok?: Token[],
|
|
|
|
TokensOnly?: [Token[], Err],
|
|
|
|
Err?: Err,
|
|
|
|
}
|
|
|
|
|
2024-07-29 01:12:53 +00:00
|
|
|
const error_classes = "underline decoration-wavy decoration-red-500";
|
2024-03-26 23:29:52 +00:00
|
|
|
|
2024-07-28 23:23:51 +00:00
|
|
|
export async function native_highlighter(code: string): Promise<[string, string, string | null]> {
|
2024-07-06 00:12:30 +00:00
|
|
|
let formatted_code = leftTrimDedent(code).join("\n");
|
|
|
|
|
2024-07-29 00:54:15 +00:00
|
|
|
let result: TokenizeResult;
|
|
|
|
try {
|
|
|
|
result = await native_lex(formatted_code);
|
|
|
|
} catch (error) {
|
|
|
|
return compiler_error(formatted_code, error as Error);
|
|
|
|
}
|
2024-07-06 00:12:30 +00:00
|
|
|
|
|
|
|
if (result.Err) {
|
2024-07-28 23:23:51 +00:00
|
|
|
return lex_error_highlighter(formatted_code, result.Err!.Lex!);
|
|
|
|
}
|
|
|
|
else if (result.TokensOnly) {
|
|
|
|
const [tokens, error] = result.TokensOnly!;
|
|
|
|
return syntax_error_highlighter(formatted_code, tokens, error.Syntax!);
|
2024-03-26 23:29:52 +00:00
|
|
|
}
|
|
|
|
|
2024-07-06 00:12:30 +00:00
|
|
|
const tokens = result.Ok!;
|
|
|
|
|
2024-07-28 23:23:51 +00:00
|
|
|
const output = highlight_tokens(formatted_code, tokens);
|
|
|
|
|
|
|
|
return [output, "", null];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Highlights code that has a lexic error
|
|
|
|
*/
|
|
|
|
function lex_error_highlighter(code: string, error: LexError): [string, string, string] {
|
|
|
|
// Create a single error token
|
|
|
|
|
|
|
|
const err_pos = error.position;
|
|
|
|
const before_err = code.substring(0, err_pos);
|
|
|
|
const err_str = code[err_pos];
|
|
|
|
const after_err = code.substring(err_pos + 1);
|
|
|
|
|
2024-07-29 01:12:53 +00:00
|
|
|
const token = `<span class="token ${error_classes}">${err_str}</span>`;
|
2024-07-28 23:23:51 +00:00
|
|
|
|
|
|
|
const all = `${before_err}${token}${after_err}`;
|
|
|
|
|
|
|
|
// TODO: Transform absolute posijion (error.position) into line:column
|
|
|
|
return [all, "Lexical", error.reason + " at position " + error.position]
|
|
|
|
}
|
|
|
|
|
|
|
|
function syntax_error_highlighter(code: string, tokens: Array<Token>, error: SyntaxError): [string, string, string] {
|
2024-07-29 01:12:53 +00:00
|
|
|
const highlighted = highlight_tokens(code, tokens, error.error_start, error.error_end);
|
2024-07-28 23:23:51 +00:00
|
|
|
|
|
|
|
const error_message = `${error.reason} from position ${error.error_start} to ${error.error_end}`;
|
|
|
|
return [highlighted, "Syntax", error_message];
|
|
|
|
}
|
|
|
|
|
2024-07-29 00:54:15 +00:00
|
|
|
function compiler_error(code: string, error: Error): [string, string, string] {
|
|
|
|
return [code, "Fatal Compiler", error.message];
|
|
|
|
}
|
|
|
|
|
2024-07-29 01:12:53 +00:00
|
|
|
/**
|
|
|
|
* Transforms a list of tokens into colored HTML, and underlines errors
|
|
|
|
* if present
|
|
|
|
* @param input The original source code
|
|
|
|
* @param tokens The list of tokens
|
|
|
|
* @param error_start Absolute position from where the error starts.
|
|
|
|
* @param error_end Absolute position to where the error ends.
|
|
|
|
* @returns
|
|
|
|
*/
|
|
|
|
function highlight_tokens(input: string, tokens: Array<Token>, error_start = -1, error_end = -1): string {
|
2024-07-28 23:23:51 +00:00
|
|
|
const input_chars = input.split("");
|
2024-07-06 00:12:30 +00:00
|
|
|
let output = "";
|
|
|
|
|
|
|
|
let current_pos = 0;
|
|
|
|
for (let i = 0; i < tokens.length; i += 1) {
|
|
|
|
const t = tokens[i]!;
|
|
|
|
const token_start = t.position;
|
|
|
|
const token_end = t.position + t.value.length;
|
|
|
|
|
2024-07-29 01:12:53 +00:00
|
|
|
let is_errored = (token_start == error_start);
|
|
|
|
|
2024-07-29 22:03:09 +00:00
|
|
|
// Some tokens require processing (like multiline comments)
|
2024-07-29 01:12:53 +00:00
|
|
|
|
2024-07-06 00:12:30 +00:00
|
|
|
// There are some tokens that are empty, ignore them
|
|
|
|
if (t.value == "") {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Append all characters before the token
|
|
|
|
output += input_chars.slice(current_pos, token_start).join("");
|
|
|
|
|
|
|
|
// Append the token
|
2024-07-29 22:03:09 +00:00
|
|
|
const [token_value, new_token_end] = process_token_value_and_end(t.value, t.token_type, token_end);
|
2024-07-06 00:12:30 +00:00
|
|
|
const token_type = translate_token_type(t.token_type, token_value);
|
2024-07-29 01:12:53 +00:00
|
|
|
output += `<span class="token ${token_type} ${is_errored ? error_classes : ""}">${token_value}</span>`;
|
2024-07-06 00:12:30 +00:00
|
|
|
|
2024-07-29 22:03:09 +00:00
|
|
|
current_pos = new_token_end;
|
2024-07-06 00:12:30 +00:00
|
|
|
}
|
|
|
|
|
2024-07-28 23:23:51 +00:00
|
|
|
return output;
|
2024-07-28 21:51:53 +00:00
|
|
|
}
|
|
|
|
|
2024-07-29 22:03:09 +00:00
|
|
|
/**
|
|
|
|
* Certain tokens store values that differ from the source code representation.
|
|
|
|
* For example, the multiline comment token stores the content of the comment
|
|
|
|
* without `/*` and `* /`, this function handles those cases.
|
|
|
|
*
|
|
|
|
* @param value The value of the token
|
|
|
|
* @param token_type The type of the token, used to know if it needs preprocessing
|
|
|
|
* @param first_end The position where the token ends according to the token value
|
|
|
|
* @returns
|
|
|
|
*/
|
|
|
|
function process_token_value_and_end(value: string, token_type: TokenType, first_end: number): [string, number] {
|
|
|
|
let token_value = value;
|
|
|
|
let new_end = first_end;
|
|
|
|
if (token_type === "MultilineComment") {
|
|
|
|
token_value = `/*${token_value}*/`;
|
|
|
|
new_end += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Escape html and return
|
|
|
|
return [
|
|
|
|
token_value.replaceAll(/</g, "<").replaceAll(/>/g, ">"),
|
|
|
|
new_end
|
|
|
|
];
|
|
|
|
}
|
2024-07-28 21:51:53 +00:00
|
|
|
|
2024-07-06 00:12:30 +00:00
|
|
|
function translate_token_type(tt: TokenType, value: string): string {
|
2024-07-21 23:06:01 +00:00
|
|
|
const keywords = ["throws", "extends", "constructor", "case", "static", "const",
|
|
|
|
"enum", "union", "loop", "use", "break", "catch", "continue", "as", "do",
|
|
|
|
"else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw",
|
|
|
|
"try", "while", "type", "match", "with", "of", "abstract", "class", "interface",
|
|
|
|
"private", "protected", "pub", "override", "open", "init", "val", "var", "mut", "clone"];
|
2024-07-06 00:12:30 +00:00
|
|
|
|
|
|
|
switch (tt) {
|
|
|
|
case "Datatype":
|
|
|
|
return "class-name";
|
|
|
|
case "Identifier": {
|
|
|
|
if (keywords.includes(value)) {
|
|
|
|
return "keyword";
|
|
|
|
}
|
|
|
|
|
|
|
|
return "identifier";
|
|
|
|
}
|
|
|
|
case "Int":
|
|
|
|
return "number";
|
|
|
|
case "Float":
|
|
|
|
return "number";
|
|
|
|
case "String":
|
|
|
|
return "string";
|
|
|
|
case "Comment":
|
2024-07-29 22:03:09 +00:00
|
|
|
case "MultilineComment":
|
2024-07-06 00:12:30 +00:00
|
|
|
return "comment";
|
|
|
|
// keywords:
|
|
|
|
case "VAL":
|
|
|
|
case "VAR":
|
|
|
|
case "FUN":
|
|
|
|
return "keyword";
|
|
|
|
default:
|
|
|
|
return tt;
|
|
|
|
}
|
2024-03-26 23:29:52 +00:00
|
|
|
}
|
2024-07-06 00:12:30 +00:00
|
|
|
|
2024-07-28 23:23:51 +00:00
|
|
|
const native_lex = (code: string) => new Promise<TokenizeResult>((resolve, reject) => {
|
2024-07-06 00:12:30 +00:00
|
|
|
// Get binary path from .env
|
|
|
|
const binary = import.meta.env.THP_BINARY;
|
|
|
|
if (!binary) {
|
|
|
|
throw new Error("THP_BINARY not set in .env");
|
|
|
|
}
|
|
|
|
|
|
|
|
const subprocess = spawn(binary, ["tokenize"]);
|
|
|
|
let response = "";
|
|
|
|
let error = "";
|
|
|
|
|
|
|
|
subprocess.stdin.write(code);
|
|
|
|
subprocess.stdin.end();
|
|
|
|
|
|
|
|
subprocess.stdout.on("data", (data) => {
|
|
|
|
response += data.toString();
|
|
|
|
});
|
|
|
|
|
|
|
|
subprocess.stderr.on("data", (data) => {
|
|
|
|
error += data.toString();
|
|
|
|
});
|
|
|
|
|
|
|
|
subprocess.on("close", (code) => {
|
|
|
|
if (code === 0) {
|
|
|
|
resolve(JSON.parse(response));
|
|
|
|
} else {
|
2024-07-29 00:54:15 +00:00
|
|
|
reject(new Error(error));
|
2024-07-06 00:12:30 +00:00
|
|
|
}
|
|
|
|
});
|
|
|
|
})
|