Compare commits

..

6 Commits

Author SHA1 Message Date
b78b30a34d use the new highlighter in all the pages 2024-03-27 12:01:14 -05:00
d098b60a38 scan strings and comments 2024-03-27 08:36:34 -05:00
2e93df0fd8 scan datatypes 2024-03-27 08:18:31 -05:00
4665d87b5f scan identifiers 2024-03-27 08:12:32 -05:00
6490e8dbaa Complete minimal flow for editor highlighting 2024-03-26 18:29:52 -05:00
184ed14435 write a custom highlighter for codejar 2024-03-26 18:05:58 -05:00
21 changed files with 566 additions and 1304 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
node_modules node_modules
static/css/out.css static/css/out.css
static/learn static/learn
static/js/highlighter.js

BIN
bun.lockb Executable file

Binary file not shown.

21
lexer/highlighter.ts Normal file
View File

@ -0,0 +1,21 @@
import {lex} from "./lexer.ts";
import { CodeJar } from "codejar"
function thp_highlighter(editor: any) {
let code: string = editor.textContent;
let tokens = lex(code);
let highlighted_code = "";
for (let token of tokens) {
highlighted_code += `<span class="token ${token.token_type}">${token.v}</span>`;
}
editor.innerHTML = highlighted_code;
}
// @ts-ignore
window.thp_highlighter = thp_highlighter;
// @ts-ignore
window.CodeJar = CodeJar;

View File

@ -0,0 +1,55 @@
import { expect, test, describe } from "bun:test";
import { scan_identifier } from "./identifier_lexer";
describe("Identifier Lexer", () => {
test("should return an identifier token", () => {
const code = "a";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "a", token_type: "identifier" }, 1]);
});
test("should scan an underscore", () => {
const code = "_";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "_", token_type: "identifier" }, 1]);
});
test("should scan an identifier with an underscore", () => {
const code = "a_";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "a_", token_type: "identifier" }, 2]);
});
test("should scan an identifier that starts with an underscore", () => {
const code = "_a";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "_a", token_type: "identifier" }, 2]);
});
test("should scan an identifier with numbers and uppercase letters", () => {
const code = "aA1";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "aA1", token_type: "identifier" }, 3]);
});
test("should scan a keyword", () => {
const code = "val";
const token = scan_identifier(code, 0);
expect(token).toEqual([{ v: "val", token_type: "keyword" }, 3]);
});
test("should scan a datatype", () => {
const code = "Int";
const token = scan_identifier(code, 0, true);
expect(token).toEqual([{ v: "Int", token_type: "class-name" }, 3]);
});
});

44
lexer/identifier_lexer.ts Normal file
View File

@ -0,0 +1,44 @@
import type { Token } from "./lexer.ts";
import { is_identifier_char } from "./utils.ts";
/**
* Scans an identifier, at the given position in the input string.
* This function assumes that the character at the given position is a letter.
*
* @param input the input string
* @param starting_position the position to start scanning from
* @param is_datatype whether the identifier is a datatype
*/
export function scan_identifier(input: string, starting_position: number, is_datatype = false): [Token, number] {
let value = input[starting_position];
let pos = starting_position + 1;
while (pos < input.length) {
const c = input[pos];
if (is_identifier_char(c)) {
pos += 1;
value += c;
}
else {
break;
}
}
if (is_datatype) {
return [{ v: value, token_type: "class-name" }, pos];
}
else {
return [{ v: value, token_type: check_keyword(value) }, pos];
}
}
function check_keyword(value: string): string {
const keywords = ["case", "static", "const", "enum", "loop", "use", "break", "catch", "continue", "do", "else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw", "try", "while", "type", "match", "with", "of", "abstract", "class", "interface", "private", "pub", "map", "override", "open", "init", "val", "var", "mut", "clone"];
if (keywords.includes(value)) {
return "keyword";
}
return "identifier";
}

45
lexer/lexer.test.ts Normal file
View File

@ -0,0 +1,45 @@
import { expect, test, describe } from "bun:test";
import { lex } from "./lexer";
describe("Lexer", () => {
test("empty program should return no tokens", () => {
const code = "";
const tokens = lex(code);
expect(tokens).toEqual([]);
});
test("program with whitespace should return a single token", () => {
const code = " ";
const tokens = lex(code);
expect(tokens).toEqual([{v: " ", token_type: ""}]);
})
test("program with newlines should return a single token", () => {
const code = "\n";
const tokens = lex(code);
expect(tokens).toEqual([{v: "\n", token_type: ""}]);
});
test("program with random unicode should return the same unicode", () => {
const code = "🍕";
const tokens = lex(code);
expect(tokens).toEqual([{v: "🍕", token_type: ""}]);
});
test("should scan integers", () => {
const code = "12345";
const tokens = lex(code);
expect(tokens).toEqual([{v: "12345", token_type: "number"}]);
});
test("should scan integers and whitespace around", () => {
const code = " 12345 \n ";
const tokens = lex(code);
expect(tokens).toEqual([
{v: " ", token_type: ""},
{v: "12345", token_type: "number"},
{v: " \n ", token_type: ""},
]);
});
});

138
lexer/lexer.ts Normal file
View File

@ -0,0 +1,138 @@
import { scan_identifier } from "./identifier_lexer.ts";
import { scan_number } from "./number_lexer.ts";
import { scan_string } from "./string_lexer.ts";
import { is_digit, is_lowercase, is_uppercase } from "./utils.ts";
export type Token = {
v: string,
token_type: string,
};
/**
* Lexes a string of THP code, and returns an array of tokens. Unlike a regular
* lexer, whitespace and other characters are not ignored, and are instead treated
* as a default token.
*
* This lexer implements a subset of the grammar defined in the THP language specification,
* only recognizing the following tokens:
* - Identifier
* - Datatype
* - String
* - Number
* - Single line comment
* - Multi line comment
* - Keywords
*
* @param code Code to lex
* @returns An array of all the tokens found
*/
export function lex(code: string): Array<Token> {
const code_len = code.length;
const tokens: Array<Token> = [];
let current_pos = 0;
let current_default_token = "";
while (current_pos < code_len) {
const c = code[current_pos];
// try to scan a number
if (is_digit(c)) {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
// lex a number
const [token, next] = scan_number(code, current_pos);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan an identifier/keyword
else if (is_lowercase(c) || c === "_") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
const [token, next] = scan_identifier(code, current_pos);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan a datatype
else if (is_uppercase(c)) {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
const [token, next] = scan_identifier(code, current_pos, true);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan a string
else if (c === "\"") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
const [token, next] = scan_string(code, current_pos);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan a comment
else if (c === "/" && code[current_pos + 1] === "/") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
let comment = "";
let pos = current_pos;
while (pos < code_len) {
const char = code[pos];
if (char === "\n") {
break;
}
comment += char;
pos++;
}
tokens.push({ v: comment, token_type: "comment" });
current_pos = pos;
continue;
}
// replace < with &lt;
else if (c === "<") {
current_default_token += "&lt;";
current_pos++;
continue;
}
current_default_token += c;
current_pos++;
}
// if there was a default token, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
return tokens;
}

View File

@ -0,0 +1,19 @@
import { expect, test, describe } from "bun:test";
import { scan_number } from "./number_lexer";
describe("Number Lexer", () => {
test("should return a whole number token", () => {
const code = "1";
const token = scan_number(code, 0);
expect(token).toEqual([{ v: "1", token_type: "number" }, 1]);
});
test("should return a whole number token pt 2", () => {
const code = "12345";
const token = scan_number(code, 0);
expect(token).toEqual([{ v: "12345", token_type: "number" }, 5]);
});
});

47
lexer/number_lexer.ts Normal file
View File

@ -0,0 +1,47 @@
import type { Token } from "./lexer.ts";
import { is_digit } from "./utils.ts";
/**
* Scans a number, at the given position in the input string.
* This function assumes that the character at the given position is a digit.
* It follows this grammar:
*
* @param input the input string
* @param pos the position to start scanning from
* @returns
*/
export function scan_number(input: string, pos: number): [Token, number] {
const [token_value, next] = scan_decimal(input, pos);
return [{ v: token_value, token_type: "number" }, next];
}
function scan_decimal(input: string, starting_position: number): [string, number] {
let current_value = "";
let pos = starting_position;
while (pos < input.length) {
const c = input[pos];
if (c === ".") {
// todo
throw new Error("Not implemented");
}
else if (c == "e" || c == "E") {
// todo
throw new Error("Not implemented");
}
else if (is_digit(c)) {
current_value += c;
pos += 1;
}
else {
break;
}
}
return [current_value, pos];
}

View File

@ -0,0 +1,32 @@
import { expect, test, describe } from "bun:test";
import { scan_string } from "./string_lexer";
describe("String Lexer", () => {
test("should scan an empty string", () => {
const code = "\"\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
});
test("should scan a string with a single character", () => {
const code = "\"a\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
});
test("should scan a string with multiple characters", () => {
const code = "\"hello\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
});
test("should scan a string with an escape character", () => {
const code = "\"\\n\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
});
});

49
lexer/string_lexer.ts Normal file
View File

@ -0,0 +1,49 @@
import type { Token } from "./lexer.ts";
export function scan_string(input: string, starting_position: number): [Token, number] {
let value = "\"";
let pos = starting_position + 1;
while (pos < input.length) {
const c = input[pos];
if (c === "\"") {
value += c;
pos += 1;
break;
}
if (c === "\n") {
// todo: error handling, return an error indicator and the caller should render a red wavy underline
break;
}
if (c === "\\") {
const next_char = input[pos + 1];
value += handle_escape_char(next_char);
pos += 2;
continue;
}
value += c;
pos += 1;
}
return [{ v: value, token_type: "string" }, pos];
}
function handle_escape_char(next_char: string): string {
switch (next_char) {
case "n":
return "\\n"
case "t":
return "\\t"
case "r":
return "\\r"
case "\"":
return "\\\""
case "\\":
return "\\\\"
default:
return "\\" + next_char
}
}

15
lexer/utils.ts Normal file
View File

@ -0,0 +1,15 @@
export function is_digit(c: string): boolean {
return c >= '0' && c <= '9';
}
export function is_lowercase(c: string): boolean {
return c >= 'a' && c <= 'z';
}
export function is_uppercase(c: string): boolean {
return c >= 'A' && c <= 'Z';
}
export function is_identifier_char(c: string): boolean {
return is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === '_';
}

View File

@ -76,7 +76,7 @@ val has_key = haystack.contains("needle")
] ]
// THP // THP
Obj { .{
names: #("Toni", "Stark"), // Tuple names: #("Toni", "Stark"), // Tuple
age: 33, age: 33,
numbers: [32, 64, 128] numbers: [32, 64, 128]

View File

@ -5,7 +5,9 @@
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {
"generate": "md-docs", "generate": "md-docs",
"dev": "concurrently -k \"pnpm tailwind:watch\" \"serve ./static/ -l 3333\"", "bundle": "bun build ./lexer/highlighter.ts --outdir ./static/js/ --format esm --minify",
"dev": "concurrently -k \"tailwindcss -i ./tailwind.css -o ./static/css/out.css --watch\" \"serve ./static/ -l 3333\"",
"codemirror": "esbuild --bundle ./static/js/codemirror.js --outfile=./static/js/codemirror.min.js --minify --sourcemap",
"tailwind:watch": "tailwindcss -i ./tailwind.css -o ./static/css/out.css --watch", "tailwind:watch": "tailwindcss -i ./tailwind.css -o ./static/css/out.css --watch",
"tailwind:build": "tailwindcss -i ./tailwind.css -o ./static/css/out.css --minify" "tailwind:build": "tailwindcss -i ./tailwind.css -o ./static/css/out.css --minify"
}, },
@ -13,10 +15,16 @@
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@types/bun": "^1.0.10",
"codejar": "^4.2.0",
"tailwindcss": "^3.2.7" "tailwindcss": "^3.2.7"
}, },
"devDependencies": { "devDependencies": {
"concurrently": "^8.2.0", "concurrently": "^8.2.0",
"serve": "^14.2.0" "serve": "^14.2.1",
"bun-types": "latest"
},
"peerDependencies": {
"typescript": "^5.0.0"
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,7 @@
<link rel="preconnect" href="https://fonts.googleapis.com"> <link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link <link
href="https://fonts.googleapis.com/css2?family=Fira+Sans:wght@400;500;600;700;800;900&family=Fira+Code&family=Inter:ital,wght@0,400;1,700&display=swap" href="https://fonts.googleapis.com/css2?family=Fira+Sans:wght@400;500;600;700;800;900&family=Fira+Code&display=swap"
rel="stylesheet"> rel="stylesheet">
</head> </head>
@ -26,7 +26,7 @@
</div> </div>
</nav> </nav>
<div class="container mx-auto py-16 grid grid-cols-[5fr_4fr] gap-4 px-10"> <div class="container mx-auto py-16 grid grid-cols-[auto_32rem] gap-4 px-10">
<div class="pl-10 table"> <div class="pl-10 table">
<div class="table-cell align-middle"> <div class="table-cell align-middle">
<h1 class="font-display font-bold text-5xl leading-tight"> <h1 class="font-display font-bold text-5xl leading-tight">
@ -43,19 +43,13 @@
<br> <br>
<br> <br>
<div class="text-center"> <div class="text-center">
<a <a class="inline-block font-display text-lg border-2 border-pink-400 hover:bg-pink-400 transition-colors
class="inline-block font-display text-lg border-2 border-pink-400 hover:bg-pink-400 transition-colors hover:text-c-bg py-3 px-8 mx-6 rounded" href="/learn/">
hover:text-c-bg py-3 px-8 mx-6 rounded"
href="/learn/"
>
Learn Learn
</a> </a>
<a <a class="inline-block font-display text-lg border-2 border-sky-400 py-3 px-8 mx-6 rounded
class="inline-block font-display text-lg border-2 border-sky-400 py-3 px-8 mx-6 rounded transition-colors hover:text-black hover:bg-sky-400" href="/install/">
transition-colors hover:text-black hover:bg-sky-400"
href="/install/"
>
Install Install
</a> </a>
</div> </div>
@ -76,9 +70,18 @@
</g> </g>
</svg> </svg>
<div class="h-1"></div> <div class="h-1"></div>
<pre style="padding: 0 !important; border: none !important;"> <div id="editor" class="font-mono language-thp"></div>
<code class="language-thp"> </div>
// Actual generics & sum types </div>
<script src="/js/highlighter.js"></script>
<script>
let jar = CodeJar(document.getElementById("editor"), thp_highlighter, {
tab: " ",
});
jar.updateCode(
`// Actual generics & sum types
fun find_person(Int person_id) -> Result[String, String] { fun find_person(Int person_id) -> Result[String, String] {
// Easy, explicit error bubbling // Easy, explicit error bubbling
try Person::find_by_id(person_id) try Person::find_by_id(person_id)
@ -93,18 +96,13 @@
// First class HTML-like templates & components // First class HTML-like templates & components
print( print(
&lt;a href="/person/reports/{person.id}"&gt; <a href="/person/reports/{person.id}">
welcome, {person.name} welcome, {person.name}
&lt;/a&gt; </a>
) )
// And more! // And more!`
</code> )
</pre> </script>
</div>
</div>
<script src="/js/prism.min.js"></script>
<script src="/js/prism.thp.js"></script>
</body> </body>
</html> </html>

File diff suppressed because one or more lines are too long

View File

@ -1,25 +0,0 @@
Prism.languages.thp = {
"comment": [
{
pattern: /(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,
lookbehind: true,
greedy: true,
},
{
pattern: /(^|[^\\:])\/\/.*/,
lookbehind: true,
greedy: true,
},
],
"string": {
pattern: /(["])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,
greedy: true,
},
"keyword": /\b(?:case|static|const|enum|loop|use|break|catch|continue|do|else|finally|for|fun|if|in|fn|nil|return|throw|try|while|type|match|with|of|abstract|class|interface|private|pub|map|override|open|init|val|var|mut|clone)\b/,
"number": /\b0x[\da-f]+\b|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i,
"operator": /[<>]=?|[!=]=?=?|--?|\$|\+\+?|&&?|\|\|?|[?*/~^%]/,
"punctuation": /[{}[\];(),.]/,
"boolean": /\b(?:false|true)\b/,
"class-name": /\b[A-Z][a-zA-Z_0-9]*\b/,
"variable": /\b[a-z_0-9][a-zA-Z_0-9]+:/,
};

View File

@ -49,8 +49,25 @@
</main> </main>
</div> </div>
<script src="/js/prism.min.js"></script> <script src="/js/highlighter.js"></script>
<script src="/js/prism.thp.js"></script> <script>
</body> // Add an editor to all code samples
const code_elements = document.querySelectorAll(".language-thp");
for (const el of [...code_elements]) {
const pre_parent = el.parentElement;
const new_div = document.createElement("div");
const code = el.innerHTML;
el.parentElement.classList.add("language-thp");
pre_parent.removeChild(el);
pre_parent.appendChild(new_div);
CodeJar(new_div, thp_highlighter, {
tab: " "
}).updateCode(code);
}
</script>
</body>
</html> </html>

View File

@ -17,7 +17,7 @@ module.exports = {
} }
}, },
fontFamily: { fontFamily: {
"mono": ["'Fira Code'", "Inconsolata", "Iosevka", "monospace"], "mono": ["Iosevka", "monospace"],
"display": ["Inter", "'Josefin Sans'", "'Fugaz One'", "sans-serif"], "display": ["Inter", "'Josefin Sans'", "'Fugaz One'", "sans-serif"],
"body": ["'Fira Sans'", "Inter", "sans-serif"], "body": ["'Fira Sans'", "Inter", "sans-serif"],
}, },

27
tsconfig.json Normal file
View File

@ -0,0 +1,27 @@
{
"compilerOptions": {
// Enable latest features
"lib": ["ESNext"],
"target": "ESNext",
"module": "ESNext",
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
// Bundler mode
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"noEmit": true,
// Best practices
"strict": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
// Some stricter flags
"noUnusedLocals": true,
"noUnusedParameters": true,
"noPropertyAccessFromIndexSignature": true
}
}