Compare commits

..

No commits in common. "27bfca8880810b34bba2f8b818efb91c88eea027" and "771a4b70447b3e2357d4e58394ad534a45c7eb7d" have entirely different histories.

12 changed files with 56 additions and 131 deletions

View File

@ -1,17 +1,10 @@
--- ---
import { native_highlighter } from "../lexer/highlighter"; import { native_highlighter } from "../lexer/highlighter";
import CodeError from "./docs/CodeError.astro";
const { thpcode } = Astro.props; const { thpcode } = Astro.props;
const [native_html, error_type, error_message] = await native_highlighter(thpcode); const native_html = await native_highlighter(thpcode);
--- ---
<pre <pre
class="language-thp"><code class="language-thp" set:html={native_html} /><span class="absolute top-2 right-2 inline-block text-sm select-none opacity-75">thp class="language-thp"><code class="language-thp" set:html={native_html} /><span class="absolute top-2 right-2 inline-block text-sm select-none opacity-75">thp</span></pre>
</span></pre>
{
error_message !== null && (
<CodeError error_type={error_type}>{error_message}</CodeError>
)
}

View File

@ -1,8 +0,0 @@
---
const { error_type = "Unknown" } = Astro.props;
---
<div class="px-4 py-2 rounded bg-red-200 dark:bg-red-950">
<span class="inline-block font-bold">{error_type} error:</span>
<slot />
</div>

View File

@ -4,7 +4,7 @@ import PagesLayout from "./PagesLayout.astro";
const { frontmatter, headings } = Astro.props; const { frontmatter, headings } = Astro.props;
const posts = await Astro.glob("../pages/spec/**/*.{md,mdx}"); const posts = await Astro.glob("../pages/spec/**/*.{md,mdx}");
const indexSubpath = `/spec/index.mdx`; const indexSubpath = `/spec/index.md`;
--- ---
<PagesLayout <PagesLayout

View File

@ -1,6 +1,11 @@
import { spawn } from "node:child_process"; import { spawn } from "node:child_process";
import { leftTrimDedent } from "../components/utils"; import { leftTrimDedent } from "../components/utils";
export interface LexResult {
Ok?: Token[]
Err?: Err
}
export interface Token { export interface Token {
token_type: TokenType token_type: TokenType
value: string value: string
@ -31,8 +36,7 @@ type TokenType =
"FUN"; "FUN";
export interface Err { export interface Err {
Lex?: LexError Lex: LexError
Syntax?: SyntaxError
} }
export interface LexError { export interface LexError {
@ -40,72 +44,23 @@ export interface LexError {
reason: string reason: string
} }
export interface SyntaxError {
error_start: number
error_end: number
reason: string
}
export interface TokenizeResult { export async function native_highlighter(code: string): Promise<string> {
Ok?: Token[],
TokensOnly?: [Token[], Err],
Err?: Err,
}
export async function native_highlighter(code: string): Promise<[string, string, string | null]> {
let formatted_code = leftTrimDedent(code).join("\n"); let formatted_code = leftTrimDedent(code).join("\n");
const result = await native_lex(formatted_code); const result = await native_lex(formatted_code);
if (result.Err) { if (result.Err) {
return lex_error_highlighter(formatted_code, result.Err!.Lex!); throw new Error(JSON.stringify(result.Err.Lex) + "\n" + code);
}
else if (result.TokensOnly) {
// TODO
const [tokens, error] = result.TokensOnly!;
return syntax_error_highlighter(formatted_code, tokens, error.Syntax!);
} }
const tokens = result.Ok!; const tokens = result.Ok!;
const output = highlight_tokens(formatted_code, tokens); const input_chars = formatted_code.split("");
return [output, "", null];
}
/**
* Highlights code that has a lexic error
*/
function lex_error_highlighter(code: string, error: LexError): [string, string, string] {
// Create a single error token
const err_pos = error.position;
const before_err = code.substring(0, err_pos);
const err_str = code[err_pos];
const after_err = code.substring(err_pos + 1);
const token = `<span class="token underline decoration-wavy decoration-red-500">${err_str}</span>`;
const all = `${before_err}${token}${after_err}`;
// TODO: Transform absolute posijion (error.position) into line:column
return [all, "Lexical", error.reason + " at position " + error.position]
}
function syntax_error_highlighter(code: string, tokens: Array<Token>, error: SyntaxError): [string, string, string] {
const highlighted = highlight_tokens(code, tokens);
const error_message = `${error.reason} from position ${error.error_start} to ${error.error_end}`;
return [highlighted, "Syntax", error_message];
}
function highlight_tokens(input: string, tokens: Array<Token>): string {
const input_chars = input.split("");
let output = ""; let output = "";
let current_pos = 0; let current_pos = 0;
for (let i = 0; i < tokens.length; i += 1) { for (let i = 0; i < tokens.length; i += 1) {
const t = tokens[i]!; const t = tokens[i]!;
const token_start = t.position; const token_start = t.position;
@ -130,7 +85,6 @@ function highlight_tokens(input: string, tokens: Array<Token>): string {
return output; return output;
} }
function translate_token_type(tt: TokenType, value: string): string { function translate_token_type(tt: TokenType, value: string): string {
const keywords = ["throws", "extends", "constructor", "case", "static", "const", const keywords = ["throws", "extends", "constructor", "case", "static", "const",
"enum", "union", "loop", "use", "break", "catch", "continue", "as", "do", "enum", "union", "loop", "use", "break", "catch", "continue", "as", "do",
@ -166,7 +120,7 @@ function translate_token_type(tt: TokenType, value: string): string {
} }
} }
const native_lex = (code: string) => new Promise<TokenizeResult>((resolve, reject) => { const native_lex = (code: string) => new Promise<LexResult>((resolve, reject) => {
// Get binary path from .env // Get binary path from .env
const binary = import.meta.env.THP_BINARY; const binary = import.meta.env.THP_BINARY;
if (!binary) { if (!binary) {

View File

@ -5,11 +5,3 @@ title: Readonly
import Code from "../../../components/Code.astro" import Code from "../../../components/Code.astro"
# Readonly # Readonly
<Code thpcode={`
class Caño
{
}
`} />

View File

@ -57,11 +57,11 @@ fun UserDetail(User user) -> HTML
{ {
<div> <div>
@match user.type @match user.type
@case ::Admin case ::Admin
{ {
<button>Delete resource</button> <button>Delete resource</button>
} }
@case ::User case ::User
{ {
<button disable>Not allowed</button> <button disable>Not allowed</button>
} }

View File

@ -20,7 +20,6 @@ pagesLayout:
- path: ast - path: ast
- path: expression - path: expression
--- ---
import Code from "../../components/Code.astro"
# The THP Language Specification # The THP Language Specification
@ -101,11 +100,11 @@ greater than before, it emits a Indent token. If it's lower, emits a Dedent toke
if it's the same it does nothing. if it's the same it does nothing.
<Code thpcode={` ```thp
1 + 2 1 + 2
+ 3 + 3
+ 4 + 4
`} /> ```
The previous code would emit the following tokens: `1` `+` `2` `NewLine` `Indent` `+` `3` `NewLine` The previous code would emit the following tokens: `1` `+` `2` `NewLine` `Indent` `+` `3` `NewLine`
`+` `4` `Dedent` `+` `4` `Dedent`
@ -115,12 +114,12 @@ Additionaly, it is a lexical error to have wrong indentation. The lexer stores a
previous indentation levels in a stack, and reports an error if a decrease in indentation previous indentation levels in a stack, and reports an error if a decrease in indentation
doesn't match a previous level. doesn't match a previous level.
<Code thpcode={` ```thp
if true { // 0 indentation if true { // 0 indentation
// print() // 4 indentation print() // 4 indentation
// print() // 2 indentation. Error. There is no 2-indentation level print() // 2 indentation. Error. There is no 2-indentation level
} }
`} /> ```
All productions of the grammar ignore whitespace/indentation, except those involved in All productions of the grammar ignore whitespace/indentation, except those involved in
semicolon inference. semicolon inference.
@ -135,26 +134,26 @@ Statements in THP end when a new line is encountered:
<Code thpcode={` ```thp
// The statement ends | here, on the newline // The statement ends | here, on the newline
val value = (123 + 456) * 0.75 val value = (123 + 456) * 0.75
`} /> ```
<Code thpcode={` ```thp
// Each line contains a different statement. They all end on their new lines // Each line contains a different statement. They all end on their new lines
var a = 1 + 2 // a = 3 var a = 1 + 2 // a = 3
+ 3 // this is not part of \`a\`, this is a different statement + 3 // this is not part of `a`, this is a different statement
`} /> ```
This is true even if the line ends with an operator: This is true even if the line ends with an operator:
<Code thpcode={` ```thp
// These are still different statements // These are still different statements
var a = 1 + 2 + // This is now a compile error, there is a hanging `+` var a = 1 + 2 + // This is now a compile error, there is a hanging `+`
3 // This is still a different statement 3 // This is still a different statement
`} /> ```
### Parenthesis ### Parenthesis
@ -162,16 +161,16 @@ var a = 1 + 2 + // This is now a compile error, there is a hanging `+`
Exception 1: When a parenthesis is open, all following whitespace is ignored Exception 1: When a parenthesis is open, all following whitespace is ignored
until the closing parenthesis. until the closing parenthesis.
<Code thpcode={` ```thp
// open parenthesis found, all whitespace is ignored until the closing // open parenthesis found, all whitespace is ignored until the closing
name.contains( name.contains(
"weird" "weird"
) )
`} /> ```
However, for a parenthesis to begin to act, it needs to be open on the same line. However, for a parenthesis to begin to act, it needs to be open on the same line.
<Code thpcode={` ```thp
// Still 2 statements, because the parenthesis is in a new line // Still 2 statements, because the parenthesis is in a new line
print print
( (
@ -182,7 +181,7 @@ print
print( print(
"hello" "hello"
) )
`} /> ```
### Indented binary operator ### Indented binary operator
@ -190,22 +189,22 @@ Exception 2:
- When a binary operator is followed by indentation: - When a binary operator is followed by indentation:
<Code thpcode={` ```thp
val sum = 1 + 2 + // The line ends with a binary operator val sum = 1 + 2 + // The line ends with a binary operator
3 // There is indentation 3 // There is indentation
`} /> ```
- Or when indentation is followed by a binary operator: - Or when indentation is followed by a binary operator:
<Code thpcode={` ```thp
val sum = 1 + 2 val sum = 1 + 2
+ 3 // Indentation and a binary operator + 3 // Indentation and a binary operator
`} /> ```
In theses cases, all whitespace will be ignored In theses cases, all whitespace will be ignored
until the indentation returns to the initial level. until the indentation returns to the initial level.
<Code thpcode={` ```thp
// This method chain is a single statement because of the indentation // This method chain is a single statement because of the indentation
val person = PersonBuilder() val person = PersonBuilder()
.set_name("john") .set_name("john")
@ -216,6 +215,6 @@ val person = PersonBuilder()
// Here indentation returns, and a new statement begins // Here indentation returns, and a new statement begins
print(person) print(person)
`} /> ```

View File

@ -2,7 +2,6 @@
layout: ../../../layouts/SpecLayout.astro layout: ../../../layouts/SpecLayout.astro
title: Comment title: Comment
--- ---
import Code from "../../../components/Code.astro"
# Comment # Comment
@ -10,8 +9,8 @@ import Code from "../../../components/Code.astro"
Comment = "//", any_except_new_line Comment = "//", any_except_new_line
``` ```
<Code thpcode={` ```thp
// This is a comment // This is a comment
// //
// Another // comment // Another // comment
`} /> ```

View File

@ -2,7 +2,6 @@
layout: ../../../layouts/SpecLayout.astro layout: ../../../layouts/SpecLayout.astro
title: Identifiers & Datatypes title: Identifiers & Datatypes
--- ---
import Code from "../../../components/Code.astro"
# Identifiers & Datatypes # Identifiers & Datatypes
@ -19,13 +18,13 @@ Identifier = (underscore | lowercase_letter), identifier_letter*
identifier_letter = underscore | lowercase_letter | uppercase_letter | decimal_digit identifier_letter = underscore | lowercase_letter | uppercase_letter | decimal_digit
``` ```
<Code thpcode={` ```thp
identifier identifier
_identifier _identifier
_123 _123
_many_letters _many_letters
camelCase camelCase
`} /> ```
## Datatype ## Datatype
@ -34,20 +33,20 @@ camelCase
Datatype = uppercase_letter, indentifier_letter* Datatype = uppercase_letter, indentifier_letter*
``` ```
<Code thpcode={` ```thp
Datatype Datatype
PDO PDO
WEIRD_DATATYPE WEIRD_DATATYPE
`} /> ```
## Keywords ## Keywords
The following are (currently) THP keywords: The following are (currently) THP keywords:
<Code thpcode={` ```thp
val var fun val var fun
`} /> ```
Keywords are scanned first as identifiers, then transformed Keywords are scanned first as identifiers, then transformed
to their respective tokens. to their respective tokens.

View File

@ -2,7 +2,6 @@
layout: ../../../layouts/SpecLayout.astro layout: ../../../layouts/SpecLayout.astro
title: Numbers title: Numbers
--- ---
import Code from "../../../components/Code.astro"
# Numbers # Numbers
@ -16,12 +15,12 @@ hexadecimal_number = "0", ("x" | "X"), hexadecimal_digit+
decimal_number = decimal_digit+ decimal_number = decimal_digit+
``` ```
<Code thpcode={` ```thp
12345 12345
01234 // This is a decimal number, not an octal number 01234 // This is a decimal number, not an octal number
0xff25 0xff25
0XFfaA 0XFfaA
`} /> ```
`TODO`: Implement octal `0o777` and binary `0b0110`. `TODO`: Implement octal `0o777` and binary `0b0110`.
@ -37,14 +36,14 @@ Float = decimal_number, ".", decimal_number+, scientific_notation?
scientific_notation = "e", ("+" | "-"), decimal_number scientific_notation = "e", ("+" | "-"), decimal_number
``` ```
<Code thpcode={` ```thp
123.456 123.456
123.456e+4 123.456e+4
123.456e-2 123.456e-2
123e+10 123e+10
123e-3 123e-3
`} /> ```
All floating point numbers must start with at least 1 digit. All floating point numbers must start with at least 1 digit.

View File

@ -2,7 +2,6 @@
layout: ../../../layouts/SpecLayout.astro layout: ../../../layouts/SpecLayout.astro
title: Operator title: Operator
--- ---
import Code from "../../../components/Code.astro"
# Operator # Operator
@ -15,9 +14,9 @@ operator_char = "+" | "-" | "=" | "*" | "!" | "/" | "|"
| "<" | ">" | "^" | "." | ":" | "<" | ">" | "^" | "." | ":"
``` ```
<Code thpcode={` ```thp
+ - / * % < > <= >= -> => + - / * % < > <= >= -> =>
`} /> ```
These are all the characters that can make an operator. These are all the characters that can make an operator.

View File

@ -2,7 +2,6 @@
layout: ../../../layouts/SpecLayout.astro layout: ../../../layouts/SpecLayout.astro
title: String title: String
--- ---
import Code from "../../../components/Code.astro"
# String # String
@ -20,11 +19,11 @@ escape_seq = "\n"
string_char = any_unicode_except_newline_and_double_quote string_char = any_unicode_except_newline_and_double_quote
``` ```
<Code thpcode={` ```thp
"hello" "hello"
"" ""
"it's me" "it's me"
"\\"Mario\\"" "\"Mario\""
`} /> ```
`TODO`: String interpolation `TODO`: String interpolation