Add docs for lang spec

master
Araozu 2024-05-30 20:03:54 -05:00
parent 670364e386
commit 088ec3f867
42 changed files with 490 additions and 37 deletions

1
public/js/alpine-3.14.0.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -37,10 +37,10 @@ const { showSidebarButton = true } = Astro.props;
How to guides
</a>
<a
href="/reference/"
href="/spec/"
class="hidden lg:inline-block px-4 font-display font-bold-text-xl hover:underline"
>
Language reference
Language spec
</a>
<a
href="/api/std/"

View File

@ -36,6 +36,6 @@ const { title } = Astro.props;
<body class="bg-c-bg text-c-text">
<slot />
<script src="//unpkg.com/alpinejs" defer></script>
<script src="/js/alpine-3.14.0.min.js" defer></script>
</body>
</html>

View File

@ -0,0 +1,18 @@
---
import PagesLayout from "./PagesLayout.astro";
const { frontmatter, headings } = Astro.props;
const posts = await Astro.glob("../pages/learn/**/*.{md,mdx}");
const indexSubpath = `/learn/index.mdx`;
---
<PagesLayout
frontmatter={frontmatter}
headings={headings}
posts={posts}
indexSubpath={indexSubpath}
basePath="/learn/"
>
<slot />
</PagesLayout>

View File

@ -4,12 +4,8 @@ import BaseLayout from "./BaseLayout.astro";
import TOC from "../components/TOC.astro";
import Sidebar from "../components/Sidebar.astro";
const { frontmatter, headings } = Astro.props;
const posts = await Astro.glob("../pages/learn/**/*.{md,mdx}");
// The index.md page must have a `pagesLayout` frontmatter, which declares the order of all the pages.
const indexSubpath = `/learn/index.mdx`;
const { frontmatter, headings, posts: _posts, indexSubpath, basePath } = Astro.props;
const posts: Record<string, any>[] = _posts;
const indexPage = posts.find((post) => post.file.endsWith(indexSubpath));
@ -66,7 +62,7 @@ function validateEntry(entry: PageEntry, basePath: string) {
}
for (const entry of pagesIndex) {
validateEntry(entry, `/learn/`);
validateEntry(entry, basePath);
}
---
@ -82,7 +78,7 @@ for (const entry of pagesIndex) {
<nav class="py-4 pr-2 overflow-x-scroll h-[calc(100vh-3rem)]">
{
pagesIndex.map((entry) => (
<Sidebar entry={entry} basePath="/learn/" />
<Sidebar entry={entry} basePath={basePath} />
))
}
</nav>

View File

@ -0,0 +1,18 @@
---
import PagesLayout from "./PagesLayout.astro";
const { frontmatter, headings } = Astro.props;
const posts = await Astro.glob("../pages/spec/**/*.{md,mdx}");
const indexSubpath = `/spec/index.md`;
---
<PagesLayout
frontmatter={frontmatter}
headings={headings}
posts={posts}
indexSubpath={indexSubpath}
basePath="/spec/"
>
<slot />
</PagesLayout>

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Comments
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Datatypes
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Hello world
---
import InteractiveCode from "../../../components/InteractiveCode.astro";

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Operators
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Variables
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Anonymous classes
---
# Anonymous classes

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Classes
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Interfaces
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Magic methods
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Static
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Arrays
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Enums
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Maps
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Tuples
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Nullable types
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Try/Exceptions
---
import InteractiveCode from "../../../components/InteractiveCode.astro";

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Blocks
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Conditionals
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Loops
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Match
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Declaration
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Higher Order Functions
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Lambdas
---

View File

@ -1,5 +1,5 @@
---
layout: ../../../layouts/PagesLayout.astro
layout: ../../../layouts/DocsLayout.astro
title: Function parameters
---

View File

@ -1,5 +1,5 @@
---
layout: ../../layouts/PagesLayout.astro
layout: ../../layouts/DocsLayout.astro
title: Welcome
pagesLayout:
- path: index

View File

@ -1,5 +1,5 @@
---
layout: ../../layouts/PagesLayout.astro
layout: ../../layouts/DocsLayout.astro
title: Install
---

36
src/pages/spec/ast/ast.md Normal file
View File

@ -0,0 +1,36 @@
---
layout: ../../../layouts/SpecLayout.astro
title: AST
---
# THP AST
Created during the syntax analysis phase, from the stream of
tokens produced by the lexic analysis phase.
## File and modules
Every file has its own AST, and every file is a module.
```ebnf
AST = Module
Module = (Statement | Expression)*
```
## Statement
(At the moment) a statement is either a variable binding or a function declaration
```ebnf
Statement = VariableBinding
| FunctionDeclaration
```
## Expression
See the Expression section

129
src/pages/spec/index.md Normal file
View File

@ -0,0 +1,129 @@
---
layout: ../../layouts/SpecLayout.astro
title: Welcome
pagesLayout:
- path: index
- path: tokens
title: Tokens
children:
- path: tokens
- path: numbers
- path: identifier
- path: string
- path: comments
- path: operator
- path: grouping
- path: newline
- path: ast
title: THP AST
children:
- path: ast
---
# The THP Language Specification
This series of pages define the THP Programming Language.
THP's grammar is context-dependant.
The syntax is specified using a weird mix of Extended Backus Naur Form
and RegExp:
```abnf
; comments
syntax = concatenation
concatenation = alternation grouping
alternation = "a" | "b"
| "c"
grouping = ("a", "b")
optional = "a"?
one_or_more = "a"+
zero_or_more = "a"*
range = "1".."9"
literal = "a"
```
## Compiler architecture
The compiler consists of 5 common phases:
- **Lexical Analysis**: Transforms the source code into tokens
- **Syntactic Analysis**: Parses the tokens and generates an AST
- **Semantic Analysis**: Checks the AST structure and performs type checking
- **IR**: Transforms the THP AST into a PHP AST
- **Codegen**: Generates PHP source code from the PHP AST
## Source Code representation
Source code is encoded in UTF-8, and a single UTF-8 codepoint is
a single character. As THP is implemented using the Rust programming
language, rules around Rust's UTF-8 usage are followed.
## Basic characters
Although the source code must be encoded in UTF-8, most of the actual
source code will use only the basic 128 ASCII characters. String contents may
contain any Unicode code point.
```abnf
underscore = "_"
decimal_digit = "0".."9"
binary_digit = "0" | "1"
octal_digit = "0".."7"
hex_digit = decimal_digit | "a".."f" | "A".."F"
lowercase_letter = "a".."z"
uppercase_letter = "A".."Z"
```
## Whitespace
THP is partially whitespace sensitive. It uses the following tokens: Indent, Dedent & NewLine
to determine when an expression spans multiple lines.
The lexer stores the indentation level of every line, and when scanning the next line,
compares the previous indentation to the new one. If the amount of whitespace is
greater than before, it emits a Indent token. If it's lower, emits a Dedent token, and
if it's the same it does nothing.
```thp
1 + 2
+ 3
+ 4
```
The previous code would emit the following tokens: `1` `+` `2` `NewLine` `Indent` `+` `3` `NewLine`
`+` `4` `Dedent`
Additionaly, it is a lexical error to have wrong indentation. The lexer stores all
previous indentation levels in a stack, and reports an error if a decrease in indentation
doesn't match a previous level.
```thp
if true { // 0 indentation
print() // 4 indentation
print() // 2 indentation. Error. There is no 2-indentation level
}
```
These tokens are used to detect when a expression is done, instead of relying on
semicolons. This is performed by the parser.
Every other production of the grammar doesn't care about whitespace/indentation, so
those ignore whitespace.

View File

@ -0,0 +1,16 @@
---
layout: ../../../layouts/SpecLayout.astro
title: Comment
---
# Comment
```ebnf
Comment = "//", any_except_new_line
```
```thp
// This is a comment
//
// Another // comment
```

View File

@ -0,0 +1,17 @@
---
layout: ../../../layouts/SpecLayout.astro
title: Grouping signs
---
# Grouping signs
Each grouping sign has its own token:
```ebnf
LeftParen = "("
RightParen = ")"
LeftBracket = "["
RightBracket = "]"
LeftBrace = "{"
RightBrace = "}"
```

View File

@ -0,0 +1,54 @@
---
layout: ../../../layouts/SpecLayout.astro
title: Identifiers & Datatypes
---
# Identifiers & Datatypes
Upper and lowercase letters carry different meaning when at the start of a word.
A Datatype must always start with an uppercase letter, and an identifier must start
with either a lowercase letter or an underscore.
## Identifier
```ebnf
Identifier = (underscore | lowercase_letter), identifier_letter*
identifier_letter = underscore | lowercase_letter | uppercase_letter | decimal_digit
```
```thp
identifier
_identifier
_123
_many_letters
camelCase
```
## Datatype
```ebnf
Datatype = uppercase_letter, indentifier_letter*
```
```thp
Datatype
PDO
WEIRD_DATATYPE
```
## Keywords
The following are (currently) THP keywords:
```thp
val var fun
```
Keywords are scanned first as identifiers, then transformed
to their respective tokens.

View File

@ -0,0 +1,15 @@
---
layout: ../../../layouts/SpecLayout.astro
title: New line
---
# New line
When there are multiple empty lines, only a single NewLine token
is emitted.
```ebnf
NewLine = "\n", empty_line*
empty_line = " "*, "\n"
```

View File

@ -0,0 +1,56 @@
---
layout: ../../../layouts/SpecLayout.astro
title: Numbers
---
# Numbers
## Int
```ebnf
Int = hexadecimal_number
| decimal_number
hexadecimal_number = "0", ("x" | "X"), hexadecimal_digit+
decimal_number = decimal_digit+
```
```thp
12345
01234 // This is a decimal number, not an octal number
0xff25
0XFfaA
```
`TODO`: Implement octal `0o777` and binary `0b0110`.
`TODO`: Allow underscores `_` between any number: `1_000_000`.
## Float
```ebnf
Float = decimal_number, ".", decimal_number+, scientific_notation?
| decimal_number, scientific_notation
scientific_notation = "e", ("+" | "-"), decimal_number
```
```thp
123.456
123.456e+4
123.456e-2
123e+10
123e-3
```
All floating point numbers must start with at least 1 digit.
`.5` is not a valid floating point number.
`TODO`: Allow scientific notation to omit the `+`/`-`: `10e4`.

View File

@ -0,0 +1,30 @@
---
layout: ../../../layouts/SpecLayout.astro
title: Operator
---
# Operator
```ebnf
Operator = operator_char+
operator_char = "+" | "-" | "=" | "*" | "!" | "/" | "|"
| "@" | "#" | "$" | "~" | "%" | "&" | "?"
| "<" | ">" | "^" | "." | ":"
```
```thp
+ - / * % < > <= >= -> =>
```
These are all the characters that can make an operator.
The lexer doesn't know about any operator in particular.
In other languages something like `+-1` would be interpreted
as `+` `-` `1`. In THP, this is always `+-` `1`, and that
would throw an error because the operator `+-` doesn't exist.
## Comma
Comma is its own token: `,`.

View File

@ -0,0 +1,29 @@
---
layout: ../../../layouts/SpecLayout.astro
title: String
---
# String
A string is single line, delimited by double quotes `"` only.
```ebnf
String = double_quote, (escape_seq | string_char)*, double_quote
double_quote = '"'
escape_seq = "\n"
| '\"'
| "\r"
| "\\"
| "\t"
string_char = any_unicode_except_newline_and_double_quote
```
```thp
"hello"
""
"it's me"
"\"Mario\""
```
`TODO`: String interpolation

View File

@ -0,0 +1,38 @@
---
layout: ../../../layouts/SpecLayout.astro
title: Index
---
# Tokens index
These are all the THP tokens:
```rust
pub enum TokenType {
Identifier,
Datatype,
Int,
Float,
String,
Operator,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
LeftBrace,
RightBrace,
NewLine,
Comment,
Comma,
INDENT,
DEDENT,
VAL,
VAR,
EOF,
FUN,
}
```
Every keyword has its own token.