From 838bd233ac9b0d45b1f21435a7b0119f20c6521a Mon Sep 17 00:00:00 2001 From: Araozu Date: Mon, 20 May 2024 09:21:51 -0500 Subject: [PATCH] Create parse for thp state machine dsl --- src/components/InteractiveCode.astro | 6 +- src/lexer/identifier_lexer.ts | 4 +- src/lexer/lexer.ts | 2 +- src/thp_machine/machine_parser.ts | 188 +++++++++++++++++++++++++++ 4 files changed, 194 insertions(+), 6 deletions(-) create mode 100644 src/thp_machine/machine_parser.ts diff --git a/src/components/InteractiveCode.astro b/src/components/InteractiveCode.astro index 822f0cc..4838f51 100644 --- a/src/components/InteractiveCode.astro +++ b/src/components/InteractiveCode.astro @@ -120,7 +120,7 @@ const codeHtml = highlightCode(trimAndDedent(code));
state
-
+
@@ -128,10 +128,10 @@ const codeHtml = highlightCode(trimAndDedent(code));
- -
diff --git a/src/lexer/identifier_lexer.ts b/src/lexer/identifier_lexer.ts index cb9e7cd..3390f93 100644 --- a/src/lexer/identifier_lexer.ts +++ b/src/lexer/identifier_lexer.ts @@ -10,11 +10,11 @@ import { is_identifier_char } from "./utils"; * @param is_datatype whether the identifier is a datatype */ export function scan_identifier(input: string, starting_position: number, is_datatype = false): [Token, number] { - let value = input[starting_position]; + let value = input[starting_position]!; let pos = starting_position + 1; while (pos < input.length) { - const c = input[pos]; + const c = input[pos]!; if (is_identifier_char(c)) { pos += 1; diff --git a/src/lexer/lexer.ts b/src/lexer/lexer.ts index b683c16..5c1b131 100644 --- a/src/lexer/lexer.ts +++ b/src/lexer/lexer.ts @@ -34,7 +34,7 @@ export function lex(code: string): Array { let current_default_token = ""; while (current_pos < code_len) { - const c = code[current_pos]; + const c = code[current_pos]!; // try to scan a number if (is_digit(c)) { diff --git a/src/thp_machine/machine_parser.ts b/src/thp_machine/machine_parser.ts new file mode 100644 index 0000000..56c7404 --- /dev/null +++ b/src/thp_machine/machine_parser.ts @@ -0,0 +1,188 @@ +/* +step { + line 1 + set "a" "b" + unset "a" +} +*/ + +import { scan_number } from "../lexer/number_lexer"; +import { scan_string } from "../lexer/string_lexer"; +import { is_digit, is_lowercase, is_uppercase } from "../lexer/utils"; + +enum TokenType { + Step, + Line, + Set, + Number, + String, + Unset, + BraceOpen, + BraceClose, +}; + +type Token = [TokenType, string | undefined]; + +// Creates a stream of tokens +function lex(input: string): Array { + const characters = input.split(""); + const characters_len = characters.length; + let next_p = 0; + + const tokens: Array = []; + + while (next_p < characters_len) + { + const c = characters[next_p]!; + + // word + if (is_lowercase(c) || is_uppercase(c)) + { + const [token, next] = lex_word(characters, next_p); + tokens.push(token); + next_p = next; + } + // number + else if (is_digit(c)) + { + const [token, next] = scan_number(input, next_p); + tokens.push([TokenType.Number, token.v]); + next_p = next; + } + // string + else if (c === "\"") + { + const [token, next] = scan_string(input, next_p); + tokens.push([TokenType.String, token.v]); + next_p = next; + } + else if (c === "{") + { + tokens.push([TokenType.BraceOpen, undefined]); + next_p += 1; + } + else if (c === "}") + { + tokens.push([TokenType.BraceClose, undefined]); + next_p += 1; + } + else if (c === " " || c === "\n") + { + next_p += 1; + } + else + { + throw new Error(`Invalid character: \`${c}\``); + } + } + + return tokens; +} + +function lex_word(input: Array, pos: number): [Token, number] { + let next_p = pos; + let value = ""; + + let c = input[next_p]; + while (c !== undefined && (is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === "_")) + { + value += c; + next_p += 1; + c = input[next_p]; + } + + let token_type; + if (value === "step") { token_type = TokenType.Step; } + else if (value === "line") { token_type = TokenType.Line; } + else if (value === "set") { token_type = TokenType.Set; } + else if (value === "unset"){ token_type = TokenType.Unset; } + else + { + throw new Error(`Invalid word: ${value}`); + } + + return [[token_type, value], next_p] +} + +enum InstructionType { + Line, + Set, + Unset, +} + +type Instruction = { + t: InstructionType, + v0: string, + v1?: string, +} + +// Parses the tokens into a instruction set +function parse(tokens: Array): Array> { + let pos = 0; + let max = tokens.length; + + return []; +} + +function parse_step(tokens: Array, _pos: number): Array { + let pos = _pos; + + expect(tokens, pos, TokenType.Step, "expected step"); + pos += 1; + expect(tokens, pos, TokenType.BraceOpen, "expected opening brace"); + pos += 1; + + const instructions = []; + + while (true) { + const [inst, next] = parse_instruction(tokens, pos); + if (inst === null) { + break; + } + instructions.push(inst); + pos = next; + } + + expect(tokens, pos, TokenType.BraceClose, "expected closing brace"); + pos += 1 + + return instructions; +} + +function parse_instruction(tokens: Array, _pos: number): [Instruction|null, number] { + let pos = _pos; + + let instruction_type = tokens[pos]![0]; + if (instruction_type === TokenType.Line) { + pos += 1; + expect(tokens, pos, TokenType.Number, "expected a number after the `line` instruction"); + return [{ + t: InstructionType.Line, + v0: tokens[pos]![1]!, + }, pos + 1] + } + else if (instruction_type === TokenType.Set) { + pos += 1; + expect(tokens, pos, TokenType.String, "expected a string after the `set` instruction"); + pos += 1; + expect(tokens, pos, TokenType.String, "expected a second string after the `set` instruction"); + + return [{ + t: InstructionType.Set, + v0: tokens[pos - 1]![1]!, + v1: tokens[pos]![1]!, + }, pos + 1] + } + + return [null, pos]; +} + +function expect(t: Array, pos: number, type: TokenType, err: string) { + const [t_type] = t[pos]!; + if (t_type !== type) { + console.error(t[pos]); + throw new Error(err); + } +} + +console.log(parse_step(lex(" step { line 20 set \"a\" \"b\" }"), 0))