scan identifiers
This commit is contained in:
parent
6490e8dbaa
commit
4665d87b5f
48
lexer/identifier_lexer.test.ts
Normal file
48
lexer/identifier_lexer.test.ts
Normal file
@ -0,0 +1,48 @@
|
||||
import { expect, test, describe } from "bun:test";
|
||||
import { scan_identifier } from "./identifier_lexer";
|
||||
|
||||
|
||||
describe("Identifier Lexer", () => {
|
||||
test("should return an identifier token", () => {
|
||||
const code = "a";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "a", token_type: "identifier" }, 1]);
|
||||
});
|
||||
|
||||
test("should scan an underscore", () => {
|
||||
const code = "_";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "_", token_type: "identifier" }, 1]);
|
||||
});
|
||||
|
||||
test("should scan an identifier with an underscore", () => {
|
||||
const code = "a_";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "a_", token_type: "identifier" }, 2]);
|
||||
});
|
||||
|
||||
test("should scan an identifier that starts with an underscore", () => {
|
||||
const code = "_a";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "_a", token_type: "identifier" }, 2]);
|
||||
});
|
||||
|
||||
test("should scan an identifier with numbers and uppercase letters", () => {
|
||||
const code = "aA1";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "aA1", token_type: "identifier" }, 3]);
|
||||
});
|
||||
|
||||
test("should scan a keyword", () => {
|
||||
const code = "val";
|
||||
const token = scan_identifier(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "val", token_type: "keyword" }, 3]);
|
||||
});
|
||||
});
|
||||
|
35
lexer/identifier_lexer.ts
Normal file
35
lexer/identifier_lexer.ts
Normal file
@ -0,0 +1,35 @@
|
||||
import type { Token } from "./lexer.ts";
|
||||
import { is_identifier_char } from "./utils.ts";
|
||||
|
||||
/**
|
||||
* Scans an identifier, at the given position in the input string.
|
||||
* This function assumes that the character at the given position is a letter.
|
||||
*/
|
||||
export function scan_identifier(input: string, starting_position: number): [Token, number] {
|
||||
let value = input[starting_position];
|
||||
let pos = starting_position + 1;
|
||||
|
||||
while (pos < input.length) {
|
||||
const c = input[pos];
|
||||
|
||||
if (is_identifier_char(c)) {
|
||||
pos += 1;
|
||||
value += c;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return [{ v: value, token_type: check_keyword(value) }, pos];
|
||||
}
|
||||
|
||||
function check_keyword(value: string): string {
|
||||
const keywords = ["case", "static", "const", "enum", "loop", "use", "break", "catch", "continue", "do", "else", "finally", "for", "fun", "if", "in", "fn", "nil", "return", "throw", "try", "while", "type", "match", "with", "of", "abstract", "class", "interface", "private", "pub", "map", "override", "open", "init", "val", "var", "mut", "clone"];
|
||||
|
||||
if (keywords.includes(value)) {
|
||||
return "keyword";
|
||||
}
|
||||
return "identifier";
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { lex_number } from "./number_lexer.ts";
|
||||
import { is_digit } from "./utils.ts";
|
||||
import { scan_identifier } from "./identifier_lexer.ts";
|
||||
import { scan_number } from "./number_lexer.ts";
|
||||
import { is_digit, is_lowercase } from "./utils.ts";
|
||||
|
||||
export type Token = {
|
||||
v: string,
|
||||
@ -37,6 +38,7 @@ export function lex(code: string): Array<Token> {
|
||||
let next_token: Token | null = null;
|
||||
let next_position: number | null = null;
|
||||
|
||||
// try to scan a number
|
||||
if (is_digit(c)) {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
@ -45,7 +47,20 @@ export function lex(code: string): Array<Token> {
|
||||
}
|
||||
|
||||
// lex a number
|
||||
const [token, next] = lex_number(code, current_pos);
|
||||
const [token, next] = scan_number(code, current_pos);
|
||||
current_pos = next;
|
||||
tokens.push(token);
|
||||
continue;
|
||||
}
|
||||
// try to scan an identifier/keyword
|
||||
else if (is_lowercase(c) || c === "_") {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
const [token, next] = scan_identifier(code, current_pos);
|
||||
current_pos = next;
|
||||
tokens.push(token);
|
||||
continue;
|
||||
|
@ -1,17 +1,17 @@
|
||||
import { expect, test, describe } from "bun:test";
|
||||
import { lex_number } from "./number_lexer";
|
||||
import { scan_number } from "./number_lexer";
|
||||
|
||||
describe("Number Lexer", () => {
|
||||
test("should return a whole number token", () => {
|
||||
const code = "1";
|
||||
const token = lex_number(code, 0);
|
||||
const token = scan_number(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "1", token_type: "number" }, 1]);
|
||||
});
|
||||
|
||||
test("should return a whole number token pt 2", () => {
|
||||
const code = "12345";
|
||||
const token = lex_number(code, 0);
|
||||
const token = scan_number(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "12345", token_type: "number" }, 5]);
|
||||
});
|
||||
|
@ -10,7 +10,7 @@ import { is_digit } from "./utils.ts";
|
||||
* @param pos the position to start scanning from
|
||||
* @returns
|
||||
*/
|
||||
export function lex_number(input: string, pos: number): [Token, number] {
|
||||
export function scan_number(input: string, pos: number): [Token, number] {
|
||||
const [token_value, next] = scan_decimal(input, pos);
|
||||
|
||||
return [{ v: token_value, token_type: "number" }, next];
|
||||
|
@ -1,3 +1,15 @@
|
||||
export function is_digit(c: string): boolean {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
}
|
||||
|
||||
export function is_lowercase(c: string): boolean {
|
||||
return c >= 'a' && c <= 'z';
|
||||
}
|
||||
|
||||
export function is_uppercase(c: string): boolean {
|
||||
return c >= 'A' && c <= 'Z';
|
||||
}
|
||||
|
||||
export function is_identifier_char(c: string): boolean {
|
||||
return is_lowercase(c) || is_uppercase(c) || is_digit(c) || c === '_';
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ val has_key = haystack.contains("needle")
|
||||
]
|
||||
|
||||
// THP
|
||||
Obj {
|
||||
.{
|
||||
names: #("Toni", "Stark"), // Tuple
|
||||
age: 33,
|
||||
numbers: [32, 64, 128]
|
||||
|
Loading…
Reference in New Issue
Block a user