scan strings and comments
This commit is contained in:
parent
2e93df0fd8
commit
d098b60a38
@ -1,5 +1,6 @@
|
||||
import { scan_identifier } from "./identifier_lexer.ts";
|
||||
import { scan_number } from "./number_lexer.ts";
|
||||
import { scan_string } from "./string_lexer.ts";
|
||||
import { is_digit, is_lowercase, is_uppercase } from "./utils.ts";
|
||||
|
||||
export type Token = {
|
||||
@ -78,6 +79,45 @@ export function lex(code: string): Array<Token> {
|
||||
tokens.push(token);
|
||||
continue;
|
||||
}
|
||||
// try to scan a string
|
||||
else if (c === "\"") {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
const [token, next] = scan_string(code, current_pos);
|
||||
current_pos = next;
|
||||
tokens.push(token);
|
||||
continue;
|
||||
}
|
||||
// try to scan a comment
|
||||
else if (c === "/" && code[current_pos + 1] === "/") {
|
||||
// if the current default token is not empty, push it to the tokens array
|
||||
if (current_default_token !== "") {
|
||||
tokens.push({ v: current_default_token, token_type: "" });
|
||||
current_default_token = "";
|
||||
}
|
||||
|
||||
let comment = "";
|
||||
let pos = current_pos;
|
||||
|
||||
while (pos < code_len) {
|
||||
const char = code[pos];
|
||||
|
||||
if (char === "\n") {
|
||||
break;
|
||||
}
|
||||
|
||||
comment += char;
|
||||
pos++;
|
||||
}
|
||||
|
||||
tokens.push({ v: comment, token_type: "comment" });
|
||||
current_pos = pos;
|
||||
continue;
|
||||
}
|
||||
|
||||
// here, check if a token was found
|
||||
if (next_token !== null && next_position !== null) {
|
||||
|
32
lexer/string_lexer.test.ts
Normal file
32
lexer/string_lexer.test.ts
Normal file
@ -0,0 +1,32 @@
|
||||
import { expect, test, describe } from "bun:test";
|
||||
import { scan_string } from "./string_lexer";
|
||||
|
||||
describe("String Lexer", () => {
|
||||
test("should scan an empty string", () => {
|
||||
const code = "\"\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
|
||||
});
|
||||
|
||||
test("should scan a string with a single character", () => {
|
||||
const code = "\"a\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
|
||||
});
|
||||
|
||||
test("should scan a string with multiple characters", () => {
|
||||
const code = "\"hello\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
|
||||
});
|
||||
|
||||
test("should scan a string with an escape character", () => {
|
||||
const code = "\"\\n\"";
|
||||
const token = scan_string(code, 0);
|
||||
|
||||
expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
|
||||
});
|
||||
});
|
49
lexer/string_lexer.ts
Normal file
49
lexer/string_lexer.ts
Normal file
@ -0,0 +1,49 @@
|
||||
import type { Token } from "./lexer.ts";
|
||||
|
||||
export function scan_string(input: string, starting_position: number): [Token, number] {
|
||||
let value = "\"";
|
||||
let pos = starting_position + 1;
|
||||
|
||||
while (pos < input.length) {
|
||||
const c = input[pos];
|
||||
|
||||
if (c === "\"") {
|
||||
value += c;
|
||||
pos += 1;
|
||||
break;
|
||||
}
|
||||
if (c === "\n") {
|
||||
// todo: error handling, return an error indicator and the caller should render a red wavy underline
|
||||
break;
|
||||
}
|
||||
if (c === "\\") {
|
||||
const next_char = input[pos + 1];
|
||||
value += handle_escape_char(next_char);
|
||||
pos += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
value += c;
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
return [{ v: value, token_type: "string" }, pos];
|
||||
}
|
||||
|
||||
function handle_escape_char(next_char: string): string {
|
||||
switch (next_char) {
|
||||
case "n":
|
||||
return "\\n"
|
||||
case "t":
|
||||
return "\\t"
|
||||
case "r":
|
||||
return "\\r"
|
||||
case "\"":
|
||||
return "\\\""
|
||||
case "\\":
|
||||
return "\\\\"
|
||||
default:
|
||||
return "\\" + next_char
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user