scan strings and comments

This commit is contained in:
Araozu 2024-03-27 08:36:34 -05:00
parent 2e93df0fd8
commit d098b60a38
3 changed files with 121 additions and 0 deletions

View File

@ -1,5 +1,6 @@
import { scan_identifier } from "./identifier_lexer.ts"; import { scan_identifier } from "./identifier_lexer.ts";
import { scan_number } from "./number_lexer.ts"; import { scan_number } from "./number_lexer.ts";
import { scan_string } from "./string_lexer.ts";
import { is_digit, is_lowercase, is_uppercase } from "./utils.ts"; import { is_digit, is_lowercase, is_uppercase } from "./utils.ts";
export type Token = { export type Token = {
@ -78,6 +79,45 @@ export function lex(code: string): Array<Token> {
tokens.push(token); tokens.push(token);
continue; continue;
} }
// try to scan a string
else if (c === "\"") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
const [token, next] = scan_string(code, current_pos);
current_pos = next;
tokens.push(token);
continue;
}
// try to scan a comment
else if (c === "/" && code[current_pos + 1] === "/") {
// if the current default token is not empty, push it to the tokens array
if (current_default_token !== "") {
tokens.push({ v: current_default_token, token_type: "" });
current_default_token = "";
}
let comment = "";
let pos = current_pos;
while (pos < code_len) {
const char = code[pos];
if (char === "\n") {
break;
}
comment += char;
pos++;
}
tokens.push({ v: comment, token_type: "comment" });
current_pos = pos;
continue;
}
// here, check if a token was found // here, check if a token was found
if (next_token !== null && next_position !== null) { if (next_token !== null && next_position !== null) {

View File

@ -0,0 +1,32 @@
import { expect, test, describe } from "bun:test";
import { scan_string } from "./string_lexer";
describe("String Lexer", () => {
test("should scan an empty string", () => {
const code = "\"\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"\"", token_type: "string" }, 2]);
});
test("should scan a string with a single character", () => {
const code = "\"a\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"a\"", token_type: "string" }, 3]);
});
test("should scan a string with multiple characters", () => {
const code = "\"hello\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"hello\"", token_type: "string" }, 7]);
});
test("should scan a string with an escape character", () => {
const code = "\"\\n\"";
const token = scan_string(code, 0);
expect(token).toEqual([{ v: "\"\\n\"", token_type: "string" }, 4]);
});
});

49
lexer/string_lexer.ts Normal file
View File

@ -0,0 +1,49 @@
import type { Token } from "./lexer.ts";
export function scan_string(input: string, starting_position: number): [Token, number] {
let value = "\"";
let pos = starting_position + 1;
while (pos < input.length) {
const c = input[pos];
if (c === "\"") {
value += c;
pos += 1;
break;
}
if (c === "\n") {
// todo: error handling, return an error indicator and the caller should render a red wavy underline
break;
}
if (c === "\\") {
const next_char = input[pos + 1];
value += handle_escape_char(next_char);
pos += 2;
continue;
}
value += c;
pos += 1;
}
return [{ v: value, token_type: "string" }, pos];
}
function handle_escape_char(next_char: string): string {
switch (next_char) {
case "n":
return "\\n"
case "t":
return "\\t"
case "r":
return "\\r"
case "\"":
return "\\\""
case "\\":
return "\\\\"
default:
return "\\" + next_char
}
}