feat: Add interface to tokenize code from STDIN and output JSON
This commit is contained in:
parent
3f892e91c2
commit
5102d25676
18
CHANGELOG.md
18
CHANGELOG.md
@ -4,16 +4,9 @@
|
|||||||
|
|
||||||
- Test correct operator precedence
|
- Test correct operator precedence
|
||||||
- Implement functions as first class citizens
|
- Implement functions as first class citizens
|
||||||
- Implement AST transformation before codegen:
|
|
||||||
Create a new AST to represent PHP source code
|
|
||||||
and a THP ast -> PHP ast process, so that the
|
|
||||||
codegen section can focus only in codegen, not in
|
|
||||||
translation of thp->php.
|
|
||||||
- Ignore indentation where it doesn't matter
|
|
||||||
- Parse __more__ binary operators
|
- Parse __more__ binary operators
|
||||||
- Store tokens for the semantic analysis phase, to have actual error reporting
|
- Store tokens for the semantic analysis phase, to have actual error reporting
|
||||||
- Parse more complex bindings
|
- Parse more complex bindings
|
||||||
- Watch mode
|
|
||||||
- Rework error messages
|
- Rework error messages
|
||||||
- Parse other language constructions
|
- Parse other language constructions
|
||||||
- Type checking
|
- Type checking
|
||||||
@ -28,7 +21,16 @@
|
|||||||
- Not ignore comments & whitespace, for code formatting
|
- Not ignore comments & whitespace, for code formatting
|
||||||
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
|
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
|
||||||
- Include the original tokens in the AST
|
- Include the original tokens in the AST
|
||||||
- Finish the workflow for a hello world
|
|
||||||
|
|
||||||
|
## v0.0.14
|
||||||
|
|
||||||
|
- [ ] Define a minimal PHP AST
|
||||||
|
- [ ] Transform THP AST into PHP AST
|
||||||
|
- [ ] Implement minimal codegen for the PHP AST
|
||||||
|
- [ ] Remove old codegen
|
||||||
|
- [ ] Finish the workflow for a hello world
|
||||||
|
|
||||||
|
|
||||||
## v0.0.13
|
## v0.0.13
|
||||||
|
|
||||||
|
80
Cargo.lock
generated
80
Cargo.lock
generated
@ -12,19 +12,99 @@ dependencies = [
|
|||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itoa"
|
||||||
|
version = "1.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.86"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.36"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ryu"
|
||||||
|
version = "1.0.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.203"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.203"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_json"
|
||||||
|
version = "1.0.120"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"ryu",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.68"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thp"
|
name = "thp"
|
||||||
version = "0.0.13"
|
version = "0.0.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"colored",
|
"colored",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
|
@ -8,3 +8,5 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
colored = "2.1.0"
|
colored = "2.1.0"
|
||||||
|
serde = { version = "1.0.203", features = ["derive"] }
|
||||||
|
serde_json = "1.0.120"
|
||||||
|
@ -12,6 +12,7 @@ init Initializes a new project in the current directory
|
|||||||
build, b Builds the project
|
build, b Builds the project
|
||||||
fmt Formats all files in the project
|
fmt Formats all files in the project
|
||||||
watch, w Starts compilation of the project in watch mode
|
watch, w Starts compilation of the project in watch mode
|
||||||
|
tokenize Tokenize code from STDIN and output tokens as JSON to STDOUT
|
||||||
|
|
||||||
|
|
||||||
help, h Print this message & exit
|
help, h Print this message & exit
|
||||||
|
@ -2,6 +2,7 @@ mod compile;
|
|||||||
mod empty;
|
mod empty;
|
||||||
mod help;
|
mod help;
|
||||||
mod repl;
|
mod repl;
|
||||||
|
mod tokenize;
|
||||||
mod types;
|
mod types;
|
||||||
|
|
||||||
use types::CommandType;
|
use types::CommandType;
|
||||||
@ -23,6 +24,7 @@ Commands
|
|||||||
build Builds the project
|
build Builds the project
|
||||||
fmt Formats all files in the project
|
fmt Formats all files in the project
|
||||||
watch, w Starts compilation of the project in watch mode
|
watch, w Starts compilation of the project in watch mode
|
||||||
|
tokenize Tokenize code from STDIN and output tokens as JSON to STDOUT
|
||||||
|
|
||||||
help, h Print this message & exit
|
help, h Print this message & exit
|
||||||
|
|
||||||
@ -67,6 +69,7 @@ fn parse_args() -> Result<(CommandType, Vec<String>), String> {
|
|||||||
"init" => CommandType::Init,
|
"init" => CommandType::Init,
|
||||||
"build" => CommandType::Build,
|
"build" => CommandType::Build,
|
||||||
"fmt" => CommandType::Fmt,
|
"fmt" => CommandType::Fmt,
|
||||||
|
"tokenize" => CommandType::Tokenize,
|
||||||
"watch" | "w" => CommandType::Watch,
|
"watch" | "w" => CommandType::Watch,
|
||||||
"help" | "h" => CommandType::Help,
|
"help" | "h" => CommandType::Help,
|
||||||
_ => return Err(format!("Unknown command `{}`", command)),
|
_ => return Err(format!("Unknown command `{}`", command)),
|
||||||
|
28
src/cli/tokenize.rs
Normal file
28
src/cli/tokenize.rs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
use std::io::{self, BufRead};
|
||||||
|
use crate::lexic::get_tokens;
|
||||||
|
|
||||||
|
pub fn tokenize_command(_options: Vec<String>) -> Result<(), ()> {
|
||||||
|
// Get the input from stdin
|
||||||
|
let stdin = io::stdin();
|
||||||
|
|
||||||
|
let mut lines = Vec::new();
|
||||||
|
for line in stdin.lock().lines() {
|
||||||
|
match line {
|
||||||
|
Ok(line) => {
|
||||||
|
lines.push(line)
|
||||||
|
}
|
||||||
|
Err(reason) => {
|
||||||
|
eprintln!("Error reading input: {}", reason);
|
||||||
|
return Err(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let input_code = lines.join("\n");
|
||||||
|
let tokens = get_tokens(&input_code);
|
||||||
|
|
||||||
|
let json = serde_json::to_string(&tokens).unwrap();
|
||||||
|
println!("{}", json);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -8,6 +8,7 @@ pub enum CommandType {
|
|||||||
Fmt,
|
Fmt,
|
||||||
Watch,
|
Watch,
|
||||||
Help,
|
Help,
|
||||||
|
Tokenize,
|
||||||
None,
|
None,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -18,6 +19,7 @@ impl CommandType {
|
|||||||
CommandType::Compile => super::compile::compile_command(options),
|
CommandType::Compile => super::compile::compile_command(options),
|
||||||
CommandType::Repl => super::repl::repl_command(options),
|
CommandType::Repl => super::repl::repl_command(options),
|
||||||
CommandType::None => super::empty::empty_command(options),
|
CommandType::None => super::empty::empty_command(options),
|
||||||
|
CommandType::Tokenize => super::tokenize::tokenize_command(options),
|
||||||
_ => {
|
_ => {
|
||||||
eprintln!("Not implemented yet! {:?} {:?}", self, options);
|
eprintln!("Not implemented yet! {:?} {:?}", self, options);
|
||||||
Err(())
|
Err(())
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
use self::semantic_error::SemanticError;
|
use self::semantic_error::SemanticError;
|
||||||
|
|
||||||
mod lex_error;
|
mod lex_error;
|
||||||
@ -9,20 +11,20 @@ pub trait PrintableError {
|
|||||||
fn get_error_str(&self, chars: &Vec<char>) -> String;
|
fn get_error_str(&self, chars: &Vec<char>) -> String;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Serialize, Debug)]
|
||||||
pub enum MistiError {
|
pub enum MistiError {
|
||||||
Lex(LexError),
|
Lex(LexError),
|
||||||
Syntax(SyntaxError),
|
Syntax(SyntaxError),
|
||||||
Semantic(SemanticError),
|
Semantic(SemanticError),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Serialize, Debug)]
|
||||||
pub struct LexError {
|
pub struct LexError {
|
||||||
pub position: usize,
|
pub position: usize,
|
||||||
pub reason: String,
|
pub reason: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Serialize, Debug)]
|
||||||
pub struct SyntaxError {
|
pub struct SyntaxError {
|
||||||
pub error_start: usize,
|
pub error_start: usize,
|
||||||
pub error_end: usize,
|
pub error_end: usize,
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
use super::utils::{get_line, get_line_number};
|
use super::utils::{get_line, get_line_number};
|
||||||
use super::PrintableError;
|
use super::PrintableError;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Serialize, Debug)]
|
||||||
pub struct SemanticError {
|
pub struct SemanticError {
|
||||||
pub error_start: usize,
|
pub error_start: usize,
|
||||||
pub error_end: usize,
|
pub error_end: usize,
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
#[derive(PartialEq, Debug, Clone)]
|
use serde::Serialize;
|
||||||
|
|
||||||
|
#[derive(Serialize, PartialEq, Debug, Clone)]
|
||||||
pub enum TokenType {
|
pub enum TokenType {
|
||||||
Identifier,
|
Identifier,
|
||||||
Datatype,
|
Datatype,
|
||||||
@ -23,7 +25,7 @@ pub enum TokenType {
|
|||||||
FUN,
|
FUN,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
pub token_type: TokenType,
|
pub token_type: TokenType,
|
||||||
// The token as a raw string
|
// The token as a raw string
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
// Follows https://phplang.org/spec/09-lexical-structure.html
|
// Follows https://phplang.org/spec/19-grammar.html#syntactic-grammar
|
||||||
|
|
||||||
struct PhpAst {}
|
struct PhpAst {}
|
||||||
|
@ -14,7 +14,6 @@ impl<'a> Parseable<'a> for Expression<'a> {
|
|||||||
type Item = Expression<'a>;
|
type Item = Expression<'a>;
|
||||||
|
|
||||||
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
|
fn try_parse(tokens: &'a Vec<Token>, current_pos: usize) -> ParsingResult<'a, Self::Item> {
|
||||||
// TODO: This must be newline/indentation aware
|
|
||||||
equality::try_parse(tokens, current_pos)
|
equality::try_parse(tokens, current_pos)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -42,6 +42,7 @@ impl<'a> Parseable<'a> for ModuleAST<'a> {
|
|||||||
Ok((prod, next_pos)) => {
|
Ok((prod, next_pos)) => {
|
||||||
productions.push(ModuleMembers::Expr(prod));
|
productions.push(ModuleMembers::Expr(prod));
|
||||||
current_pos = next_pos;
|
current_pos = next_pos;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
Err(ParsingError::Err(error)) => {
|
Err(ParsingError::Err(error)) => {
|
||||||
// TODO: Better error handling, write a better error message
|
// TODO: Better error handling, write a better error message
|
||||||
@ -92,4 +93,13 @@ mod test {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_expression() {
|
||||||
|
let tokens = get_tokens(&String::from("1")).unwrap();
|
||||||
|
|
||||||
|
let (module, next) = ModuleAST::try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(next, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user