[Compiler] Begin refactor of type checking

This commit is contained in:
Araozu 2023-04-14 10:17:03 -05:00
parent 9a3460d176
commit ccfb95956c
15 changed files with 186 additions and 410 deletions

View File

@ -15,7 +15,7 @@
- Scan single line comments
- Refactor String token to include double quotes (") in its content
## v0.0.4
- Explicit datatype of variables

256
compiler/Cargo.lock generated
View File

@ -2,15 +2,6 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "atty"
version = "0.2.14"
@ -22,51 +13,18 @@ dependencies = [
"winapi",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bumpalo"
version = "3.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
[[package]]
name = "cc"
version = "1.0.76"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a284da2e6fe2092f2353e51713435363112dfd60030e22add80be333fb928f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f"
dependencies = [
"iana-time-zone",
"js-sys",
"num-integer",
"num-traits",
"time",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "clap"
version = "4.1.3"
@ -104,16 +62,6 @@ dependencies = [
"os_str_bytes",
]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]]
name = "colored"
version = "2.0.0"
@ -125,56 +73,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "cxx"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4a41a86530d0fe7f5d9ea779916b7cadd2d4f9add748b99c2c029cbbdfaf453"
dependencies = [
"cc",
"cxxbridge-flags",
"cxxbridge-macro",
"link-cplusplus",
]
[[package]]
name = "cxx-build"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06416d667ff3e3ad2df1cd8cd8afae5da26cf9cec4d0825040f88b5ca659a2f0"
dependencies = [
"cc",
"codespan-reporting",
"once_cell",
"proc-macro2",
"quote",
"scratch",
"syn",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "820a9a2af1669deeef27cb271f476ffd196a2c4b6731336011e0ba63e2c7cf71"
[[package]]
name = "cxxbridge-macro"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "errno"
version = "0.2.8"
@ -220,30 +118,6 @@ dependencies = [
"libc",
]
[[package]]
name = "iana-time-zone"
version = "0.1.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca"
dependencies = [
"cxx",
"cxx-build",
]
[[package]]
name = "io-lifetimes"
version = "1.0.4"
@ -266,15 +140,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "js-sys"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -287,58 +152,20 @@ version = "0.2.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89"
[[package]]
name = "link-cplusplus"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369"
dependencies = [
"cc",
]
[[package]]
name = "linux-raw-sys"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "misti"
version = "0.0.4"
dependencies = [
"chrono",
"clap",
"colored",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.16.0"
@ -407,12 +234,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "scratch"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898"
[[package]]
name = "strsim"
version = "0.10.0"
@ -439,95 +260,18 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]]
name = "unicode-ident"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasm-bindgen"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
[[package]]
name = "winapi"
version = "0.3.9"

View File

@ -12,6 +12,5 @@ test = false
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
chrono = "0.4.23"
clap = { version = "4.1.3", features = ["derive"] }
colored = "2.0.0"

View File

@ -94,12 +94,12 @@ fn get_line(
mod tests {
use super::*;
use crate::{
error_handling::{PrintableError, SyntaxError},
error_handling::{PrintableError, SyntaxError, MistiError},
lexic::get_tokens,
syntax::construct_ast,
};
fn get_error_data(input: String) -> (Vec<char>, SyntaxError) {
fn get_error_data(input: String) -> (Vec<char>, MistiError) {
let tokens = get_tokens(&input).unwrap();
let error_holder = construct_ast(&tokens);

View File

@ -26,3 +26,7 @@ pub use token::TokenType;
pub fn tokenize(input: &String) -> Result<Vec<Token>, MistiError> {
lexic::get_tokens(input)
}
pub fn repl() {
let _ = repl::run();
}

View File

@ -1,4 +1,3 @@
use chrono::{prelude::Utc, Datelike};
use clap::{Parser, Subcommand};
// Module to handle the repl and its compilation
@ -45,11 +44,9 @@ enum Commands {
const VERSION: &str = "0.0.1";
fn get_copyright() -> String {
let year = Utc::now().year();
format!(
"Misti {}\nCopyright (c) {} Fernando Enrique Araoz Morales\n",
VERSION, year
"Misti {}\nCopyright (c) 2023 Fernando Enrique Araoz Morales\n",
VERSION,
)
}

View File

@ -32,8 +32,9 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
match ast {
Ok(mut ast) => {
let mut table = SymbolTable::new();
semantic::check_ast(&mut ast, &mut table);
let mut symbol_table = SymbolTable::new();
semantic::check_ast(&mut ast, &mut symbol_table);
let js_code = codegen::codegen(&ast);
println!("{}", js_code)
}
@ -44,6 +45,8 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
}
}
/// Executes the REPL, reading from stdin, compiling and emitting JS to stdout
pub fn run() -> io::Result<()> {
let stdin = io::stdin();

View File

@ -0,0 +1,59 @@
/// Represents a qualified datatype of the compiler.
///
/// A datatype is composed of a path, e.g. `base.Str`, `base.Num`
#[derive(PartialEq)]
pub struct Datatype {
t: String,
}
impl Datatype {
pub fn new(t: String) -> Datatype {
Datatype { t }
}
pub fn str() -> Datatype {
Datatype { t: String::from("base.Str") }
}
pub fn num() -> Datatype {
Datatype { t: String::from("base.Num") }
}
pub fn bool() -> Datatype {
Datatype { t: String::from("base.Bool") }
}
pub fn clone(&self) -> Datatype {
Datatype { t: self.t.clone() }
}
}
#[cfg(test)]
mod tests {
use super::Datatype;
#[test]
fn should_create_datatype() {
let str = Datatype::new(String::from("base.Str"));
assert_eq!("base.Str", str.t);
}
#[test]
fn should_create_primitive_datatypes() {
assert_eq!("base.Str", Datatype::str().t);
assert_eq!("base.Num", Datatype::num().t);
assert_eq!("base.Bool", Datatype::bool().t);
}
#[test]
fn should_compare() {
let s1 = Datatype::str();
let s2 = Datatype::str();
assert_eq!(true, (s1 == s2));
}
}

View File

@ -0,0 +1,15 @@
# Semantic analysis
## Label checking
- Over all the bindings:
- Resolve references with the Symbol table
- If valid, insert reference into Symbol table
## Type checking
TODO
## Flow control check
TODO

View File

@ -1,42 +1,17 @@
use super::ast_types::{Binding, Expression, ModuleAST};
use super::symbol_table::{SymbolTable, _BOOLEAN, _NUMBER, _STRING};
mod datatype;
mod type_check;
pub use datatype::Datatype;
/// Checks the AST. In the future should return a list of errors.
pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) {
for binding in &ast.bindings {
match binding {
Binding::Val(binding) => {
symbol_table.add(
binding.identifier,
get_expression_type(&binding.expression, symbol_table).as_str(),
);
}
Binding::Var(binding) => {
symbol_table.add(
binding.identifier,
get_expression_type(&binding.expression, symbol_table).as_str(),
);
}
}
}
}
fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
match exp {
Expression::Number(_) => String::from(_NUMBER),
Expression::String(_) => String::from(_STRING),
Expression::Boolean(_) => String::from(_BOOLEAN),
Expression::Identifier(id) => {
match symbol_table.get_type(*id) {
Some(datatype) => datatype,
None => {
// Should add an error to the list instead of panicking
panic!("Semantic analysis: identifier {} not found", id);
}
}
}
}
}
#[cfg(test)]
mod tests {
@ -47,57 +22,4 @@ mod tests {
use super::*;
fn test_type(input: String, datatype: &str) -> bool {
let tokens = lexic::get_tokens(&input).unwrap();
let mut table = SymbolTable::new();
let mut ast = syntax::construct_ast(&tokens).unwrap();
check_ast(&mut ast, &mut table);
table.check_type("a", datatype)
}
#[test]
fn should_update_symbol_table() {
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
let mut table = SymbolTable::new();
let mut ast = syntax::construct_ast(&tokens).unwrap();
check_ast(&mut ast, &mut table);
let result = table.test("identifier");
assert_eq!(true, result);
}
#[test]
fn should_get_correct_type() {
assert!(test_type(String::from("val a = 322"), _NUMBER));
assert!(test_type(String::from("var a = 322"), _NUMBER));
assert!(test_type(String::from("val a = \"str\" "), _STRING));
assert!(test_type(String::from("var a = \"str\" "), _STRING));
assert!(test_type(String::from("val a = false"), _BOOLEAN));
assert!(test_type(String::from("var a = true"), _BOOLEAN));
}
#[test]
fn should_get_type_from_identifier() {
let mut table = SymbolTable::new();
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
let mut ast = syntax::construct_ast(&tokens).unwrap();
// Add an identifier
check_ast(&mut ast, &mut table);
let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap();
let mut ast = syntax::construct_ast(&tokens).unwrap();
// Add a new value that references an identifier
check_ast(&mut ast, &mut table);
// The type should be Num
let current_type = table.get_type("newValue").unwrap();
assert_eq!(_NUMBER, current_type);
}
}

View File

@ -0,0 +1,62 @@
use crate::{ast_types::Expression, symbol_table::SymbolTable};
use super::datatype::Datatype;
trait Typed<'a> {
fn t(&self, symbol_table: &'a mut SymbolTable) -> Datatype;
}
impl<'a> Typed<'a> for Expression<'a> {
fn t(&self, symbol_table: &'a mut SymbolTable) -> Datatype {
match self {
Expression::Number(_) => Datatype::num(),
Expression::String(_) => Datatype::str(),
Expression::Boolean(_) => Datatype::bool(),
Expression::Identifier(id) => {
let res = symbol_table.get_type(id).unwrap();
res.clone()
}
}
}
}
#[cfg(test)]
mod t {
use super::*;
#[test]
fn should_get_type_of_primitives() {
let mut t = SymbolTable::new();
let s = String::from("322");
let exp = Expression::Number(&s);
let datatype = exp.t(&mut t);
assert!(datatype == Datatype::num());
let s = String::from("hello");
let exp = Expression::String(&s);
let datatype = exp.t(&mut t);
assert!(datatype == Datatype::str());
let exp = Expression::Boolean(true);
let datatype = exp.t(&mut t);
assert!(datatype == Datatype::bool());
}
#[test]
fn shold_get_type_of_existing_id() {
let mut table = SymbolTable::new();
table.insert("my_number", Datatype::num());
let id = String::from("my_number");
let exp = Expression::Identifier(&id);
assert!(exp.t(&mut table) == Datatype::num());
}
}

View File

@ -1,37 +1,40 @@
use std::collections::HashMap;
use crate::semantic::Datatype;
// Primitive datatypes
pub const _NUMBER: &str = "Num";
pub const _STRING: &str = "Str";
pub const _BOOLEAN: &str = "Bool";
pub struct SymbolTable {
table: HashMap<String, String>,
/// For now just stores identifiers and datatypes
table: HashMap<String, Datatype>,
}
impl SymbolTable {
pub fn new() -> SymbolTable {
let symbol_table = HashMap::<String, String>::new();
let symbol_table = HashMap::<String, Datatype>::new();
SymbolTable {
table: symbol_table,
}
}
pub fn add(&mut self, identifier: &str, datatype: &str) {
pub fn insert(&mut self, identifier: &str, datatype: Datatype) {
self.table
.insert(String::from(identifier), String::from(datatype));
.insert(String::from(identifier), datatype);
}
pub fn test(&self, identifier: &str) -> bool {
return self.table.contains_key::<String>(&String::from(identifier));
pub fn has_id(&self, identifier: &String) -> bool {
return self.table.contains_key::<String>(identifier);
}
pub fn check_type(&self, identifier: &str, datatype: &str) -> bool {
pub fn check_type(&self, identifier: &String, datatype: Datatype) -> bool {
self.table
.get_key_value(&String::from(identifier))
.and_then(|(_, value)| {
if value == &String::from(datatype) {
.get(identifier)
.and_then(|value| {
if *value == datatype {
Some(true)
} else {
Some(false)
@ -40,10 +43,11 @@ impl SymbolTable {
.unwrap_or(false)
}
pub fn get_type(&self, identifier: &str) -> Option<String> {
/// Returns the Datatype of a given identifier
pub fn get_type(&self, identifier: &String) -> Option<&Datatype> {
self.table
.get_key_value(&String::from(identifier))
.and_then(|(_, value)| Some(String::from(value)))
.get(identifier)
.and_then(|value| Some(value))
}
}
@ -59,15 +63,18 @@ mod tests {
#[test]
fn should_add_identifier() {
let mut table = SymbolTable::new();
table.add("identifier", _NUMBER);
assert_eq!(true, table.test("identifier"))
table.insert("identifier", Datatype::num());
let s = String::from("identifier");
assert_eq!(true, table.has_id(&s))
}
#[test]
fn should_check_type() {
let mut table = SymbolTable::new();
table.add("firstNumber", _NUMBER);
table.insert("firstNumber", Datatype::num());
assert!(table.check_type("firstNumber", _NUMBER));
let s = String::from("firstNumber");
assert!(table.check_type(&s, Datatype::num()));
}
}

View File

@ -1,5 +1,5 @@
use crate::ast_types::Binding;
use crate::error_handling::SyntaxError;
use crate::error_handling::{SyntaxError, MistiError};
use super::token::Token;
@ -22,7 +22,7 @@ pub enum SyntaxResult<'a> {
}
/// Constructs the Misti AST from a vector of tokens
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, SyntaxError> {
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, MistiError> {
let _token_amount = tokens.len();
let mut current_pos = 0;
@ -30,13 +30,13 @@ pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, Syntax
SyntaxResult::Ok(module) => Ok(ModuleAST {
bindings: vec![module],
}),
SyntaxResult::None => Err(SyntaxError {
SyntaxResult::None => Err(MistiError::Syntax(SyntaxError {
reason: String::from("PARSER couldn't parse any construction"),
// FIXME: This should get the position of the _token_ that current_pos points to
error_start: current_pos,
error_end: current_pos,
}),
SyntaxResult::Err(err) => Err(err),
})),
SyntaxResult::Err(err) => Err(MistiError::Syntax(err)),
}
}

View File

@ -1,29 +0,0 @@
entry-point = "index"
[basics]
section-name = "Basics"
children = [
"variables-and-constants",
"simple-datatypes",
"function-calls",
"operators",
"tuples",
"indentation-rules",
]
[flow-control]
section-name = "Flow control"
children = [
"conditionals",
"arrays",
"loops",
]
[functions]
section-name = "Functions"
children = [
"definition",
"lambdas",
"parameters",
]

View File

@ -1,7 +0,0 @@
entry-point = "index"
[prelude]
section-name = "Prelude"
children = [
"String"
]