[Compiler] Refactor string lexer to include starting and closing double quotes

This commit is contained in:
Araozu 2023-04-15 17:17:27 -05:00
parent ccfb95956c
commit c445f8bb00
11 changed files with 131 additions and 69 deletions

View File

@ -15,6 +15,7 @@
- Scan single line comments
- Refactor String token to include double quotes (") in its content
- Refactor datachecking of semantic analysis
## v0.0.4

View File

@ -94,7 +94,7 @@ fn get_line(
mod tests {
use super::*;
use crate::{
error_handling::{PrintableError, SyntaxError, MistiError},
error_handling::{MistiError, PrintableError, SyntaxError},
lexic::get_tokens,
syntax::construct_ast,
};

View File

@ -6,7 +6,7 @@ use crate::lexic::{token, utils, LexResult};
/// This function assumes that `start_pos` is after the first double quote,
/// e.g. if the input is `"hello"`, `start_pos == 1`
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
scan_impl(chars, start_pos, String::from(""))
scan_impl(chars, start_pos, String::from("\""))
}
/// Recursive function that does the scanning
@ -16,10 +16,11 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
// start_pos is the position where the token ENDS, not where it STARTS,
// so this is used to retrieve the original START position of the token
// 1 is added to account for the opening `"`
let current_len = current.len() + 1;
let current_len = current.len();
let final_str = format!("{}\"", current);
LexResult::Some(
token::new_string(current, start_pos - current_len),
token::new_string(final_str, start_pos - current_len),
start_pos + 1,
)
}
@ -77,7 +78,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(2, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("", token.value);
assert_eq!("\"\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -91,7 +92,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(15, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Hello, world!", token.value);
assert_eq!("\"Hello, world!\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -116,7 +117,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\ntext", token.value);
assert_eq!("\"Sample\\ntext\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -127,7 +128,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\\"text", token.value);
assert_eq!("\"Sample\\\"text\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -138,7 +139,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\rtext", token.value);
assert_eq!("\"Sample\\rtext\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -149,7 +150,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\\\text", token.value);
assert_eq!("\"Sample\\\\text\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -160,7 +161,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\ttext", token.value);
assert_eq!("\"Sample\\ttext\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()
@ -171,7 +172,7 @@ mod tests {
if let LexResult::Some(token, next) = scan(&input, start_pos) {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\ text", token.value);
assert_eq!("\"Sample\\ text\"", token.value);
assert_eq!(0, token.position);
} else {
panic!()

View File

@ -10,12 +10,12 @@ use super::semantic;
use super::syntax;
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
fn compile(input: &String) {
fn compile(input: &String, symbol_table: &mut SymbolTable) {
let tokens = lexic::get_tokens(input);
match tokens {
Ok(tokens) => {
build_ast(input, tokens);
build_ast(input, tokens, symbol_table);
}
Err(error) => {
let chars: Vec<char> = input.chars().into_iter().collect();
@ -27,13 +27,12 @@ fn compile(input: &String) {
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
///
/// Prints the generated code in stdin
fn build_ast(input: &String, tokens: Vec<Token>) {
fn build_ast(input: &String, tokens: Vec<Token>, symbol_table: &mut SymbolTable) {
let ast = syntax::construct_ast(&tokens);
match ast {
Ok(mut ast) => {
let mut symbol_table = SymbolTable::new();
semantic::check_ast(&mut ast, &mut symbol_table);
Ok( ast) => {
semantic::check_ast(& ast, symbol_table);
let js_code = codegen::codegen(&ast);
println!("{}", js_code)
@ -45,17 +44,16 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
}
}
/// Executes the REPL, reading from stdin, compiling and emitting JS to stdout
pub fn run() -> io::Result<()> {
let stdin = io::stdin();
let mut buffer = String::new();
let mut repl_symbol_table = SymbolTable::new();
println!("REPL: Enter expressions to evaluate. Type Ctrl-D to exit.");
loop {
print!("> ");
let _ = io::stdout().flush();
io::stdout().flush()?;
buffer.clear();
let read = stdin.read_line(&mut buffer);
@ -65,7 +63,7 @@ pub fn run() -> io::Result<()> {
break Ok(());
}
Ok(_) => {
compile(&buffer);
compile(&buffer, &mut repl_symbol_table);
}
Err(error) => {
eprintln!("Error reading stdin.");

View File

@ -1,4 +1,3 @@
/// Represents a qualified datatype of the compiler.
///
/// A datatype is composed of a path, e.g. `base.Str`, `base.Num`
@ -7,22 +6,27 @@ pub struct Datatype {
t: String,
}
impl Datatype {
pub fn new(t: String) -> Datatype {
Datatype { t }
}
pub fn str() -> Datatype {
Datatype { t: String::from("base.Str") }
Datatype {
t: String::from("base.Str"),
}
}
pub fn num() -> Datatype {
Datatype { t: String::from("base.Num") }
Datatype {
t: String::from("base.Num"),
}
}
pub fn bool() -> Datatype {
Datatype { t: String::from("base.Bool") }
Datatype {
t: String::from("base.Bool"),
}
}
pub fn clone(&self) -> Datatype {
@ -30,8 +34,6 @@ impl Datatype {
}
}
#[cfg(test)]
mod tests {
use super::Datatype;

View File

@ -1,25 +1,101 @@
use super::ast_types::{Binding, Expression, ModuleAST};
use super::symbol_table::{SymbolTable, _BOOLEAN, _NUMBER, _STRING};
use super::ast_types::{Binding, ModuleAST};
use super::symbol_table::SymbolTable;
mod datatype;
mod type_check;
use type_check::Typed;
pub use datatype::Datatype;
/// Checks the AST. In the future should return a list of errors.
pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) {
pub fn check_ast<'a>(ast: &'a ModuleAST, symbol_table: &'a mut SymbolTable) {
for binding in ast.bindings.iter() {
match binding {
Binding::Val(b) => {
let datatype = b.expression.t(symbol_table);
let identifier = b.identifier;
// TODO: check datatype of a explicit datatype, e.g. `Str val x = 322`
symbol_table.insert(identifier.as_str(), datatype);
}
Binding::Var(b) => {
let datatype = b.expression.t(symbol_table);
let identifier = b.identifier;
// TODO: check datatype of a explicit datatype, e.g. `Str val x = 322`
symbol_table.insert(identifier.as_str(), datatype);
}
}
}
}
#[cfg(test)]
mod tests {
use crate::lexic;
use crate::symbol_table::_BOOLEAN;
use crate::symbol_table::_STRING;
use crate::syntax;
mod t {
use crate::ast_types::Expression;
use super::*;
#[test]
fn should_insert_into_symbol_table() {
let s1 = String::from("id");
let s2 = String::from("322");
let binding = Binding::Val(crate::ast_types::ValBinding {
datatype: None,
identifier: &s1,
expression: Expression::Number(&s2),
});
let mut table = SymbolTable::new();
check_ast(
&ModuleAST {
bindings: vec![binding],
},
&mut table,
);
assert!(table.has_id(&String::from("id")));
assert!(table.check_type(&String::from("id"), Datatype::num()));
}
#[test]
fn should_insert_id_reference() {
let s1 = String::from("id");
let s2 = String::from("322");
let binding = Binding::Val(crate::ast_types::ValBinding {
datatype: None,
identifier: &s1,
expression: Expression::Number(&s2),
});
let mut table = SymbolTable::new();
check_ast(
&ModuleAST {
bindings: vec![binding],
},
&mut table,
);
let s1 = String::from("id2");
let s2 = String::from("id");
let binding = Binding::Val(crate::ast_types::ValBinding {
datatype: None,
identifier: &s1,
expression: Expression::Identifier(&s2),
});
check_ast(
&ModuleAST {
bindings: vec![binding],
},
&mut table,
);
assert!(table.has_id(&String::from("id2")));
assert!(table.check_type(&String::from("id2"), Datatype::num()));
}
}

View File

@ -2,26 +2,27 @@ use crate::{ast_types::Expression, symbol_table::SymbolTable};
use super::datatype::Datatype;
trait Typed<'a> {
pub trait Typed<'a> {
fn t(&self, symbol_table: &'a mut SymbolTable) -> Datatype;
}
impl<'a> Typed<'a> for Expression<'a> {
/// Returns the Datatype of this Expression
fn t(&self, symbol_table: &'a mut SymbolTable) -> Datatype {
match self {
Expression::Number(_) => Datatype::num(),
Expression::String(_) => Datatype::str(),
Expression::Boolean(_) => Datatype::bool(),
Expression::Identifier(id) => {
let res = symbol_table.get_type(id).unwrap();
let res = symbol_table
.get_type(id)
.expect("SEMANTIC: identifier doesn't exist in Symbol table");
res.clone()
}
}
}
}
#[cfg(test)]
mod t {
use super::*;
@ -59,4 +60,3 @@ mod t {
assert!(exp.t(&mut table) == Datatype::num());
}
}

View File

@ -2,11 +2,6 @@ use std::collections::HashMap;
use crate::semantic::Datatype;
// Primitive datatypes
pub const _NUMBER: &str = "Num";
pub const _STRING: &str = "Str";
pub const _BOOLEAN: &str = "Bool";
pub struct SymbolTable {
/// For now just stores identifiers and datatypes
table: HashMap<String, Datatype>,
@ -22,8 +17,7 @@ impl SymbolTable {
}
pub fn insert(&mut self, identifier: &str, datatype: Datatype) {
self.table
.insert(String::from(identifier), datatype);
self.table.insert(String::from(identifier), datatype);
}
pub fn has_id(&self, identifier: &String) -> bool {
@ -45,9 +39,7 @@ impl SymbolTable {
/// Returns the Datatype of a given identifier
pub fn get_type(&self, identifier: &String) -> Option<&Datatype> {
self.table
.get(identifier)
.and_then(|value| Some(value))
self.table.get(identifier).and_then(|value| Some(value))
}
}

View File

@ -41,7 +41,7 @@ mod tests {
let expression = try_parse(&tokens, 0).unwrap();
match expression {
Expression::String(value) => assert_eq!("Hello", value),
Expression::String(value) => assert_eq!("\"Hello\"", value),
_ => panic!(),
}
}

View File

@ -1,5 +1,5 @@
use crate::ast_types::Binding;
use crate::error_handling::{SyntaxError, MistiError};
use crate::error_handling::{MistiError, SyntaxError};
use super::token::Token;
@ -41,13 +41,8 @@ pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, MistiE
}
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
None.or_else(|| binding::try_parse(tokens, 0))
None.or_else(|| binding::try_parse(tokens, current_pos))
.unwrap_or_else(|| {
SyntaxResult::Err(SyntaxError {
reason: String::from("Unrecognized token"),
// FIXME: This should get the position of the _token_ that current_pos points to
error_start: current_pos,
error_end: current_pos,
})
SyntaxResult::None
})
}

View File

@ -30,10 +30,7 @@ pub struct Token {
impl Token {
pub fn get_end_position(&self) -> usize {
match self.token_type {
TokenType::String => self.position + self.value.len() + 2,
_ => self.position + self.value.len(),
}
self.position + self.value.len()
}
}