[Compiler] Refactor string lexer to include starting and closing double quotes
This commit is contained in:
parent
ccfb95956c
commit
c445f8bb00
@ -15,6 +15,7 @@
|
||||
|
||||
- Scan single line comments
|
||||
- Refactor String token to include double quotes (") in its content
|
||||
- Refactor datachecking of semantic analysis
|
||||
|
||||
## v0.0.4
|
||||
|
||||
|
@ -94,7 +94,7 @@ fn get_line(
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
error_handling::{PrintableError, SyntaxError, MistiError},
|
||||
error_handling::{MistiError, PrintableError, SyntaxError},
|
||||
lexic::get_tokens,
|
||||
syntax::construct_ast,
|
||||
};
|
||||
|
@ -6,7 +6,7 @@ use crate::lexic::{token, utils, LexResult};
|
||||
/// This function assumes that `start_pos` is after the first double quote,
|
||||
/// e.g. if the input is `"hello"`, `start_pos == 1`
|
||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
scan_impl(chars, start_pos, String::from(""))
|
||||
scan_impl(chars, start_pos, String::from("\""))
|
||||
}
|
||||
|
||||
/// Recursive function that does the scanning
|
||||
@ -16,10 +16,11 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
// start_pos is the position where the token ENDS, not where it STARTS,
|
||||
// so this is used to retrieve the original START position of the token
|
||||
// 1 is added to account for the opening `"`
|
||||
let current_len = current.len() + 1;
|
||||
let current_len = current.len();
|
||||
|
||||
let final_str = format!("{}\"", current);
|
||||
LexResult::Some(
|
||||
token::new_string(current, start_pos - current_len),
|
||||
token::new_string(final_str, start_pos - current_len),
|
||||
start_pos + 1,
|
||||
)
|
||||
}
|
||||
@ -77,7 +78,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(2, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("", token.value);
|
||||
assert_eq!("\"\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -91,7 +92,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(15, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Hello, world!", token.value);
|
||||
assert_eq!("\"Hello, world!\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -116,7 +117,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\ntext", token.value);
|
||||
assert_eq!("\"Sample\\ntext\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -127,7 +128,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\\"text", token.value);
|
||||
assert_eq!("\"Sample\\\"text\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -138,7 +139,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\rtext", token.value);
|
||||
assert_eq!("\"Sample\\rtext\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -149,7 +150,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\\\text", token.value);
|
||||
assert_eq!("\"Sample\\\\text\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -160,7 +161,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\ttext", token.value);
|
||||
assert_eq!("\"Sample\\ttext\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
@ -171,7 +172,7 @@ mod tests {
|
||||
if let LexResult::Some(token, next) = scan(&input, start_pos) {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\ text", token.value);
|
||||
assert_eq!("\"Sample\\ text\"", token.value);
|
||||
assert_eq!(0, token.position);
|
||||
} else {
|
||||
panic!()
|
||||
|
@ -10,12 +10,12 @@ use super::semantic;
|
||||
use super::syntax;
|
||||
|
||||
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
|
||||
fn compile(input: &String) {
|
||||
fn compile(input: &String, symbol_table: &mut SymbolTable) {
|
||||
let tokens = lexic::get_tokens(input);
|
||||
|
||||
match tokens {
|
||||
Ok(tokens) => {
|
||||
build_ast(input, tokens);
|
||||
build_ast(input, tokens, symbol_table);
|
||||
}
|
||||
Err(error) => {
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
@ -27,13 +27,12 @@ fn compile(input: &String) {
|
||||
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
|
||||
///
|
||||
/// Prints the generated code in stdin
|
||||
fn build_ast(input: &String, tokens: Vec<Token>) {
|
||||
fn build_ast(input: &String, tokens: Vec<Token>, symbol_table: &mut SymbolTable) {
|
||||
let ast = syntax::construct_ast(&tokens);
|
||||
|
||||
match ast {
|
||||
Ok(mut ast) => {
|
||||
let mut symbol_table = SymbolTable::new();
|
||||
semantic::check_ast(&mut ast, &mut symbol_table);
|
||||
Ok( ast) => {
|
||||
semantic::check_ast(& ast, symbol_table);
|
||||
|
||||
let js_code = codegen::codegen(&ast);
|
||||
println!("{}", js_code)
|
||||
@ -45,17 +44,16 @@ fn build_ast(input: &String, tokens: Vec<Token>) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Executes the REPL, reading from stdin, compiling and emitting JS to stdout
|
||||
pub fn run() -> io::Result<()> {
|
||||
let stdin = io::stdin();
|
||||
let mut buffer = String::new();
|
||||
let mut repl_symbol_table = SymbolTable::new();
|
||||
|
||||
println!("REPL: Enter expressions to evaluate. Type Ctrl-D to exit.");
|
||||
loop {
|
||||
print!("> ");
|
||||
let _ = io::stdout().flush();
|
||||
io::stdout().flush()?;
|
||||
buffer.clear();
|
||||
let read = stdin.read_line(&mut buffer);
|
||||
|
||||
@ -65,7 +63,7 @@ pub fn run() -> io::Result<()> {
|
||||
break Ok(());
|
||||
}
|
||||
Ok(_) => {
|
||||
compile(&buffer);
|
||||
compile(&buffer, &mut repl_symbol_table);
|
||||
}
|
||||
Err(error) => {
|
||||
eprintln!("Error reading stdin.");
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
/// Represents a qualified datatype of the compiler.
|
||||
///
|
||||
/// A datatype is composed of a path, e.g. `base.Str`, `base.Num`
|
||||
@ -7,22 +6,27 @@ pub struct Datatype {
|
||||
t: String,
|
||||
}
|
||||
|
||||
|
||||
impl Datatype {
|
||||
pub fn new(t: String) -> Datatype {
|
||||
Datatype { t }
|
||||
}
|
||||
|
||||
pub fn str() -> Datatype {
|
||||
Datatype { t: String::from("base.Str") }
|
||||
Datatype {
|
||||
t: String::from("base.Str"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn num() -> Datatype {
|
||||
Datatype { t: String::from("base.Num") }
|
||||
Datatype {
|
||||
t: String::from("base.Num"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bool() -> Datatype {
|
||||
Datatype { t: String::from("base.Bool") }
|
||||
Datatype {
|
||||
t: String::from("base.Bool"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clone(&self) -> Datatype {
|
||||
@ -30,8 +34,6 @@ impl Datatype {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Datatype;
|
||||
|
@ -1,25 +1,101 @@
|
||||
use super::ast_types::{Binding, Expression, ModuleAST};
|
||||
use super::symbol_table::{SymbolTable, _BOOLEAN, _NUMBER, _STRING};
|
||||
use super::ast_types::{Binding, ModuleAST};
|
||||
use super::symbol_table::SymbolTable;
|
||||
|
||||
mod datatype;
|
||||
mod type_check;
|
||||
|
||||
use type_check::Typed;
|
||||
|
||||
pub use datatype::Datatype;
|
||||
|
||||
/// Checks the AST. In the future should return a list of errors.
|
||||
pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) {
|
||||
pub fn check_ast<'a>(ast: &'a ModuleAST, symbol_table: &'a mut SymbolTable) {
|
||||
for binding in ast.bindings.iter() {
|
||||
match binding {
|
||||
Binding::Val(b) => {
|
||||
let datatype = b.expression.t(symbol_table);
|
||||
let identifier = b.identifier;
|
||||
|
||||
// TODO: check datatype of a explicit datatype, e.g. `Str val x = 322`
|
||||
|
||||
symbol_table.insert(identifier.as_str(), datatype);
|
||||
}
|
||||
Binding::Var(b) => {
|
||||
let datatype = b.expression.t(symbol_table);
|
||||
let identifier = b.identifier;
|
||||
|
||||
// TODO: check datatype of a explicit datatype, e.g. `Str val x = 322`
|
||||
|
||||
symbol_table.insert(identifier.as_str(), datatype);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic;
|
||||
use crate::symbol_table::_BOOLEAN;
|
||||
use crate::symbol_table::_STRING;
|
||||
use crate::syntax;
|
||||
mod t {
|
||||
use crate::ast_types::Expression;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn should_insert_into_symbol_table() {
|
||||
let s1 = String::from("id");
|
||||
let s2 = String::from("322");
|
||||
let binding = Binding::Val(crate::ast_types::ValBinding {
|
||||
datatype: None,
|
||||
identifier: &s1,
|
||||
expression: Expression::Number(&s2),
|
||||
});
|
||||
|
||||
let mut table = SymbolTable::new();
|
||||
|
||||
check_ast(
|
||||
&ModuleAST {
|
||||
bindings: vec![binding],
|
||||
},
|
||||
&mut table,
|
||||
);
|
||||
|
||||
assert!(table.has_id(&String::from("id")));
|
||||
assert!(table.check_type(&String::from("id"), Datatype::num()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_insert_id_reference() {
|
||||
let s1 = String::from("id");
|
||||
let s2 = String::from("322");
|
||||
let binding = Binding::Val(crate::ast_types::ValBinding {
|
||||
datatype: None,
|
||||
identifier: &s1,
|
||||
expression: Expression::Number(&s2),
|
||||
});
|
||||
|
||||
let mut table = SymbolTable::new();
|
||||
|
||||
check_ast(
|
||||
&ModuleAST {
|
||||
bindings: vec![binding],
|
||||
},
|
||||
&mut table,
|
||||
);
|
||||
|
||||
let s1 = String::from("id2");
|
||||
let s2 = String::from("id");
|
||||
let binding = Binding::Val(crate::ast_types::ValBinding {
|
||||
datatype: None,
|
||||
identifier: &s1,
|
||||
expression: Expression::Identifier(&s2),
|
||||
});
|
||||
|
||||
check_ast(
|
||||
&ModuleAST {
|
||||
bindings: vec![binding],
|
||||
},
|
||||
&mut table,
|
||||
);
|
||||
|
||||
assert!(table.has_id(&String::from("id2")));
|
||||
assert!(table.check_type(&String::from("id2"), Datatype::num()));
|
||||
}
|
||||
}
|
||||
|
@ -2,26 +2,27 @@ use crate::{ast_types::Expression, symbol_table::SymbolTable};
|
||||
|
||||
use super::datatype::Datatype;
|
||||
|
||||
trait Typed<'a> {
|
||||
pub trait Typed<'a> {
|
||||
fn t(&self, symbol_table: &'a mut SymbolTable) -> Datatype;
|
||||
}
|
||||
|
||||
impl<'a> Typed<'a> for Expression<'a> {
|
||||
/// Returns the Datatype of this Expression
|
||||
fn t(&self, symbol_table: &'a mut SymbolTable) -> Datatype {
|
||||
match self {
|
||||
Expression::Number(_) => Datatype::num(),
|
||||
Expression::String(_) => Datatype::str(),
|
||||
Expression::Boolean(_) => Datatype::bool(),
|
||||
Expression::Identifier(id) => {
|
||||
let res = symbol_table.get_type(id).unwrap();
|
||||
let res = symbol_table
|
||||
.get_type(id)
|
||||
.expect("SEMANTIC: identifier doesn't exist in Symbol table");
|
||||
res.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod t {
|
||||
use super::*;
|
||||
@ -59,4 +60,3 @@ mod t {
|
||||
assert!(exp.t(&mut table) == Datatype::num());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,6 @@ use std::collections::HashMap;
|
||||
|
||||
use crate::semantic::Datatype;
|
||||
|
||||
// Primitive datatypes
|
||||
pub const _NUMBER: &str = "Num";
|
||||
pub const _STRING: &str = "Str";
|
||||
pub const _BOOLEAN: &str = "Bool";
|
||||
|
||||
pub struct SymbolTable {
|
||||
/// For now just stores identifiers and datatypes
|
||||
table: HashMap<String, Datatype>,
|
||||
@ -22,8 +17,7 @@ impl SymbolTable {
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, identifier: &str, datatype: Datatype) {
|
||||
self.table
|
||||
.insert(String::from(identifier), datatype);
|
||||
self.table.insert(String::from(identifier), datatype);
|
||||
}
|
||||
|
||||
pub fn has_id(&self, identifier: &String) -> bool {
|
||||
@ -45,9 +39,7 @@ impl SymbolTable {
|
||||
|
||||
/// Returns the Datatype of a given identifier
|
||||
pub fn get_type(&self, identifier: &String) -> Option<&Datatype> {
|
||||
self.table
|
||||
.get(identifier)
|
||||
.and_then(|value| Some(value))
|
||||
self.table.get(identifier).and_then(|value| Some(value))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ mod tests {
|
||||
let expression = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match expression {
|
||||
Expression::String(value) => assert_eq!("Hello", value),
|
||||
Expression::String(value) => assert_eq!("\"Hello\"", value),
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::ast_types::Binding;
|
||||
use crate::error_handling::{SyntaxError, MistiError};
|
||||
use crate::error_handling::{MistiError, SyntaxError};
|
||||
|
||||
use super::token::Token;
|
||||
|
||||
@ -41,13 +41,8 @@ pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, MistiE
|
||||
}
|
||||
|
||||
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
|
||||
None.or_else(|| binding::try_parse(tokens, 0))
|
||||
None.or_else(|| binding::try_parse(tokens, current_pos))
|
||||
.unwrap_or_else(|| {
|
||||
SyntaxResult::Err(SyntaxError {
|
||||
reason: String::from("Unrecognized token"),
|
||||
// FIXME: This should get the position of the _token_ that current_pos points to
|
||||
error_start: current_pos,
|
||||
error_end: current_pos,
|
||||
})
|
||||
SyntaxResult::None
|
||||
})
|
||||
}
|
||||
|
@ -30,10 +30,7 @@ pub struct Token {
|
||||
|
||||
impl Token {
|
||||
pub fn get_end_position(&self) -> usize {
|
||||
match self.token_type {
|
||||
TokenType::String => self.position + self.value.len() + 2,
|
||||
_ => self.position + self.value.len(),
|
||||
}
|
||||
self.position + self.value.len()
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user