Add minimal error reporting for syntax analysis
This commit is contained in:
parent
a88c8e9216
commit
cdafc40ff7
@ -1,11 +1,10 @@
|
||||
|
||||
pub struct ModuleAST<'a> {
|
||||
pub bindings: Vec<Binding<'a>>,
|
||||
}
|
||||
|
||||
pub enum Binding<'a> {
|
||||
Val(ValBinding<'a>),
|
||||
Var(VarBinding<'a>)
|
||||
Var(VarBinding<'a>),
|
||||
}
|
||||
|
||||
pub struct ValBinding<'a> {
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::ast_types::Binding;
|
||||
use super::Transpilable;
|
||||
use crate::ast_types::Binding;
|
||||
|
||||
impl Transpilable for Binding<'_> {
|
||||
/// Transpiles val and var bindings into JS.
|
||||
@ -19,12 +19,10 @@ impl Transpilable for Binding<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::ast_types::{Expression, Binding, ValBinding};
|
||||
use crate::ast_types::{Binding, Expression, ValBinding};
|
||||
|
||||
#[test]
|
||||
fn binding_should_transpile() {
|
||||
@ -37,7 +35,7 @@ mod tests {
|
||||
});
|
||||
|
||||
let result = binding.transpile();
|
||||
|
||||
|
||||
assert_eq!("const identifier = 322;", result);
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::ast_types::Expression;
|
||||
use super::Transpilable;
|
||||
use crate::ast_types::Expression;
|
||||
|
||||
impl Transpilable for Expression<'_> {
|
||||
/// Transpiles an Expression to JS
|
||||
@ -11,23 +11,16 @@ impl Transpilable for Expression<'_> {
|
||||
/// - Identifier
|
||||
fn transpile(&self) -> String {
|
||||
match self {
|
||||
Expression::Number(value) => {
|
||||
String::from(*value)
|
||||
}
|
||||
Expression::Number(value) => String::from(*value),
|
||||
Expression::String(value) => {
|
||||
format!("\"{}\"", *value)
|
||||
}
|
||||
Expression::Boolean(value) => {
|
||||
String::from(if *value {"true"} else {"false"})
|
||||
}
|
||||
Expression::Identifier(value) => {
|
||||
String::from(*value)
|
||||
}
|
||||
Expression::Boolean(value) => String::from(if *value { "true" } else { "false" }),
|
||||
Expression::Identifier(value) => String::from(*value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -41,7 +34,7 @@ mod tests {
|
||||
|
||||
assert_eq!("42", result);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_transpile_string() {
|
||||
let str = String::from("Hello world");
|
||||
@ -50,21 +43,21 @@ mod tests {
|
||||
|
||||
assert_eq!("\"Hello world\"", result);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_transpile_boolean() {
|
||||
let exp = Expression::Boolean(true);
|
||||
let result = exp.transpile();
|
||||
|
||||
|
||||
assert_eq!("true", result);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_transpile_identifier() {
|
||||
let s = String::from("newValue");
|
||||
let exp = Expression::Identifier(&s);
|
||||
let result = exp.transpile();
|
||||
|
||||
|
||||
assert_eq!("newValue", result);
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use super::ast_types::ModuleAST;
|
||||
|
||||
mod expression;
|
||||
mod binding;
|
||||
mod expression;
|
||||
mod module_ast;
|
||||
|
||||
/// Trait that the AST and its nodes implement to support transformation to JavaScript
|
||||
@ -15,13 +15,9 @@ pub fn codegen<'a>(ast: &'a ModuleAST) -> String {
|
||||
ast.transpile()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{lexic, syntax, semantic, symbol_table::SymbolTable};
|
||||
use crate::{lexic, semantic, symbol_table::SymbolTable, syntax};
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -38,4 +34,3 @@ mod tests {
|
||||
assert_eq!("const id = 322;", out_str);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,21 +1,24 @@
|
||||
use crate::ast_types::ModuleAST;
|
||||
use super::Transpilable;
|
||||
use crate::ast_types::ModuleAST;
|
||||
|
||||
impl Transpilable for ModuleAST<'_> {
|
||||
/// Transpiles the whole AST into JS, using this same trait on the
|
||||
/// nodes and leaves of the AST
|
||||
fn transpile(&self) -> String {
|
||||
let bindings_str: Vec::<String> = self.bindings.iter().map(|binding| binding.transpile()).collect();
|
||||
let bindings_str: Vec<String> = self
|
||||
.bindings
|
||||
.iter()
|
||||
.map(|binding| binding.transpile())
|
||||
.collect();
|
||||
|
||||
bindings_str.join("\n")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::ast_types::{Expression, ValBinding, Binding};
|
||||
use crate::ast_types::{Binding, Expression, ValBinding};
|
||||
|
||||
#[test]
|
||||
fn module_ast_should_transpile() {
|
||||
@ -34,5 +37,5 @@ mod tests {
|
||||
let result = module.transpile();
|
||||
|
||||
assert_eq!("const identifier = 322;", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,9 @@
|
||||
use std::{collections::VecDeque};
|
||||
use super::{PrintableError, LexError};
|
||||
use super::{LexError, PrintableError};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
impl PrintableError for LexError {
|
||||
// TODO: Count and show line number
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||
|
||||
let (erroneous_code, back_count) = get_line(chars, self.position);
|
||||
|
||||
let mut whitespace = Vec::<char>::new();
|
||||
@ -14,9 +13,9 @@ impl PrintableError for LexError {
|
||||
let whitespace = whitespace.iter().collect::<String>();
|
||||
|
||||
format!(
|
||||
"\n{}\n{}^\n\n{}{}\n{}",
|
||||
erroneous_code,
|
||||
whitespace,
|
||||
"\n{}\n{}^\n\n{}{}\n{}",
|
||||
erroneous_code,
|
||||
whitespace,
|
||||
"Invalid character at pos ",
|
||||
self.position + 1,
|
||||
self.reason,
|
||||
@ -26,11 +25,11 @@ impl PrintableError for LexError {
|
||||
|
||||
/// Extracts a line of code from `chars` and the number of characters in the back.
|
||||
/// `pos` indicates a position, from where to extract the line.
|
||||
///
|
||||
///
|
||||
/// Ex. Given:
|
||||
/// - `input = "first line\nsecond line\nthird line"`
|
||||
/// - `pos = 15`
|
||||
///
|
||||
///
|
||||
/// this function should return `("second line", 4)`
|
||||
fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
|
||||
let mut result_chars = VecDeque::<char>::new();
|
||||
@ -72,12 +71,10 @@ fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
|
||||
(result_chars.iter().collect::<String>(), pos - before_pos)
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic;
|
||||
use super::*;
|
||||
use crate::lexic;
|
||||
|
||||
#[test]
|
||||
fn test_error_msg() {
|
||||
@ -92,15 +89,11 @@ mod tests {
|
||||
|
||||
// TODO: check for line number
|
||||
let expected_str = format!(
|
||||
"\n{}\n{}^\n\nInvalid character at pos 9",
|
||||
"val name' = 20",
|
||||
" "
|
||||
"\n{}\n{}^\n\nInvalid character at pos 9\n{}",
|
||||
"val name' = 20", " ", "Unrecognized character `'` (escaped: `\\'`)"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
expected_str,
|
||||
err_str,
|
||||
);
|
||||
assert_eq!(expected_str, err_str,);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -115,7 +108,6 @@ mod tests {
|
||||
assert_eq!("second line", result);
|
||||
assert_eq!(4, back_count);
|
||||
|
||||
|
||||
let input = String::from("val binding = 322");
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
mod lex_error;
|
||||
mod syntax_error;
|
||||
|
||||
pub trait PrintableError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String;
|
||||
@ -6,7 +7,8 @@ pub trait PrintableError {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum MistiError {
|
||||
Lex(LexError)
|
||||
Lex(LexError),
|
||||
Syntax(SyntaxError),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -15,13 +17,16 @@ pub struct LexError {
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SyntaxError {
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
impl PrintableError for MistiError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||
match self {
|
||||
Self::Lex(err) => err.get_error_str(chars)
|
||||
Self::Lex(err) => err.get_error_str(chars),
|
||||
Self::Syntax(err) => err.get_error_str(chars),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
7
src/error_handling/syntax_error.rs
Normal file
7
src/error_handling/syntax_error.rs
Normal file
@ -0,0 +1,7 @@
|
||||
use super::{PrintableError, SyntaxError};
|
||||
|
||||
impl PrintableError for SyntaxError {
|
||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||
String::from("Syntax error: NOT IMPLEMENTED")
|
||||
}
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
mod utils;
|
||||
mod scanner;
|
||||
mod utils;
|
||||
|
||||
use super::token::{self, Token};
|
||||
use crate::error_handling::{MistiError, LexError};
|
||||
use crate::error_handling::{LexError, MistiError};
|
||||
|
||||
type Chars = Vec<char>;
|
||||
|
||||
@ -11,7 +11,7 @@ pub enum LexResult {
|
||||
/// A token was found. The first element is the token, and the
|
||||
/// second element is the position in the input after the token.
|
||||
///
|
||||
/// E.g., given an input
|
||||
/// E.g., given an input
|
||||
///
|
||||
/// "`identifier 55`"
|
||||
///
|
||||
@ -32,7 +32,6 @@ pub enum LexResult {
|
||||
Err(LexError),
|
||||
}
|
||||
|
||||
|
||||
/// Scans and returns all the tokens in the input String
|
||||
pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
@ -44,10 +43,10 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
||||
LexResult::Some(token, next_pos) => {
|
||||
results.push(token);
|
||||
current_pos = next_pos;
|
||||
},
|
||||
}
|
||||
LexResult::None(next_pos) => {
|
||||
current_pos = next_pos;
|
||||
},
|
||||
}
|
||||
LexResult::Err(error_info) => {
|
||||
return Err(MistiError::Lex(error_info));
|
||||
}
|
||||
@ -65,17 +64,16 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||
|
||||
// If EOF is reached return nothing but the current position
|
||||
if next_char == '\0' {
|
||||
return LexResult::None(current_pos)
|
||||
return LexResult::None(current_pos);
|
||||
}
|
||||
|
||||
// Handle whitespace recursively.
|
||||
if next_char == ' ' {
|
||||
return next_token(chars, current_pos + 1)
|
||||
return next_token(chars, current_pos + 1);
|
||||
}
|
||||
|
||||
// Scanners
|
||||
None
|
||||
.or_else(|| scanner::number(next_char, chars, current_pos))
|
||||
None.or_else(|| scanner::number(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||
@ -87,7 +85,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||
position: current_pos,
|
||||
reason: format!(
|
||||
"Unrecognized character `{}` (escaped: `{}`)",
|
||||
next_char,
|
||||
next_char,
|
||||
next_char.escape_default().to_string(),
|
||||
),
|
||||
};
|
||||
@ -106,8 +104,6 @@ fn has_input(input: &Chars, current_pos: usize) -> bool {
|
||||
current_pos < input.len()
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -149,7 +145,7 @@ mod tests {
|
||||
match next_token(&chars, 0) {
|
||||
LexResult::Some(t, _) => {
|
||||
assert_eq!("126", t.value)
|
||||
},
|
||||
}
|
||||
_ => {
|
||||
panic!()
|
||||
}
|
||||
@ -173,7 +169,7 @@ mod tests {
|
||||
let t3 = tokens.get(2).unwrap();
|
||||
assert_eq!(TokenType::Number, t3.token_type);
|
||||
assert_eq!("0.282398", t3.value);
|
||||
|
||||
|
||||
assert_eq!("1789e+1", tokens.get(3).unwrap().value);
|
||||
assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
|
||||
assert_eq!(TokenType::Semicolon, tokens.get(5).unwrap().token_type);
|
||||
@ -209,7 +205,7 @@ mod tests {
|
||||
assert_eq!(TokenType::RightBracket, t.token_type);
|
||||
assert_eq!("]", t.value);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_scan_datatype() {
|
||||
let input = String::from("Num");
|
||||
@ -217,7 +213,7 @@ mod tests {
|
||||
|
||||
assert_eq!(TokenType::Datatype, tokens[0].token_type);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_scan_new_line() {
|
||||
let input = String::from("3\n22");
|
||||
@ -225,7 +221,7 @@ mod tests {
|
||||
|
||||
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_scan_multiple_new_lines() {
|
||||
let input = String::from("3\n\n\n22");
|
||||
@ -234,7 +230,7 @@ mod tests {
|
||||
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
|
||||
assert_eq!(TokenType::Number, tokens[2].token_type);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_scan_multiple_new_lines_with_whitespace_in_between() {
|
||||
let input = String::from("3\n \n \n22");
|
||||
|
@ -1,4 +1,7 @@
|
||||
use crate::{lexic::{token, utils, LexResult}, token::TokenType};
|
||||
use crate::{
|
||||
lexic::{token, utils, LexResult},
|
||||
token::TokenType,
|
||||
};
|
||||
|
||||
/// Checks if a String is a keyword, and returns its TokenType
|
||||
fn str_is_keyword(s: &String) -> Option<TokenType> {
|
||||
@ -24,31 +27,24 @@ pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult
|
||||
/// Recursive funtion that scans the identifier
|
||||
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype: bool) -> LexResult {
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if utils::is_identifier_char(*c) => {
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
utils::str_append(current, *c),
|
||||
is_datatype,
|
||||
)
|
||||
},
|
||||
Some(c) if utils::is_identifier_char(*c) => scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
utils::str_append(current, *c),
|
||||
is_datatype,
|
||||
),
|
||||
_ => {
|
||||
if let Some(token_type) = str_is_keyword(¤t) {
|
||||
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
|
||||
}
|
||||
else if is_datatype {
|
||||
} else if is_datatype {
|
||||
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic::token::TokenType;
|
||||
@ -69,8 +65,8 @@ mod tests {
|
||||
assert_eq!(1, next);
|
||||
assert_eq!(TokenType::Identifier, token.token_type);
|
||||
assert_eq!("_", token.value);
|
||||
},
|
||||
_ => panic!()
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
|
||||
let input = str_to_vec("i");
|
||||
@ -80,8 +76,8 @@ mod tests {
|
||||
assert_eq!(1, next);
|
||||
assert_eq!(TokenType::Identifier, token.token_type);
|
||||
assert_eq!("i", token.value);
|
||||
},
|
||||
_ => panic!()
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,27 +85,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_2() {
|
||||
let operators = vec![
|
||||
"_a",
|
||||
"_z",
|
||||
"_A",
|
||||
"_Z",
|
||||
"__",
|
||||
"_0",
|
||||
"_9",
|
||||
"aa",
|
||||
"az",
|
||||
"aA",
|
||||
"aZ",
|
||||
"a_",
|
||||
"a0",
|
||||
"a9",
|
||||
"za",
|
||||
"zz",
|
||||
"zA",
|
||||
"zZ",
|
||||
"z_",
|
||||
"z0",
|
||||
"z9",
|
||||
"_a", "_z", "_A", "_Z", "__", "_0", "_9", "aa", "az", "aA", "aZ", "a_", "a0", "a9",
|
||||
"za", "zz", "zA", "zZ", "z_", "z0", "z9",
|
||||
];
|
||||
|
||||
for op in operators {
|
||||
@ -120,13 +97,12 @@ mod tests {
|
||||
assert_eq!(2, next);
|
||||
assert_eq!(TokenType::Identifier, token.token_type);
|
||||
assert_eq!(op, token.value);
|
||||
},
|
||||
_ => panic!()
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Should scan long identifiers
|
||||
#[test]
|
||||
fn test_3() {
|
||||
@ -145,8 +121,8 @@ mod tests {
|
||||
assert_eq!(input.len(), next);
|
||||
assert_eq!(TokenType::Identifier, token.token_type);
|
||||
assert_eq!(op, token.value);
|
||||
},
|
||||
_ => panic!()
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -156,19 +132,22 @@ mod tests {
|
||||
fn test_4() {
|
||||
let input = str_to_vec("var");
|
||||
let start_pos = 0;
|
||||
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
||||
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
||||
assert_eq!(3, next);
|
||||
assert_eq!(TokenType::VAR, token.token_type);
|
||||
assert_eq!("var", token.value);
|
||||
} else {panic!()}
|
||||
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
|
||||
let input = str_to_vec("val");
|
||||
let start_pos = 0;
|
||||
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
||||
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
||||
assert_eq!(3, next);
|
||||
assert_eq!(TokenType::VAL, token.token_type);
|
||||
assert_eq!("val", token.value);
|
||||
} else {panic!()}
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,13 @@
|
||||
use super::{token::{TokenType, self}, utils, LexResult};
|
||||
use super::{
|
||||
token::{self, TokenType},
|
||||
utils, LexResult,
|
||||
};
|
||||
|
||||
mod identifier;
|
||||
mod new_line;
|
||||
mod number;
|
||||
mod operator;
|
||||
mod identifier;
|
||||
mod string;
|
||||
mod new_line;
|
||||
|
||||
|
||||
// This module contains the individual scanners, and exports them
|
||||
|
||||
@ -14,13 +16,11 @@ pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
|
||||
utils::is_digit(c).then(|| number::scan(chars, start_pos))
|
||||
}
|
||||
|
||||
|
||||
/// Attempts to scan an operator. If not found returns None to be able to chain other scanner
|
||||
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
utils::is_operator(c).then(|| operator::scan(chars, start_pos))
|
||||
}
|
||||
|
||||
|
||||
/// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner
|
||||
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
let token_type = match c {
|
||||
@ -33,27 +33,20 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let token = token::new(
|
||||
c.to_string(),
|
||||
start_pos as i32,
|
||||
token_type,
|
||||
);
|
||||
let token = token::new(c.to_string(), start_pos as i32, token_type);
|
||||
Some(LexResult::Some(token, start_pos + 1))
|
||||
}
|
||||
|
||||
|
||||
/// Attempts to scan an identifier. If not found returns None to be able to chain other scanner
|
||||
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
(utils::is_lowercase(c) || c == '_')
|
||||
.then(|| identifier::scan(c, chars, start_pos))
|
||||
(utils::is_lowercase(c) || c == '_').then(|| identifier::scan(c, chars, start_pos))
|
||||
}
|
||||
|
||||
/// Attempts to scan a datatype. If not found returns None to be able to chain other scanner
|
||||
pub fn datatype(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
// Since the only difference with an identifier is that the fist character is an
|
||||
// uppercase letter, reuse the identifier scanner
|
||||
utils::is_uppercase(c)
|
||||
.then(|| identifier::scan(c, chars, start_pos))
|
||||
utils::is_uppercase(c).then(|| identifier::scan(c, chars, start_pos))
|
||||
}
|
||||
|
||||
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
|
||||
@ -62,7 +55,6 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
|
||||
}
|
||||
|
||||
/// Attemts to scan a new line. If not found returns None to be able to chain other scanner
|
||||
pub fn new_line(c:char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
(c == '\n').then(|| new_line::scan(chars, start_pos))
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
use crate::{
|
||||
lexic::{
|
||||
token, LexResult,
|
||||
},
|
||||
token::TokenType
|
||||
lexic::{token, LexResult},
|
||||
token::TokenType,
|
||||
};
|
||||
|
||||
/// Function to handle new lines
|
||||
@ -15,28 +13,16 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
let current = chars.get(start_pos);
|
||||
|
||||
match current {
|
||||
Some(c) if *c == '\n' => {
|
||||
scan(chars, start_pos + 1)
|
||||
}
|
||||
Some(c) if *c == ' ' => {
|
||||
match look_ahead_for_new_line(chars, start_pos + 1) {
|
||||
Some(next_pos) => scan(chars, next_pos),
|
||||
None => {
|
||||
let token = token::new(
|
||||
String::from(";"),
|
||||
start_pos as i32,
|
||||
TokenType::Semicolon,
|
||||
);
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
Some(c) if *c == '\n' => scan(chars, start_pos + 1),
|
||||
Some(c) if *c == ' ' => match look_ahead_for_new_line(chars, start_pos + 1) {
|
||||
Some(next_pos) => scan(chars, next_pos),
|
||||
None => {
|
||||
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
}
|
||||
},
|
||||
Some(_) | None => {
|
||||
let token = token::new(
|
||||
String::from(";"),
|
||||
start_pos as i32,
|
||||
TokenType::Semicolon,
|
||||
);
|
||||
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
|
||||
LexResult::Some(token, start_pos)
|
||||
}
|
||||
}
|
||||
@ -45,19 +31,12 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
/// Returns the position after the new line
|
||||
fn look_ahead_for_new_line(chars: &Vec<char>, pos: usize) -> Option<usize> {
|
||||
match chars.get(pos) {
|
||||
Some(c) if *c == ' ' => {
|
||||
look_ahead_for_new_line(chars, pos + 1)
|
||||
}
|
||||
Some(c) if *c == '\n' => {
|
||||
Some(pos + 1)
|
||||
}
|
||||
Some(_) | None => {
|
||||
None
|
||||
}
|
||||
Some(c) if *c == ' ' => look_ahead_for_new_line(chars, pos + 1),
|
||||
Some(c) if *c == '\n' => Some(pos + 1),
|
||||
Some(_) | None => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic::token::TokenType;
|
||||
@ -67,7 +46,7 @@ mod tests {
|
||||
fn str_to_vec(s: &str) -> Vec<char> {
|
||||
s.chars().collect()
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_emit_semicolon_instead_of_new_line() {
|
||||
let input = str_to_vec("\n");
|
||||
@ -92,8 +71,7 @@ mod tests {
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
|
||||
|
||||
|
||||
let input = str_to_vec("\n\n\naToken");
|
||||
let start_pos = 0;
|
||||
|
||||
@ -104,7 +82,7 @@ mod tests {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() {
|
||||
let input = str_to_vec("\n \n \n");
|
||||
@ -116,8 +94,7 @@ mod tests {
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
|
||||
|
||||
|
||||
let input = str_to_vec("\n \n \n aToken");
|
||||
let start_pos = 0;
|
||||
|
||||
@ -127,8 +104,7 @@ mod tests {
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
|
||||
|
||||
|
||||
let input = str_to_vec("\n \n \n ");
|
||||
let start_pos = 0;
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
use crate::error_handling::LexError;
|
||||
use crate::lexic::{
|
||||
token::{self, Token},
|
||||
utils, LexResult,
|
||||
};
|
||||
use crate::error_handling::LexError;
|
||||
|
||||
/// Function to scan a number
|
||||
///
|
||||
@ -66,9 +66,9 @@ fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResul
|
||||
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
|
||||
Some(_) => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
reason : String::from(
|
||||
reason: String::from(
|
||||
"The character after the dot when scanning a double is not a number.",
|
||||
)
|
||||
),
|
||||
}),
|
||||
_ => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
@ -112,7 +112,7 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
|
||||
position: start_pos,
|
||||
reason: String::from(
|
||||
"The characters after 'e' are not + or -, or are not followed by a number",
|
||||
)
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
@ -230,7 +230,9 @@ mod tests {
|
||||
let start_pos = 0;
|
||||
|
||||
match scan(&input, start_pos) {
|
||||
LexResult::Err(reason) => assert_eq!("Tried to scan an incomplete hex value", reason.reason),
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!("Tried to scan an incomplete hex value", reason.reason)
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
|
||||
@ -299,7 +301,9 @@ mod tests {
|
||||
let start_pos = 0;
|
||||
|
||||
match scan(&input, start_pos) {
|
||||
LexResult::Err(reason) => assert_eq!("EOF when scanning a double number.", reason.reason),
|
||||
LexResult::Err(reason) => {
|
||||
assert_eq!("EOF when scanning a double number.", reason.reason)
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,7 @@
|
||||
use crate::lexic::{token, utils, LexResult};
|
||||
|
||||
|
||||
/// Function to scan an operator
|
||||
///
|
||||
///
|
||||
/// This function assumes the character at `start_pos` is an operator
|
||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
scan_impl(chars, start_pos, String::from(""))
|
||||
@ -12,16 +11,11 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if utils::is_operator(*c) => {
|
||||
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
},
|
||||
_ => {
|
||||
LexResult::Some(token::new_operator(current, start_pos as i32), start_pos)
|
||||
}
|
||||
_ => LexResult::Some(token::new_operator(current, start_pos as i32), start_pos),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -35,26 +29,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_1() {
|
||||
let operators = vec![
|
||||
"+",
|
||||
"-",
|
||||
"=",
|
||||
"*",
|
||||
"!",
|
||||
"\\",
|
||||
"/",
|
||||
"|",
|
||||
"@",
|
||||
"#",
|
||||
"$",
|
||||
"~",
|
||||
"%",
|
||||
"&",
|
||||
"?",
|
||||
"<",
|
||||
">",
|
||||
"^",
|
||||
".",
|
||||
":",
|
||||
"+", "-", "=", "*", "!", "\\", "/", "|", "@", "#", "$", "~", "%", "&", "?", "<", ">",
|
||||
"^", ".", ":",
|
||||
];
|
||||
|
||||
for op in operators {
|
||||
@ -65,8 +41,8 @@ mod tests {
|
||||
assert_eq!(1, next);
|
||||
assert_eq!(TokenType::Operator, token.token_type);
|
||||
assert_eq!(op, token.value);
|
||||
},
|
||||
_ => panic!()
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -75,20 +51,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_2() {
|
||||
let operators = vec![
|
||||
"<<",
|
||||
">>",
|
||||
"<|",
|
||||
"|>",
|
||||
"+>",
|
||||
"<+",
|
||||
"+=",
|
||||
"-=",
|
||||
"?.",
|
||||
"??",
|
||||
"?:",
|
||||
"*=",
|
||||
"/=",
|
||||
"==",
|
||||
"<<", ">>", "<|", "|>", "+>", "<+", "+=", "-=", "?.", "??", "?:", "*=", "/=", "==",
|
||||
"!=",
|
||||
];
|
||||
|
||||
@ -100,8 +63,8 @@ mod tests {
|
||||
assert_eq!(2, next);
|
||||
assert_eq!(TokenType::Operator, token.token_type);
|
||||
assert_eq!(op, token.value);
|
||||
},
|
||||
_ => panic!()
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,5 @@
|
||||
use crate::lexic::{
|
||||
token,
|
||||
utils, LexResult,
|
||||
};
|
||||
use crate::error_handling::LexError;
|
||||
use crate::lexic::{token, utils, LexResult};
|
||||
|
||||
/// Function to scan a string
|
||||
///
|
||||
@ -18,46 +15,26 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
Some(c) if *c == '"' => {
|
||||
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
|
||||
}
|
||||
Some(c) if *c == '\n' => {
|
||||
LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
reason: String::from("Unexpected new line inside a string.")
|
||||
})
|
||||
}
|
||||
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
reason: String::from("Unexpected new line inside a string."),
|
||||
}),
|
||||
Some(c) if *c == '\\' => {
|
||||
if let Some(escape) = test_escape_char(chars, start_pos + 1) {
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 2,
|
||||
utils::str_append(current, escape),
|
||||
)
|
||||
}
|
||||
else {
|
||||
scan_impl(chars, start_pos + 2, utils::str_append(current, escape))
|
||||
} else {
|
||||
// Ignore the backslash
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
current,
|
||||
)
|
||||
scan_impl(chars, start_pos + 1, current)
|
||||
}
|
||||
}
|
||||
Some(c) => {
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
utils::str_append(current, *c),
|
||||
)
|
||||
}
|
||||
None => {
|
||||
LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
reason: String::from("Incomplete string found")
|
||||
})
|
||||
}
|
||||
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
|
||||
None => LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
reason: String::from("Incomplete string found"),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Checks if the char at `start_pos` is a escape character
|
||||
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
|
||||
if let Some(c) = chars.get(start_pos) {
|
||||
@ -69,15 +46,11 @@ fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
|
||||
't' => Some('\t'),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic::token::TokenType;
|
||||
@ -96,8 +69,9 @@ mod tests {
|
||||
assert_eq!(2, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -108,8 +82,9 @@ mod tests {
|
||||
assert_eq!(15, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Hello, world!", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -118,8 +93,9 @@ mod tests {
|
||||
let start_pos = 1;
|
||||
if let LexResult::Err(reason) = scan(&input, start_pos) {
|
||||
assert_eq!("Unexpected new line inside a string.", reason.reason)
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -130,8 +106,9 @@ mod tests {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\ntext", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\\"text\"");
|
||||
let start_pos = 1;
|
||||
@ -139,8 +116,9 @@ mod tests {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\"text", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\rtext\"");
|
||||
let start_pos = 1;
|
||||
@ -148,8 +126,9 @@ mod tests {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\rtext", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\\\text\"");
|
||||
let start_pos = 1;
|
||||
@ -157,8 +136,9 @@ mod tests {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\\text", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\ttext\"");
|
||||
let start_pos = 1;
|
||||
@ -166,8 +146,9 @@ mod tests {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample\ttext", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
|
||||
let input = str_to_vec("\"Sample\\ text\"");
|
||||
let start_pos = 1;
|
||||
@ -175,7 +156,8 @@ mod tests {
|
||||
assert_eq!(14, next);
|
||||
assert_eq!(TokenType::String, token.token_type);
|
||||
assert_eq!("Sample text", token.value);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
else {panic!()}
|
||||
}
|
||||
}
|
||||
|
@ -15,11 +15,26 @@ pub fn str_append(current: String, c: char) -> String {
|
||||
|
||||
/// Whether `c` is an operator char.
|
||||
pub fn is_operator(c: char) -> bool {
|
||||
c == '+' || c == '-' || c == '=' || c == '*' || c == '!'
|
||||
|| c == '\\' || c == '/' || c == '|' || c == '@'
|
||||
|| c == '#' || c == '$' || c == '~' || c == '%'
|
||||
|| c == '&' || c == '?' || c == '<' || c == '>'
|
||||
|| c == '^' || c == '.' || c == ':'
|
||||
c == '+'
|
||||
|| c == '-'
|
||||
|| c == '='
|
||||
|| c == '*'
|
||||
|| c == '!'
|
||||
|| c == '\\'
|
||||
|| c == '/'
|
||||
|| c == '|'
|
||||
|| c == '@'
|
||||
|| c == '#'
|
||||
|| c == '$'
|
||||
|| c == '~'
|
||||
|| c == '%'
|
||||
|| c == '&'
|
||||
|| c == '?'
|
||||
|| c == '<'
|
||||
|| c == '>'
|
||||
|| c == '^'
|
||||
|| c == '.'
|
||||
|| c == ':'
|
||||
}
|
||||
|
||||
/// Whether `c` is between `a-z`
|
||||
|
15
src/main.rs
15
src/main.rs
@ -20,7 +20,6 @@ mod codegen;
|
||||
|
||||
mod error_handling;
|
||||
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
@ -39,20 +38,22 @@ enum Commands {
|
||||
output: String,
|
||||
},
|
||||
/// Starts the REPL
|
||||
R {}
|
||||
R {},
|
||||
}
|
||||
|
||||
|
||||
const VERSION: &str = "0.0.1";
|
||||
|
||||
fn get_copyright() -> String {
|
||||
let year = Utc::now().year();
|
||||
|
||||
format!("Misti {}\nCopyright (c) {} Fernando Enrique Araoz Morales\n", VERSION, year)
|
||||
format!(
|
||||
"Misti {}\nCopyright (c) {} Fernando Enrique Araoz Morales\n",
|
||||
VERSION, year
|
||||
)
|
||||
}
|
||||
|
||||
/// # Misti
|
||||
///
|
||||
///
|
||||
/// Usage:
|
||||
/// - `misti` : Starts the compiler in watch mode
|
||||
/// - `misti w, --watch, -w` : Starts the compiler in watch mode
|
||||
@ -66,13 +67,11 @@ fn main() {
|
||||
Some(Commands::C { file: _, output: _ }) => {
|
||||
println!("Compile a file: Not implemented")
|
||||
}
|
||||
Some(Commands::R { }) => {
|
||||
Some(Commands::R {}) => {
|
||||
let _ = repl::run();
|
||||
}
|
||||
None => {
|
||||
println!("Compile in watch mode: Not implemented")
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -4,10 +4,10 @@ use crate::error_handling::PrintableError;
|
||||
use crate::symbol_table::SymbolTable;
|
||||
use crate::token::Token;
|
||||
|
||||
use super::lexic;
|
||||
use super::syntax;
|
||||
use super::semantic;
|
||||
use super::codegen;
|
||||
use super::lexic;
|
||||
use super::semantic;
|
||||
use super::syntax;
|
||||
|
||||
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
|
||||
fn compile(input: &String) {
|
||||
@ -15,20 +15,19 @@ fn compile(input: &String) {
|
||||
|
||||
match tokens {
|
||||
Ok(tokens) => {
|
||||
build_ast(tokens);
|
||||
},
|
||||
build_ast(input, tokens);
|
||||
}
|
||||
Err(error) => {
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
eprintln!("{}", error.get_error_str(&chars))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
|
||||
///
|
||||
/// Prints the generated code in stdin
|
||||
fn build_ast(tokens: Vec<Token>) {
|
||||
fn build_ast(input: &String, tokens: Vec<Token>) {
|
||||
let ast = syntax::construct_ast(&tokens);
|
||||
|
||||
match ast {
|
||||
@ -39,7 +38,8 @@ fn build_ast(tokens: Vec<Token>) {
|
||||
println!("{}", js_code)
|
||||
}
|
||||
Err(reason) => {
|
||||
eprintln!("Syntax error.\n{}", reason)
|
||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||
eprintln!("Syntax error.\n{}", reason.get_error_str(&chars))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -59,14 +59,14 @@ pub fn run() -> io::Result<()> {
|
||||
match read {
|
||||
Ok(0) => {
|
||||
println!("\nBye");
|
||||
break Ok(())
|
||||
},
|
||||
break Ok(());
|
||||
}
|
||||
Ok(_) => {
|
||||
compile(&buffer);
|
||||
},
|
||||
}
|
||||
Err(error) => {
|
||||
eprintln!("Error reading stdin.");
|
||||
break Err(error)
|
||||
break Err(error);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::symbol_table::{SymbolTable, _NUMBER, _STRING, _BOOLEAN};
|
||||
use super::ast_types::{ModuleAST, Binding, Expression};
|
||||
use super::ast_types::{Binding, Expression, ModuleAST};
|
||||
use super::symbol_table::{SymbolTable, _BOOLEAN, _NUMBER, _STRING};
|
||||
|
||||
/// Checks the AST. In the future should return a list of errors.
|
||||
pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) {
|
||||
@ -8,7 +8,7 @@ pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable)
|
||||
Binding::Val(binding) => {
|
||||
symbol_table.add(
|
||||
binding.identifier,
|
||||
get_expression_type(&binding.expression, symbol_table).as_str()
|
||||
get_expression_type(&binding.expression, symbol_table).as_str(),
|
||||
);
|
||||
}
|
||||
Binding::Var(binding) => {
|
||||
@ -28,9 +28,7 @@ fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
|
||||
Expression::Boolean(_) => String::from(_BOOLEAN),
|
||||
Expression::Identifier(id) => {
|
||||
match symbol_table.get_type(*id) {
|
||||
Some(datatype) => {
|
||||
datatype
|
||||
}
|
||||
Some(datatype) => datatype,
|
||||
None => {
|
||||
// Should add an error to the list instead of panicking
|
||||
panic!("Semantic analysis: identifier {} not found", id);
|
||||
@ -42,10 +40,10 @@ fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic;
|
||||
use crate::symbol_table::_BOOLEAN;
|
||||
use crate::symbol_table::_STRING;
|
||||
use crate::syntax;
|
||||
use crate::lexic;
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -58,7 +56,7 @@ mod tests {
|
||||
|
||||
table.check_type("a", datatype)
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_update_symbol_table() {
|
||||
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
|
||||
@ -70,19 +68,19 @@ mod tests {
|
||||
let result = table.test("identifier");
|
||||
assert_eq!(true, result);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_get_correct_type() {
|
||||
assert!(test_type(String::from("val a = 322"), _NUMBER));
|
||||
assert!(test_type(String::from("var a = 322"), _NUMBER));
|
||||
|
||||
|
||||
assert!(test_type(String::from("val a = \"str\" "), _STRING));
|
||||
assert!(test_type(String::from("var a = \"str\" "), _STRING));
|
||||
|
||||
|
||||
assert!(test_type(String::from("val a = false"), _BOOLEAN));
|
||||
assert!(test_type(String::from("var a = true"), _BOOLEAN));
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_get_type_from_identifier() {
|
||||
let mut table = SymbolTable::new();
|
||||
@ -91,13 +89,13 @@ mod tests {
|
||||
|
||||
// Add an identifier
|
||||
check_ast(&mut ast, &mut table);
|
||||
|
||||
|
||||
let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap();
|
||||
let mut ast = syntax::construct_ast(&tokens).unwrap();
|
||||
|
||||
|
||||
// Add a new value that references an identifier
|
||||
check_ast(&mut ast, &mut table);
|
||||
|
||||
|
||||
// The type should be Num
|
||||
let current_type = table.get_type("newValue").unwrap();
|
||||
assert_eq!(_NUMBER, current_type);
|
||||
|
@ -6,7 +6,7 @@ pub const _STRING: &str = "Str";
|
||||
pub const _BOOLEAN: &str = "Bool";
|
||||
|
||||
pub struct SymbolTable {
|
||||
table: HashMap<String, String>
|
||||
table: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl SymbolTable {
|
||||
@ -19,7 +19,8 @@ impl SymbolTable {
|
||||
}
|
||||
|
||||
pub fn add(&mut self, identifier: &str, datatype: &str) {
|
||||
self.table.insert(String::from(identifier), String::from(datatype));
|
||||
self.table
|
||||
.insert(String::from(identifier), String::from(datatype));
|
||||
}
|
||||
|
||||
pub fn test(&self, identifier: &str) -> bool {
|
||||
@ -32,24 +33,20 @@ impl SymbolTable {
|
||||
.and_then(|(_, value)| {
|
||||
if value == &String::from(datatype) {
|
||||
Some(true)
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
Some(false)
|
||||
}
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
|
||||
pub fn get_type(&self, identifier: &str) -> Option<String> {
|
||||
self.table
|
||||
.get_key_value(&String::from(identifier))
|
||||
.and_then(|(_, value)| {
|
||||
Some(String::from(value))
|
||||
})
|
||||
.and_then(|(_, value)| Some(String::from(value)))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -70,7 +67,7 @@ mod tests {
|
||||
fn should_check_type() {
|
||||
let mut table = SymbolTable::new();
|
||||
table.add("firstNumber", _NUMBER);
|
||||
|
||||
|
||||
assert!(table.check_type("firstNumber", _NUMBER));
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,14 @@
|
||||
use super::ast_types::{Binding, ValBinding, VarBinding};
|
||||
use super::{expression, SyntaxResult};
|
||||
use crate::token::{Token, TokenType};
|
||||
use super::ast_types::{ValBinding, VarBinding, Binding};
|
||||
use super::expression;
|
||||
|
||||
// TODO: Should return a 3 state value:
|
||||
// - Success: binding parsed successfully
|
||||
// - NotFound: the first token (var | val) was not found, so the parser should try other options
|
||||
// - Error: token (var | val) was found, but then other expected tokens were not found
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult> {
|
||||
let mut pos = pos;
|
||||
|
||||
|
||||
// Optional datatype annotation
|
||||
let datatype_annotation = {
|
||||
match tokens.get(pos) {
|
||||
@ -17,10 +17,11 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
||||
Some(String::from(&t.value))
|
||||
}
|
||||
Some(_) => None,
|
||||
None => return None
|
||||
// TODO: return Error
|
||||
None => return None,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// var/val keyword
|
||||
let is_val = {
|
||||
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
||||
@ -30,58 +31,62 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
||||
let res2 = try_token_type(tokens, pos, TokenType::VAR);
|
||||
match res2 {
|
||||
Some(_) => false,
|
||||
None => return None
|
||||
// TODO: return Error
|
||||
None => return None,
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
|
||||
if identifier.is_none() { return None }
|
||||
if identifier.is_none() {
|
||||
// TODO: return Error
|
||||
return None;
|
||||
}
|
||||
let identifier = identifier.unwrap();
|
||||
|
||||
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
|
||||
if equal_operator.is_none() { return None }
|
||||
if equal_operator.is_none() {
|
||||
// TODO: return Error
|
||||
return None;
|
||||
}
|
||||
|
||||
let expression = expression::try_parse(tokens, pos + 3);
|
||||
if expression.is_none() { return None }
|
||||
if expression.is_none() {
|
||||
// TODO: return Error
|
||||
return None;
|
||||
}
|
||||
let expression = expression.unwrap();
|
||||
|
||||
if is_val {
|
||||
Some(Binding::Val(ValBinding {
|
||||
let binding = if is_val {
|
||||
Binding::Val(ValBinding {
|
||||
datatype: datatype_annotation,
|
||||
identifier: &identifier.value,
|
||||
expression,
|
||||
}))
|
||||
}
|
||||
else {
|
||||
Some(Binding::Var(VarBinding {
|
||||
})
|
||||
} else {
|
||||
Binding::Var(VarBinding {
|
||||
datatype: datatype_annotation,
|
||||
identifier: &identifier.value,
|
||||
expression,
|
||||
}))
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
Some(SyntaxResult::Ok(binding))
|
||||
}
|
||||
|
||||
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Option<&Token> {
|
||||
tokens
|
||||
.get(pos)
|
||||
.and_then(|token| {
|
||||
(token.token_type == token_type).then(|| token)
|
||||
})
|
||||
.and_then(|token| (token.token_type == token_type).then(|| token))
|
||||
}
|
||||
|
||||
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Option<&Token> {
|
||||
tokens
|
||||
.get(pos)
|
||||
.and_then(|token| {
|
||||
(token.token_type == TokenType::Operator && token.value == operator)
|
||||
.then(|| token)
|
||||
})
|
||||
tokens.get(pos).and_then(|token| {
|
||||
(token.token_type == TokenType::Operator && token.value == operator).then(|| token)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -93,10 +98,10 @@ mod tests {
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
Binding::Val(binding) => {
|
||||
SyntaxResult::Ok(Binding::Val(binding)) => {
|
||||
assert_eq!("identifier", binding.identifier);
|
||||
}
|
||||
_ => panic!()
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,30 +129,29 @@ mod tests {
|
||||
|
||||
assert_eq!("=", token.value);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_parse_binding_with_datatype() {
|
||||
let tokens = get_tokens(&String::from("Num val identifier = 20")).unwrap();
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
Binding::Val(binding) => {
|
||||
SyntaxResult::Ok(Binding::Val(binding)) => {
|
||||
assert_eq!(Some(String::from("Num")), binding.datatype);
|
||||
assert_eq!("identifier", binding.identifier);
|
||||
}
|
||||
_ => panic!()
|
||||
_ => panic!(),
|
||||
}
|
||||
|
||||
|
||||
|
||||
let tokens = get_tokens(&String::from("Bool var identifier = true")).unwrap();
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
Binding::Var(binding) => {
|
||||
SyntaxResult::Ok(Binding::Var(binding)) => {
|
||||
assert_eq!(Some(String::from("Bool")), binding.datatype);
|
||||
assert_eq!("identifier", binding.identifier);
|
||||
}
|
||||
_ => panic!()
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::token::{Token, TokenType};
|
||||
use super::ast_types::Expression;
|
||||
use crate::token::{Token, TokenType};
|
||||
|
||||
/// An expression can be:
|
||||
///
|
||||
@ -8,28 +8,17 @@ use super::ast_types::Expression;
|
||||
/// - A boolean
|
||||
/// - An identifier
|
||||
pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
|
||||
tokens
|
||||
.get(pos)
|
||||
.and_then(|token| {
|
||||
match token.token_type {
|
||||
TokenType::Number => {
|
||||
Some(Expression::Number(&token.value))
|
||||
}
|
||||
TokenType::String => {
|
||||
Some(Expression::String(&token.value))
|
||||
}
|
||||
TokenType::Identifier if token.value == "true" || token.value == "false" => {
|
||||
Some(Expression::Boolean(token.value == "true"))
|
||||
}
|
||||
TokenType::Identifier => {
|
||||
Some(Expression::Identifier(&token.value))
|
||||
}
|
||||
_ => None
|
||||
}
|
||||
})
|
||||
tokens.get(pos).and_then(|token| match token.token_type {
|
||||
TokenType::Number => Some(Expression::Number(&token.value)),
|
||||
TokenType::String => Some(Expression::String(&token.value)),
|
||||
TokenType::Identifier if token.value == "true" || token.value == "false" => {
|
||||
Some(Expression::Boolean(token.value == "true"))
|
||||
}
|
||||
TokenType::Identifier => Some(Expression::Identifier(&token.value)),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -42,10 +31,10 @@ mod tests {
|
||||
|
||||
match expression {
|
||||
Expression::Number(value) => assert_eq!("40", value),
|
||||
_ => panic!()
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_parse_a_string() {
|
||||
let tokens = get_tokens(&String::from("\"Hello\"")).unwrap();
|
||||
@ -53,10 +42,10 @@ mod tests {
|
||||
|
||||
match expression {
|
||||
Expression::String(value) => assert_eq!("Hello", value),
|
||||
_ => panic!()
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_parse_a_boolean() {
|
||||
let tokens = get_tokens(&String::from("true")).unwrap();
|
||||
@ -64,10 +53,10 @@ mod tests {
|
||||
|
||||
match expression {
|
||||
Expression::Boolean(value) => assert!(value),
|
||||
_ => panic!()
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn should_parse_an_identifier() {
|
||||
let tokens = get_tokens(&String::from("someIdentifier")).unwrap();
|
||||
@ -75,7 +64,7 @@ mod tests {
|
||||
|
||||
match expression {
|
||||
Expression::Identifier(value) => assert_eq!("someIdentifier", value),
|
||||
_ => panic!()
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,25 +1,47 @@
|
||||
use crate::ast_types::Binding;
|
||||
use crate::error_handling::SyntaxError;
|
||||
|
||||
use super::token::Token;
|
||||
|
||||
mod expression;
|
||||
mod binding;
|
||||
mod expression;
|
||||
use super::ast_types;
|
||||
|
||||
use ast_types::ModuleAST;
|
||||
|
||||
/// Constructs the Misti AST from a vector of tokens
|
||||
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, String> {
|
||||
let maybe_binding = binding::try_parse(tokens, 0);
|
||||
pub enum SyntaxResult<'a> {
|
||||
///
|
||||
/// A construct has been found
|
||||
Ok(Binding<'a>),
|
||||
///
|
||||
/// No construct was found
|
||||
None,
|
||||
///
|
||||
/// A construct was found, but there was an error parsing it
|
||||
Err(SyntaxError),
|
||||
}
|
||||
|
||||
match maybe_binding {
|
||||
Some(binding) => {
|
||||
Ok(ModuleAST {
|
||||
bindings: vec![binding]
|
||||
})
|
||||
}
|
||||
None => {
|
||||
Err(String::from("Syntax error."))
|
||||
}
|
||||
/// Constructs the Misti AST from a vector of tokens
|
||||
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, SyntaxError> {
|
||||
let _token_amount = tokens.len();
|
||||
let mut current_pos = 0;
|
||||
|
||||
match next_construct(tokens, current_pos) {
|
||||
SyntaxResult::Ok(module) => Ok(ModuleAST {
|
||||
bindings: vec![module],
|
||||
}),
|
||||
SyntaxResult::None => Err(SyntaxError {
|
||||
reason: String::from("D:"),
|
||||
}),
|
||||
SyntaxResult::Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
|
||||
None.or_else(|| binding::try_parse(tokens, 0))
|
||||
.unwrap_or_else(|| {
|
||||
SyntaxResult::Err(SyntaxError {
|
||||
reason: String::from("Unrecognized token"),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
10
src/token.rs
10
src/token.rs
@ -38,7 +38,7 @@ pub fn new_number(value: String, position: i32) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
value,
|
||||
_position: position
|
||||
_position: position,
|
||||
}
|
||||
}
|
||||
|
||||
@ -46,12 +46,16 @@ pub fn new_operator(value: String, position: i32) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Operator,
|
||||
value,
|
||||
_position: position
|
||||
_position: position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
|
||||
Token {token_type, value, _position: position}
|
||||
Token {
|
||||
token_type,
|
||||
value,
|
||||
_position: position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_identifier(value: String, position: i32) -> Token {
|
||||
|
Loading…
Reference in New Issue
Block a user