Add minimal error reporting for syntax analysis

master
Araozu 2023-03-14 16:10:43 -05:00
parent a88c8e9216
commit cdafc40ff7
24 changed files with 335 additions and 423 deletions

View File

@ -1,11 +1,10 @@
pub struct ModuleAST<'a> {
pub bindings: Vec<Binding<'a>>,
}
pub enum Binding<'a> {
Val(ValBinding<'a>),
Var(VarBinding<'a>)
Var(VarBinding<'a>),
}
pub struct ValBinding<'a> {

View File

@ -1,5 +1,5 @@
use crate::ast_types::Binding;
use super::Transpilable;
use crate::ast_types::Binding;
impl Transpilable for Binding<'_> {
/// Transpiles val and var bindings into JS.
@ -19,12 +19,10 @@ impl Transpilable for Binding<'_> {
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast_types::{Expression, Binding, ValBinding};
use crate::ast_types::{Binding, Expression, ValBinding};
#[test]
fn binding_should_transpile() {

View File

@ -1,5 +1,5 @@
use crate::ast_types::Expression;
use super::Transpilable;
use crate::ast_types::Expression;
impl Transpilable for Expression<'_> {
/// Transpiles an Expression to JS
@ -11,22 +11,15 @@ impl Transpilable for Expression<'_> {
/// - Identifier
fn transpile(&self) -> String {
match self {
Expression::Number(value) => {
String::from(*value)
}
Expression::Number(value) => String::from(*value),
Expression::String(value) => {
format!("\"{}\"", *value)
}
Expression::Boolean(value) => {
String::from(if *value {"true"} else {"false"})
}
Expression::Identifier(value) => {
String::from(*value)
Expression::Boolean(value) => String::from(if *value { "true" } else { "false" }),
Expression::Identifier(value) => String::from(*value),
}
}
}
}
#[cfg(test)]
mod tests {

View File

@ -1,7 +1,7 @@
use super::ast_types::ModuleAST;
mod expression;
mod binding;
mod expression;
mod module_ast;
/// Trait that the AST and its nodes implement to support transformation to JavaScript
@ -15,13 +15,9 @@ pub fn codegen<'a>(ast: &'a ModuleAST) -> String {
ast.transpile()
}
#[cfg(test)]
mod tests {
use crate::{lexic, syntax, semantic, symbol_table::SymbolTable};
use crate::{lexic, semantic, symbol_table::SymbolTable, syntax};
use super::*;
@ -38,4 +34,3 @@ mod tests {
assert_eq!("const id = 322;", out_str);
}
}

View File

@ -1,21 +1,24 @@
use crate::ast_types::ModuleAST;
use super::Transpilable;
use crate::ast_types::ModuleAST;
impl Transpilable for ModuleAST<'_> {
/// Transpiles the whole AST into JS, using this same trait on the
/// nodes and leaves of the AST
fn transpile(&self) -> String {
let bindings_str: Vec::<String> = self.bindings.iter().map(|binding| binding.transpile()).collect();
let bindings_str: Vec<String> = self
.bindings
.iter()
.map(|binding| binding.transpile())
.collect();
bindings_str.join("\n")
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast_types::{Expression, ValBinding, Binding};
use crate::ast_types::{Binding, Expression, ValBinding};
#[test]
fn module_ast_should_transpile() {

View File

@ -1,10 +1,9 @@
use std::{collections::VecDeque};
use super::{PrintableError, LexError};
use super::{LexError, PrintableError};
use std::collections::VecDeque;
impl PrintableError for LexError {
// TODO: Count and show line number
fn get_error_str(&self, chars: &Vec<char>) -> String {
let (erroneous_code, back_count) = get_line(chars, self.position);
let mut whitespace = Vec::<char>::new();
@ -72,12 +71,10 @@ fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
(result_chars.iter().collect::<String>(), pos - before_pos)
}
#[cfg(test)]
mod tests {
use crate::lexic;
use super::*;
use crate::lexic;
#[test]
fn test_error_msg() {
@ -92,15 +89,11 @@ mod tests {
// TODO: check for line number
let expected_str = format!(
"\n{}\n{}^\n\nInvalid character at pos 9",
"val name' = 20",
" "
"\n{}\n{}^\n\nInvalid character at pos 9\n{}",
"val name' = 20", " ", "Unrecognized character `'` (escaped: `\\'`)"
);
assert_eq!(
expected_str,
err_str,
);
assert_eq!(expected_str, err_str,);
}
}
}
@ -115,7 +108,6 @@ mod tests {
assert_eq!("second line", result);
assert_eq!(4, back_count);
let input = String::from("val binding = 322");
let chars: Vec<char> = input.chars().into_iter().collect();

View File

@ -1,4 +1,5 @@
mod lex_error;
mod syntax_error;
pub trait PrintableError {
fn get_error_str(&self, chars: &Vec<char>) -> String;
@ -6,7 +7,8 @@ pub trait PrintableError {
#[derive(Debug)]
pub enum MistiError {
Lex(LexError)
Lex(LexError),
Syntax(SyntaxError),
}
#[derive(Debug)]
@ -15,13 +17,16 @@ pub struct LexError {
pub reason: String,
}
#[derive(Debug)]
pub struct SyntaxError {
pub reason: String,
}
impl PrintableError for MistiError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
match self {
Self::Lex(err) => err.get_error_str(chars)
Self::Lex(err) => err.get_error_str(chars),
Self::Syntax(err) => err.get_error_str(chars),
}
}
}

View File

@ -0,0 +1,7 @@
use super::{PrintableError, SyntaxError};
impl PrintableError for SyntaxError {
fn get_error_str(&self, chars: &Vec<char>) -> String {
String::from("Syntax error: NOT IMPLEMENTED")
}
}

View File

@ -1,8 +1,8 @@
mod utils;
mod scanner;
mod utils;
use super::token::{self, Token};
use crate::error_handling::{MistiError, LexError};
use crate::error_handling::{LexError, MistiError};
type Chars = Vec<char>;
@ -32,7 +32,6 @@ pub enum LexResult {
Err(LexError),
}
/// Scans and returns all the tokens in the input String
pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
let chars: Vec<char> = input.chars().into_iter().collect();
@ -44,10 +43,10 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
LexResult::Some(token, next_pos) => {
results.push(token);
current_pos = next_pos;
},
}
LexResult::None(next_pos) => {
current_pos = next_pos;
},
}
LexResult::Err(error_info) => {
return Err(MistiError::Lex(error_info));
}
@ -65,17 +64,16 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
// If EOF is reached return nothing but the current position
if next_char == '\0' {
return LexResult::None(current_pos)
return LexResult::None(current_pos);
}
// Handle whitespace recursively.
if next_char == ' ' {
return next_token(chars, current_pos + 1)
return next_token(chars, current_pos + 1);
}
// Scanners
None
.or_else(|| scanner::number(next_char, chars, current_pos))
None.or_else(|| scanner::number(next_char, chars, current_pos))
.or_else(|| scanner::identifier(next_char, chars, current_pos))
.or_else(|| scanner::datatype(next_char, chars, current_pos))
.or_else(|| scanner::string(next_char, chars, current_pos))
@ -106,8 +104,6 @@ fn has_input(input: &Chars, current_pos: usize) -> bool {
current_pos < input.len()
}
#[cfg(test)]
mod tests {
use super::*;
@ -149,7 +145,7 @@ mod tests {
match next_token(&chars, 0) {
LexResult::Some(t, _) => {
assert_eq!("126", t.value)
},
}
_ => {
panic!()
}

View File

@ -1,4 +1,7 @@
use crate::{lexic::{token, utils, LexResult}, token::TokenType};
use crate::{
lexic::{token, utils, LexResult},
token::TokenType,
};
/// Checks if a String is a keyword, and returns its TokenType
fn str_is_keyword(s: &String) -> Option<TokenType> {
@ -24,31 +27,24 @@ pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult
/// Recursive funtion that scans the identifier
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype: bool) -> LexResult {
match chars.get(start_pos) {
Some(c) if utils::is_identifier_char(*c) => {
scan_impl(
Some(c) if utils::is_identifier_char(*c) => scan_impl(
chars,
start_pos + 1,
utils::str_append(current, *c),
is_datatype,
)
},
),
_ => {
if let Some(token_type) = str_is_keyword(&current) {
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
}
else if is_datatype {
} else if is_datatype {
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
}
else {
} else {
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
}
}
}
}
#[cfg(test)]
mod tests {
use crate::lexic::token::TokenType;
@ -69,8 +65,8 @@ mod tests {
assert_eq!(1, next);
assert_eq!(TokenType::Identifier, token.token_type);
assert_eq!("_", token.value);
},
_ => panic!()
}
_ => panic!(),
}
let input = str_to_vec("i");
@ -80,8 +76,8 @@ mod tests {
assert_eq!(1, next);
assert_eq!(TokenType::Identifier, token.token_type);
assert_eq!("i", token.value);
},
_ => panic!()
}
_ => panic!(),
}
}
@ -89,27 +85,8 @@ mod tests {
#[test]
fn test_2() {
let operators = vec![
"_a",
"_z",
"_A",
"_Z",
"__",
"_0",
"_9",
"aa",
"az",
"aA",
"aZ",
"a_",
"a0",
"a9",
"za",
"zz",
"zA",
"zZ",
"z_",
"z0",
"z9",
"_a", "_z", "_A", "_Z", "__", "_0", "_9", "aa", "az", "aA", "aZ", "a_", "a0", "a9",
"za", "zz", "zA", "zZ", "z_", "z0", "z9",
];
for op in operators {
@ -120,13 +97,12 @@ mod tests {
assert_eq!(2, next);
assert_eq!(TokenType::Identifier, token.token_type);
assert_eq!(op, token.value);
},
_ => panic!()
}
_ => panic!(),
}
}
}
// Should scan long identifiers
#[test]
fn test_3() {
@ -145,8 +121,8 @@ mod tests {
assert_eq!(input.len(), next);
assert_eq!(TokenType::Identifier, token.token_type);
assert_eq!(op, token.value);
},
_ => panic!()
}
_ => panic!(),
}
}
}
@ -160,8 +136,9 @@ mod tests {
assert_eq!(3, next);
assert_eq!(TokenType::VAR, token.token_type);
assert_eq!("var", token.value);
} else {panic!()}
} else {
panic!()
}
let input = str_to_vec("val");
let start_pos = 0;
@ -169,6 +146,8 @@ mod tests {
assert_eq!(3, next);
assert_eq!(TokenType::VAL, token.token_type);
assert_eq!("val", token.value);
} else {panic!()}
} else {
panic!()
}
}
}

View File

@ -1,11 +1,13 @@
use super::{token::{TokenType, self}, utils, LexResult};
use super::{
token::{self, TokenType},
utils, LexResult,
};
mod identifier;
mod new_line;
mod number;
mod operator;
mod identifier;
mod string;
mod new_line;
// This module contains the individual scanners, and exports them
@ -14,13 +16,11 @@ pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
utils::is_digit(c).then(|| number::scan(chars, start_pos))
}
/// Attempts to scan an operator. If not found returns None to be able to chain other scanner
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
utils::is_operator(c).then(|| operator::scan(chars, start_pos))
}
/// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
let token_type = match c {
@ -33,27 +33,20 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
_ => return None,
};
let token = token::new(
c.to_string(),
start_pos as i32,
token_type,
);
let token = token::new(c.to_string(), start_pos as i32, token_type);
Some(LexResult::Some(token, start_pos + 1))
}
/// Attempts to scan an identifier. If not found returns None to be able to chain other scanner
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(utils::is_lowercase(c) || c == '_')
.then(|| identifier::scan(c, chars, start_pos))
(utils::is_lowercase(c) || c == '_').then(|| identifier::scan(c, chars, start_pos))
}
/// Attempts to scan a datatype. If not found returns None to be able to chain other scanner
pub fn datatype(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
// Since the only difference with an identifier is that the fist character is an
// uppercase letter, reuse the identifier scanner
utils::is_uppercase(c)
.then(|| identifier::scan(c, chars, start_pos))
utils::is_uppercase(c).then(|| identifier::scan(c, chars, start_pos))
}
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
@ -65,4 +58,3 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(c == '\n').then(|| new_line::scan(chars, start_pos))
}

View File

@ -1,8 +1,6 @@
use crate::{
lexic::{
token, LexResult,
},
token::TokenType
lexic::{token, LexResult},
token::TokenType,
};
/// Function to handle new lines
@ -15,28 +13,16 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
let current = chars.get(start_pos);
match current {
Some(c) if *c == '\n' => {
scan(chars, start_pos + 1)
}
Some(c) if *c == ' ' => {
match look_ahead_for_new_line(chars, start_pos + 1) {
Some(c) if *c == '\n' => scan(chars, start_pos + 1),
Some(c) if *c == ' ' => match look_ahead_for_new_line(chars, start_pos + 1) {
Some(next_pos) => scan(chars, next_pos),
None => {
let token = token::new(
String::from(";"),
start_pos as i32,
TokenType::Semicolon,
);
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
LexResult::Some(token, start_pos)
}
}
}
},
Some(_) | None => {
let token = token::new(
String::from(";"),
start_pos as i32,
TokenType::Semicolon,
);
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
LexResult::Some(token, start_pos)
}
}
@ -45,18 +31,11 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
/// Returns the position after the new line
fn look_ahead_for_new_line(chars: &Vec<char>, pos: usize) -> Option<usize> {
match chars.get(pos) {
Some(c) if *c == ' ' => {
look_ahead_for_new_line(chars, pos + 1)
}
Some(c) if *c == '\n' => {
Some(pos + 1)
}
Some(_) | None => {
None
Some(c) if *c == ' ' => look_ahead_for_new_line(chars, pos + 1),
Some(c) if *c == '\n' => Some(pos + 1),
Some(_) | None => None,
}
}
}
#[cfg(test)]
mod tests {
@ -93,7 +72,6 @@ mod tests {
panic!()
}
let input = str_to_vec("\n\n\naToken");
let start_pos = 0;
@ -117,7 +95,6 @@ mod tests {
panic!()
}
let input = str_to_vec("\n \n \n aToken");
let start_pos = 0;
@ -128,7 +105,6 @@ mod tests {
panic!()
}
let input = str_to_vec("\n \n \n ");
let start_pos = 0;

View File

@ -1,8 +1,8 @@
use crate::error_handling::LexError;
use crate::lexic::{
token::{self, Token},
utils, LexResult,
};
use crate::error_handling::LexError;
/// Function to scan a number
///
@ -68,7 +68,7 @@ fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResul
position: start_pos,
reason: String::from(
"The character after the dot when scanning a double is not a number.",
)
),
}),
_ => LexResult::Err(LexError {
position: start_pos,
@ -112,7 +112,7 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
position: start_pos,
reason: String::from(
"The characters after 'e' are not + or -, or are not followed by a number",
)
),
}),
}
}
@ -230,7 +230,9 @@ mod tests {
let start_pos = 0;
match scan(&input, start_pos) {
LexResult::Err(reason) => assert_eq!("Tried to scan an incomplete hex value", reason.reason),
LexResult::Err(reason) => {
assert_eq!("Tried to scan an incomplete hex value", reason.reason)
}
_ => panic!(),
}
@ -299,7 +301,9 @@ mod tests {
let start_pos = 0;
match scan(&input, start_pos) {
LexResult::Err(reason) => assert_eq!("EOF when scanning a double number.", reason.reason),
LexResult::Err(reason) => {
assert_eq!("EOF when scanning a double number.", reason.reason)
}
_ => panic!(),
}
}

View File

@ -1,6 +1,5 @@
use crate::lexic::{token, utils, LexResult};
/// Function to scan an operator
///
/// This function assumes the character at `start_pos` is an operator
@ -12,15 +11,10 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
match chars.get(start_pos) {
Some(c) if utils::is_operator(*c) => {
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
},
_ => {
LexResult::Some(token::new_operator(current, start_pos as i32), start_pos)
}
_ => LexResult::Some(token::new_operator(current, start_pos as i32), start_pos),
}
}
}
#[cfg(test)]
mod tests {
@ -35,26 +29,8 @@ mod tests {
#[test]
fn test_1() {
let operators = vec![
"+",
"-",
"=",
"*",
"!",
"\\",
"/",
"|",
"@",
"#",
"$",
"~",
"%",
"&",
"?",
"<",
">",
"^",
".",
":",
"+", "-", "=", "*", "!", "\\", "/", "|", "@", "#", "$", "~", "%", "&", "?", "<", ">",
"^", ".", ":",
];
for op in operators {
@ -65,8 +41,8 @@ mod tests {
assert_eq!(1, next);
assert_eq!(TokenType::Operator, token.token_type);
assert_eq!(op, token.value);
},
_ => panic!()
}
_ => panic!(),
}
}
}
@ -75,20 +51,7 @@ mod tests {
#[test]
fn test_2() {
let operators = vec![
"<<",
">>",
"<|",
"|>",
"+>",
"<+",
"+=",
"-=",
"?.",
"??",
"?:",
"*=",
"/=",
"==",
"<<", ">>", "<|", "|>", "+>", "<+", "+=", "-=", "?.", "??", "?:", "*=", "/=", "==",
"!=",
];
@ -100,8 +63,8 @@ mod tests {
assert_eq!(2, next);
assert_eq!(TokenType::Operator, token.token_type);
assert_eq!(op, token.value);
},
_ => panic!()
}
_ => panic!(),
}
}
}

View File

@ -1,8 +1,5 @@
use crate::lexic::{
token,
utils, LexResult,
};
use crate::error_handling::LexError;
use crate::lexic::{token, utils, LexResult};
/// Function to scan a string
///
@ -18,45 +15,25 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
Some(c) if *c == '"' => {
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
}
Some(c) if *c == '\n' => {
LexResult::Err(LexError {
Some(c) if *c == '\n' => LexResult::Err(LexError {
position: start_pos,
reason: String::from("Unexpected new line inside a string.")
})
}
reason: String::from("Unexpected new line inside a string."),
}),
Some(c) if *c == '\\' => {
if let Some(escape) = test_escape_char(chars, start_pos + 1) {
scan_impl(
chars,
start_pos + 2,
utils::str_append(current, escape),
)
}
else {
scan_impl(chars, start_pos + 2, utils::str_append(current, escape))
} else {
// Ignore the backslash
scan_impl(
chars,
start_pos + 1,
current,
)
scan_impl(chars, start_pos + 1, current)
}
}
Some(c) => {
scan_impl(
chars,
start_pos + 1,
utils::str_append(current, *c),
)
}
None => {
LexResult::Err(LexError {
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
None => LexResult::Err(LexError {
position: start_pos,
reason: String::from("Incomplete string found")
})
reason: String::from("Incomplete string found"),
}),
}
}
}
/// Checks if the char at `start_pos` is a escape character
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
@ -69,15 +46,11 @@ fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
't' => Some('\t'),
_ => None,
}
}
else {
} else {
None
}
}
#[cfg(test)]
mod tests {
use crate::lexic::token::TokenType;
@ -96,8 +69,9 @@ mod tests {
assert_eq!(2, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("", token.value);
} else {
panic!()
}
else {panic!()}
}
#[test]
@ -108,8 +82,9 @@ mod tests {
assert_eq!(15, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Hello, world!", token.value);
} else {
panic!()
}
else {panic!()}
}
#[test]
@ -118,8 +93,9 @@ mod tests {
let start_pos = 1;
if let LexResult::Err(reason) = scan(&input, start_pos) {
assert_eq!("Unexpected new line inside a string.", reason.reason)
} else {
panic!()
}
else {panic!()}
}
#[test]
@ -130,8 +106,9 @@ mod tests {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\ntext", token.value);
} else {
panic!()
}
else {panic!()}
let input = str_to_vec("\"Sample\\\"text\"");
let start_pos = 1;
@ -139,8 +116,9 @@ mod tests {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\"text", token.value);
} else {
panic!()
}
else {panic!()}
let input = str_to_vec("\"Sample\\rtext\"");
let start_pos = 1;
@ -148,8 +126,9 @@ mod tests {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\rtext", token.value);
} else {
panic!()
}
else {panic!()}
let input = str_to_vec("\"Sample\\\\text\"");
let start_pos = 1;
@ -157,8 +136,9 @@ mod tests {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\\text", token.value);
} else {
panic!()
}
else {panic!()}
let input = str_to_vec("\"Sample\\ttext\"");
let start_pos = 1;
@ -166,8 +146,9 @@ mod tests {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample\ttext", token.value);
} else {
panic!()
}
else {panic!()}
let input = str_to_vec("\"Sample\\ text\"");
let start_pos = 1;
@ -175,7 +156,8 @@ mod tests {
assert_eq!(14, next);
assert_eq!(TokenType::String, token.token_type);
assert_eq!("Sample text", token.value);
}
else {panic!()}
} else {
panic!()
}
}
}

View File

@ -15,11 +15,26 @@ pub fn str_append(current: String, c: char) -> String {
/// Whether `c` is an operator char.
pub fn is_operator(c: char) -> bool {
c == '+' || c == '-' || c == '=' || c == '*' || c == '!'
|| c == '\\' || c == '/' || c == '|' || c == '@'
|| c == '#' || c == '$' || c == '~' || c == '%'
|| c == '&' || c == '?' || c == '<' || c == '>'
|| c == '^' || c == '.' || c == ':'
c == '+'
|| c == '-'
|| c == '='
|| c == '*'
|| c == '!'
|| c == '\\'
|| c == '/'
|| c == '|'
|| c == '@'
|| c == '#'
|| c == '$'
|| c == '~'
|| c == '%'
|| c == '&'
|| c == '?'
|| c == '<'
|| c == '>'
|| c == '^'
|| c == '.'
|| c == ':'
}
/// Whether `c` is between `a-z`

View File

@ -20,7 +20,6 @@ mod codegen;
mod error_handling;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
@ -39,16 +38,18 @@ enum Commands {
output: String,
},
/// Starts the REPL
R {}
R {},
}
const VERSION: &str = "0.0.1";
fn get_copyright() -> String {
let year = Utc::now().year();
format!("Misti {}\nCopyright (c) {} Fernando Enrique Araoz Morales\n", VERSION, year)
format!(
"Misti {}\nCopyright (c) {} Fernando Enrique Araoz Morales\n",
VERSION, year
)
}
/// # Misti
@ -73,6 +74,4 @@ fn main() {
println!("Compile in watch mode: Not implemented")
}
}
}

View File

@ -4,10 +4,10 @@ use crate::error_handling::PrintableError;
use crate::symbol_table::SymbolTable;
use crate::token::Token;
use super::lexic;
use super::syntax;
use super::semantic;
use super::codegen;
use super::lexic;
use super::semantic;
use super::syntax;
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
fn compile(input: &String) {
@ -15,20 +15,19 @@ fn compile(input: &String) {
match tokens {
Ok(tokens) => {
build_ast(tokens);
},
build_ast(input, tokens);
}
Err(error) => {
let chars: Vec<char> = input.chars().into_iter().collect();
eprintln!("{}", error.get_error_str(&chars))
}
}
}
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
///
/// Prints the generated code in stdin
fn build_ast(tokens: Vec<Token>) {
fn build_ast(input: &String, tokens: Vec<Token>) {
let ast = syntax::construct_ast(&tokens);
match ast {
@ -39,7 +38,8 @@ fn build_ast(tokens: Vec<Token>) {
println!("{}", js_code)
}
Err(reason) => {
eprintln!("Syntax error.\n{}", reason)
let chars: Vec<char> = input.chars().into_iter().collect();
eprintln!("Syntax error.\n{}", reason.get_error_str(&chars))
}
}
}
@ -59,14 +59,14 @@ pub fn run() -> io::Result<()> {
match read {
Ok(0) => {
println!("\nBye");
break Ok(())
},
break Ok(());
}
Ok(_) => {
compile(&buffer);
},
}
Err(error) => {
eprintln!("Error reading stdin.");
break Err(error)
break Err(error);
}
};
}

View File

@ -1,5 +1,5 @@
use super::symbol_table::{SymbolTable, _NUMBER, _STRING, _BOOLEAN};
use super::ast_types::{ModuleAST, Binding, Expression};
use super::ast_types::{Binding, Expression, ModuleAST};
use super::symbol_table::{SymbolTable, _BOOLEAN, _NUMBER, _STRING};
/// Checks the AST. In the future should return a list of errors.
pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) {
@ -8,7 +8,7 @@ pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable)
Binding::Val(binding) => {
symbol_table.add(
binding.identifier,
get_expression_type(&binding.expression, symbol_table).as_str()
get_expression_type(&binding.expression, symbol_table).as_str(),
);
}
Binding::Var(binding) => {
@ -28,9 +28,7 @@ fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
Expression::Boolean(_) => String::from(_BOOLEAN),
Expression::Identifier(id) => {
match symbol_table.get_type(*id) {
Some(datatype) => {
datatype
}
Some(datatype) => datatype,
None => {
// Should add an error to the list instead of panicking
panic!("Semantic analysis: identifier {} not found", id);
@ -42,10 +40,10 @@ fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
#[cfg(test)]
mod tests {
use crate::lexic;
use crate::symbol_table::_BOOLEAN;
use crate::symbol_table::_STRING;
use crate::syntax;
use crate::lexic;
use super::*;

View File

@ -6,7 +6,7 @@ pub const _STRING: &str = "Str";
pub const _BOOLEAN: &str = "Bool";
pub struct SymbolTable {
table: HashMap<String, String>
table: HashMap<String, String>,
}
impl SymbolTable {
@ -19,7 +19,8 @@ impl SymbolTable {
}
pub fn add(&mut self, identifier: &str, datatype: &str) {
self.table.insert(String::from(identifier), String::from(datatype));
self.table
.insert(String::from(identifier), String::from(datatype));
}
pub fn test(&self, identifier: &str) -> bool {
@ -32,8 +33,7 @@ impl SymbolTable {
.and_then(|(_, value)| {
if value == &String::from(datatype) {
Some(true)
}
else {
} else {
Some(false)
}
})
@ -43,13 +43,10 @@ impl SymbolTable {
pub fn get_type(&self, identifier: &str) -> Option<String> {
self.table
.get_key_value(&String::from(identifier))
.and_then(|(_, value)| {
Some(String::from(value))
})
.and_then(|(_, value)| Some(String::from(value)))
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -1,12 +1,12 @@
use super::ast_types::{Binding, ValBinding, VarBinding};
use super::{expression, SyntaxResult};
use crate::token::{Token, TokenType};
use super::ast_types::{ValBinding, VarBinding, Binding};
use super::expression;
// TODO: Should return a 3 state value:
// - Success: binding parsed successfully
// - NotFound: the first token (var | val) was not found, so the parser should try other options
// - Error: token (var | val) was found, but then other expected tokens were not found
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult> {
let mut pos = pos;
// Optional datatype annotation
@ -17,7 +17,8 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
Some(String::from(&t.value))
}
Some(_) => None,
None => return None
// TODO: return Error
None => return None,
}
};
@ -30,58 +31,62 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
let res2 = try_token_type(tokens, pos, TokenType::VAR);
match res2 {
Some(_) => false,
None => return None
// TODO: return Error
None => return None,
}
}
}
};
let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
if identifier.is_none() { return None }
if identifier.is_none() {
// TODO: return Error
return None;
}
let identifier = identifier.unwrap();
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
if equal_operator.is_none() { return None }
if equal_operator.is_none() {
// TODO: return Error
return None;
}
let expression = expression::try_parse(tokens, pos + 3);
if expression.is_none() { return None }
if expression.is_none() {
// TODO: return Error
return None;
}
let expression = expression.unwrap();
if is_val {
Some(Binding::Val(ValBinding {
let binding = if is_val {
Binding::Val(ValBinding {
datatype: datatype_annotation,
identifier: &identifier.value,
expression,
}))
}
else {
Some(Binding::Var(VarBinding {
})
} else {
Binding::Var(VarBinding {
datatype: datatype_annotation,
identifier: &identifier.value,
expression,
}))
}
})
};
Some(SyntaxResult::Ok(binding))
}
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Option<&Token> {
tokens
.get(pos)
.and_then(|token| {
(token.token_type == token_type).then(|| token)
})
.and_then(|token| (token.token_type == token_type).then(|| token))
}
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Option<&Token> {
tokens
.get(pos)
.and_then(|token| {
(token.token_type == TokenType::Operator && token.value == operator)
.then(|| token)
tokens.get(pos).and_then(|token| {
(token.token_type == TokenType::Operator && token.value == operator).then(|| token)
})
}
#[cfg(test)]
mod tests {
use super::*;
@ -93,10 +98,10 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap();
match binding {
Binding::Val(binding) => {
SyntaxResult::Ok(Binding::Val(binding)) => {
assert_eq!("identifier", binding.identifier);
}
_ => panic!()
_ => panic!(),
}
}
@ -131,23 +136,22 @@ mod tests {
let binding = try_parse(&tokens, 0).unwrap();
match binding {
Binding::Val(binding) => {
SyntaxResult::Ok(Binding::Val(binding)) => {
assert_eq!(Some(String::from("Num")), binding.datatype);
assert_eq!("identifier", binding.identifier);
}
_ => panic!()
_ => panic!(),
}
let tokens = get_tokens(&String::from("Bool var identifier = true")).unwrap();
let binding = try_parse(&tokens, 0).unwrap();
match binding {
Binding::Var(binding) => {
SyntaxResult::Ok(Binding::Var(binding)) => {
assert_eq!(Some(String::from("Bool")), binding.datatype);
assert_eq!("identifier", binding.identifier);
}
_ => panic!()
_ => panic!(),
}
}
}

View File

@ -1,5 +1,5 @@
use crate::token::{Token, TokenType};
use super::ast_types::Expression;
use crate::token::{Token, TokenType};
/// An expression can be:
///
@ -8,28 +8,17 @@ use super::ast_types::Expression;
/// - A boolean
/// - An identifier
pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
tokens
.get(pos)
.and_then(|token| {
match token.token_type {
TokenType::Number => {
Some(Expression::Number(&token.value))
}
TokenType::String => {
Some(Expression::String(&token.value))
}
tokens.get(pos).and_then(|token| match token.token_type {
TokenType::Number => Some(Expression::Number(&token.value)),
TokenType::String => Some(Expression::String(&token.value)),
TokenType::Identifier if token.value == "true" || token.value == "false" => {
Some(Expression::Boolean(token.value == "true"))
}
TokenType::Identifier => {
Some(Expression::Identifier(&token.value))
}
_ => None
}
TokenType::Identifier => Some(Expression::Identifier(&token.value)),
_ => None,
})
}
#[cfg(test)]
mod tests {
use super::*;
@ -42,7 +31,7 @@ mod tests {
match expression {
Expression::Number(value) => assert_eq!("40", value),
_ => panic!()
_ => panic!(),
}
}
@ -53,7 +42,7 @@ mod tests {
match expression {
Expression::String(value) => assert_eq!("Hello", value),
_ => panic!()
_ => panic!(),
}
}
@ -64,7 +53,7 @@ mod tests {
match expression {
Expression::Boolean(value) => assert!(value),
_ => panic!()
_ => panic!(),
}
}
@ -75,7 +64,7 @@ mod tests {
match expression {
Expression::Identifier(value) => assert_eq!("someIdentifier", value),
_ => panic!()
_ => panic!(),
}
}
}

View File

@ -1,25 +1,47 @@
use crate::ast_types::Binding;
use crate::error_handling::SyntaxError;
use super::token::Token;
mod expression;
mod binding;
mod expression;
use super::ast_types;
use ast_types::ModuleAST;
/// Constructs the Misti AST from a vector of tokens
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, String> {
let maybe_binding = binding::try_parse(tokens, 0);
pub enum SyntaxResult<'a> {
///
/// A construct has been found
Ok(Binding<'a>),
///
/// No construct was found
None,
///
/// A construct was found, but there was an error parsing it
Err(SyntaxError),
}
match maybe_binding {
Some(binding) => {
Ok(ModuleAST {
bindings: vec![binding]
/// Constructs the Misti AST from a vector of tokens
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, SyntaxError> {
let _token_amount = tokens.len();
let mut current_pos = 0;
match next_construct(tokens, current_pos) {
SyntaxResult::Ok(module) => Ok(ModuleAST {
bindings: vec![module],
}),
SyntaxResult::None => Err(SyntaxError {
reason: String::from("D:"),
}),
SyntaxResult::Err(err) => Err(err),
}
}
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
None.or_else(|| binding::try_parse(tokens, 0))
.unwrap_or_else(|| {
SyntaxResult::Err(SyntaxError {
reason: String::from("Unrecognized token"),
})
})
}
None => {
Err(String::from("Syntax error."))
}
}
}

View File

@ -38,7 +38,7 @@ pub fn new_number(value: String, position: i32) -> Token {
Token {
token_type: TokenType::Number,
value,
_position: position
_position: position,
}
}
@ -46,12 +46,16 @@ pub fn new_operator(value: String, position: i32) -> Token {
Token {
token_type: TokenType::Operator,
value,
_position: position
_position: position,
}
}
pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
Token {token_type, value, _position: position}
Token {
token_type,
value,
_position: position,
}
}
pub fn new_identifier(value: String, position: i32) -> Token {