Add minimal error reporting for syntax analysis
This commit is contained in:
parent
a88c8e9216
commit
cdafc40ff7
@ -1,11 +1,10 @@
|
|||||||
|
|
||||||
pub struct ModuleAST<'a> {
|
pub struct ModuleAST<'a> {
|
||||||
pub bindings: Vec<Binding<'a>>,
|
pub bindings: Vec<Binding<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum Binding<'a> {
|
pub enum Binding<'a> {
|
||||||
Val(ValBinding<'a>),
|
Val(ValBinding<'a>),
|
||||||
Var(VarBinding<'a>)
|
Var(VarBinding<'a>),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ValBinding<'a> {
|
pub struct ValBinding<'a> {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use crate::ast_types::Binding;
|
|
||||||
use super::Transpilable;
|
use super::Transpilable;
|
||||||
|
use crate::ast_types::Binding;
|
||||||
|
|
||||||
impl Transpilable for Binding<'_> {
|
impl Transpilable for Binding<'_> {
|
||||||
/// Transpiles val and var bindings into JS.
|
/// Transpiles val and var bindings into JS.
|
||||||
@ -19,12 +19,10 @@ impl Transpilable for Binding<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::ast_types::{Expression, Binding, ValBinding};
|
use crate::ast_types::{Binding, Expression, ValBinding};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn binding_should_transpile() {
|
fn binding_should_transpile() {
|
||||||
@ -37,7 +35,7 @@ mod tests {
|
|||||||
});
|
});
|
||||||
|
|
||||||
let result = binding.transpile();
|
let result = binding.transpile();
|
||||||
|
|
||||||
assert_eq!("const identifier = 322;", result);
|
assert_eq!("const identifier = 322;", result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use crate::ast_types::Expression;
|
|
||||||
use super::Transpilable;
|
use super::Transpilable;
|
||||||
|
use crate::ast_types::Expression;
|
||||||
|
|
||||||
impl Transpilable for Expression<'_> {
|
impl Transpilable for Expression<'_> {
|
||||||
/// Transpiles an Expression to JS
|
/// Transpiles an Expression to JS
|
||||||
@ -11,23 +11,16 @@ impl Transpilable for Expression<'_> {
|
|||||||
/// - Identifier
|
/// - Identifier
|
||||||
fn transpile(&self) -> String {
|
fn transpile(&self) -> String {
|
||||||
match self {
|
match self {
|
||||||
Expression::Number(value) => {
|
Expression::Number(value) => String::from(*value),
|
||||||
String::from(*value)
|
|
||||||
}
|
|
||||||
Expression::String(value) => {
|
Expression::String(value) => {
|
||||||
format!("\"{}\"", *value)
|
format!("\"{}\"", *value)
|
||||||
}
|
}
|
||||||
Expression::Boolean(value) => {
|
Expression::Boolean(value) => String::from(if *value { "true" } else { "false" }),
|
||||||
String::from(if *value {"true"} else {"false"})
|
Expression::Identifier(value) => String::from(*value),
|
||||||
}
|
|
||||||
Expression::Identifier(value) => {
|
|
||||||
String::from(*value)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -41,7 +34,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!("42", result);
|
assert_eq!("42", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_transpile_string() {
|
fn should_transpile_string() {
|
||||||
let str = String::from("Hello world");
|
let str = String::from("Hello world");
|
||||||
@ -50,21 +43,21 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!("\"Hello world\"", result);
|
assert_eq!("\"Hello world\"", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_transpile_boolean() {
|
fn should_transpile_boolean() {
|
||||||
let exp = Expression::Boolean(true);
|
let exp = Expression::Boolean(true);
|
||||||
let result = exp.transpile();
|
let result = exp.transpile();
|
||||||
|
|
||||||
assert_eq!("true", result);
|
assert_eq!("true", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_transpile_identifier() {
|
fn should_transpile_identifier() {
|
||||||
let s = String::from("newValue");
|
let s = String::from("newValue");
|
||||||
let exp = Expression::Identifier(&s);
|
let exp = Expression::Identifier(&s);
|
||||||
let result = exp.transpile();
|
let result = exp.transpile();
|
||||||
|
|
||||||
assert_eq!("newValue", result);
|
assert_eq!("newValue", result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use super::ast_types::ModuleAST;
|
use super::ast_types::ModuleAST;
|
||||||
|
|
||||||
mod expression;
|
|
||||||
mod binding;
|
mod binding;
|
||||||
|
mod expression;
|
||||||
mod module_ast;
|
mod module_ast;
|
||||||
|
|
||||||
/// Trait that the AST and its nodes implement to support transformation to JavaScript
|
/// Trait that the AST and its nodes implement to support transformation to JavaScript
|
||||||
@ -15,13 +15,9 @@ pub fn codegen<'a>(ast: &'a ModuleAST) -> String {
|
|||||||
ast.transpile()
|
ast.transpile()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::{lexic, syntax, semantic, symbol_table::SymbolTable};
|
use crate::{lexic, semantic, symbol_table::SymbolTable, syntax};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@ -38,4 +34,3 @@ mod tests {
|
|||||||
assert_eq!("const id = 322;", out_str);
|
assert_eq!("const id = 322;", out_str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,21 +1,24 @@
|
|||||||
use crate::ast_types::ModuleAST;
|
|
||||||
use super::Transpilable;
|
use super::Transpilable;
|
||||||
|
use crate::ast_types::ModuleAST;
|
||||||
|
|
||||||
impl Transpilable for ModuleAST<'_> {
|
impl Transpilable for ModuleAST<'_> {
|
||||||
/// Transpiles the whole AST into JS, using this same trait on the
|
/// Transpiles the whole AST into JS, using this same trait on the
|
||||||
/// nodes and leaves of the AST
|
/// nodes and leaves of the AST
|
||||||
fn transpile(&self) -> String {
|
fn transpile(&self) -> String {
|
||||||
let bindings_str: Vec::<String> = self.bindings.iter().map(|binding| binding.transpile()).collect();
|
let bindings_str: Vec<String> = self
|
||||||
|
.bindings
|
||||||
|
.iter()
|
||||||
|
.map(|binding| binding.transpile())
|
||||||
|
.collect();
|
||||||
|
|
||||||
bindings_str.join("\n")
|
bindings_str.join("\n")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::ast_types::{Expression, ValBinding, Binding};
|
use crate::ast_types::{Binding, Expression, ValBinding};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn module_ast_should_transpile() {
|
fn module_ast_should_transpile() {
|
||||||
@ -34,5 +37,5 @@ mod tests {
|
|||||||
let result = module.transpile();
|
let result = module.transpile();
|
||||||
|
|
||||||
assert_eq!("const identifier = 322;", result);
|
assert_eq!("const identifier = 322;", result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
use std::{collections::VecDeque};
|
use super::{LexError, PrintableError};
|
||||||
use super::{PrintableError, LexError};
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
impl PrintableError for LexError {
|
impl PrintableError for LexError {
|
||||||
// TODO: Count and show line number
|
// TODO: Count and show line number
|
||||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||||
|
|
||||||
let (erroneous_code, back_count) = get_line(chars, self.position);
|
let (erroneous_code, back_count) = get_line(chars, self.position);
|
||||||
|
|
||||||
let mut whitespace = Vec::<char>::new();
|
let mut whitespace = Vec::<char>::new();
|
||||||
@ -14,9 +13,9 @@ impl PrintableError for LexError {
|
|||||||
let whitespace = whitespace.iter().collect::<String>();
|
let whitespace = whitespace.iter().collect::<String>();
|
||||||
|
|
||||||
format!(
|
format!(
|
||||||
"\n{}\n{}^\n\n{}{}\n{}",
|
"\n{}\n{}^\n\n{}{}\n{}",
|
||||||
erroneous_code,
|
erroneous_code,
|
||||||
whitespace,
|
whitespace,
|
||||||
"Invalid character at pos ",
|
"Invalid character at pos ",
|
||||||
self.position + 1,
|
self.position + 1,
|
||||||
self.reason,
|
self.reason,
|
||||||
@ -26,11 +25,11 @@ impl PrintableError for LexError {
|
|||||||
|
|
||||||
/// Extracts a line of code from `chars` and the number of characters in the back.
|
/// Extracts a line of code from `chars` and the number of characters in the back.
|
||||||
/// `pos` indicates a position, from where to extract the line.
|
/// `pos` indicates a position, from where to extract the line.
|
||||||
///
|
///
|
||||||
/// Ex. Given:
|
/// Ex. Given:
|
||||||
/// - `input = "first line\nsecond line\nthird line"`
|
/// - `input = "first line\nsecond line\nthird line"`
|
||||||
/// - `pos = 15`
|
/// - `pos = 15`
|
||||||
///
|
///
|
||||||
/// this function should return `("second line", 4)`
|
/// this function should return `("second line", 4)`
|
||||||
fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
|
fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
|
||||||
let mut result_chars = VecDeque::<char>::new();
|
let mut result_chars = VecDeque::<char>::new();
|
||||||
@ -72,12 +71,10 @@ fn get_line(chars: &Vec<char>, pos: usize) -> (String, usize) {
|
|||||||
(result_chars.iter().collect::<String>(), pos - before_pos)
|
(result_chars.iter().collect::<String>(), pos - before_pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::lexic;
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::lexic;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_error_msg() {
|
fn test_error_msg() {
|
||||||
@ -92,15 +89,11 @@ mod tests {
|
|||||||
|
|
||||||
// TODO: check for line number
|
// TODO: check for line number
|
||||||
let expected_str = format!(
|
let expected_str = format!(
|
||||||
"\n{}\n{}^\n\nInvalid character at pos 9",
|
"\n{}\n{}^\n\nInvalid character at pos 9\n{}",
|
||||||
"val name' = 20",
|
"val name' = 20", " ", "Unrecognized character `'` (escaped: `\\'`)"
|
||||||
" "
|
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(expected_str, err_str,);
|
||||||
expected_str,
|
|
||||||
err_str,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -115,7 +108,6 @@ mod tests {
|
|||||||
assert_eq!("second line", result);
|
assert_eq!("second line", result);
|
||||||
assert_eq!(4, back_count);
|
assert_eq!(4, back_count);
|
||||||
|
|
||||||
|
|
||||||
let input = String::from("val binding = 322");
|
let input = String::from("val binding = 322");
|
||||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
mod lex_error;
|
mod lex_error;
|
||||||
|
mod syntax_error;
|
||||||
|
|
||||||
pub trait PrintableError {
|
pub trait PrintableError {
|
||||||
fn get_error_str(&self, chars: &Vec<char>) -> String;
|
fn get_error_str(&self, chars: &Vec<char>) -> String;
|
||||||
@ -6,7 +7,8 @@ pub trait PrintableError {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum MistiError {
|
pub enum MistiError {
|
||||||
Lex(LexError)
|
Lex(LexError),
|
||||||
|
Syntax(SyntaxError),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -15,13 +17,16 @@ pub struct LexError {
|
|||||||
pub reason: String,
|
pub reason: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SyntaxError {
|
||||||
|
pub reason: String,
|
||||||
|
}
|
||||||
|
|
||||||
impl PrintableError for MistiError {
|
impl PrintableError for MistiError {
|
||||||
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||||
match self {
|
match self {
|
||||||
Self::Lex(err) => err.get_error_str(chars)
|
Self::Lex(err) => err.get_error_str(chars),
|
||||||
|
Self::Syntax(err) => err.get_error_str(chars),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
7
src/error_handling/syntax_error.rs
Normal file
7
src/error_handling/syntax_error.rs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
use super::{PrintableError, SyntaxError};
|
||||||
|
|
||||||
|
impl PrintableError for SyntaxError {
|
||||||
|
fn get_error_str(&self, chars: &Vec<char>) -> String {
|
||||||
|
String::from("Syntax error: NOT IMPLEMENTED")
|
||||||
|
}
|
||||||
|
}
|
@ -1,8 +1,8 @@
|
|||||||
mod utils;
|
|
||||||
mod scanner;
|
mod scanner;
|
||||||
|
mod utils;
|
||||||
|
|
||||||
use super::token::{self, Token};
|
use super::token::{self, Token};
|
||||||
use crate::error_handling::{MistiError, LexError};
|
use crate::error_handling::{LexError, MistiError};
|
||||||
|
|
||||||
type Chars = Vec<char>;
|
type Chars = Vec<char>;
|
||||||
|
|
||||||
@ -11,7 +11,7 @@ pub enum LexResult {
|
|||||||
/// A token was found. The first element is the token, and the
|
/// A token was found. The first element is the token, and the
|
||||||
/// second element is the position in the input after the token.
|
/// second element is the position in the input after the token.
|
||||||
///
|
///
|
||||||
/// E.g., given an input
|
/// E.g., given an input
|
||||||
///
|
///
|
||||||
/// "`identifier 55`"
|
/// "`identifier 55`"
|
||||||
///
|
///
|
||||||
@ -32,7 +32,6 @@ pub enum LexResult {
|
|||||||
Err(LexError),
|
Err(LexError),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Scans and returns all the tokens in the input String
|
/// Scans and returns all the tokens in the input String
|
||||||
pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
||||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||||
@ -44,10 +43,10 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, MistiError> {
|
|||||||
LexResult::Some(token, next_pos) => {
|
LexResult::Some(token, next_pos) => {
|
||||||
results.push(token);
|
results.push(token);
|
||||||
current_pos = next_pos;
|
current_pos = next_pos;
|
||||||
},
|
}
|
||||||
LexResult::None(next_pos) => {
|
LexResult::None(next_pos) => {
|
||||||
current_pos = next_pos;
|
current_pos = next_pos;
|
||||||
},
|
}
|
||||||
LexResult::Err(error_info) => {
|
LexResult::Err(error_info) => {
|
||||||
return Err(MistiError::Lex(error_info));
|
return Err(MistiError::Lex(error_info));
|
||||||
}
|
}
|
||||||
@ -65,17 +64,16 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
|||||||
|
|
||||||
// If EOF is reached return nothing but the current position
|
// If EOF is reached return nothing but the current position
|
||||||
if next_char == '\0' {
|
if next_char == '\0' {
|
||||||
return LexResult::None(current_pos)
|
return LexResult::None(current_pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle whitespace recursively.
|
// Handle whitespace recursively.
|
||||||
if next_char == ' ' {
|
if next_char == ' ' {
|
||||||
return next_token(chars, current_pos + 1)
|
return next_token(chars, current_pos + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scanners
|
// Scanners
|
||||||
None
|
None.or_else(|| scanner::number(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::number(next_char, chars, current_pos))
|
|
||||||
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||||
@ -87,7 +85,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
|||||||
position: current_pos,
|
position: current_pos,
|
||||||
reason: format!(
|
reason: format!(
|
||||||
"Unrecognized character `{}` (escaped: `{}`)",
|
"Unrecognized character `{}` (escaped: `{}`)",
|
||||||
next_char,
|
next_char,
|
||||||
next_char.escape_default().to_string(),
|
next_char.escape_default().to_string(),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
@ -106,8 +104,6 @@ fn has_input(input: &Chars, current_pos: usize) -> bool {
|
|||||||
current_pos < input.len()
|
current_pos < input.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -149,7 +145,7 @@ mod tests {
|
|||||||
match next_token(&chars, 0) {
|
match next_token(&chars, 0) {
|
||||||
LexResult::Some(t, _) => {
|
LexResult::Some(t, _) => {
|
||||||
assert_eq!("126", t.value)
|
assert_eq!("126", t.value)
|
||||||
},
|
}
|
||||||
_ => {
|
_ => {
|
||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
@ -173,7 +169,7 @@ mod tests {
|
|||||||
let t3 = tokens.get(2).unwrap();
|
let t3 = tokens.get(2).unwrap();
|
||||||
assert_eq!(TokenType::Number, t3.token_type);
|
assert_eq!(TokenType::Number, t3.token_type);
|
||||||
assert_eq!("0.282398", t3.value);
|
assert_eq!("0.282398", t3.value);
|
||||||
|
|
||||||
assert_eq!("1789e+1", tokens.get(3).unwrap().value);
|
assert_eq!("1789e+1", tokens.get(3).unwrap().value);
|
||||||
assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
|
assert_eq!("239.3298e-103", tokens.get(4).unwrap().value);
|
||||||
assert_eq!(TokenType::Semicolon, tokens.get(5).unwrap().token_type);
|
assert_eq!(TokenType::Semicolon, tokens.get(5).unwrap().token_type);
|
||||||
@ -209,7 +205,7 @@ mod tests {
|
|||||||
assert_eq!(TokenType::RightBracket, t.token_type);
|
assert_eq!(TokenType::RightBracket, t.token_type);
|
||||||
assert_eq!("]", t.value);
|
assert_eq!("]", t.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_scan_datatype() {
|
fn should_scan_datatype() {
|
||||||
let input = String::from("Num");
|
let input = String::from("Num");
|
||||||
@ -217,7 +213,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(TokenType::Datatype, tokens[0].token_type);
|
assert_eq!(TokenType::Datatype, tokens[0].token_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_scan_new_line() {
|
fn should_scan_new_line() {
|
||||||
let input = String::from("3\n22");
|
let input = String::from("3\n22");
|
||||||
@ -225,7 +221,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
|
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_scan_multiple_new_lines() {
|
fn should_scan_multiple_new_lines() {
|
||||||
let input = String::from("3\n\n\n22");
|
let input = String::from("3\n\n\n22");
|
||||||
@ -234,7 +230,7 @@ mod tests {
|
|||||||
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
|
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
|
||||||
assert_eq!(TokenType::Number, tokens[2].token_type);
|
assert_eq!(TokenType::Number, tokens[2].token_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_scan_multiple_new_lines_with_whitespace_in_between() {
|
fn should_scan_multiple_new_lines_with_whitespace_in_between() {
|
||||||
let input = String::from("3\n \n \n22");
|
let input = String::from("3\n \n \n22");
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
use crate::{lexic::{token, utils, LexResult}, token::TokenType};
|
use crate::{
|
||||||
|
lexic::{token, utils, LexResult},
|
||||||
|
token::TokenType,
|
||||||
|
};
|
||||||
|
|
||||||
/// Checks if a String is a keyword, and returns its TokenType
|
/// Checks if a String is a keyword, and returns its TokenType
|
||||||
fn str_is_keyword(s: &String) -> Option<TokenType> {
|
fn str_is_keyword(s: &String) -> Option<TokenType> {
|
||||||
@ -24,31 +27,24 @@ pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult
|
|||||||
/// Recursive funtion that scans the identifier
|
/// Recursive funtion that scans the identifier
|
||||||
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype: bool) -> LexResult {
|
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype: bool) -> LexResult {
|
||||||
match chars.get(start_pos) {
|
match chars.get(start_pos) {
|
||||||
Some(c) if utils::is_identifier_char(*c) => {
|
Some(c) if utils::is_identifier_char(*c) => scan_impl(
|
||||||
scan_impl(
|
chars,
|
||||||
chars,
|
start_pos + 1,
|
||||||
start_pos + 1,
|
utils::str_append(current, *c),
|
||||||
utils::str_append(current, *c),
|
is_datatype,
|
||||||
is_datatype,
|
),
|
||||||
)
|
|
||||||
},
|
|
||||||
_ => {
|
_ => {
|
||||||
if let Some(token_type) = str_is_keyword(¤t) {
|
if let Some(token_type) = str_is_keyword(¤t) {
|
||||||
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
|
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
|
||||||
}
|
} else if is_datatype {
|
||||||
else if is_datatype {
|
|
||||||
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
|
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
|
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::lexic::token::TokenType;
|
use crate::lexic::token::TokenType;
|
||||||
@ -69,8 +65,8 @@ mod tests {
|
|||||||
assert_eq!(1, next);
|
assert_eq!(1, next);
|
||||||
assert_eq!(TokenType::Identifier, token.token_type);
|
assert_eq!(TokenType::Identifier, token.token_type);
|
||||||
assert_eq!("_", token.value);
|
assert_eq!("_", token.value);
|
||||||
},
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
let input = str_to_vec("i");
|
let input = str_to_vec("i");
|
||||||
@ -80,8 +76,8 @@ mod tests {
|
|||||||
assert_eq!(1, next);
|
assert_eq!(1, next);
|
||||||
assert_eq!(TokenType::Identifier, token.token_type);
|
assert_eq!(TokenType::Identifier, token.token_type);
|
||||||
assert_eq!("i", token.value);
|
assert_eq!("i", token.value);
|
||||||
},
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,27 +85,8 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_2() {
|
fn test_2() {
|
||||||
let operators = vec![
|
let operators = vec![
|
||||||
"_a",
|
"_a", "_z", "_A", "_Z", "__", "_0", "_9", "aa", "az", "aA", "aZ", "a_", "a0", "a9",
|
||||||
"_z",
|
"za", "zz", "zA", "zZ", "z_", "z0", "z9",
|
||||||
"_A",
|
|
||||||
"_Z",
|
|
||||||
"__",
|
|
||||||
"_0",
|
|
||||||
"_9",
|
|
||||||
"aa",
|
|
||||||
"az",
|
|
||||||
"aA",
|
|
||||||
"aZ",
|
|
||||||
"a_",
|
|
||||||
"a0",
|
|
||||||
"a9",
|
|
||||||
"za",
|
|
||||||
"zz",
|
|
||||||
"zA",
|
|
||||||
"zZ",
|
|
||||||
"z_",
|
|
||||||
"z0",
|
|
||||||
"z9",
|
|
||||||
];
|
];
|
||||||
|
|
||||||
for op in operators {
|
for op in operators {
|
||||||
@ -120,13 +97,12 @@ mod tests {
|
|||||||
assert_eq!(2, next);
|
assert_eq!(2, next);
|
||||||
assert_eq!(TokenType::Identifier, token.token_type);
|
assert_eq!(TokenType::Identifier, token.token_type);
|
||||||
assert_eq!(op, token.value);
|
assert_eq!(op, token.value);
|
||||||
},
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Should scan long identifiers
|
// Should scan long identifiers
|
||||||
#[test]
|
#[test]
|
||||||
fn test_3() {
|
fn test_3() {
|
||||||
@ -145,8 +121,8 @@ mod tests {
|
|||||||
assert_eq!(input.len(), next);
|
assert_eq!(input.len(), next);
|
||||||
assert_eq!(TokenType::Identifier, token.token_type);
|
assert_eq!(TokenType::Identifier, token.token_type);
|
||||||
assert_eq!(op, token.value);
|
assert_eq!(op, token.value);
|
||||||
},
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -156,19 +132,22 @@ mod tests {
|
|||||||
fn test_4() {
|
fn test_4() {
|
||||||
let input = str_to_vec("var");
|
let input = str_to_vec("var");
|
||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
||||||
assert_eq!(3, next);
|
assert_eq!(3, next);
|
||||||
assert_eq!(TokenType::VAR, token.token_type);
|
assert_eq!(TokenType::VAR, token.token_type);
|
||||||
assert_eq!("var", token.value);
|
assert_eq!("var", token.value);
|
||||||
} else {panic!()}
|
} else {
|
||||||
|
panic!()
|
||||||
|
}
|
||||||
|
|
||||||
let input = str_to_vec("val");
|
let input = str_to_vec("val");
|
||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
if let LexResult::Some(token, next) = scan(*input.get(0).unwrap(), &input, start_pos) {
|
||||||
assert_eq!(3, next);
|
assert_eq!(3, next);
|
||||||
assert_eq!(TokenType::VAL, token.token_type);
|
assert_eq!(TokenType::VAL, token.token_type);
|
||||||
assert_eq!("val", token.value);
|
assert_eq!("val", token.value);
|
||||||
} else {panic!()}
|
} else {
|
||||||
|
panic!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
use super::{token::{TokenType, self}, utils, LexResult};
|
use super::{
|
||||||
|
token::{self, TokenType},
|
||||||
|
utils, LexResult,
|
||||||
|
};
|
||||||
|
|
||||||
|
mod identifier;
|
||||||
|
mod new_line;
|
||||||
mod number;
|
mod number;
|
||||||
mod operator;
|
mod operator;
|
||||||
mod identifier;
|
|
||||||
mod string;
|
mod string;
|
||||||
mod new_line;
|
|
||||||
|
|
||||||
|
|
||||||
// This module contains the individual scanners, and exports them
|
// This module contains the individual scanners, and exports them
|
||||||
|
|
||||||
@ -14,13 +16,11 @@ pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
|
|||||||
utils::is_digit(c).then(|| number::scan(chars, start_pos))
|
utils::is_digit(c).then(|| number::scan(chars, start_pos))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Attempts to scan an operator. If not found returns None to be able to chain other scanner
|
/// Attempts to scan an operator. If not found returns None to be able to chain other scanner
|
||||||
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
utils::is_operator(c).then(|| operator::scan(chars, start_pos))
|
utils::is_operator(c).then(|| operator::scan(chars, start_pos))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner
|
/// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner
|
||||||
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
let token_type = match c {
|
let token_type = match c {
|
||||||
@ -33,27 +33,20 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
|
|||||||
_ => return None,
|
_ => return None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let token = token::new(
|
let token = token::new(c.to_string(), start_pos as i32, token_type);
|
||||||
c.to_string(),
|
|
||||||
start_pos as i32,
|
|
||||||
token_type,
|
|
||||||
);
|
|
||||||
Some(LexResult::Some(token, start_pos + 1))
|
Some(LexResult::Some(token, start_pos + 1))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Attempts to scan an identifier. If not found returns None to be able to chain other scanner
|
/// Attempts to scan an identifier. If not found returns None to be able to chain other scanner
|
||||||
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
(utils::is_lowercase(c) || c == '_')
|
(utils::is_lowercase(c) || c == '_').then(|| identifier::scan(c, chars, start_pos))
|
||||||
.then(|| identifier::scan(c, chars, start_pos))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Attempts to scan a datatype. If not found returns None to be able to chain other scanner
|
/// Attempts to scan a datatype. If not found returns None to be able to chain other scanner
|
||||||
pub fn datatype(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
pub fn datatype(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
// Since the only difference with an identifier is that the fist character is an
|
// Since the only difference with an identifier is that the fist character is an
|
||||||
// uppercase letter, reuse the identifier scanner
|
// uppercase letter, reuse the identifier scanner
|
||||||
utils::is_uppercase(c)
|
utils::is_uppercase(c).then(|| identifier::scan(c, chars, start_pos))
|
||||||
.then(|| identifier::scan(c, chars, start_pos))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
|
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
|
||||||
@ -62,7 +55,6 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Attemts to scan a new line. If not found returns None to be able to chain other scanner
|
/// Attemts to scan a new line. If not found returns None to be able to chain other scanner
|
||||||
pub fn new_line(c:char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
pub fn new_line(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
(c == '\n').then(|| new_line::scan(chars, start_pos))
|
(c == '\n').then(|| new_line::scan(chars, start_pos))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
lexic::{
|
lexic::{token, LexResult},
|
||||||
token, LexResult,
|
token::TokenType,
|
||||||
},
|
|
||||||
token::TokenType
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Function to handle new lines
|
/// Function to handle new lines
|
||||||
@ -15,28 +13,16 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
|||||||
let current = chars.get(start_pos);
|
let current = chars.get(start_pos);
|
||||||
|
|
||||||
match current {
|
match current {
|
||||||
Some(c) if *c == '\n' => {
|
Some(c) if *c == '\n' => scan(chars, start_pos + 1),
|
||||||
scan(chars, start_pos + 1)
|
Some(c) if *c == ' ' => match look_ahead_for_new_line(chars, start_pos + 1) {
|
||||||
}
|
Some(next_pos) => scan(chars, next_pos),
|
||||||
Some(c) if *c == ' ' => {
|
None => {
|
||||||
match look_ahead_for_new_line(chars, start_pos + 1) {
|
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
|
||||||
Some(next_pos) => scan(chars, next_pos),
|
LexResult::Some(token, start_pos)
|
||||||
None => {
|
|
||||||
let token = token::new(
|
|
||||||
String::from(";"),
|
|
||||||
start_pos as i32,
|
|
||||||
TokenType::Semicolon,
|
|
||||||
);
|
|
||||||
LexResult::Some(token, start_pos)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
Some(_) | None => {
|
Some(_) | None => {
|
||||||
let token = token::new(
|
let token = token::new(String::from(";"), start_pos as i32, TokenType::Semicolon);
|
||||||
String::from(";"),
|
|
||||||
start_pos as i32,
|
|
||||||
TokenType::Semicolon,
|
|
||||||
);
|
|
||||||
LexResult::Some(token, start_pos)
|
LexResult::Some(token, start_pos)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -45,19 +31,12 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
|||||||
/// Returns the position after the new line
|
/// Returns the position after the new line
|
||||||
fn look_ahead_for_new_line(chars: &Vec<char>, pos: usize) -> Option<usize> {
|
fn look_ahead_for_new_line(chars: &Vec<char>, pos: usize) -> Option<usize> {
|
||||||
match chars.get(pos) {
|
match chars.get(pos) {
|
||||||
Some(c) if *c == ' ' => {
|
Some(c) if *c == ' ' => look_ahead_for_new_line(chars, pos + 1),
|
||||||
look_ahead_for_new_line(chars, pos + 1)
|
Some(c) if *c == '\n' => Some(pos + 1),
|
||||||
}
|
Some(_) | None => None,
|
||||||
Some(c) if *c == '\n' => {
|
|
||||||
Some(pos + 1)
|
|
||||||
}
|
|
||||||
Some(_) | None => {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::lexic::token::TokenType;
|
use crate::lexic::token::TokenType;
|
||||||
@ -67,7 +46,7 @@ mod tests {
|
|||||||
fn str_to_vec(s: &str) -> Vec<char> {
|
fn str_to_vec(s: &str) -> Vec<char> {
|
||||||
s.chars().collect()
|
s.chars().collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_emit_semicolon_instead_of_new_line() {
|
fn should_emit_semicolon_instead_of_new_line() {
|
||||||
let input = str_to_vec("\n");
|
let input = str_to_vec("\n");
|
||||||
@ -92,8 +71,7 @@ mod tests {
|
|||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let input = str_to_vec("\n\n\naToken");
|
let input = str_to_vec("\n\n\naToken");
|
||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
|
|
||||||
@ -104,7 +82,7 @@ mod tests {
|
|||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() {
|
fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() {
|
||||||
let input = str_to_vec("\n \n \n");
|
let input = str_to_vec("\n \n \n");
|
||||||
@ -116,8 +94,7 @@ mod tests {
|
|||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let input = str_to_vec("\n \n \n aToken");
|
let input = str_to_vec("\n \n \n aToken");
|
||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
|
|
||||||
@ -127,8 +104,7 @@ mod tests {
|
|||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let input = str_to_vec("\n \n \n ");
|
let input = str_to_vec("\n \n \n ");
|
||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
|
use crate::error_handling::LexError;
|
||||||
use crate::lexic::{
|
use crate::lexic::{
|
||||||
token::{self, Token},
|
token::{self, Token},
|
||||||
utils, LexResult,
|
utils, LexResult,
|
||||||
};
|
};
|
||||||
use crate::error_handling::LexError;
|
|
||||||
|
|
||||||
/// Function to scan a number
|
/// Function to scan a number
|
||||||
///
|
///
|
||||||
@ -66,9 +66,9 @@ fn scan_double(chars: &Vec<char>, start_pos: usize, current: String) -> LexResul
|
|||||||
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
|
Some(c) if utils::is_digit(*c) => scan_double_impl(chars, start_pos, current),
|
||||||
Some(_) => LexResult::Err(LexError {
|
Some(_) => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
reason : String::from(
|
reason: String::from(
|
||||||
"The character after the dot when scanning a double is not a number.",
|
"The character after the dot when scanning a double is not a number.",
|
||||||
)
|
),
|
||||||
}),
|
}),
|
||||||
_ => LexResult::Err(LexError {
|
_ => LexResult::Err(LexError {
|
||||||
position: start_pos,
|
position: start_pos,
|
||||||
@ -112,7 +112,7 @@ fn scan_scientific(chars: &Vec<char>, start_pos: usize, current: String) -> LexR
|
|||||||
position: start_pos,
|
position: start_pos,
|
||||||
reason: String::from(
|
reason: String::from(
|
||||||
"The characters after 'e' are not + or -, or are not followed by a number",
|
"The characters after 'e' are not + or -, or are not followed by a number",
|
||||||
)
|
),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -230,7 +230,9 @@ mod tests {
|
|||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
|
|
||||||
match scan(&input, start_pos) {
|
match scan(&input, start_pos) {
|
||||||
LexResult::Err(reason) => assert_eq!("Tried to scan an incomplete hex value", reason.reason),
|
LexResult::Err(reason) => {
|
||||||
|
assert_eq!("Tried to scan an incomplete hex value", reason.reason)
|
||||||
|
}
|
||||||
_ => panic!(),
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -299,7 +301,9 @@ mod tests {
|
|||||||
let start_pos = 0;
|
let start_pos = 0;
|
||||||
|
|
||||||
match scan(&input, start_pos) {
|
match scan(&input, start_pos) {
|
||||||
LexResult::Err(reason) => assert_eq!("EOF when scanning a double number.", reason.reason),
|
LexResult::Err(reason) => {
|
||||||
|
assert_eq!("EOF when scanning a double number.", reason.reason)
|
||||||
|
}
|
||||||
_ => panic!(),
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
use crate::lexic::{token, utils, LexResult};
|
use crate::lexic::{token, utils, LexResult};
|
||||||
|
|
||||||
|
|
||||||
/// Function to scan an operator
|
/// Function to scan an operator
|
||||||
///
|
///
|
||||||
/// This function assumes the character at `start_pos` is an operator
|
/// This function assumes the character at `start_pos` is an operator
|
||||||
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||||
scan_impl(chars, start_pos, String::from(""))
|
scan_impl(chars, start_pos, String::from(""))
|
||||||
@ -12,16 +11,11 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
|||||||
match chars.get(start_pos) {
|
match chars.get(start_pos) {
|
||||||
Some(c) if utils::is_operator(*c) => {
|
Some(c) if utils::is_operator(*c) => {
|
||||||
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
||||||
},
|
|
||||||
_ => {
|
|
||||||
LexResult::Some(token::new_operator(current, start_pos as i32), start_pos)
|
|
||||||
}
|
}
|
||||||
|
_ => LexResult::Some(token::new_operator(current, start_pos as i32), start_pos),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -35,26 +29,8 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_1() {
|
fn test_1() {
|
||||||
let operators = vec![
|
let operators = vec![
|
||||||
"+",
|
"+", "-", "=", "*", "!", "\\", "/", "|", "@", "#", "$", "~", "%", "&", "?", "<", ">",
|
||||||
"-",
|
"^", ".", ":",
|
||||||
"=",
|
|
||||||
"*",
|
|
||||||
"!",
|
|
||||||
"\\",
|
|
||||||
"/",
|
|
||||||
"|",
|
|
||||||
"@",
|
|
||||||
"#",
|
|
||||||
"$",
|
|
||||||
"~",
|
|
||||||
"%",
|
|
||||||
"&",
|
|
||||||
"?",
|
|
||||||
"<",
|
|
||||||
">",
|
|
||||||
"^",
|
|
||||||
".",
|
|
||||||
":",
|
|
||||||
];
|
];
|
||||||
|
|
||||||
for op in operators {
|
for op in operators {
|
||||||
@ -65,8 +41,8 @@ mod tests {
|
|||||||
assert_eq!(1, next);
|
assert_eq!(1, next);
|
||||||
assert_eq!(TokenType::Operator, token.token_type);
|
assert_eq!(TokenType::Operator, token.token_type);
|
||||||
assert_eq!(op, token.value);
|
assert_eq!(op, token.value);
|
||||||
},
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -75,20 +51,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_2() {
|
fn test_2() {
|
||||||
let operators = vec![
|
let operators = vec![
|
||||||
"<<",
|
"<<", ">>", "<|", "|>", "+>", "<+", "+=", "-=", "?.", "??", "?:", "*=", "/=", "==",
|
||||||
">>",
|
|
||||||
"<|",
|
|
||||||
"|>",
|
|
||||||
"+>",
|
|
||||||
"<+",
|
|
||||||
"+=",
|
|
||||||
"-=",
|
|
||||||
"?.",
|
|
||||||
"??",
|
|
||||||
"?:",
|
|
||||||
"*=",
|
|
||||||
"/=",
|
|
||||||
"==",
|
|
||||||
"!=",
|
"!=",
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -100,8 +63,8 @@ mod tests {
|
|||||||
assert_eq!(2, next);
|
assert_eq!(2, next);
|
||||||
assert_eq!(TokenType::Operator, token.token_type);
|
assert_eq!(TokenType::Operator, token.token_type);
|
||||||
assert_eq!(op, token.value);
|
assert_eq!(op, token.value);
|
||||||
},
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,5 @@
|
|||||||
use crate::lexic::{
|
|
||||||
token,
|
|
||||||
utils, LexResult,
|
|
||||||
};
|
|
||||||
use crate::error_handling::LexError;
|
use crate::error_handling::LexError;
|
||||||
|
use crate::lexic::{token, utils, LexResult};
|
||||||
|
|
||||||
/// Function to scan a string
|
/// Function to scan a string
|
||||||
///
|
///
|
||||||
@ -18,46 +15,26 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
|||||||
Some(c) if *c == '"' => {
|
Some(c) if *c == '"' => {
|
||||||
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
|
LexResult::Some(token::new_string(current, start_pos as i32), start_pos + 1)
|
||||||
}
|
}
|
||||||
Some(c) if *c == '\n' => {
|
Some(c) if *c == '\n' => LexResult::Err(LexError {
|
||||||
LexResult::Err(LexError {
|
position: start_pos,
|
||||||
position: start_pos,
|
reason: String::from("Unexpected new line inside a string."),
|
||||||
reason: String::from("Unexpected new line inside a string.")
|
}),
|
||||||
})
|
|
||||||
}
|
|
||||||
Some(c) if *c == '\\' => {
|
Some(c) if *c == '\\' => {
|
||||||
if let Some(escape) = test_escape_char(chars, start_pos + 1) {
|
if let Some(escape) = test_escape_char(chars, start_pos + 1) {
|
||||||
scan_impl(
|
scan_impl(chars, start_pos + 2, utils::str_append(current, escape))
|
||||||
chars,
|
} else {
|
||||||
start_pos + 2,
|
|
||||||
utils::str_append(current, escape),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Ignore the backslash
|
// Ignore the backslash
|
||||||
scan_impl(
|
scan_impl(chars, start_pos + 1, current)
|
||||||
chars,
|
|
||||||
start_pos + 1,
|
|
||||||
current,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(c) => {
|
Some(c) => scan_impl(chars, start_pos + 1, utils::str_append(current, *c)),
|
||||||
scan_impl(
|
None => LexResult::Err(LexError {
|
||||||
chars,
|
position: start_pos,
|
||||||
start_pos + 1,
|
reason: String::from("Incomplete string found"),
|
||||||
utils::str_append(current, *c),
|
}),
|
||||||
)
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
LexResult::Err(LexError {
|
|
||||||
position: start_pos,
|
|
||||||
reason: String::from("Incomplete string found")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Checks if the char at `start_pos` is a escape character
|
/// Checks if the char at `start_pos` is a escape character
|
||||||
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
|
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
|
||||||
if let Some(c) = chars.get(start_pos) {
|
if let Some(c) = chars.get(start_pos) {
|
||||||
@ -69,15 +46,11 @@ fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
|
|||||||
't' => Some('\t'),
|
't' => Some('\t'),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::lexic::token::TokenType;
|
use crate::lexic::token::TokenType;
|
||||||
@ -96,8 +69,9 @@ mod tests {
|
|||||||
assert_eq!(2, next);
|
assert_eq!(2, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("", token.value);
|
assert_eq!("", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -108,8 +82,9 @@ mod tests {
|
|||||||
assert_eq!(15, next);
|
assert_eq!(15, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("Hello, world!", token.value);
|
assert_eq!("Hello, world!", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -118,8 +93,9 @@ mod tests {
|
|||||||
let start_pos = 1;
|
let start_pos = 1;
|
||||||
if let LexResult::Err(reason) = scan(&input, start_pos) {
|
if let LexResult::Err(reason) = scan(&input, start_pos) {
|
||||||
assert_eq!("Unexpected new line inside a string.", reason.reason)
|
assert_eq!("Unexpected new line inside a string.", reason.reason)
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -130,8 +106,9 @@ mod tests {
|
|||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("Sample\ntext", token.value);
|
assert_eq!("Sample\ntext", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
|
|
||||||
let input = str_to_vec("\"Sample\\\"text\"");
|
let input = str_to_vec("\"Sample\\\"text\"");
|
||||||
let start_pos = 1;
|
let start_pos = 1;
|
||||||
@ -139,8 +116,9 @@ mod tests {
|
|||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("Sample\"text", token.value);
|
assert_eq!("Sample\"text", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
|
|
||||||
let input = str_to_vec("\"Sample\\rtext\"");
|
let input = str_to_vec("\"Sample\\rtext\"");
|
||||||
let start_pos = 1;
|
let start_pos = 1;
|
||||||
@ -148,8 +126,9 @@ mod tests {
|
|||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("Sample\rtext", token.value);
|
assert_eq!("Sample\rtext", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
|
|
||||||
let input = str_to_vec("\"Sample\\\\text\"");
|
let input = str_to_vec("\"Sample\\\\text\"");
|
||||||
let start_pos = 1;
|
let start_pos = 1;
|
||||||
@ -157,8 +136,9 @@ mod tests {
|
|||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("Sample\\text", token.value);
|
assert_eq!("Sample\\text", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
|
|
||||||
let input = str_to_vec("\"Sample\\ttext\"");
|
let input = str_to_vec("\"Sample\\ttext\"");
|
||||||
let start_pos = 1;
|
let start_pos = 1;
|
||||||
@ -166,8 +146,9 @@ mod tests {
|
|||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("Sample\ttext", token.value);
|
assert_eq!("Sample\ttext", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
|
|
||||||
let input = str_to_vec("\"Sample\\ text\"");
|
let input = str_to_vec("\"Sample\\ text\"");
|
||||||
let start_pos = 1;
|
let start_pos = 1;
|
||||||
@ -175,7 +156,8 @@ mod tests {
|
|||||||
assert_eq!(14, next);
|
assert_eq!(14, next);
|
||||||
assert_eq!(TokenType::String, token.token_type);
|
assert_eq!(TokenType::String, token.token_type);
|
||||||
assert_eq!("Sample text", token.value);
|
assert_eq!("Sample text", token.value);
|
||||||
|
} else {
|
||||||
|
panic!()
|
||||||
}
|
}
|
||||||
else {panic!()}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,11 +15,26 @@ pub fn str_append(current: String, c: char) -> String {
|
|||||||
|
|
||||||
/// Whether `c` is an operator char.
|
/// Whether `c` is an operator char.
|
||||||
pub fn is_operator(c: char) -> bool {
|
pub fn is_operator(c: char) -> bool {
|
||||||
c == '+' || c == '-' || c == '=' || c == '*' || c == '!'
|
c == '+'
|
||||||
|| c == '\\' || c == '/' || c == '|' || c == '@'
|
|| c == '-'
|
||||||
|| c == '#' || c == '$' || c == '~' || c == '%'
|
|| c == '='
|
||||||
|| c == '&' || c == '?' || c == '<' || c == '>'
|
|| c == '*'
|
||||||
|| c == '^' || c == '.' || c == ':'
|
|| c == '!'
|
||||||
|
|| c == '\\'
|
||||||
|
|| c == '/'
|
||||||
|
|| c == '|'
|
||||||
|
|| c == '@'
|
||||||
|
|| c == '#'
|
||||||
|
|| c == '$'
|
||||||
|
|| c == '~'
|
||||||
|
|| c == '%'
|
||||||
|
|| c == '&'
|
||||||
|
|| c == '?'
|
||||||
|
|| c == '<'
|
||||||
|
|| c == '>'
|
||||||
|
|| c == '^'
|
||||||
|
|| c == '.'
|
||||||
|
|| c == ':'
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether `c` is between `a-z`
|
/// Whether `c` is between `a-z`
|
||||||
|
15
src/main.rs
15
src/main.rs
@ -20,7 +20,6 @@ mod codegen;
|
|||||||
|
|
||||||
mod error_handling;
|
mod error_handling;
|
||||||
|
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about, long_about = None)]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
@ -39,20 +38,22 @@ enum Commands {
|
|||||||
output: String,
|
output: String,
|
||||||
},
|
},
|
||||||
/// Starts the REPL
|
/// Starts the REPL
|
||||||
R {}
|
R {},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const VERSION: &str = "0.0.1";
|
const VERSION: &str = "0.0.1";
|
||||||
|
|
||||||
fn get_copyright() -> String {
|
fn get_copyright() -> String {
|
||||||
let year = Utc::now().year();
|
let year = Utc::now().year();
|
||||||
|
|
||||||
format!("Misti {}\nCopyright (c) {} Fernando Enrique Araoz Morales\n", VERSION, year)
|
format!(
|
||||||
|
"Misti {}\nCopyright (c) {} Fernando Enrique Araoz Morales\n",
|
||||||
|
VERSION, year
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// # Misti
|
/// # Misti
|
||||||
///
|
///
|
||||||
/// Usage:
|
/// Usage:
|
||||||
/// - `misti` : Starts the compiler in watch mode
|
/// - `misti` : Starts the compiler in watch mode
|
||||||
/// - `misti w, --watch, -w` : Starts the compiler in watch mode
|
/// - `misti w, --watch, -w` : Starts the compiler in watch mode
|
||||||
@ -66,13 +67,11 @@ fn main() {
|
|||||||
Some(Commands::C { file: _, output: _ }) => {
|
Some(Commands::C { file: _, output: _ }) => {
|
||||||
println!("Compile a file: Not implemented")
|
println!("Compile a file: Not implemented")
|
||||||
}
|
}
|
||||||
Some(Commands::R { }) => {
|
Some(Commands::R {}) => {
|
||||||
let _ = repl::run();
|
let _ = repl::run();
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
println!("Compile in watch mode: Not implemented")
|
println!("Compile in watch mode: Not implemented")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,10 +4,10 @@ use crate::error_handling::PrintableError;
|
|||||||
use crate::symbol_table::SymbolTable;
|
use crate::symbol_table::SymbolTable;
|
||||||
use crate::token::Token;
|
use crate::token::Token;
|
||||||
|
|
||||||
use super::lexic;
|
|
||||||
use super::syntax;
|
|
||||||
use super::semantic;
|
|
||||||
use super::codegen;
|
use super::codegen;
|
||||||
|
use super::lexic;
|
||||||
|
use super::semantic;
|
||||||
|
use super::syntax;
|
||||||
|
|
||||||
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
|
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
|
||||||
fn compile(input: &String) {
|
fn compile(input: &String) {
|
||||||
@ -15,20 +15,19 @@ fn compile(input: &String) {
|
|||||||
|
|
||||||
match tokens {
|
match tokens {
|
||||||
Ok(tokens) => {
|
Ok(tokens) => {
|
||||||
build_ast(tokens);
|
build_ast(input, tokens);
|
||||||
},
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
let chars: Vec<char> = input.chars().into_iter().collect();
|
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||||
eprintln!("{}", error.get_error_str(&chars))
|
eprintln!("{}", error.get_error_str(&chars))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
|
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
|
||||||
///
|
///
|
||||||
/// Prints the generated code in stdin
|
/// Prints the generated code in stdin
|
||||||
fn build_ast(tokens: Vec<Token>) {
|
fn build_ast(input: &String, tokens: Vec<Token>) {
|
||||||
let ast = syntax::construct_ast(&tokens);
|
let ast = syntax::construct_ast(&tokens);
|
||||||
|
|
||||||
match ast {
|
match ast {
|
||||||
@ -39,7 +38,8 @@ fn build_ast(tokens: Vec<Token>) {
|
|||||||
println!("{}", js_code)
|
println!("{}", js_code)
|
||||||
}
|
}
|
||||||
Err(reason) => {
|
Err(reason) => {
|
||||||
eprintln!("Syntax error.\n{}", reason)
|
let chars: Vec<char> = input.chars().into_iter().collect();
|
||||||
|
eprintln!("Syntax error.\n{}", reason.get_error_str(&chars))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -59,14 +59,14 @@ pub fn run() -> io::Result<()> {
|
|||||||
match read {
|
match read {
|
||||||
Ok(0) => {
|
Ok(0) => {
|
||||||
println!("\nBye");
|
println!("\nBye");
|
||||||
break Ok(())
|
break Ok(());
|
||||||
},
|
}
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
compile(&buffer);
|
compile(&buffer);
|
||||||
},
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
eprintln!("Error reading stdin.");
|
eprintln!("Error reading stdin.");
|
||||||
break Err(error)
|
break Err(error);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use super::symbol_table::{SymbolTable, _NUMBER, _STRING, _BOOLEAN};
|
use super::ast_types::{Binding, Expression, ModuleAST};
|
||||||
use super::ast_types::{ModuleAST, Binding, Expression};
|
use super::symbol_table::{SymbolTable, _BOOLEAN, _NUMBER, _STRING};
|
||||||
|
|
||||||
/// Checks the AST. In the future should return a list of errors.
|
/// Checks the AST. In the future should return a list of errors.
|
||||||
pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) {
|
pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) {
|
||||||
@ -8,7 +8,7 @@ pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable)
|
|||||||
Binding::Val(binding) => {
|
Binding::Val(binding) => {
|
||||||
symbol_table.add(
|
symbol_table.add(
|
||||||
binding.identifier,
|
binding.identifier,
|
||||||
get_expression_type(&binding.expression, symbol_table).as_str()
|
get_expression_type(&binding.expression, symbol_table).as_str(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Binding::Var(binding) => {
|
Binding::Var(binding) => {
|
||||||
@ -28,9 +28,7 @@ fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
|
|||||||
Expression::Boolean(_) => String::from(_BOOLEAN),
|
Expression::Boolean(_) => String::from(_BOOLEAN),
|
||||||
Expression::Identifier(id) => {
|
Expression::Identifier(id) => {
|
||||||
match symbol_table.get_type(*id) {
|
match symbol_table.get_type(*id) {
|
||||||
Some(datatype) => {
|
Some(datatype) => datatype,
|
||||||
datatype
|
|
||||||
}
|
|
||||||
None => {
|
None => {
|
||||||
// Should add an error to the list instead of panicking
|
// Should add an error to the list instead of panicking
|
||||||
panic!("Semantic analysis: identifier {} not found", id);
|
panic!("Semantic analysis: identifier {} not found", id);
|
||||||
@ -42,10 +40,10 @@ fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use crate::lexic;
|
||||||
use crate::symbol_table::_BOOLEAN;
|
use crate::symbol_table::_BOOLEAN;
|
||||||
use crate::symbol_table::_STRING;
|
use crate::symbol_table::_STRING;
|
||||||
use crate::syntax;
|
use crate::syntax;
|
||||||
use crate::lexic;
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@ -58,7 +56,7 @@ mod tests {
|
|||||||
|
|
||||||
table.check_type("a", datatype)
|
table.check_type("a", datatype)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_update_symbol_table() {
|
fn should_update_symbol_table() {
|
||||||
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
|
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
|
||||||
@ -70,19 +68,19 @@ mod tests {
|
|||||||
let result = table.test("identifier");
|
let result = table.test("identifier");
|
||||||
assert_eq!(true, result);
|
assert_eq!(true, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_get_correct_type() {
|
fn should_get_correct_type() {
|
||||||
assert!(test_type(String::from("val a = 322"), _NUMBER));
|
assert!(test_type(String::from("val a = 322"), _NUMBER));
|
||||||
assert!(test_type(String::from("var a = 322"), _NUMBER));
|
assert!(test_type(String::from("var a = 322"), _NUMBER));
|
||||||
|
|
||||||
assert!(test_type(String::from("val a = \"str\" "), _STRING));
|
assert!(test_type(String::from("val a = \"str\" "), _STRING));
|
||||||
assert!(test_type(String::from("var a = \"str\" "), _STRING));
|
assert!(test_type(String::from("var a = \"str\" "), _STRING));
|
||||||
|
|
||||||
assert!(test_type(String::from("val a = false"), _BOOLEAN));
|
assert!(test_type(String::from("val a = false"), _BOOLEAN));
|
||||||
assert!(test_type(String::from("var a = true"), _BOOLEAN));
|
assert!(test_type(String::from("var a = true"), _BOOLEAN));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_get_type_from_identifier() {
|
fn should_get_type_from_identifier() {
|
||||||
let mut table = SymbolTable::new();
|
let mut table = SymbolTable::new();
|
||||||
@ -91,13 +89,13 @@ mod tests {
|
|||||||
|
|
||||||
// Add an identifier
|
// Add an identifier
|
||||||
check_ast(&mut ast, &mut table);
|
check_ast(&mut ast, &mut table);
|
||||||
|
|
||||||
let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap();
|
let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap();
|
||||||
let mut ast = syntax::construct_ast(&tokens).unwrap();
|
let mut ast = syntax::construct_ast(&tokens).unwrap();
|
||||||
|
|
||||||
// Add a new value that references an identifier
|
// Add a new value that references an identifier
|
||||||
check_ast(&mut ast, &mut table);
|
check_ast(&mut ast, &mut table);
|
||||||
|
|
||||||
// The type should be Num
|
// The type should be Num
|
||||||
let current_type = table.get_type("newValue").unwrap();
|
let current_type = table.get_type("newValue").unwrap();
|
||||||
assert_eq!(_NUMBER, current_type);
|
assert_eq!(_NUMBER, current_type);
|
||||||
|
@ -6,7 +6,7 @@ pub const _STRING: &str = "Str";
|
|||||||
pub const _BOOLEAN: &str = "Bool";
|
pub const _BOOLEAN: &str = "Bool";
|
||||||
|
|
||||||
pub struct SymbolTable {
|
pub struct SymbolTable {
|
||||||
table: HashMap<String, String>
|
table: HashMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SymbolTable {
|
impl SymbolTable {
|
||||||
@ -19,7 +19,8 @@ impl SymbolTable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn add(&mut self, identifier: &str, datatype: &str) {
|
pub fn add(&mut self, identifier: &str, datatype: &str) {
|
||||||
self.table.insert(String::from(identifier), String::from(datatype));
|
self.table
|
||||||
|
.insert(String::from(identifier), String::from(datatype));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn test(&self, identifier: &str) -> bool {
|
pub fn test(&self, identifier: &str) -> bool {
|
||||||
@ -32,24 +33,20 @@ impl SymbolTable {
|
|||||||
.and_then(|(_, value)| {
|
.and_then(|(_, value)| {
|
||||||
if value == &String::from(datatype) {
|
if value == &String::from(datatype) {
|
||||||
Some(true)
|
Some(true)
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
Some(false)
|
Some(false)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_type(&self, identifier: &str) -> Option<String> {
|
pub fn get_type(&self, identifier: &str) -> Option<String> {
|
||||||
self.table
|
self.table
|
||||||
.get_key_value(&String::from(identifier))
|
.get_key_value(&String::from(identifier))
|
||||||
.and_then(|(_, value)| {
|
.and_then(|(_, value)| Some(String::from(value)))
|
||||||
Some(String::from(value))
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -70,7 +67,7 @@ mod tests {
|
|||||||
fn should_check_type() {
|
fn should_check_type() {
|
||||||
let mut table = SymbolTable::new();
|
let mut table = SymbolTable::new();
|
||||||
table.add("firstNumber", _NUMBER);
|
table.add("firstNumber", _NUMBER);
|
||||||
|
|
||||||
assert!(table.check_type("firstNumber", _NUMBER));
|
assert!(table.check_type("firstNumber", _NUMBER));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
|
use super::ast_types::{Binding, ValBinding, VarBinding};
|
||||||
|
use super::{expression, SyntaxResult};
|
||||||
use crate::token::{Token, TokenType};
|
use crate::token::{Token, TokenType};
|
||||||
use super::ast_types::{ValBinding, VarBinding, Binding};
|
|
||||||
use super::expression;
|
|
||||||
|
|
||||||
// TODO: Should return a 3 state value:
|
// TODO: Should return a 3 state value:
|
||||||
// - Success: binding parsed successfully
|
// - Success: binding parsed successfully
|
||||||
// - NotFound: the first token (var | val) was not found, so the parser should try other options
|
// - NotFound: the first token (var | val) was not found, so the parser should try other options
|
||||||
// - Error: token (var | val) was found, but then other expected tokens were not found
|
// - Error: token (var | val) was found, but then other expected tokens were not found
|
||||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<SyntaxResult> {
|
||||||
let mut pos = pos;
|
let mut pos = pos;
|
||||||
|
|
||||||
// Optional datatype annotation
|
// Optional datatype annotation
|
||||||
let datatype_annotation = {
|
let datatype_annotation = {
|
||||||
match tokens.get(pos) {
|
match tokens.get(pos) {
|
||||||
@ -17,10 +17,11 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
|||||||
Some(String::from(&t.value))
|
Some(String::from(&t.value))
|
||||||
}
|
}
|
||||||
Some(_) => None,
|
Some(_) => None,
|
||||||
None => return None
|
// TODO: return Error
|
||||||
|
None => return None,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// var/val keyword
|
// var/val keyword
|
||||||
let is_val = {
|
let is_val = {
|
||||||
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
||||||
@ -30,58 +31,62 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
|||||||
let res2 = try_token_type(tokens, pos, TokenType::VAR);
|
let res2 = try_token_type(tokens, pos, TokenType::VAR);
|
||||||
match res2 {
|
match res2 {
|
||||||
Some(_) => false,
|
Some(_) => false,
|
||||||
None => return None
|
// TODO: return Error
|
||||||
|
None => return None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
|
let identifier = try_token_type(tokens, pos + 1, TokenType::Identifier);
|
||||||
if identifier.is_none() { return None }
|
if identifier.is_none() {
|
||||||
|
// TODO: return Error
|
||||||
|
return None;
|
||||||
|
}
|
||||||
let identifier = identifier.unwrap();
|
let identifier = identifier.unwrap();
|
||||||
|
|
||||||
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
|
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
|
||||||
if equal_operator.is_none() { return None }
|
if equal_operator.is_none() {
|
||||||
|
// TODO: return Error
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
let expression = expression::try_parse(tokens, pos + 3);
|
let expression = expression::try_parse(tokens, pos + 3);
|
||||||
if expression.is_none() { return None }
|
if expression.is_none() {
|
||||||
|
// TODO: return Error
|
||||||
|
return None;
|
||||||
|
}
|
||||||
let expression = expression.unwrap();
|
let expression = expression.unwrap();
|
||||||
|
|
||||||
if is_val {
|
let binding = if is_val {
|
||||||
Some(Binding::Val(ValBinding {
|
Binding::Val(ValBinding {
|
||||||
datatype: datatype_annotation,
|
datatype: datatype_annotation,
|
||||||
identifier: &identifier.value,
|
identifier: &identifier.value,
|
||||||
expression,
|
expression,
|
||||||
}))
|
})
|
||||||
}
|
} else {
|
||||||
else {
|
Binding::Var(VarBinding {
|
||||||
Some(Binding::Var(VarBinding {
|
|
||||||
datatype: datatype_annotation,
|
datatype: datatype_annotation,
|
||||||
identifier: &identifier.value,
|
identifier: &identifier.value,
|
||||||
expression,
|
expression,
|
||||||
}))
|
})
|
||||||
}
|
};
|
||||||
|
|
||||||
|
Some(SyntaxResult::Ok(binding))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Option<&Token> {
|
fn try_token_type(tokens: &Vec<Token>, pos: usize, token_type: TokenType) -> Option<&Token> {
|
||||||
tokens
|
tokens
|
||||||
.get(pos)
|
.get(pos)
|
||||||
.and_then(|token| {
|
.and_then(|token| (token.token_type == token_type).then(|| token))
|
||||||
(token.token_type == token_type).then(|| token)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Option<&Token> {
|
fn try_operator(tokens: &Vec<Token>, pos: usize, operator: String) -> Option<&Token> {
|
||||||
tokens
|
tokens.get(pos).and_then(|token| {
|
||||||
.get(pos)
|
(token.token_type == TokenType::Operator && token.value == operator).then(|| token)
|
||||||
.and_then(|token| {
|
})
|
||||||
(token.token_type == TokenType::Operator && token.value == operator)
|
|
||||||
.then(|| token)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -93,10 +98,10 @@ mod tests {
|
|||||||
let binding = try_parse(&tokens, 0).unwrap();
|
let binding = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
match binding {
|
match binding {
|
||||||
Binding::Val(binding) => {
|
SyntaxResult::Ok(Binding::Val(binding)) => {
|
||||||
assert_eq!("identifier", binding.identifier);
|
assert_eq!("identifier", binding.identifier);
|
||||||
}
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,30 +129,29 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!("=", token.value);
|
assert_eq!("=", token.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_binding_with_datatype() {
|
fn should_parse_binding_with_datatype() {
|
||||||
let tokens = get_tokens(&String::from("Num val identifier = 20")).unwrap();
|
let tokens = get_tokens(&String::from("Num val identifier = 20")).unwrap();
|
||||||
let binding = try_parse(&tokens, 0).unwrap();
|
let binding = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
match binding {
|
match binding {
|
||||||
Binding::Val(binding) => {
|
SyntaxResult::Ok(Binding::Val(binding)) => {
|
||||||
assert_eq!(Some(String::from("Num")), binding.datatype);
|
assert_eq!(Some(String::from("Num")), binding.datatype);
|
||||||
assert_eq!("identifier", binding.identifier);
|
assert_eq!("identifier", binding.identifier);
|
||||||
}
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let tokens = get_tokens(&String::from("Bool var identifier = true")).unwrap();
|
let tokens = get_tokens(&String::from("Bool var identifier = true")).unwrap();
|
||||||
let binding = try_parse(&tokens, 0).unwrap();
|
let binding = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
match binding {
|
match binding {
|
||||||
Binding::Var(binding) => {
|
SyntaxResult::Ok(Binding::Var(binding)) => {
|
||||||
assert_eq!(Some(String::from("Bool")), binding.datatype);
|
assert_eq!(Some(String::from("Bool")), binding.datatype);
|
||||||
assert_eq!("identifier", binding.identifier);
|
assert_eq!("identifier", binding.identifier);
|
||||||
}
|
}
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use crate::token::{Token, TokenType};
|
|
||||||
use super::ast_types::Expression;
|
use super::ast_types::Expression;
|
||||||
|
use crate::token::{Token, TokenType};
|
||||||
|
|
||||||
/// An expression can be:
|
/// An expression can be:
|
||||||
///
|
///
|
||||||
@ -8,28 +8,17 @@ use super::ast_types::Expression;
|
|||||||
/// - A boolean
|
/// - A boolean
|
||||||
/// - An identifier
|
/// - An identifier
|
||||||
pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
|
pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
|
||||||
tokens
|
tokens.get(pos).and_then(|token| match token.token_type {
|
||||||
.get(pos)
|
TokenType::Number => Some(Expression::Number(&token.value)),
|
||||||
.and_then(|token| {
|
TokenType::String => Some(Expression::String(&token.value)),
|
||||||
match token.token_type {
|
TokenType::Identifier if token.value == "true" || token.value == "false" => {
|
||||||
TokenType::Number => {
|
Some(Expression::Boolean(token.value == "true"))
|
||||||
Some(Expression::Number(&token.value))
|
}
|
||||||
}
|
TokenType::Identifier => Some(Expression::Identifier(&token.value)),
|
||||||
TokenType::String => {
|
_ => None,
|
||||||
Some(Expression::String(&token.value))
|
})
|
||||||
}
|
|
||||||
TokenType::Identifier if token.value == "true" || token.value == "false" => {
|
|
||||||
Some(Expression::Boolean(token.value == "true"))
|
|
||||||
}
|
|
||||||
TokenType::Identifier => {
|
|
||||||
Some(Expression::Identifier(&token.value))
|
|
||||||
}
|
|
||||||
_ => None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -42,10 +31,10 @@ mod tests {
|
|||||||
|
|
||||||
match expression {
|
match expression {
|
||||||
Expression::Number(value) => assert_eq!("40", value),
|
Expression::Number(value) => assert_eq!("40", value),
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_a_string() {
|
fn should_parse_a_string() {
|
||||||
let tokens = get_tokens(&String::from("\"Hello\"")).unwrap();
|
let tokens = get_tokens(&String::from("\"Hello\"")).unwrap();
|
||||||
@ -53,10 +42,10 @@ mod tests {
|
|||||||
|
|
||||||
match expression {
|
match expression {
|
||||||
Expression::String(value) => assert_eq!("Hello", value),
|
Expression::String(value) => assert_eq!("Hello", value),
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_a_boolean() {
|
fn should_parse_a_boolean() {
|
||||||
let tokens = get_tokens(&String::from("true")).unwrap();
|
let tokens = get_tokens(&String::from("true")).unwrap();
|
||||||
@ -64,10 +53,10 @@ mod tests {
|
|||||||
|
|
||||||
match expression {
|
match expression {
|
||||||
Expression::Boolean(value) => assert!(value),
|
Expression::Boolean(value) => assert!(value),
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_parse_an_identifier() {
|
fn should_parse_an_identifier() {
|
||||||
let tokens = get_tokens(&String::from("someIdentifier")).unwrap();
|
let tokens = get_tokens(&String::from("someIdentifier")).unwrap();
|
||||||
@ -75,7 +64,7 @@ mod tests {
|
|||||||
|
|
||||||
match expression {
|
match expression {
|
||||||
Expression::Identifier(value) => assert_eq!("someIdentifier", value),
|
Expression::Identifier(value) => assert_eq!("someIdentifier", value),
|
||||||
_ => panic!()
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,25 +1,47 @@
|
|||||||
|
use crate::ast_types::Binding;
|
||||||
|
use crate::error_handling::SyntaxError;
|
||||||
|
|
||||||
use super::token::Token;
|
use super::token::Token;
|
||||||
|
|
||||||
mod expression;
|
|
||||||
mod binding;
|
mod binding;
|
||||||
|
mod expression;
|
||||||
use super::ast_types;
|
use super::ast_types;
|
||||||
|
|
||||||
use ast_types::ModuleAST;
|
use ast_types::ModuleAST;
|
||||||
|
|
||||||
/// Constructs the Misti AST from a vector of tokens
|
pub enum SyntaxResult<'a> {
|
||||||
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, String> {
|
///
|
||||||
let maybe_binding = binding::try_parse(tokens, 0);
|
/// A construct has been found
|
||||||
|
Ok(Binding<'a>),
|
||||||
|
///
|
||||||
|
/// No construct was found
|
||||||
|
None,
|
||||||
|
///
|
||||||
|
/// A construct was found, but there was an error parsing it
|
||||||
|
Err(SyntaxError),
|
||||||
|
}
|
||||||
|
|
||||||
match maybe_binding {
|
/// Constructs the Misti AST from a vector of tokens
|
||||||
Some(binding) => {
|
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, SyntaxError> {
|
||||||
Ok(ModuleAST {
|
let _token_amount = tokens.len();
|
||||||
bindings: vec![binding]
|
let mut current_pos = 0;
|
||||||
})
|
|
||||||
}
|
match next_construct(tokens, current_pos) {
|
||||||
None => {
|
SyntaxResult::Ok(module) => Ok(ModuleAST {
|
||||||
Err(String::from("Syntax error."))
|
bindings: vec![module],
|
||||||
}
|
}),
|
||||||
|
SyntaxResult::None => Err(SyntaxError {
|
||||||
|
reason: String::from("D:"),
|
||||||
|
}),
|
||||||
|
SyntaxResult::Err(err) => Err(err),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn next_construct<'a>(tokens: &'a Vec<Token>, current_pos: usize) -> SyntaxResult {
|
||||||
|
None.or_else(|| binding::try_parse(tokens, 0))
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
SyntaxResult::Err(SyntaxError {
|
||||||
|
reason: String::from("Unrecognized token"),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
10
src/token.rs
10
src/token.rs
@ -38,7 +38,7 @@ pub fn new_number(value: String, position: i32) -> Token {
|
|||||||
Token {
|
Token {
|
||||||
token_type: TokenType::Number,
|
token_type: TokenType::Number,
|
||||||
value,
|
value,
|
||||||
_position: position
|
_position: position,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -46,12 +46,16 @@ pub fn new_operator(value: String, position: i32) -> Token {
|
|||||||
Token {
|
Token {
|
||||||
token_type: TokenType::Operator,
|
token_type: TokenType::Operator,
|
||||||
value,
|
value,
|
||||||
_position: position
|
_position: position,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
|
pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
|
||||||
Token {token_type, value, _position: position}
|
Token {
|
||||||
|
token_type,
|
||||||
|
value,
|
||||||
|
_position: position,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_identifier(value: String, position: i32) -> Token {
|
pub fn new_identifier(value: String, position: i32) -> Token {
|
||||||
|
Loading…
Reference in New Issue
Block a user