diff --git a/CHANGELOG.md b/CHANGELOG.md index 70b2a5b..41b06fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ - [ ] Stdlib - [ ] Document code +## v0.0.3 + +- Get datatype of an identifier from the symbol table +- Improve documentation of the code + ## v0.0.2 - Compilation of `val` and `var` bindings with a number, string or boolean as value. diff --git a/src/ast_types.rs b/src/ast_types.rs index c8d65ac..f657079 100644 --- a/src/ast_types.rs +++ b/src/ast_types.rs @@ -22,4 +22,5 @@ pub enum Expression<'a> { Number(&'a String), String(&'a String), Boolean(bool), + Identifier(&'a String), } diff --git a/src/codegen/binding.rs b/src/codegen/binding.rs index 5155562..301a643 100644 --- a/src/codegen/binding.rs +++ b/src/codegen/binding.rs @@ -2,6 +2,7 @@ use crate::ast_types::Binding; use super::Transpilable; impl Transpilable for Binding<'_> { + /// Transpiles val and var bindings into JS. fn transpile(&self) -> String { match self { Binding::Val(val_binding) => { diff --git a/src/codegen/expression.rs b/src/codegen/expression.rs index a4ed8ce..183d217 100644 --- a/src/codegen/expression.rs +++ b/src/codegen/expression.rs @@ -2,6 +2,13 @@ use crate::ast_types::Expression; use super::Transpilable; impl Transpilable for Expression<'_> { + /// Transpiles an Expression to JS + /// + /// Right now the expressions in the grammar are: + /// - Number + /// - String + /// - Boolean + /// - Identifier fn transpile(&self) -> String { match self { Expression::Number(value) => { @@ -13,6 +20,9 @@ impl Transpilable for Expression<'_> { Expression::Boolean(value) => { String::from(if *value {"true"} else {"false"}) } + Expression::Identifier(value) => { + String::from(*value) + } } } } @@ -48,4 +58,13 @@ mod tests { assert_eq!("true", result); } + + #[test] + fn should_transpile_identifier() { + let s = String::from("newValue"); + let exp = Expression::Identifier(&s); + let result = exp.transpile(); + + assert_eq!("newValue", result); + } } diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 8108701..48781d8 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -4,11 +4,13 @@ mod expression; mod binding; mod module_ast; +/// Trait that the AST and its nodes implement to support transformation to JavaScript trait Transpilable { + /// Transforms this struct into JavaScript fn transpile(&self) -> String; } -/// Generates JavaScript from the AST +/// Transforms an AST to its representation in JavaScript pub fn codegen<'a>(ast: &'a ModuleAST) -> String { ast.transpile() } diff --git a/src/codegen/module_ast.rs b/src/codegen/module_ast.rs index 24c34f9..e9f17b4 100644 --- a/src/codegen/module_ast.rs +++ b/src/codegen/module_ast.rs @@ -2,6 +2,8 @@ use crate::ast_types::ModuleAST; use super::Transpilable; impl Transpilable for ModuleAST<'_> { + /// Transpiles the whole AST into JS, using this same trait on the + /// nodes and leaves of the AST fn transpile(&self) -> String { let bindings_str: Vec:: = self.bindings.iter().map(|binding| binding.transpile()).collect(); diff --git a/src/lexic/lex_error.rs b/src/lexic/lex_error.rs index 3e43059..cc4c65a 100755 --- a/src/lexic/lex_error.rs +++ b/src/lexic/lex_error.rs @@ -1,6 +1,9 @@ +/// Represents an error in the scanning process #[derive(Debug)] pub struct LexError { + /// Position where the offending char was found pub position: usize, + /// Reason of the errror pub reason: String, } diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 102a571..2894eef 100755 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -6,11 +6,29 @@ use lex_error::LexError; type Chars = Vec; +/// Represents the result of scanning a single token from the input pub enum LexResult { - // A token was scanned + /// A token was found. The first element is the token, and the + /// second element is the position in the input after the token. + /// + /// E.g., given an input + /// + /// "`identifier 55`" + /// + /// scanning from a position `0`, the result would be + /// + /// `Some(Token("identifier"), 10)`. + /// + /// where: + /// - `Token("identifier")` is the token + /// - `10` is the position where the token ends, and from where the next token + /// should be scanned Some(Token, usize), - // No token was found, but there was no error (EOF) + /// No token was found. This indicates that EOF has been reached. + /// + /// Contains the last position, which should be the input lenght - 1 None(usize), + /// An error was found while scanning. Err(LexError), } @@ -38,6 +56,7 @@ pub fn get_tokens(input: &String) -> Result, LexError> { Ok(results) } +/// Scans a single token from `chars`, starting from `current_pos` fn next_token(chars: &Chars, current_pos: usize) -> LexResult { let next_char = peek(chars, current_pos); @@ -72,11 +91,13 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { }) } +/// Returns the char at `pos` fn peek(input: &Chars, pos: usize) -> char { let result = input.get(pos).unwrap_or(&'\0'); *result } +/// Whether there is still input based on `current_pos` fn has_input(input: &Chars, current_pos: usize) -> bool { current_pos < input.len() } diff --git a/src/lexic/scanner/identifier.rs b/src/lexic/scanner/identifier.rs index b8c58f1..d9ecef0 100755 --- a/src/lexic/scanner/identifier.rs +++ b/src/lexic/scanner/identifier.rs @@ -9,12 +9,15 @@ fn str_is_keyword(s: &String) -> Option { } } - +/// Scans an identifier. This function assumes that `start_pos` is the start of +/// a valid identifier pub fn scan(start_char: char, chars: &Vec, start_pos: usize) -> LexResult { + // The scanning is done by this recursive function scan_impl(chars, start_pos + 1, format!("{}", start_char)) } -pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { +/// Recursive funtion that scans the identifier +fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if utils::is_identifier_char(*c) => { scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) diff --git a/src/lexic/scanner/mod.rs b/src/lexic/scanner/mod.rs index 43ba880..74517d1 100755 --- a/src/lexic/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -5,19 +5,22 @@ mod operator; mod identifier; mod string; -/// Attempts to scan a number. Returns None to be able to chain other scanner + +// This module contains the individual scanners, and exports them + +/// Attempts to scan a number. If not found returns None to be able to chain other scanner pub fn number(c: char, chars: &Vec, start_pos: usize) -> Option { utils::is_digit(c).then(|| number::scan(chars, start_pos)) } -/// Attempts to scan an operator. Returns None to be able to chain other scanner +/// Attempts to scan an operator. If not found returns None to be able to chain other scanner pub fn operator(c: char, chars: &Vec, start_pos: usize) -> Option { utils::is_operator(c).then(|| operator::scan(chars, start_pos)) } -/// Attempts to scan a grouping sign. Returns None to be able to chain other scanner +/// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner pub fn grouping_sign(c: char, _: &Vec, start_pos: usize) -> Option { let token_type = match c { '(' => TokenType::LeftParen, @@ -38,13 +41,14 @@ pub fn grouping_sign(c: char, _: &Vec, start_pos: usize) -> Option, start_pos: usize) -> Option { (utils::is_lowercase(c) || c == '_') .then(|| identifier::scan(c, chars, start_pos)) } +/// Attempts to scan a string. If not found returns None to be able to chain other scanner pub fn string(c: char, chars: &Vec, start_pos: usize) -> Option { (c == '"').then(|| string::scan(chars, start_pos + 1)) } diff --git a/src/lexic/scanner/string.rs b/src/lexic/scanner/string.rs index ec1b764..db2c77e 100755 --- a/src/lexic/scanner/string.rs +++ b/src/lexic/scanner/string.rs @@ -11,6 +11,7 @@ pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { scan_impl(chars, start_pos, String::from("")) } +/// Recursive function that does the scanning pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexResult { match chars.get(start_pos) { Some(c) if *c == '"' => { @@ -56,6 +57,7 @@ pub fn scan_impl(chars: &Vec, start_pos: usize, current: String) -> LexRes } +/// Checks if the char at `start_pos` is a escape character fn test_escape_char(chars: &Vec, start_pos: usize) -> Option { if let Some(c) = chars.get(start_pos) { match *c { diff --git a/src/lexic/utils.rs b/src/lexic/utils.rs index eea48f2..dd012f2 100755 --- a/src/lexic/utils.rs +++ b/src/lexic/utils.rs @@ -1,16 +1,19 @@ - +/// Whether `c` is between `0-9` pub fn is_digit(c: char) -> bool { '0' <= c && c <= '9' } +/// Whether `c` is between `a-fA-F` pub fn is_hex_digit(c: char) -> bool { is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' } +/// Joins a String and a char pub fn str_append(current: String, c: char) -> String { format!("{}{}", current, c) } +/// Whether `c` is an operator char. pub fn is_operator(c: char) -> bool { c == '+' || c == '-' || c == '=' || c == '*' || c == '!' || c == '\\' || c == '/' || c == '|' || c == '@' @@ -19,14 +22,17 @@ pub fn is_operator(c: char) -> bool { || c == '^' || c == '.' || c == ':' } +/// Whether `c` is between `a-z` pub fn is_lowercase(c: char) -> bool { c >= 'a' && c <= 'z' } +/// Whether `c` is between `A-Z` pub fn is_uppercase(c: char) -> bool { c >= 'A' && c <= 'Z' } +/// Whether `c` is between `a-zA-Z_0-9` pub fn is_identifier_char(c: char) -> bool { is_lowercase(c) || is_uppercase(c) || c == '_' || is_digit(c) } diff --git a/src/repl/mod.rs b/src/repl/mod.rs index fa79cfb..616224f 100755 --- a/src/repl/mod.rs +++ b/src/repl/mod.rs @@ -8,6 +8,7 @@ use super::syntax; use super::semantic; use super::codegen; +/// Executes Lexical analysis, handles errors and calls build_ast for the next phase fn compile(input: &String) { let _tokens = lexic::get_tokens(input); @@ -22,6 +23,9 @@ fn compile(input: &String) { } +/// Executes Syntax analysis, and for now, Semantic analysis and Code generation. +/// +/// Prints the generated code in stdin fn build_ast(tokens: Vec) { let ast = syntax::construct_ast(&tokens); @@ -38,6 +42,7 @@ fn build_ast(tokens: Vec) { } } +/// Executes the REPL, reading from stdin, compiling and emitting JS to stdout pub fn run() -> io::Result<()> { let stdin = io::stdin(); let mut buffer = String::new(); diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index 7557eca..539736e 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -8,24 +8,35 @@ pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable) Binding::Val(binding) => { symbol_table.add( binding.identifier, - get_expression_type(&binding.expression).as_str() + get_expression_type(&binding.expression, symbol_table).as_str() ); } Binding::Var(binding) => { symbol_table.add( binding.identifier, - get_expression_type(&binding.expression).as_str(), + get_expression_type(&binding.expression, symbol_table).as_str(), ); } } } } -fn get_expression_type(exp: &Expression) -> String { +fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String { match exp { Expression::Number(_) => String::from(_NUMBER), Expression::String(_) => String::from(_STRING), Expression::Boolean(_) => String::from(_BOOLEAN), + Expression::Identifier(id) => { + match symbol_table.get_type(*id) { + Some(datatype) => { + datatype + } + None => { + // Should add an error to the list instead of panicking + panic!("Semantic analysis: identifier {} not found", id); + } + } + } } } @@ -71,4 +82,24 @@ mod tests { assert!(test_type(String::from("val a = false"), _BOOLEAN)); assert!(test_type(String::from("var a = true"), _BOOLEAN)); } + + #[test] + fn should_get_type_from_identifier() { + let mut table = SymbolTable::new(); + let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap(); + let mut ast = syntax::construct_ast(&tokens).unwrap(); + + // Add an identifier + check_ast(&mut ast, &mut table); + + let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap(); + let mut ast = syntax::construct_ast(&tokens).unwrap(); + + // Add a new value that references an identifier + check_ast(&mut ast, &mut table); + + // The type should be Num + let current_type = table.get_type("newValue").unwrap(); + assert_eq!(_NUMBER, current_type); + } } diff --git a/src/symbol_table.rs b/src/symbol_table.rs index 3b7e716..9b9fd39 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -39,6 +39,14 @@ impl SymbolTable { }) .unwrap_or(false) } + + pub fn get_type(&self, identifier: &str) -> Option { + self.table + .get_key_value(&String::from(identifier)) + .and_then(|(_, value)| { + Some(String::from(value)) + }) + } } diff --git a/src/syntax/expression.rs b/src/syntax/expression.rs index c4cd3b7..ea278cb 100644 --- a/src/syntax/expression.rs +++ b/src/syntax/expression.rs @@ -1,7 +1,12 @@ use crate::token::{Token, TokenType}; use super::ast_types::Expression; - +/// An expression can be: +/// +/// - A number +/// - A string +/// - A boolean +/// - An identifier pub fn try_parse(tokens: &Vec, pos: usize) -> Option { tokens .get(pos) @@ -16,6 +21,9 @@ pub fn try_parse(tokens: &Vec, pos: usize) -> Option { TokenType::Identifier if token.value == "true" || token.value == "false" => { Some(Expression::Boolean(token.value == "true")) } + TokenType::Identifier => { + Some(Expression::Identifier(&token.value)) + } _ => None } }) @@ -48,4 +56,26 @@ mod tests { _ => panic!() } } + + #[test] + fn should_parse_a_boolean() { + let tokens = get_tokens(&String::from("true")).unwrap(); + let expression = try_parse(&tokens, 0).unwrap(); + + match expression { + Expression::Boolean(value) => assert!(value), + _ => panic!() + } + } + + #[test] + fn should_parse_an_identifier() { + let tokens = get_tokens(&String::from("someIdentifier")).unwrap(); + let expression = try_parse(&tokens, 0).unwrap(); + + match expression { + Expression::Identifier(value) => assert_eq!("someIdentifier", value), + _ => panic!() + } + } }