Get datatype from an identifier in the symbol table. Improve code documentation

This commit is contained in:
Araozu 2023-02-11 18:13:05 -05:00
parent 3a11000fe0
commit 5d40be6d90
16 changed files with 157 additions and 14 deletions

View File

@ -11,6 +11,11 @@
- [ ] Stdlib - [ ] Stdlib
- [ ] Document code - [ ] Document code
## v0.0.3
- Get datatype of an identifier from the symbol table
- Improve documentation of the code
## v0.0.2 ## v0.0.2
- Compilation of `val` and `var` bindings with a number, string or boolean as value. - Compilation of `val` and `var` bindings with a number, string or boolean as value.

View File

@ -22,4 +22,5 @@ pub enum Expression<'a> {
Number(&'a String), Number(&'a String),
String(&'a String), String(&'a String),
Boolean(bool), Boolean(bool),
Identifier(&'a String),
} }

View File

@ -2,6 +2,7 @@ use crate::ast_types::Binding;
use super::Transpilable; use super::Transpilable;
impl Transpilable for Binding<'_> { impl Transpilable for Binding<'_> {
/// Transpiles val and var bindings into JS.
fn transpile(&self) -> String { fn transpile(&self) -> String {
match self { match self {
Binding::Val(val_binding) => { Binding::Val(val_binding) => {

View File

@ -2,6 +2,13 @@ use crate::ast_types::Expression;
use super::Transpilable; use super::Transpilable;
impl Transpilable for Expression<'_> { impl Transpilable for Expression<'_> {
/// Transpiles an Expression to JS
///
/// Right now the expressions in the grammar are:
/// - Number
/// - String
/// - Boolean
/// - Identifier
fn transpile(&self) -> String { fn transpile(&self) -> String {
match self { match self {
Expression::Number(value) => { Expression::Number(value) => {
@ -13,6 +20,9 @@ impl Transpilable for Expression<'_> {
Expression::Boolean(value) => { Expression::Boolean(value) => {
String::from(if *value {"true"} else {"false"}) String::from(if *value {"true"} else {"false"})
} }
Expression::Identifier(value) => {
String::from(*value)
}
} }
} }
} }
@ -48,4 +58,13 @@ mod tests {
assert_eq!("true", result); assert_eq!("true", result);
} }
#[test]
fn should_transpile_identifier() {
let s = String::from("newValue");
let exp = Expression::Identifier(&s);
let result = exp.transpile();
assert_eq!("newValue", result);
}
} }

View File

@ -4,11 +4,13 @@ mod expression;
mod binding; mod binding;
mod module_ast; mod module_ast;
/// Trait that the AST and its nodes implement to support transformation to JavaScript
trait Transpilable { trait Transpilable {
/// Transforms this struct into JavaScript
fn transpile(&self) -> String; fn transpile(&self) -> String;
} }
/// Generates JavaScript from the AST /// Transforms an AST to its representation in JavaScript
pub fn codegen<'a>(ast: &'a ModuleAST) -> String { pub fn codegen<'a>(ast: &'a ModuleAST) -> String {
ast.transpile() ast.transpile()
} }

View File

@ -2,6 +2,8 @@ use crate::ast_types::ModuleAST;
use super::Transpilable; use super::Transpilable;
impl Transpilable for ModuleAST<'_> { impl Transpilable for ModuleAST<'_> {
/// Transpiles the whole AST into JS, using this same trait on the
/// nodes and leaves of the AST
fn transpile(&self) -> String { fn transpile(&self) -> String {
let bindings_str: Vec::<String> = self.bindings.iter().map(|binding| binding.transpile()).collect(); let bindings_str: Vec::<String> = self.bindings.iter().map(|binding| binding.transpile()).collect();

View File

@ -1,6 +1,9 @@
/// Represents an error in the scanning process
#[derive(Debug)] #[derive(Debug)]
pub struct LexError { pub struct LexError {
/// Position where the offending char was found
pub position: usize, pub position: usize,
/// Reason of the errror
pub reason: String, pub reason: String,
} }

View File

@ -6,11 +6,29 @@ use lex_error::LexError;
type Chars = Vec<char>; type Chars = Vec<char>;
/// Represents the result of scanning a single token from the input
pub enum LexResult { pub enum LexResult {
// A token was scanned /// A token was found. The first element is the token, and the
/// second element is the position in the input after the token.
///
/// E.g., given an input
///
/// "`identifier 55`"
///
/// scanning from a position `0`, the result would be
///
/// `Some(Token("identifier"), 10)`.
///
/// where:
/// - `Token("identifier")` is the token
/// - `10` is the position where the token ends, and from where the next token
/// should be scanned
Some(Token, usize), Some(Token, usize),
// No token was found, but there was no error (EOF) /// No token was found. This indicates that EOF has been reached.
///
/// Contains the last position, which should be the input lenght - 1
None(usize), None(usize),
/// An error was found while scanning.
Err(LexError), Err(LexError),
} }
@ -38,6 +56,7 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, LexError> {
Ok(results) Ok(results)
} }
/// Scans a single token from `chars`, starting from `current_pos`
fn next_token(chars: &Chars, current_pos: usize) -> LexResult { fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
let next_char = peek(chars, current_pos); let next_char = peek(chars, current_pos);
@ -72,11 +91,13 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
}) })
} }
/// Returns the char at `pos`
fn peek(input: &Chars, pos: usize) -> char { fn peek(input: &Chars, pos: usize) -> char {
let result = input.get(pos).unwrap_or(&'\0'); let result = input.get(pos).unwrap_or(&'\0');
*result *result
} }
/// Whether there is still input based on `current_pos`
fn has_input(input: &Chars, current_pos: usize) -> bool { fn has_input(input: &Chars, current_pos: usize) -> bool {
current_pos < input.len() current_pos < input.len()
} }

View File

@ -9,12 +9,15 @@ fn str_is_keyword(s: &String) -> Option<TokenType> {
} }
} }
/// Scans an identifier. This function assumes that `start_pos` is the start of
/// a valid identifier
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult { pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
// The scanning is done by this recursive function
scan_impl(chars, start_pos + 1, format!("{}", start_char)) scan_impl(chars, start_pos + 1, format!("{}", start_char))
} }
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult { /// Recursive funtion that scans the identifier
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if utils::is_identifier_char(*c) => { Some(c) if utils::is_identifier_char(*c) => {
scan_impl(chars, start_pos + 1, utils::str_append(current, *c)) scan_impl(chars, start_pos + 1, utils::str_append(current, *c))

View File

@ -5,19 +5,22 @@ mod operator;
mod identifier; mod identifier;
mod string; mod string;
/// Attempts to scan a number. Returns None to be able to chain other scanner
// This module contains the individual scanners, and exports them
/// Attempts to scan a number. If not found returns None to be able to chain other scanner
pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
utils::is_digit(c).then(|| number::scan(chars, start_pos)) utils::is_digit(c).then(|| number::scan(chars, start_pos))
} }
/// Attempts to scan an operator. Returns None to be able to chain other scanner /// Attempts to scan an operator. If not found returns None to be able to chain other scanner
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
utils::is_operator(c).then(|| operator::scan(chars, start_pos)) utils::is_operator(c).then(|| operator::scan(chars, start_pos))
} }
/// Attempts to scan a grouping sign. Returns None to be able to chain other scanner /// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
let token_type = match c { let token_type = match c {
'(' => TokenType::LeftParen, '(' => TokenType::LeftParen,
@ -38,13 +41,14 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
} }
/// Attempts to scan an identifier. Returns None to be able to chain other scanner /// Attempts to scan an identifier. If not found returns None to be able to chain other scanner
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(utils::is_lowercase(c) || c == '_') (utils::is_lowercase(c) || c == '_')
.then(|| identifier::scan(c, chars, start_pos)) .then(|| identifier::scan(c, chars, start_pos))
} }
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> { pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(c == '"').then(|| string::scan(chars, start_pos + 1)) (c == '"').then(|| string::scan(chars, start_pos + 1))
} }

View File

@ -11,6 +11,7 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
scan_impl(chars, start_pos, String::from("")) scan_impl(chars, start_pos, String::from(""))
} }
/// Recursive function that does the scanning
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult { pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) { match chars.get(start_pos) {
Some(c) if *c == '"' => { Some(c) if *c == '"' => {
@ -56,6 +57,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
} }
/// Checks if the char at `start_pos` is a escape character
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> { fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
if let Some(c) = chars.get(start_pos) { if let Some(c) = chars.get(start_pos) {
match *c { match *c {

View File

@ -1,16 +1,19 @@
/// Whether `c` is between `0-9`
pub fn is_digit(c: char) -> bool { pub fn is_digit(c: char) -> bool {
'0' <= c && c <= '9' '0' <= c && c <= '9'
} }
/// Whether `c` is between `a-fA-F`
pub fn is_hex_digit(c: char) -> bool { pub fn is_hex_digit(c: char) -> bool {
is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
} }
/// Joins a String and a char
pub fn str_append(current: String, c: char) -> String { pub fn str_append(current: String, c: char) -> String {
format!("{}{}", current, c) format!("{}{}", current, c)
} }
/// Whether `c` is an operator char.
pub fn is_operator(c: char) -> bool { pub fn is_operator(c: char) -> bool {
c == '+' || c == '-' || c == '=' || c == '*' || c == '!' c == '+' || c == '-' || c == '=' || c == '*' || c == '!'
|| c == '\\' || c == '/' || c == '|' || c == '@' || c == '\\' || c == '/' || c == '|' || c == '@'
@ -19,14 +22,17 @@ pub fn is_operator(c: char) -> bool {
|| c == '^' || c == '.' || c == ':' || c == '^' || c == '.' || c == ':'
} }
/// Whether `c` is between `a-z`
pub fn is_lowercase(c: char) -> bool { pub fn is_lowercase(c: char) -> bool {
c >= 'a' && c <= 'z' c >= 'a' && c <= 'z'
} }
/// Whether `c` is between `A-Z`
pub fn is_uppercase(c: char) -> bool { pub fn is_uppercase(c: char) -> bool {
c >= 'A' && c <= 'Z' c >= 'A' && c <= 'Z'
} }
/// Whether `c` is between `a-zA-Z_0-9`
pub fn is_identifier_char(c: char) -> bool { pub fn is_identifier_char(c: char) -> bool {
is_lowercase(c) || is_uppercase(c) || c == '_' || is_digit(c) is_lowercase(c) || is_uppercase(c) || c == '_' || is_digit(c)
} }

View File

@ -8,6 +8,7 @@ use super::syntax;
use super::semantic; use super::semantic;
use super::codegen; use super::codegen;
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
fn compile(input: &String) { fn compile(input: &String) {
let _tokens = lexic::get_tokens(input); let _tokens = lexic::get_tokens(input);
@ -22,6 +23,9 @@ fn compile(input: &String) {
} }
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
///
/// Prints the generated code in stdin
fn build_ast(tokens: Vec<Token>) { fn build_ast(tokens: Vec<Token>) {
let ast = syntax::construct_ast(&tokens); let ast = syntax::construct_ast(&tokens);
@ -38,6 +42,7 @@ fn build_ast(tokens: Vec<Token>) {
} }
} }
/// Executes the REPL, reading from stdin, compiling and emitting JS to stdout
pub fn run() -> io::Result<()> { pub fn run() -> io::Result<()> {
let stdin = io::stdin(); let stdin = io::stdin();
let mut buffer = String::new(); let mut buffer = String::new();

View File

@ -8,24 +8,35 @@ pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable)
Binding::Val(binding) => { Binding::Val(binding) => {
symbol_table.add( symbol_table.add(
binding.identifier, binding.identifier,
get_expression_type(&binding.expression).as_str() get_expression_type(&binding.expression, symbol_table).as_str()
); );
} }
Binding::Var(binding) => { Binding::Var(binding) => {
symbol_table.add( symbol_table.add(
binding.identifier, binding.identifier,
get_expression_type(&binding.expression).as_str(), get_expression_type(&binding.expression, symbol_table).as_str(),
); );
} }
} }
} }
} }
fn get_expression_type(exp: &Expression) -> String { fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
match exp { match exp {
Expression::Number(_) => String::from(_NUMBER), Expression::Number(_) => String::from(_NUMBER),
Expression::String(_) => String::from(_STRING), Expression::String(_) => String::from(_STRING),
Expression::Boolean(_) => String::from(_BOOLEAN), Expression::Boolean(_) => String::from(_BOOLEAN),
Expression::Identifier(id) => {
match symbol_table.get_type(*id) {
Some(datatype) => {
datatype
}
None => {
// Should add an error to the list instead of panicking
panic!("Semantic analysis: identifier {} not found", id);
}
}
}
} }
} }
@ -71,4 +82,24 @@ mod tests {
assert!(test_type(String::from("val a = false"), _BOOLEAN)); assert!(test_type(String::from("val a = false"), _BOOLEAN));
assert!(test_type(String::from("var a = true"), _BOOLEAN)); assert!(test_type(String::from("var a = true"), _BOOLEAN));
} }
#[test]
fn should_get_type_from_identifier() {
let mut table = SymbolTable::new();
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
let mut ast = syntax::construct_ast(&tokens).unwrap();
// Add an identifier
check_ast(&mut ast, &mut table);
let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap();
let mut ast = syntax::construct_ast(&tokens).unwrap();
// Add a new value that references an identifier
check_ast(&mut ast, &mut table);
// The type should be Num
let current_type = table.get_type("newValue").unwrap();
assert_eq!(_NUMBER, current_type);
}
} }

View File

@ -39,6 +39,14 @@ impl SymbolTable {
}) })
.unwrap_or(false) .unwrap_or(false)
} }
pub fn get_type(&self, identifier: &str) -> Option<String> {
self.table
.get_key_value(&String::from(identifier))
.and_then(|(_, value)| {
Some(String::from(value))
})
}
} }

View File

@ -1,7 +1,12 @@
use crate::token::{Token, TokenType}; use crate::token::{Token, TokenType};
use super::ast_types::Expression; use super::ast_types::Expression;
/// An expression can be:
///
/// - A number
/// - A string
/// - A boolean
/// - An identifier
pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> { pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
tokens tokens
.get(pos) .get(pos)
@ -16,6 +21,9 @@ pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
TokenType::Identifier if token.value == "true" || token.value == "false" => { TokenType::Identifier if token.value == "true" || token.value == "false" => {
Some(Expression::Boolean(token.value == "true")) Some(Expression::Boolean(token.value == "true"))
} }
TokenType::Identifier => {
Some(Expression::Identifier(&token.value))
}
_ => None _ => None
} }
}) })
@ -48,4 +56,26 @@ mod tests {
_ => panic!() _ => panic!()
} }
} }
#[test]
fn should_parse_a_boolean() {
let tokens = get_tokens(&String::from("true")).unwrap();
let expression = try_parse(&tokens, 0).unwrap();
match expression {
Expression::Boolean(value) => assert!(value),
_ => panic!()
}
}
#[test]
fn should_parse_an_identifier() {
let tokens = get_tokens(&String::from("someIdentifier")).unwrap();
let expression = try_parse(&tokens, 0).unwrap();
match expression {
Expression::Identifier(value) => assert_eq!("someIdentifier", value),
_ => panic!()
}
}
} }