Get datatype from an identifier in the symbol table. Improve code documentation

This commit is contained in:
Araozu 2023-02-11 18:13:05 -05:00
parent 3a11000fe0
commit 5d40be6d90
16 changed files with 157 additions and 14 deletions

View File

@ -11,6 +11,11 @@
- [ ] Stdlib
- [ ] Document code
## v0.0.3
- Get datatype of an identifier from the symbol table
- Improve documentation of the code
## v0.0.2
- Compilation of `val` and `var` bindings with a number, string or boolean as value.

View File

@ -22,4 +22,5 @@ pub enum Expression<'a> {
Number(&'a String),
String(&'a String),
Boolean(bool),
Identifier(&'a String),
}

View File

@ -2,6 +2,7 @@ use crate::ast_types::Binding;
use super::Transpilable;
impl Transpilable for Binding<'_> {
/// Transpiles val and var bindings into JS.
fn transpile(&self) -> String {
match self {
Binding::Val(val_binding) => {

View File

@ -2,6 +2,13 @@ use crate::ast_types::Expression;
use super::Transpilable;
impl Transpilable for Expression<'_> {
/// Transpiles an Expression to JS
///
/// Right now the expressions in the grammar are:
/// - Number
/// - String
/// - Boolean
/// - Identifier
fn transpile(&self) -> String {
match self {
Expression::Number(value) => {
@ -13,6 +20,9 @@ impl Transpilable for Expression<'_> {
Expression::Boolean(value) => {
String::from(if *value {"true"} else {"false"})
}
Expression::Identifier(value) => {
String::from(*value)
}
}
}
}
@ -48,4 +58,13 @@ mod tests {
assert_eq!("true", result);
}
#[test]
fn should_transpile_identifier() {
let s = String::from("newValue");
let exp = Expression::Identifier(&s);
let result = exp.transpile();
assert_eq!("newValue", result);
}
}

View File

@ -4,11 +4,13 @@ mod expression;
mod binding;
mod module_ast;
/// Trait that the AST and its nodes implement to support transformation to JavaScript
trait Transpilable {
/// Transforms this struct into JavaScript
fn transpile(&self) -> String;
}
/// Generates JavaScript from the AST
/// Transforms an AST to its representation in JavaScript
pub fn codegen<'a>(ast: &'a ModuleAST) -> String {
ast.transpile()
}

View File

@ -2,6 +2,8 @@ use crate::ast_types::ModuleAST;
use super::Transpilable;
impl Transpilable for ModuleAST<'_> {
/// Transpiles the whole AST into JS, using this same trait on the
/// nodes and leaves of the AST
fn transpile(&self) -> String {
let bindings_str: Vec::<String> = self.bindings.iter().map(|binding| binding.transpile()).collect();

View File

@ -1,6 +1,9 @@
/// Represents an error in the scanning process
#[derive(Debug)]
pub struct LexError {
/// Position where the offending char was found
pub position: usize,
/// Reason of the errror
pub reason: String,
}

View File

@ -6,11 +6,29 @@ use lex_error::LexError;
type Chars = Vec<char>;
/// Represents the result of scanning a single token from the input
pub enum LexResult {
// A token was scanned
/// A token was found. The first element is the token, and the
/// second element is the position in the input after the token.
///
/// E.g., given an input
///
/// "`identifier 55`"
///
/// scanning from a position `0`, the result would be
///
/// `Some(Token("identifier"), 10)`.
///
/// where:
/// - `Token("identifier")` is the token
/// - `10` is the position where the token ends, and from where the next token
/// should be scanned
Some(Token, usize),
// No token was found, but there was no error (EOF)
/// No token was found. This indicates that EOF has been reached.
///
/// Contains the last position, which should be the input lenght - 1
None(usize),
/// An error was found while scanning.
Err(LexError),
}
@ -38,6 +56,7 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, LexError> {
Ok(results)
}
/// Scans a single token from `chars`, starting from `current_pos`
fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
let next_char = peek(chars, current_pos);
@ -72,11 +91,13 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
})
}
/// Returns the char at `pos`
fn peek(input: &Chars, pos: usize) -> char {
let result = input.get(pos).unwrap_or(&'\0');
*result
}
/// Whether there is still input based on `current_pos`
fn has_input(input: &Chars, current_pos: usize) -> bool {
current_pos < input.len()
}

View File

@ -9,12 +9,15 @@ fn str_is_keyword(s: &String) -> Option<TokenType> {
}
}
/// Scans an identifier. This function assumes that `start_pos` is the start of
/// a valid identifier
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
// The scanning is done by this recursive function
scan_impl(chars, start_pos + 1, format!("{}", start_char))
}
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
/// Recursive funtion that scans the identifier
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if utils::is_identifier_char(*c) => {
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))

View File

@ -5,19 +5,22 @@ mod operator;
mod identifier;
mod string;
/// Attempts to scan a number. Returns None to be able to chain other scanner
// This module contains the individual scanners, and exports them
/// Attempts to scan a number. If not found returns None to be able to chain other scanner
pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
utils::is_digit(c).then(|| number::scan(chars, start_pos))
}
/// Attempts to scan an operator. Returns None to be able to chain other scanner
/// Attempts to scan an operator. If not found returns None to be able to chain other scanner
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
utils::is_operator(c).then(|| operator::scan(chars, start_pos))
}
/// Attempts to scan a grouping sign. Returns None to be able to chain other scanner
/// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
let token_type = match c {
'(' => TokenType::LeftParen,
@ -38,13 +41,14 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
}
/// Attempts to scan an identifier. Returns None to be able to chain other scanner
/// Attempts to scan an identifier. If not found returns None to be able to chain other scanner
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(utils::is_lowercase(c) || c == '_')
.then(|| identifier::scan(c, chars, start_pos))
}
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(c == '"').then(|| string::scan(chars, start_pos + 1))
}

View File

@ -11,6 +11,7 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
scan_impl(chars, start_pos, String::from(""))
}
/// Recursive function that does the scanning
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if *c == '"' => {
@ -56,6 +57,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
}
/// Checks if the char at `start_pos` is a escape character
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
if let Some(c) = chars.get(start_pos) {
match *c {

View File

@ -1,16 +1,19 @@
/// Whether `c` is between `0-9`
pub fn is_digit(c: char) -> bool {
'0' <= c && c <= '9'
}
/// Whether `c` is between `a-fA-F`
pub fn is_hex_digit(c: char) -> bool {
is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
}
/// Joins a String and a char
pub fn str_append(current: String, c: char) -> String {
format!("{}{}", current, c)
}
/// Whether `c` is an operator char.
pub fn is_operator(c: char) -> bool {
c == '+' || c == '-' || c == '=' || c == '*' || c == '!'
|| c == '\\' || c == '/' || c == '|' || c == '@'
@ -19,14 +22,17 @@ pub fn is_operator(c: char) -> bool {
|| c == '^' || c == '.' || c == ':'
}
/// Whether `c` is between `a-z`
pub fn is_lowercase(c: char) -> bool {
c >= 'a' && c <= 'z'
}
/// Whether `c` is between `A-Z`
pub fn is_uppercase(c: char) -> bool {
c >= 'A' && c <= 'Z'
}
/// Whether `c` is between `a-zA-Z_0-9`
pub fn is_identifier_char(c: char) -> bool {
is_lowercase(c) || is_uppercase(c) || c == '_' || is_digit(c)
}

View File

@ -8,6 +8,7 @@ use super::syntax;
use super::semantic;
use super::codegen;
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
fn compile(input: &String) {
let _tokens = lexic::get_tokens(input);
@ -22,6 +23,9 @@ fn compile(input: &String) {
}
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
///
/// Prints the generated code in stdin
fn build_ast(tokens: Vec<Token>) {
let ast = syntax::construct_ast(&tokens);
@ -38,6 +42,7 @@ fn build_ast(tokens: Vec<Token>) {
}
}
/// Executes the REPL, reading from stdin, compiling and emitting JS to stdout
pub fn run() -> io::Result<()> {
let stdin = io::stdin();
let mut buffer = String::new();

View File

@ -8,24 +8,35 @@ pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable)
Binding::Val(binding) => {
symbol_table.add(
binding.identifier,
get_expression_type(&binding.expression).as_str()
get_expression_type(&binding.expression, symbol_table).as_str()
);
}
Binding::Var(binding) => {
symbol_table.add(
binding.identifier,
get_expression_type(&binding.expression).as_str(),
get_expression_type(&binding.expression, symbol_table).as_str(),
);
}
}
}
}
fn get_expression_type(exp: &Expression) -> String {
fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
match exp {
Expression::Number(_) => String::from(_NUMBER),
Expression::String(_) => String::from(_STRING),
Expression::Boolean(_) => String::from(_BOOLEAN),
Expression::Identifier(id) => {
match symbol_table.get_type(*id) {
Some(datatype) => {
datatype
}
None => {
// Should add an error to the list instead of panicking
panic!("Semantic analysis: identifier {} not found", id);
}
}
}
}
}
@ -71,4 +82,24 @@ mod tests {
assert!(test_type(String::from("val a = false"), _BOOLEAN));
assert!(test_type(String::from("var a = true"), _BOOLEAN));
}
#[test]
fn should_get_type_from_identifier() {
let mut table = SymbolTable::new();
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
let mut ast = syntax::construct_ast(&tokens).unwrap();
// Add an identifier
check_ast(&mut ast, &mut table);
let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap();
let mut ast = syntax::construct_ast(&tokens).unwrap();
// Add a new value that references an identifier
check_ast(&mut ast, &mut table);
// The type should be Num
let current_type = table.get_type("newValue").unwrap();
assert_eq!(_NUMBER, current_type);
}
}

View File

@ -39,6 +39,14 @@ impl SymbolTable {
})
.unwrap_or(false)
}
pub fn get_type(&self, identifier: &str) -> Option<String> {
self.table
.get_key_value(&String::from(identifier))
.and_then(|(_, value)| {
Some(String::from(value))
})
}
}

View File

@ -1,7 +1,12 @@
use crate::token::{Token, TokenType};
use super::ast_types::Expression;
/// An expression can be:
///
/// - A number
/// - A string
/// - A boolean
/// - An identifier
pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
tokens
.get(pos)
@ -16,6 +21,9 @@ pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
TokenType::Identifier if token.value == "true" || token.value == "false" => {
Some(Expression::Boolean(token.value == "true"))
}
TokenType::Identifier => {
Some(Expression::Identifier(&token.value))
}
_ => None
}
})
@ -48,4 +56,26 @@ mod tests {
_ => panic!()
}
}
#[test]
fn should_parse_a_boolean() {
let tokens = get_tokens(&String::from("true")).unwrap();
let expression = try_parse(&tokens, 0).unwrap();
match expression {
Expression::Boolean(value) => assert!(value),
_ => panic!()
}
}
#[test]
fn should_parse_an_identifier() {
let tokens = get_tokens(&String::from("someIdentifier")).unwrap();
let expression = try_parse(&tokens, 0).unwrap();
match expression {
Expression::Identifier(value) => assert_eq!("someIdentifier", value),
_ => panic!()
}
}
}