Get datatype from an identifier in the symbol table. Improve code documentation
This commit is contained in:
parent
3a11000fe0
commit
5d40be6d90
@ -11,6 +11,11 @@
|
||||
- [ ] Stdlib
|
||||
- [ ] Document code
|
||||
|
||||
## v0.0.3
|
||||
|
||||
- Get datatype of an identifier from the symbol table
|
||||
- Improve documentation of the code
|
||||
|
||||
## v0.0.2
|
||||
|
||||
- Compilation of `val` and `var` bindings with a number, string or boolean as value.
|
||||
|
@ -22,4 +22,5 @@ pub enum Expression<'a> {
|
||||
Number(&'a String),
|
||||
String(&'a String),
|
||||
Boolean(bool),
|
||||
Identifier(&'a String),
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ use crate::ast_types::Binding;
|
||||
use super::Transpilable;
|
||||
|
||||
impl Transpilable for Binding<'_> {
|
||||
/// Transpiles val and var bindings into JS.
|
||||
fn transpile(&self) -> String {
|
||||
match self {
|
||||
Binding::Val(val_binding) => {
|
||||
|
@ -2,6 +2,13 @@ use crate::ast_types::Expression;
|
||||
use super::Transpilable;
|
||||
|
||||
impl Transpilable for Expression<'_> {
|
||||
/// Transpiles an Expression to JS
|
||||
///
|
||||
/// Right now the expressions in the grammar are:
|
||||
/// - Number
|
||||
/// - String
|
||||
/// - Boolean
|
||||
/// - Identifier
|
||||
fn transpile(&self) -> String {
|
||||
match self {
|
||||
Expression::Number(value) => {
|
||||
@ -13,6 +20,9 @@ impl Transpilable for Expression<'_> {
|
||||
Expression::Boolean(value) => {
|
||||
String::from(if *value {"true"} else {"false"})
|
||||
}
|
||||
Expression::Identifier(value) => {
|
||||
String::from(*value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -48,4 +58,13 @@ mod tests {
|
||||
|
||||
assert_eq!("true", result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_transpile_identifier() {
|
||||
let s = String::from("newValue");
|
||||
let exp = Expression::Identifier(&s);
|
||||
let result = exp.transpile();
|
||||
|
||||
assert_eq!("newValue", result);
|
||||
}
|
||||
}
|
||||
|
@ -4,11 +4,13 @@ mod expression;
|
||||
mod binding;
|
||||
mod module_ast;
|
||||
|
||||
/// Trait that the AST and its nodes implement to support transformation to JavaScript
|
||||
trait Transpilable {
|
||||
/// Transforms this struct into JavaScript
|
||||
fn transpile(&self) -> String;
|
||||
}
|
||||
|
||||
/// Generates JavaScript from the AST
|
||||
/// Transforms an AST to its representation in JavaScript
|
||||
pub fn codegen<'a>(ast: &'a ModuleAST) -> String {
|
||||
ast.transpile()
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ use crate::ast_types::ModuleAST;
|
||||
use super::Transpilable;
|
||||
|
||||
impl Transpilable for ModuleAST<'_> {
|
||||
/// Transpiles the whole AST into JS, using this same trait on the
|
||||
/// nodes and leaves of the AST
|
||||
fn transpile(&self) -> String {
|
||||
let bindings_str: Vec::<String> = self.bindings.iter().map(|binding| binding.transpile()).collect();
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
|
||||
/// Represents an error in the scanning process
|
||||
#[derive(Debug)]
|
||||
pub struct LexError {
|
||||
/// Position where the offending char was found
|
||||
pub position: usize,
|
||||
/// Reason of the errror
|
||||
pub reason: String,
|
||||
}
|
||||
|
@ -6,11 +6,29 @@ use lex_error::LexError;
|
||||
|
||||
type Chars = Vec<char>;
|
||||
|
||||
/// Represents the result of scanning a single token from the input
|
||||
pub enum LexResult {
|
||||
// A token was scanned
|
||||
/// A token was found. The first element is the token, and the
|
||||
/// second element is the position in the input after the token.
|
||||
///
|
||||
/// E.g., given an input
|
||||
///
|
||||
/// "`identifier 55`"
|
||||
///
|
||||
/// scanning from a position `0`, the result would be
|
||||
///
|
||||
/// `Some(Token("identifier"), 10)`.
|
||||
///
|
||||
/// where:
|
||||
/// - `Token("identifier")` is the token
|
||||
/// - `10` is the position where the token ends, and from where the next token
|
||||
/// should be scanned
|
||||
Some(Token, usize),
|
||||
// No token was found, but there was no error (EOF)
|
||||
/// No token was found. This indicates that EOF has been reached.
|
||||
///
|
||||
/// Contains the last position, which should be the input lenght - 1
|
||||
None(usize),
|
||||
/// An error was found while scanning.
|
||||
Err(LexError),
|
||||
}
|
||||
|
||||
@ -38,6 +56,7 @@ pub fn get_tokens(input: &String) -> Result<Vec<Token>, LexError> {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Scans a single token from `chars`, starting from `current_pos`
|
||||
fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||
let next_char = peek(chars, current_pos);
|
||||
|
||||
@ -72,11 +91,13 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the char at `pos`
|
||||
fn peek(input: &Chars, pos: usize) -> char {
|
||||
let result = input.get(pos).unwrap_or(&'\0');
|
||||
*result
|
||||
}
|
||||
|
||||
/// Whether there is still input based on `current_pos`
|
||||
fn has_input(input: &Chars, current_pos: usize) -> bool {
|
||||
current_pos < input.len()
|
||||
}
|
||||
|
@ -9,12 +9,15 @@ fn str_is_keyword(s: &String) -> Option<TokenType> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Scans an identifier. This function assumes that `start_pos` is the start of
|
||||
/// a valid identifier
|
||||
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
// The scanning is done by this recursive function
|
||||
scan_impl(chars, start_pos + 1, format!("{}", start_char))
|
||||
}
|
||||
|
||||
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
/// Recursive funtion that scans the identifier
|
||||
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if utils::is_identifier_char(*c) => {
|
||||
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
|
@ -5,19 +5,22 @@ mod operator;
|
||||
mod identifier;
|
||||
mod string;
|
||||
|
||||
/// Attempts to scan a number. Returns None to be able to chain other scanner
|
||||
|
||||
// This module contains the individual scanners, and exports them
|
||||
|
||||
/// Attempts to scan a number. If not found returns None to be able to chain other scanner
|
||||
pub fn number(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
utils::is_digit(c).then(|| number::scan(chars, start_pos))
|
||||
}
|
||||
|
||||
|
||||
/// Attempts to scan an operator. Returns None to be able to chain other scanner
|
||||
/// Attempts to scan an operator. If not found returns None to be able to chain other scanner
|
||||
pub fn operator(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
utils::is_operator(c).then(|| operator::scan(chars, start_pos))
|
||||
}
|
||||
|
||||
|
||||
/// Attempts to scan a grouping sign. Returns None to be able to chain other scanner
|
||||
/// Attempts to scan a grouping sign. If not found returns None to be able to chain other scanner
|
||||
pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
let token_type = match c {
|
||||
'(' => TokenType::LeftParen,
|
||||
@ -38,13 +41,14 @@ pub fn grouping_sign(c: char, _: &Vec<char>, start_pos: usize) -> Option<LexResu
|
||||
}
|
||||
|
||||
|
||||
/// Attempts to scan an identifier. Returns None to be able to chain other scanner
|
||||
/// Attempts to scan an identifier. If not found returns None to be able to chain other scanner
|
||||
pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
(utils::is_lowercase(c) || c == '_')
|
||||
.then(|| identifier::scan(c, chars, start_pos))
|
||||
}
|
||||
|
||||
|
||||
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
|
||||
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
(c == '"').then(|| string::scan(chars, start_pos + 1))
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
scan_impl(chars, start_pos, String::from(""))
|
||||
}
|
||||
|
||||
/// Recursive function that does the scanning
|
||||
pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if *c == '"' => {
|
||||
@ -56,6 +57,7 @@ pub fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexRes
|
||||
}
|
||||
|
||||
|
||||
/// Checks if the char at `start_pos` is a escape character
|
||||
fn test_escape_char(chars: &Vec<char>, start_pos: usize) -> Option<char> {
|
||||
if let Some(c) = chars.get(start_pos) {
|
||||
match *c {
|
||||
|
@ -1,16 +1,19 @@
|
||||
|
||||
/// Whether `c` is between `0-9`
|
||||
pub fn is_digit(c: char) -> bool {
|
||||
'0' <= c && c <= '9'
|
||||
}
|
||||
|
||||
/// Whether `c` is between `a-fA-F`
|
||||
pub fn is_hex_digit(c: char) -> bool {
|
||||
is_digit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
|
||||
}
|
||||
|
||||
/// Joins a String and a char
|
||||
pub fn str_append(current: String, c: char) -> String {
|
||||
format!("{}{}", current, c)
|
||||
}
|
||||
|
||||
/// Whether `c` is an operator char.
|
||||
pub fn is_operator(c: char) -> bool {
|
||||
c == '+' || c == '-' || c == '=' || c == '*' || c == '!'
|
||||
|| c == '\\' || c == '/' || c == '|' || c == '@'
|
||||
@ -19,14 +22,17 @@ pub fn is_operator(c: char) -> bool {
|
||||
|| c == '^' || c == '.' || c == ':'
|
||||
}
|
||||
|
||||
/// Whether `c` is between `a-z`
|
||||
pub fn is_lowercase(c: char) -> bool {
|
||||
c >= 'a' && c <= 'z'
|
||||
}
|
||||
|
||||
/// Whether `c` is between `A-Z`
|
||||
pub fn is_uppercase(c: char) -> bool {
|
||||
c >= 'A' && c <= 'Z'
|
||||
}
|
||||
|
||||
/// Whether `c` is between `a-zA-Z_0-9`
|
||||
pub fn is_identifier_char(c: char) -> bool {
|
||||
is_lowercase(c) || is_uppercase(c) || c == '_' || is_digit(c)
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ use super::syntax;
|
||||
use super::semantic;
|
||||
use super::codegen;
|
||||
|
||||
/// Executes Lexical analysis, handles errors and calls build_ast for the next phase
|
||||
fn compile(input: &String) {
|
||||
let _tokens = lexic::get_tokens(input);
|
||||
|
||||
@ -22,6 +23,9 @@ fn compile(input: &String) {
|
||||
|
||||
}
|
||||
|
||||
/// Executes Syntax analysis, and for now, Semantic analysis and Code generation.
|
||||
///
|
||||
/// Prints the generated code in stdin
|
||||
fn build_ast(tokens: Vec<Token>) {
|
||||
let ast = syntax::construct_ast(&tokens);
|
||||
|
||||
@ -38,6 +42,7 @@ fn build_ast(tokens: Vec<Token>) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes the REPL, reading from stdin, compiling and emitting JS to stdout
|
||||
pub fn run() -> io::Result<()> {
|
||||
let stdin = io::stdin();
|
||||
let mut buffer = String::new();
|
||||
|
@ -8,24 +8,35 @@ pub fn check_ast<'a>(ast: &'a mut ModuleAST, symbol_table: &'a mut SymbolTable)
|
||||
Binding::Val(binding) => {
|
||||
symbol_table.add(
|
||||
binding.identifier,
|
||||
get_expression_type(&binding.expression).as_str()
|
||||
get_expression_type(&binding.expression, symbol_table).as_str()
|
||||
);
|
||||
}
|
||||
Binding::Var(binding) => {
|
||||
symbol_table.add(
|
||||
binding.identifier,
|
||||
get_expression_type(&binding.expression).as_str(),
|
||||
get_expression_type(&binding.expression, symbol_table).as_str(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_expression_type(exp: &Expression) -> String {
|
||||
fn get_expression_type(exp: &Expression, symbol_table: &SymbolTable) -> String {
|
||||
match exp {
|
||||
Expression::Number(_) => String::from(_NUMBER),
|
||||
Expression::String(_) => String::from(_STRING),
|
||||
Expression::Boolean(_) => String::from(_BOOLEAN),
|
||||
Expression::Identifier(id) => {
|
||||
match symbol_table.get_type(*id) {
|
||||
Some(datatype) => {
|
||||
datatype
|
||||
}
|
||||
None => {
|
||||
// Should add an error to the list instead of panicking
|
||||
panic!("Semantic analysis: identifier {} not found", id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -71,4 +82,24 @@ mod tests {
|
||||
assert!(test_type(String::from("val a = false"), _BOOLEAN));
|
||||
assert!(test_type(String::from("var a = true"), _BOOLEAN));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_get_type_from_identifier() {
|
||||
let mut table = SymbolTable::new();
|
||||
let tokens = lexic::get_tokens(&String::from("val identifier = 20")).unwrap();
|
||||
let mut ast = syntax::construct_ast(&tokens).unwrap();
|
||||
|
||||
// Add an identifier
|
||||
check_ast(&mut ast, &mut table);
|
||||
|
||||
let tokens = lexic::get_tokens(&String::from("val newValue = identifier")).unwrap();
|
||||
let mut ast = syntax::construct_ast(&tokens).unwrap();
|
||||
|
||||
// Add a new value that references an identifier
|
||||
check_ast(&mut ast, &mut table);
|
||||
|
||||
// The type should be Num
|
||||
let current_type = table.get_type("newValue").unwrap();
|
||||
assert_eq!(_NUMBER, current_type);
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,14 @@ impl SymbolTable {
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn get_type(&self, identifier: &str) -> Option<String> {
|
||||
self.table
|
||||
.get_key_value(&String::from(identifier))
|
||||
.and_then(|(_, value)| {
|
||||
Some(String::from(value))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,7 +1,12 @@
|
||||
use crate::token::{Token, TokenType};
|
||||
use super::ast_types::Expression;
|
||||
|
||||
|
||||
/// An expression can be:
|
||||
///
|
||||
/// - A number
|
||||
/// - A string
|
||||
/// - A boolean
|
||||
/// - An identifier
|
||||
pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
|
||||
tokens
|
||||
.get(pos)
|
||||
@ -16,6 +21,9 @@ pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> Option<Expression> {
|
||||
TokenType::Identifier if token.value == "true" || token.value == "false" => {
|
||||
Some(Expression::Boolean(token.value == "true"))
|
||||
}
|
||||
TokenType::Identifier => {
|
||||
Some(Expression::Identifier(&token.value))
|
||||
}
|
||||
_ => None
|
||||
}
|
||||
})
|
||||
@ -48,4 +56,26 @@ mod tests {
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_a_boolean() {
|
||||
let tokens = get_tokens(&String::from("true")).unwrap();
|
||||
let expression = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match expression {
|
||||
Expression::Boolean(value) => assert!(value),
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_an_identifier() {
|
||||
let tokens = get_tokens(&String::from("someIdentifier")).unwrap();
|
||||
let expression = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match expression {
|
||||
Expression::Identifier(value) => assert_eq!("someIdentifier", value),
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user