Scan datatypes. Parse datatype annotations

This commit is contained in:
Araozu 2023-02-15 16:17:50 -05:00
parent 1849e11ebb
commit f28e7394e9
11 changed files with 114 additions and 15 deletions

View File

@ -11,6 +11,11 @@
- [ ] Stdlib
- [ ] Document code
## v0.0.4
- Explicit datatype of variables
## v0.0.3
- Get datatype of an identifier from the symbol table

View File

@ -9,11 +9,13 @@ pub enum Binding<'a> {
}
pub struct ValBinding<'a> {
pub datatype: Option<String>,
pub identifier: &'a String,
pub expression: Expression<'a>,
}
pub struct VarBinding<'a> {
pub datatype: Option<String>,
pub identifier: &'a String,
pub expression: Expression<'a>,
}

View File

@ -31,6 +31,7 @@ mod tests {
let id = String::from("identifier");
let value = String::from("322");
let binding = Binding::Val(ValBinding {
datatype: None,
identifier: &id,
expression: Expression::Number(&value),
});

View File

@ -22,6 +22,7 @@ mod tests {
let id = String::from("identifier");
let value = String::from("322");
let binding = Binding::Val(ValBinding {
datatype: None,
identifier: &id,
expression: Expression::Number(&value),
});

View File

@ -75,6 +75,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
None
.or_else(|| scanner::number(next_char, chars, current_pos))
.or_else(|| scanner::identifier(next_char, chars, current_pos))
.or_else(|| scanner::datatype(next_char, chars, current_pos))
.or_else(|| scanner::string(next_char, chars, current_pos))
.or_else(|| scanner::operator(next_char, chars, current_pos))
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
@ -203,6 +204,14 @@ mod tests {
assert_eq!("]", t.value);
}
#[test]
fn should_scan_datatype() {
let input = String::from("Num");
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Datatype, tokens[0].token_type);
}
#[test]
fn should_scan_new_line() {
let input = String::from("3\n22");

View File

@ -13,19 +13,32 @@ fn str_is_keyword(s: &String) -> Option<TokenType> {
/// a valid identifier
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
// The scanning is done by this recursive function
scan_impl(chars, start_pos + 1, format!("{}", start_char))
scan_impl(
chars,
start_pos + 1,
format!("{}", start_char),
utils::is_uppercase(start_char),
)
}
/// Recursive funtion that scans the identifier
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype: bool) -> LexResult {
match chars.get(start_pos) {
Some(c) if utils::is_identifier_char(*c) => {
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
scan_impl(
chars,
start_pos + 1,
utils::str_append(current, *c),
is_datatype,
)
},
_ => {
if let Some(token_type) = str_is_keyword(&current) {
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
}
else if is_datatype {
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
}
else {
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
}

View File

@ -48,6 +48,13 @@ pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexRes
.then(|| identifier::scan(c, chars, start_pos))
}
/// Attempts to scan a datatype. If not found returns None to be able to chain other scanner
pub fn datatype(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
// Since the only difference with an identifier is that the fist character is an
// uppercase letter, reuse the identifier scanner
utils::is_uppercase(c)
.then(|| identifier::scan(c, chars, start_pos))
}
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {

View File

@ -2,11 +2,26 @@ use crate::token::{Token, TokenType};
use super::ast_types::{ValBinding, VarBinding, Binding};
use super::expression;
// Should return a 3 state value:
// TODO: Should return a 3 state value:
// - Success: binding parsed successfully
// - NotFound: the first token (var | val) was not found, so the parser should try other options
// - Error: token (var | val) was found, but then other expected tokens were not found
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
let mut pos = pos;
// Optional datatype annotation
let datatype_annotation = {
match tokens.get(pos) {
Some(t) if t.token_type == TokenType::Datatype => {
pos += 1;
Some(String::from(&t.value))
}
Some(_) => None,
None => return None
}
};
// var/val keyword
let is_val = {
let res1 = try_token_type(tokens, pos, TokenType::VAL);
match res1 {
@ -27,7 +42,6 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
if equal_operator.is_none() { return None }
let _ = equal_operator.unwrap();
let expression = expression::try_parse(tokens, pos + 3);
if expression.is_none() { return None }
@ -35,12 +49,14 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
if is_val {
Some(Binding::Val(ValBinding {
datatype: datatype_annotation,
identifier: &identifier.value,
expression,
}))
}
else {
Some(Binding::Var(VarBinding {
datatype: datatype_annotation,
identifier: &identifier.value,
expression,
}))
@ -80,9 +96,7 @@ mod tests {
Binding::Val(binding) => {
assert_eq!("identifier", binding.identifier);
}
Binding::Var(binding) => {
assert_eq!("identifier", binding.identifier);
}
_ => panic!()
}
}
@ -110,4 +124,30 @@ mod tests {
assert_eq!("=", token.value);
}
#[test]
fn should_parse_binding_with_datatype() {
let tokens = get_tokens(&String::from("Num val identifier = 20")).unwrap();
let binding = try_parse(&tokens, 0).unwrap();
match binding {
Binding::Val(binding) => {
assert_eq!(Some(String::from("Num")), binding.datatype);
assert_eq!("identifier", binding.identifier);
}
_ => panic!()
}
let tokens = get_tokens(&String::from("Bool var identifier = true")).unwrap();
let binding = try_parse(&tokens, 0).unwrap();
match binding {
Binding::Var(binding) => {
assert_eq!(Some(String::from("Bool")), binding.datatype);
assert_eq!("identifier", binding.identifier);
}
_ => panic!()
}
}
}

View File

@ -11,13 +11,25 @@ A module is (commonly) a single source file.
A declaration with `var` or `val`.
- `var = "var"`
- `val = "val"`
- `variable_binding = (var | val), identifier, "=", expression`
```ebnf
var = "var"
val = "val"
variable_binding = (var | val), identifier, "=", expression
```
### `expression`
For now just a number
For now just a number, string or boolean
- `expression = number`
```ebnf
expression = number | string | boolean
```
## Type annotations
```ebnf
variable_binding = Datatype, (var | val), identifier, "=", expression
```

View File

@ -1,14 +1,14 @@
use super::token::Token;
mod expression;
mod val_binding;
mod binding;
use super::ast_types;
use ast_types::ModuleAST;
/// Constructs the Misti AST from a vector of tokens
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, String> {
let maybe_binding = val_binding::try_parse(tokens, 0);
let maybe_binding = binding::try_parse(tokens, 0);
match maybe_binding {
Some(binding) => {

View File

@ -1,6 +1,7 @@
#[derive(PartialEq, Debug, Clone)]
pub enum TokenType {
Identifier,
Datatype,
Number,
String,
Operator,
@ -76,3 +77,11 @@ pub fn new_semicolon(position: i32) -> Token {
_position: position,
}
}
pub fn new_datatype(value: String, position: i32) -> Token {
Token {
token_type: TokenType::Datatype,
value,
_position: position,
}
}