Scan datatypes. Parse datatype annotations
This commit is contained in:
parent
1849e11ebb
commit
f28e7394e9
@ -11,6 +11,11 @@
|
||||
- [ ] Stdlib
|
||||
- [ ] Document code
|
||||
|
||||
|
||||
## v0.0.4
|
||||
|
||||
- Explicit datatype of variables
|
||||
|
||||
## v0.0.3
|
||||
|
||||
- Get datatype of an identifier from the symbol table
|
||||
|
@ -9,11 +9,13 @@ pub enum Binding<'a> {
|
||||
}
|
||||
|
||||
pub struct ValBinding<'a> {
|
||||
pub datatype: Option<String>,
|
||||
pub identifier: &'a String,
|
||||
pub expression: Expression<'a>,
|
||||
}
|
||||
|
||||
pub struct VarBinding<'a> {
|
||||
pub datatype: Option<String>,
|
||||
pub identifier: &'a String,
|
||||
pub expression: Expression<'a>,
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ mod tests {
|
||||
let id = String::from("identifier");
|
||||
let value = String::from("322");
|
||||
let binding = Binding::Val(ValBinding {
|
||||
datatype: None,
|
||||
identifier: &id,
|
||||
expression: Expression::Number(&value),
|
||||
});
|
||||
|
@ -22,6 +22,7 @@ mod tests {
|
||||
let id = String::from("identifier");
|
||||
let value = String::from("322");
|
||||
let binding = Binding::Val(ValBinding {
|
||||
datatype: None,
|
||||
identifier: &id,
|
||||
expression: Expression::Number(&value),
|
||||
});
|
||||
|
@ -75,6 +75,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
||||
None
|
||||
.or_else(|| scanner::number(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
||||
@ -203,6 +204,14 @@ mod tests {
|
||||
assert_eq!("]", t.value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_datatype() {
|
||||
let input = String::from("Num");
|
||||
let tokens = get_tokens(&input).unwrap();
|
||||
|
||||
assert_eq!(TokenType::Datatype, tokens[0].token_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_new_line() {
|
||||
let input = String::from("3\n22");
|
||||
|
@ -13,19 +13,32 @@ fn str_is_keyword(s: &String) -> Option<TokenType> {
|
||||
/// a valid identifier
|
||||
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
// The scanning is done by this recursive function
|
||||
scan_impl(chars, start_pos + 1, format!("{}", start_char))
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
format!("{}", start_char),
|
||||
utils::is_uppercase(start_char),
|
||||
)
|
||||
}
|
||||
|
||||
/// Recursive funtion that scans the identifier
|
||||
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
||||
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype: bool) -> LexResult {
|
||||
match chars.get(start_pos) {
|
||||
Some(c) if utils::is_identifier_char(*c) => {
|
||||
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
||||
scan_impl(
|
||||
chars,
|
||||
start_pos + 1,
|
||||
utils::str_append(current, *c),
|
||||
is_datatype,
|
||||
)
|
||||
},
|
||||
_ => {
|
||||
if let Some(token_type) = str_is_keyword(¤t) {
|
||||
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
|
||||
}
|
||||
else if is_datatype {
|
||||
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
|
||||
}
|
||||
else {
|
||||
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
|
||||
}
|
||||
|
@ -48,6 +48,13 @@ pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexRes
|
||||
.then(|| identifier::scan(c, chars, start_pos))
|
||||
}
|
||||
|
||||
/// Attempts to scan a datatype. If not found returns None to be able to chain other scanner
|
||||
pub fn datatype(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
// Since the only difference with an identifier is that the fist character is an
|
||||
// uppercase letter, reuse the identifier scanner
|
||||
utils::is_uppercase(c)
|
||||
.then(|| identifier::scan(c, chars, start_pos))
|
||||
}
|
||||
|
||||
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
|
||||
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
|
@ -2,11 +2,26 @@ use crate::token::{Token, TokenType};
|
||||
use super::ast_types::{ValBinding, VarBinding, Binding};
|
||||
use super::expression;
|
||||
|
||||
// Should return a 3 state value:
|
||||
// TODO: Should return a 3 state value:
|
||||
// - Success: binding parsed successfully
|
||||
// - NotFound: the first token (var | val) was not found, so the parser should try other options
|
||||
// - Error: token (var | val) was found, but then other expected tokens were not found
|
||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
||||
let mut pos = pos;
|
||||
|
||||
// Optional datatype annotation
|
||||
let datatype_annotation = {
|
||||
match tokens.get(pos) {
|
||||
Some(t) if t.token_type == TokenType::Datatype => {
|
||||
pos += 1;
|
||||
Some(String::from(&t.value))
|
||||
}
|
||||
Some(_) => None,
|
||||
None => return None
|
||||
}
|
||||
};
|
||||
|
||||
// var/val keyword
|
||||
let is_val = {
|
||||
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
||||
match res1 {
|
||||
@ -27,7 +42,6 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
||||
|
||||
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
|
||||
if equal_operator.is_none() { return None }
|
||||
let _ = equal_operator.unwrap();
|
||||
|
||||
let expression = expression::try_parse(tokens, pos + 3);
|
||||
if expression.is_none() { return None }
|
||||
@ -35,12 +49,14 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
||||
|
||||
if is_val {
|
||||
Some(Binding::Val(ValBinding {
|
||||
datatype: datatype_annotation,
|
||||
identifier: &identifier.value,
|
||||
expression,
|
||||
}))
|
||||
}
|
||||
else {
|
||||
Some(Binding::Var(VarBinding {
|
||||
datatype: datatype_annotation,
|
||||
identifier: &identifier.value,
|
||||
expression,
|
||||
}))
|
||||
@ -80,9 +96,7 @@ mod tests {
|
||||
Binding::Val(binding) => {
|
||||
assert_eq!("identifier", binding.identifier);
|
||||
}
|
||||
Binding::Var(binding) => {
|
||||
assert_eq!("identifier", binding.identifier);
|
||||
}
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
|
||||
@ -110,4 +124,30 @@ mod tests {
|
||||
|
||||
assert_eq!("=", token.value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_parse_binding_with_datatype() {
|
||||
let tokens = get_tokens(&String::from("Num val identifier = 20")).unwrap();
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
Binding::Val(binding) => {
|
||||
assert_eq!(Some(String::from("Num")), binding.datatype);
|
||||
assert_eq!("identifier", binding.identifier);
|
||||
}
|
||||
_ => panic!()
|
||||
}
|
||||
|
||||
|
||||
let tokens = get_tokens(&String::from("Bool var identifier = true")).unwrap();
|
||||
let binding = try_parse(&tokens, 0).unwrap();
|
||||
|
||||
match binding {
|
||||
Binding::Var(binding) => {
|
||||
assert_eq!(Some(String::from("Bool")), binding.datatype);
|
||||
assert_eq!("identifier", binding.identifier);
|
||||
}
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
}
|
@ -11,13 +11,25 @@ A module is (commonly) a single source file.
|
||||
|
||||
A declaration with `var` or `val`.
|
||||
|
||||
- `var = "var"`
|
||||
- `val = "val"`
|
||||
- `variable_binding = (var | val), identifier, "=", expression`
|
||||
```ebnf
|
||||
var = "var"
|
||||
val = "val"
|
||||
variable_binding = (var | val), identifier, "=", expression
|
||||
```
|
||||
|
||||
|
||||
### `expression`
|
||||
|
||||
For now just a number
|
||||
For now just a number, string or boolean
|
||||
|
||||
- `expression = number`
|
||||
```ebnf
|
||||
expression = number | string | boolean
|
||||
```
|
||||
|
||||
|
||||
## Type annotations
|
||||
|
||||
```ebnf
|
||||
variable_binding = Datatype, (var | val), identifier, "=", expression
|
||||
```
|
||||
|
||||
|
@ -1,14 +1,14 @@
|
||||
use super::token::Token;
|
||||
|
||||
mod expression;
|
||||
mod val_binding;
|
||||
mod binding;
|
||||
use super::ast_types;
|
||||
|
||||
use ast_types::ModuleAST;
|
||||
|
||||
/// Constructs the Misti AST from a vector of tokens
|
||||
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, String> {
|
||||
let maybe_binding = val_binding::try_parse(tokens, 0);
|
||||
let maybe_binding = binding::try_parse(tokens, 0);
|
||||
|
||||
match maybe_binding {
|
||||
Some(binding) => {
|
||||
|
@ -1,6 +1,7 @@
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
pub enum TokenType {
|
||||
Identifier,
|
||||
Datatype,
|
||||
Number,
|
||||
String,
|
||||
Operator,
|
||||
@ -76,3 +77,11 @@ pub fn new_semicolon(position: i32) -> Token {
|
||||
_position: position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_datatype(value: String, position: i32) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::Datatype,
|
||||
value,
|
||||
_position: position,
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user