Scan datatypes. Parse datatype annotations
This commit is contained in:
parent
1849e11ebb
commit
f28e7394e9
@ -11,6 +11,11 @@
|
|||||||
- [ ] Stdlib
|
- [ ] Stdlib
|
||||||
- [ ] Document code
|
- [ ] Document code
|
||||||
|
|
||||||
|
|
||||||
|
## v0.0.4
|
||||||
|
|
||||||
|
- Explicit datatype of variables
|
||||||
|
|
||||||
## v0.0.3
|
## v0.0.3
|
||||||
|
|
||||||
- Get datatype of an identifier from the symbol table
|
- Get datatype of an identifier from the symbol table
|
||||||
|
@ -9,11 +9,13 @@ pub enum Binding<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct ValBinding<'a> {
|
pub struct ValBinding<'a> {
|
||||||
|
pub datatype: Option<String>,
|
||||||
pub identifier: &'a String,
|
pub identifier: &'a String,
|
||||||
pub expression: Expression<'a>,
|
pub expression: Expression<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct VarBinding<'a> {
|
pub struct VarBinding<'a> {
|
||||||
|
pub datatype: Option<String>,
|
||||||
pub identifier: &'a String,
|
pub identifier: &'a String,
|
||||||
pub expression: Expression<'a>,
|
pub expression: Expression<'a>,
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,7 @@ mod tests {
|
|||||||
let id = String::from("identifier");
|
let id = String::from("identifier");
|
||||||
let value = String::from("322");
|
let value = String::from("322");
|
||||||
let binding = Binding::Val(ValBinding {
|
let binding = Binding::Val(ValBinding {
|
||||||
|
datatype: None,
|
||||||
identifier: &id,
|
identifier: &id,
|
||||||
expression: Expression::Number(&value),
|
expression: Expression::Number(&value),
|
||||||
});
|
});
|
||||||
|
@ -22,6 +22,7 @@ mod tests {
|
|||||||
let id = String::from("identifier");
|
let id = String::from("identifier");
|
||||||
let value = String::from("322");
|
let value = String::from("322");
|
||||||
let binding = Binding::Val(ValBinding {
|
let binding = Binding::Val(ValBinding {
|
||||||
|
datatype: None,
|
||||||
identifier: &id,
|
identifier: &id,
|
||||||
expression: Expression::Number(&value),
|
expression: Expression::Number(&value),
|
||||||
});
|
});
|
||||||
|
@ -75,6 +75,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
|
|||||||
None
|
None
|
||||||
.or_else(|| scanner::number(next_char, chars, current_pos))
|
.or_else(|| scanner::number(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
.or_else(|| scanner::identifier(next_char, chars, current_pos))
|
||||||
|
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
||||||
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
||||||
@ -203,6 +204,14 @@ mod tests {
|
|||||||
assert_eq!("]", t.value);
|
assert_eq!("]", t.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_scan_datatype() {
|
||||||
|
let input = String::from("Num");
|
||||||
|
let tokens = get_tokens(&input).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(TokenType::Datatype, tokens[0].token_type);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn should_scan_new_line() {
|
fn should_scan_new_line() {
|
||||||
let input = String::from("3\n22");
|
let input = String::from("3\n22");
|
||||||
|
@ -13,19 +13,32 @@ fn str_is_keyword(s: &String) -> Option<TokenType> {
|
|||||||
/// a valid identifier
|
/// a valid identifier
|
||||||
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
|
pub fn scan(start_char: char, chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||||
// The scanning is done by this recursive function
|
// The scanning is done by this recursive function
|
||||||
scan_impl(chars, start_pos + 1, format!("{}", start_char))
|
scan_impl(
|
||||||
|
chars,
|
||||||
|
start_pos + 1,
|
||||||
|
format!("{}", start_char),
|
||||||
|
utils::is_uppercase(start_char),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Recursive funtion that scans the identifier
|
/// Recursive funtion that scans the identifier
|
||||||
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
|
fn scan_impl(chars: &Vec<char>, start_pos: usize, current: String, is_datatype: bool) -> LexResult {
|
||||||
match chars.get(start_pos) {
|
match chars.get(start_pos) {
|
||||||
Some(c) if utils::is_identifier_char(*c) => {
|
Some(c) if utils::is_identifier_char(*c) => {
|
||||||
scan_impl(chars, start_pos + 1, utils::str_append(current, *c))
|
scan_impl(
|
||||||
|
chars,
|
||||||
|
start_pos + 1,
|
||||||
|
utils::str_append(current, *c),
|
||||||
|
is_datatype,
|
||||||
|
)
|
||||||
},
|
},
|
||||||
_ => {
|
_ => {
|
||||||
if let Some(token_type) = str_is_keyword(¤t) {
|
if let Some(token_type) = str_is_keyword(¤t) {
|
||||||
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
|
LexResult::Some(token::new(current, start_pos as i32, token_type), start_pos)
|
||||||
}
|
}
|
||||||
|
else if is_datatype {
|
||||||
|
LexResult::Some(token::new_datatype(current, start_pos as i32), start_pos)
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
|
LexResult::Some(token::new_identifier(current, start_pos as i32), start_pos)
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,13 @@ pub fn identifier(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexRes
|
|||||||
.then(|| identifier::scan(c, chars, start_pos))
|
.then(|| identifier::scan(c, chars, start_pos))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attempts to scan a datatype. If not found returns None to be able to chain other scanner
|
||||||
|
pub fn datatype(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
|
// Since the only difference with an identifier is that the fist character is an
|
||||||
|
// uppercase letter, reuse the identifier scanner
|
||||||
|
utils::is_uppercase(c)
|
||||||
|
.then(|| identifier::scan(c, chars, start_pos))
|
||||||
|
}
|
||||||
|
|
||||||
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
|
/// Attempts to scan a string. If not found returns None to be able to chain other scanner
|
||||||
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||||
|
@ -2,11 +2,26 @@ use crate::token::{Token, TokenType};
|
|||||||
use super::ast_types::{ValBinding, VarBinding, Binding};
|
use super::ast_types::{ValBinding, VarBinding, Binding};
|
||||||
use super::expression;
|
use super::expression;
|
||||||
|
|
||||||
// Should return a 3 state value:
|
// TODO: Should return a 3 state value:
|
||||||
// - Success: binding parsed successfully
|
// - Success: binding parsed successfully
|
||||||
// - NotFound: the first token (var | val) was not found, so the parser should try other options
|
// - NotFound: the first token (var | val) was not found, so the parser should try other options
|
||||||
// - Error: token (var | val) was found, but then other expected tokens were not found
|
// - Error: token (var | val) was found, but then other expected tokens were not found
|
||||||
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
||||||
|
let mut pos = pos;
|
||||||
|
|
||||||
|
// Optional datatype annotation
|
||||||
|
let datatype_annotation = {
|
||||||
|
match tokens.get(pos) {
|
||||||
|
Some(t) if t.token_type == TokenType::Datatype => {
|
||||||
|
pos += 1;
|
||||||
|
Some(String::from(&t.value))
|
||||||
|
}
|
||||||
|
Some(_) => None,
|
||||||
|
None => return None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// var/val keyword
|
||||||
let is_val = {
|
let is_val = {
|
||||||
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
let res1 = try_token_type(tokens, pos, TokenType::VAL);
|
||||||
match res1 {
|
match res1 {
|
||||||
@ -27,7 +42,6 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
|||||||
|
|
||||||
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
|
let equal_operator = try_operator(tokens, pos + 2, String::from("="));
|
||||||
if equal_operator.is_none() { return None }
|
if equal_operator.is_none() { return None }
|
||||||
let _ = equal_operator.unwrap();
|
|
||||||
|
|
||||||
let expression = expression::try_parse(tokens, pos + 3);
|
let expression = expression::try_parse(tokens, pos + 3);
|
||||||
if expression.is_none() { return None }
|
if expression.is_none() { return None }
|
||||||
@ -35,12 +49,14 @@ pub fn try_parse<'a>(tokens: &'a Vec<Token>, pos: usize) -> Option<Binding> {
|
|||||||
|
|
||||||
if is_val {
|
if is_val {
|
||||||
Some(Binding::Val(ValBinding {
|
Some(Binding::Val(ValBinding {
|
||||||
|
datatype: datatype_annotation,
|
||||||
identifier: &identifier.value,
|
identifier: &identifier.value,
|
||||||
expression,
|
expression,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Some(Binding::Var(VarBinding {
|
Some(Binding::Var(VarBinding {
|
||||||
|
datatype: datatype_annotation,
|
||||||
identifier: &identifier.value,
|
identifier: &identifier.value,
|
||||||
expression,
|
expression,
|
||||||
}))
|
}))
|
||||||
@ -80,9 +96,7 @@ mod tests {
|
|||||||
Binding::Val(binding) => {
|
Binding::Val(binding) => {
|
||||||
assert_eq!("identifier", binding.identifier);
|
assert_eq!("identifier", binding.identifier);
|
||||||
}
|
}
|
||||||
Binding::Var(binding) => {
|
_ => panic!()
|
||||||
assert_eq!("identifier", binding.identifier);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,4 +124,30 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!("=", token.value);
|
assert_eq!("=", token.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_binding_with_datatype() {
|
||||||
|
let tokens = get_tokens(&String::from("Num val identifier = 20")).unwrap();
|
||||||
|
let binding = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
match binding {
|
||||||
|
Binding::Val(binding) => {
|
||||||
|
assert_eq!(Some(String::from("Num")), binding.datatype);
|
||||||
|
assert_eq!("identifier", binding.identifier);
|
||||||
|
}
|
||||||
|
_ => panic!()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
let tokens = get_tokens(&String::from("Bool var identifier = true")).unwrap();
|
||||||
|
let binding = try_parse(&tokens, 0).unwrap();
|
||||||
|
|
||||||
|
match binding {
|
||||||
|
Binding::Var(binding) => {
|
||||||
|
assert_eq!(Some(String::from("Bool")), binding.datatype);
|
||||||
|
assert_eq!("identifier", binding.identifier);
|
||||||
|
}
|
||||||
|
_ => panic!()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -11,13 +11,25 @@ A module is (commonly) a single source file.
|
|||||||
|
|
||||||
A declaration with `var` or `val`.
|
A declaration with `var` or `val`.
|
||||||
|
|
||||||
- `var = "var"`
|
```ebnf
|
||||||
- `val = "val"`
|
var = "var"
|
||||||
- `variable_binding = (var | val), identifier, "=", expression`
|
val = "val"
|
||||||
|
variable_binding = (var | val), identifier, "=", expression
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### `expression`
|
### `expression`
|
||||||
|
|
||||||
For now just a number
|
For now just a number, string or boolean
|
||||||
|
|
||||||
- `expression = number`
|
```ebnf
|
||||||
|
expression = number | string | boolean
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Type annotations
|
||||||
|
|
||||||
|
```ebnf
|
||||||
|
variable_binding = Datatype, (var | val), identifier, "=", expression
|
||||||
|
```
|
||||||
|
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
use super::token::Token;
|
use super::token::Token;
|
||||||
|
|
||||||
mod expression;
|
mod expression;
|
||||||
mod val_binding;
|
mod binding;
|
||||||
use super::ast_types;
|
use super::ast_types;
|
||||||
|
|
||||||
use ast_types::ModuleAST;
|
use ast_types::ModuleAST;
|
||||||
|
|
||||||
/// Constructs the Misti AST from a vector of tokens
|
/// Constructs the Misti AST from a vector of tokens
|
||||||
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, String> {
|
pub fn construct_ast<'a>(tokens: &'a Vec<Token>) -> Result<ModuleAST<'a>, String> {
|
||||||
let maybe_binding = val_binding::try_parse(tokens, 0);
|
let maybe_binding = binding::try_parse(tokens, 0);
|
||||||
|
|
||||||
match maybe_binding {
|
match maybe_binding {
|
||||||
Some(binding) => {
|
Some(binding) => {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#[derive(PartialEq, Debug, Clone)]
|
#[derive(PartialEq, Debug, Clone)]
|
||||||
pub enum TokenType {
|
pub enum TokenType {
|
||||||
Identifier,
|
Identifier,
|
||||||
|
Datatype,
|
||||||
Number,
|
Number,
|
||||||
String,
|
String,
|
||||||
Operator,
|
Operator,
|
||||||
@ -76,3 +77,11 @@ pub fn new_semicolon(position: i32) -> Token {
|
|||||||
_position: position,
|
_position: position,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_datatype(value: String, position: i32) -> Token {
|
||||||
|
Token {
|
||||||
|
token_type: TokenType::Datatype,
|
||||||
|
value,
|
||||||
|
_position: position,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user