Compare commits

...

3 Commits

Author SHA1 Message Date
4ac01099ce chore: update changelog 2024-08-13 15:19:18 -05:00
2b23e36955 feat: lex octal and binary 2024-08-13 15:05:28 -05:00
e52176f90c feat: store tokens in ast for basic nodes 2024-08-13 08:04:01 -05:00
13 changed files with 205 additions and 72 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
target
examples
tarpaulin-report.html
run.sh

View File

@ -5,30 +5,30 @@
- Test correct operator precedence
- Implement functions as first class citizens
- Parse __more__ binary operators
- Store tokens for the semantic analysis phase, to have actual error reporting
- Parse more complex bindings
- Rework error messages
- Parse other language constructions
- Type checking
- Check for conflicting identifiers
- Namespace identifiers in the symbol table
- Stdlib
- Document code
- Watch mode
- Formatter
- Simple language server
- Decide how to handle comments in the syntax (?)(should comments mean something like in rust?)
- Not ignore comments & whitespace, for code formatting
- Fix comment handling in the AST
- Abstract the parsing of datatypes, such that in the future generics can be implemented in a single place
- Include the original tokens in the AST
- Include comments in the AST
- Begin work on the code formatter
- Remove all panic! and todo!
## v0.1.1
- [x] Top level expressions as statements
- [ ] Naively transpile variable bindings
- [x] Naively transpile variable bindings
- [x] Store tokens in the AST, to have actual error reporting
- [x] Scan octal and binary numbers
- [x] Simple type checking
- [x] Check for conflicting identifiers at the current scope
## v0.1.0

View File

@ -11,6 +11,7 @@ use self::token::TokenType;
type Chars = Vec<char>;
/// Represents the result of scanning a single token from the input
#[derive(Debug)]
pub enum LexResult {
/// A token was found. The first element is the token, and the
/// second element is the position in the input after the token.

View File

@ -11,8 +11,14 @@ pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
match (next_char_1, next_char_2) {
// Test if the input contains a hex number
(Some(c1), Some(c2)) if *c1 == '0' && (*c2 == 'x' || *c2 == 'X') => {
scan_hex(chars, start_pos + 2, String::from("0x"))
(Some('0'), Some('x' | 'X')) => scan_hex(chars, start_pos + 2, String::from("0x")),
(Some('0'), Some('o' | 'O')) => {
// octal
scan_octal(chars, start_pos + 2)
}
(Some('0'), Some('b')) => {
// binary
scan_binary(chars, start_pos + 2)
}
// Scan decimal/double/scientific otherwise
_ => scan_decimal(chars, start_pos, String::from("")),
@ -45,7 +51,7 @@ fn scan_decimal(chars: &Vec<char>, start_pos: usize, current: String) -> LexResu
/// This function expects the following on the first call:
/// - The char at `start_pos` is a value between [0-9a-fA-F]. If not, will return an error.
/// - `current == "0x"`. If not will return an incorrect value, or panic.
fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
fn scan_hex(chars: &[char], start_pos: usize, current: String) -> LexResult {
match chars.get(start_pos) {
Some(c) if utils::is_hex_digit(*c) => {
let (t, next) = scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c));
@ -59,6 +65,67 @@ fn scan_hex(chars: &Vec<char>, start_pos: usize, current: String) -> LexResult {
}
}
fn scan_octal(chars: &[char], start_pos: usize) -> LexResult {
let mut token_vec = vec![];
let mut current_pos = start_pos;
let input_len = chars.len();
while current_pos < input_len {
match chars.get(current_pos) {
Some(c) if *c >= '0' && *c <= '7' => {
token_vec.push(*c);
}
_ => break,
}
current_pos += 1;
}
if token_vec.is_empty() {
LexResult::Err(LexError {
// minus 2 to account for the opening '0o'
position: start_pos - 2,
end_position: current_pos,
reason: String::from("Found an incomplete octal number"),
})
} else {
let octal_numbers = format!("0o{}", token_vec.iter().collect::<String>());
let new_token = Token::new_int(octal_numbers, start_pos - 2);
LexResult::Some(new_token, current_pos)
}
}
// TODO: Unify this, octal and hex in a single macro
fn scan_binary(chars: &[char], start_pos: usize) -> LexResult {
let mut token_vec = vec![];
let mut current_pos = start_pos;
let input_len = chars.len();
while current_pos < input_len {
match chars.get(current_pos) {
Some(c) if *c == '0' || *c == '1' => {
token_vec.push(*c);
}
_ => break,
}
current_pos += 1;
}
if token_vec.is_empty() {
LexResult::Err(LexError {
// minus 2 to account for the opening '0b'
position: start_pos - 2,
end_position: current_pos,
reason: String::from("Found an incomplete binary number"),
})
} else {
let octal_numbers = format!("0b{}", token_vec.iter().collect::<String>());
let new_token = Token::new_int(octal_numbers, start_pos - 2);
LexResult::Some(new_token, current_pos)
}
}
/// Scans a floating point number, with or without an exponent
///
/// This function expects the following:
@ -153,7 +220,7 @@ fn scan_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token,
}
/// Scans chars between [0-9a-fA-F], returns when none is found
fn scan_hex_digits(chars: &Vec<char>, start_pos: usize, current: String) -> (Token, usize) {
fn scan_hex_digits(chars: &[char], start_pos: usize, current: String) -> (Token, usize) {
match chars.get(start_pos) {
Some(c) if utils::is_hex_digit(*c) => {
scan_hex_digits(chars, start_pos + 1, utils::str_append(current, *c))
@ -295,6 +362,64 @@ mod tests {
}
}
#[test]
fn test_octal_1() {
let input = str_to_vec("0o20 ");
match scan(&input, 0) {
LexResult::Some(t, next) => {
assert_eq!(t.token_type, TokenType::Int);
assert_eq!(t.value, "0o20");
assert_eq!(t.position, 0);
assert_eq!(t.get_end_position(), 4);
assert_eq!(next, 4);
}
_ => panic!("Expected a token"),
}
}
#[test]
fn test_octal_2() {
let input = str_to_vec("0o ");
let result = scan(&input, 0);
match result {
LexResult::Err(error) => {
assert_eq!(error.position, 0);
assert_eq!(error.end_position, 2);
assert_eq!(error.reason, "Found an incomplete octal number");
}
_ => panic!("Expected an error, got {:?}", result),
}
}
#[test]
fn test_binary_1() {
let input = str_to_vec("0b1011 ");
match scan(&input, 0) {
LexResult::Some(t, next) => {
assert_eq!(t.token_type, TokenType::Int);
assert_eq!(t.value, "0b1011");
assert_eq!(t.position, 0);
assert_eq!(t.get_end_position(), 6);
assert_eq!(next, 6);
}
_ => panic!("Expected a token"),
}
}
#[test]
fn test_binary_2() {
let input = str_to_vec("0b ");
let result = scan(&input, 0);
match result {
LexResult::Err(error) => {
assert_eq!(error.position, 0);
assert_eq!(error.end_position, 2);
assert_eq!(error.reason, "Found an incomplete binary number");
}
_ => panic!("Expected an error, got {:?}", result),
}
}
// Should scan a double
#[test]
fn test_double_1() {

View File

@ -1,11 +1,7 @@
use crate::{
error_handling::{semantic_error::SemanticError, MistiError},
semantic::{
impls::SemanticCheck,
symbol_table::SymbolTable,
types::{Type, Typed},
},
syntax::ast::{Expression, ModuleMembers, Statement},
error_handling::MistiError,
semantic::{impls::SemanticCheck, symbol_table::SymbolTable},
syntax::ast::{ModuleMembers, Statement},
};
impl SemanticCheck for ModuleMembers<'_> {

View File

@ -74,7 +74,7 @@ impl Typed for Expression<'_> {
};
// Only supported unary operator: - & !
if *op == "-" {
if op.value == "-" {
if !expr_type.is_value("Int") && !expr_type.is_value("Float") {
return Err(MistiError::Semantic(SemanticError {
error_start: 0,
@ -87,7 +87,7 @@ impl Typed for Expression<'_> {
} else {
return Ok(Type::Value("Int".into()));
}
} else if *op == "!" {
} else if op.value == "!" {
if !expr_type.is_value("Bool") {
return Err(MistiError::Semantic(SemanticError {
error_start: 0,
@ -99,16 +99,16 @@ impl Typed for Expression<'_> {
}
}
unreachable!("Illegal state: Found an unexpected unary operator during semantic analysis: {}", *op);
unreachable!("Illegal state: Found an unexpected unary operator during semantic analysis: {}", op.value);
}
Expression::BinaryOperator(exp1, exp2, operator) => {
let t1 = exp1.get_type(scope)?;
let t2 = exp2.get_type(scope)?;
// TODO: There's definitely a better way to do this
if *operator == "+" && t1.is_value("Int") && t2.is_value("Int") {
if operator.value == "+" && t1.is_value("Int") && t2.is_value("Int") {
return Ok(Type::Value("Int".into()));
} else if *operator == "-" && t1.is_value("Int") && t2.is_value("Int") {
} else if operator.value == "-" && t1.is_value("Int") && t2.is_value("Int") {
return Ok(Type::Value("Int".into()));
}

View File

@ -6,6 +6,14 @@ pub struct FunctionCall<'a> {
pub arguments: Box<ArgumentsList<'a>>,
}
impl Positionable for FunctionCall<'_> {
fn get_position(&self) -> (usize, usize) {
let (start, _) = self.function.get_position();
let (_, end) = self.arguments.get_position();
(start, end)
}
}
#[derive(Debug)]
pub struct ArgumentsList<'a> {
pub arguments: Vec<Expression<'a>>,

View File

@ -72,8 +72,10 @@ pub enum Expression<'a> {
Boolean(&'a Token),
Identifier(&'a Token),
FunctionCall(FunctionCall<'a>),
UnaryOperator(&'a String, Box<Expression<'a>>),
BinaryOperator(Box<Expression<'a>>, Box<Expression<'a>>, &'a String),
/// operator, right expression
UnaryOperator(&'a Token, Box<Expression<'a>>),
/// left expression, right expression, operator
BinaryOperator(Box<Expression<'a>>, Box<Expression<'a>>, &'a Token),
}
impl Positionable for Expression<'_> {
@ -86,9 +88,17 @@ impl Positionable for Expression<'_> {
Expression::Float(id) => (id.position, id.get_end_position()),
Expression::String(id) => (id.position, id.get_end_position()),
Expression::Boolean(id) => (id.position, id.get_end_position()),
Expression::FunctionCall(_) => (0, 1),
Expression::UnaryOperator(_, _) => (0, 1),
Expression::BinaryOperator(_, _, _) => (0, 1),
Expression::FunctionCall(f) => f.get_position(),
Expression::UnaryOperator(operator, exp) => {
let start = operator.position;
let (_, end) = exp.get_position();
(start, end)
}
Expression::BinaryOperator(left_expr, right_expr, _) => {
let (start, _) = left_expr.get_position();
let (_, end) = right_expr.get_position();
(start, end)
}
}
}
}

View File

@ -36,8 +36,7 @@ fn parse_many<'a>(
tokens, next_pos,
) {
Ok((expr, next_pos)) => {
let expr =
Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value);
let expr = Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token);
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
}
@ -67,7 +66,7 @@ mod tests {
}
_ => panic!("Expected 2 identifiers"),
}
assert_eq!(">=", op)
assert_eq!(">=", op.value)
}
_ => panic!("Expected a binary expression with 2 identifiers"),
},
@ -98,7 +97,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, ">=")
assert_eq!(op.value, ">=")
}
_ => panic!("Expected a binary operator"),
}
@ -112,7 +111,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "<=")
assert_eq!(op.value, "<=")
}
_ => panic!("Expected a binary operator"),
}
@ -128,7 +127,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "<=")
assert_eq!(op.value, "<=")
}
_ => panic!("Expected a binary operator"),
}
@ -143,7 +142,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "<=")
assert_eq!(op.value, "<=")
}
_ => panic!("Expected a binary operator"),
}
@ -158,7 +157,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, ">=")
assert_eq!(op.value, ">=")
}
_ => panic!("Expected a binary operator"),
}

View File

@ -36,8 +36,7 @@ fn parse_many<'a>(
tokens, next_pos,
) {
Ok((expr, next_pos)) => {
let expr =
Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token.value);
let expr = Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token);
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
}
@ -66,7 +65,7 @@ mod tests {
}
_ => panic!("Expected 2 identifiers"),
}
assert_eq!("==", op)
assert_eq!("==", op.value)
}
_ => panic!("Expected a binary expression with 2 identifiers"),
},
@ -97,7 +96,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "==")
assert_eq!(op.value, "==")
}
_ => panic!("Expected a binary operator"),
}
@ -114,7 +113,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "==")
assert_eq!(op.value, "==")
}
_ => panic!("Expected a binary operator"),
}
@ -130,7 +129,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "==")
assert_eq!(op.value, "==")
}
_ => panic!("Expected a binary operator"),
}
@ -145,7 +144,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "==")
assert_eq!(op.value, "==")
}
_ => panic!("Expected a binary operator"),
}
@ -160,7 +159,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "==")
assert_eq!(op.value, "==")
}
_ => panic!("Expected a binary operator"),
}

View File

@ -36,11 +36,8 @@ fn parse_many<'a>(
// match next
match super::unary::try_parse(tokens, next_pos) {
Ok((expr, next_pos)) => {
let expr = Expression::BinaryOperator(
Box::new(prev_expr),
Box::new(expr),
&token.value,
);
let expr =
Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token);
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
}
@ -70,7 +67,7 @@ mod tests {
}
_ => panic!("Expected 2 identifiers"),
}
assert_eq!("*", op)
assert_eq!("*", op.value)
}
_ => panic!("Expected a binary expression with 2 identifiers"),
},
@ -101,7 +98,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "*")
assert_eq!(op.value, "*")
}
_ => panic!("Expected a binary operator"),
}
@ -118,7 +115,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "*")
assert_eq!(op.value, "*")
}
_ => panic!("Expected a binary operator"),
}
@ -134,7 +131,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "*")
assert_eq!(op.value, "*")
}
_ => panic!("Expected a binary operator"),
}
@ -149,7 +146,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "*")
assert_eq!(op.value, "*")
}
_ => panic!("Expected a binary operator"),
}
@ -164,7 +161,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "/")
assert_eq!(op.value, "/")
}
_ => panic!("Expected a binary operator"),
}
@ -179,7 +176,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "/")
assert_eq!(op.value, "/")
}
_ => panic!("Expected a binary operator"),
}

View File

@ -36,11 +36,8 @@ fn parse_many<'a>(
// Parse the next factor
match super::factor::try_parse(tokens, pos) {
Ok((expr, next_pos)) => {
let expr = Expression::BinaryOperator(
Box::new(prev_expr),
Box::new(expr),
&token.value,
);
let expr =
Expression::BinaryOperator(Box::new(prev_expr), Box::new(expr), &token);
parse_many(tokens, next_pos, expr, indentation_level + indent_count)
}
@ -71,7 +68,7 @@ mod tests {
}
_ => panic!("Expected 2 identifiers"),
}
assert_eq!("+", op)
assert_eq!("+", op.value)
}
_ => panic!("Expected a binary expression with 2 identifiers"),
},
@ -102,7 +99,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
assert_eq!(op.value, "+")
}
_ => panic!("Expected a binary operator"),
}
@ -116,7 +113,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
assert_eq!(op.value, "+")
}
_ => panic!("Expected a binary operator"),
}
@ -132,7 +129,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
assert_eq!(op.value, "+")
}
_ => panic!("Expected a binary operator"),
}
@ -147,7 +144,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
assert_eq!(op.value, "+")
}
_ => panic!("Expected a binary operator"),
}
@ -162,7 +159,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
assert_eq!(op.value, "+")
}
_ => panic!("Expected a binary operator"),
}
@ -177,7 +174,7 @@ mod tests {
match result {
Expression::BinaryOperator(_, _, op) => {
assert_eq!(op, "+")
assert_eq!(op.value, "+")
}
_ => panic!("Expected a binary operator"),
}

View File

@ -16,7 +16,7 @@ pub fn try_parse(tokens: &Vec<Token>, pos: usize) -> ParsingResult<Expression> {
Some(token) if token.value == "!" || token.value == "-" => {
match Expression::try_parse(tokens, pos + 1) {
Ok((expression, next_pos)) => Ok((
Expression::UnaryOperator(&token.value, Box::new(expression)),
Expression::UnaryOperator(&token, Box::new(expression)),
next_pos,
)),
_ => Err(ParsingError::Unmatched),
@ -53,7 +53,7 @@ mod tests {
Ok((Expression::UnaryOperator(operator, expression), _)) => {
match (operator, *expression) {
(op, Expression::Int(value)) => {
assert_eq!(*op, "-");
assert_eq!(op.value, "-");
assert_eq!(value.value, "10");
}
_ => panic!("unexpected values"),
@ -70,7 +70,7 @@ mod tests {
match expression {
Ok((Expression::UnaryOperator(operator, expression), _)) => {
assert_eq!(*operator, "-");
assert_eq!(operator.value, "-");
match *expression {
Expression::BinaryOperator(_, _, _) => {
// :D