Simple ASI

master
Araozu 2023-02-14 15:22:29 -05:00
parent 5d40be6d90
commit cc6e3fc78a
5 changed files with 185 additions and 13 deletions

View File

@ -15,6 +15,7 @@
- Get datatype of an identifier from the symbol table - Get datatype of an identifier from the symbol table
- Improve documentation of the code - Improve documentation of the code
- Simple ASI: insert semicolon after a single or series of new lines
## v0.0.2 ## v0.0.2

View File

@ -65,12 +65,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
return LexResult::None(current_pos) return LexResult::None(current_pos)
} }
// Ignore new lines for now... // Handle whitespace recursively.
if next_char == '\n' {
return next_token(chars, current_pos + 1)
}
// Handle whitespace recursively
if next_char == ' ' { if next_char == ' ' {
return next_token(chars, current_pos + 1) return next_token(chars, current_pos + 1)
} }
@ -82,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult {
.or_else(|| scanner::string(next_char, chars, current_pos)) .or_else(|| scanner::string(next_char, chars, current_pos))
.or_else(|| scanner::operator(next_char, chars, current_pos)) .or_else(|| scanner::operator(next_char, chars, current_pos))
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos)) .or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
.or_else(|| scanner::new_line(next_char, chars, current_pos))
.unwrap_or_else(|| { .unwrap_or_else(|| {
let error = LexError { let error = LexError {
position: current_pos, position: current_pos,
@ -201,4 +197,30 @@ mod tests {
assert_eq!(TokenType::RightBracket, t.token_type); assert_eq!(TokenType::RightBracket, t.token_type);
assert_eq!("]", t.value); assert_eq!("]", t.value);
} }
#[test]
fn should_scan_new_line() {
let input = String::from("3\n22");
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
}
#[test]
fn should_scan_multiple_new_lines() {
let input = String::from("3\n\n\n22");
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
assert_eq!(TokenType::Number, tokens[2].token_type);
}
#[test]
fn should_scan_multiple_new_lines_with_whitespace_in_between() {
let input = String::from("3\n \n \n22");
let tokens = get_tokens(&input).unwrap();
assert_eq!(TokenType::Semicolon, tokens[1].token_type);
assert_eq!(TokenType::Number, tokens[2].token_type);
}
} }

View File

@ -4,6 +4,7 @@ mod number;
mod operator; mod operator;
mod identifier; mod identifier;
mod string; mod string;
mod new_line;
// This module contains the individual scanners, and exports them // This module contains the individual scanners, and exports them
@ -53,3 +54,8 @@ pub fn string(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult>
(c == '"').then(|| string::scan(chars, start_pos + 1)) (c == '"').then(|| string::scan(chars, start_pos + 1))
} }
/// Attemts to scan a new line. If not found returns None to be able to chain other scanner
pub fn new_line(c:char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
(c == '\n').then(|| new_line::scan(chars, start_pos))
}

View File

@ -0,0 +1,142 @@
use crate::{
lexic::{
token, LexResult,
},
token::TokenType
};
/// Function to handle new lines
///
/// It performs Automatic Semicolon Insertion, inserting a semicolon after
/// every new line or group of new lines
///
/// Assumes the char at start_pos is a new line
pub fn scan(chars: &Vec<char>, start_pos: usize) -> LexResult {
let current = chars.get(start_pos);
match current {
Some(c) if *c == '\n' => {
scan(chars, start_pos + 1)
}
Some(c) if *c == ' ' => {
match look_ahead_for_new_line(chars, start_pos + 1) {
Some(next_pos) => scan(chars, next_pos),
None => {
let token = token::new(
String::from(";"),
start_pos as i32,
TokenType::Semicolon,
);
LexResult::Some(token, start_pos)
}
}
}
Some(_) | None => {
let token = token::new(
String::from(";"),
start_pos as i32,
TokenType::Semicolon,
);
LexResult::Some(token, start_pos)
}
}
}
/// Returns the position after the new line
fn look_ahead_for_new_line(chars: &Vec<char>, pos: usize) -> Option<usize> {
match chars.get(pos) {
Some(c) if *c == ' ' => {
look_ahead_for_new_line(chars, pos + 1)
}
Some(c) if *c == '\n' => {
Some(pos + 1)
}
Some(_) | None => {
None
}
}
}
#[cfg(test)]
mod tests {
use crate::lexic::token::TokenType;
use super::*;
fn str_to_vec(s: &str) -> Vec<char> {
s.chars().collect()
}
#[test]
fn should_emit_semicolon_instead_of_new_line() {
let input = str_to_vec("\n");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
assert_eq!(TokenType::Semicolon, token.token_type);
assert_eq!(1, next_pos);
} else {
panic!()
}
}
#[test]
fn should_emit_a_single_semicolon_with_multiple_new_lines() {
let input = str_to_vec("\n\n\n");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
assert_eq!(TokenType::Semicolon, token.token_type);
assert_eq!(3, next_pos);
} else {
panic!()
}
let input = str_to_vec("\n\n\naToken");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
assert_eq!(TokenType::Semicolon, token.token_type);
assert_eq!(3, next_pos);
} else {
panic!()
}
}
#[test]
fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() {
let input = str_to_vec("\n \n \n");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
assert_eq!(TokenType::Semicolon, token.token_type);
assert_eq!(6, next_pos);
} else {
panic!()
}
let input = str_to_vec("\n \n \n aToken");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
assert_eq!(TokenType::Semicolon, token.token_type);
assert_eq!(6, next_pos);
} else {
panic!()
}
let input = str_to_vec("\n \n \n ");
let start_pos = 0;
if let LexResult::Some(token, next_pos) = scan(&input, start_pos) {
assert_eq!(TokenType::Semicolon, token.token_type);
assert_eq!(6, next_pos);
} else {
panic!()
}
}
}

View File

@ -10,6 +10,7 @@ pub enum TokenType {
RightBracket, RightBracket,
LeftBrace, LeftBrace,
RightBrace, RightBrace,
Semicolon,
VAR, VAR,
VAL, VAL,
EOF, EOF,
@ -21,14 +22,14 @@ pub struct Token {
pub value: String, pub value: String,
/// The absolute position of this token, from the /// The absolute position of this token, from the
/// start of the file /// start of the file
position: i32, _position: i32,
} }
pub fn new_eof(position: i32) -> Token { pub fn new_eof(position: i32) -> Token {
Token { Token {
token_type: TokenType::EOF, token_type: TokenType::EOF,
value: String::from(""), value: String::from(""),
position, _position: position,
} }
} }
@ -36,7 +37,7 @@ pub fn new_number(value: String, position: i32) -> Token {
Token { Token {
token_type: TokenType::Number, token_type: TokenType::Number,
value, value,
position _position: position
} }
} }
@ -44,19 +45,19 @@ pub fn new_operator(value: String, position: i32) -> Token {
Token { Token {
token_type: TokenType::Operator, token_type: TokenType::Operator,
value, value,
position _position: position
} }
} }
pub fn new(value: String, position: i32, token_type: TokenType) -> Token { pub fn new(value: String, position: i32, token_type: TokenType) -> Token {
Token {token_type, value, position} Token {token_type, value, _position: position}
} }
pub fn new_identifier(value: String, position: i32) -> Token { pub fn new_identifier(value: String, position: i32) -> Token {
Token { Token {
token_type: TokenType::Identifier, token_type: TokenType::Identifier,
value, value,
position, _position: position,
} }
} }
@ -64,6 +65,6 @@ pub fn new_string(value: String, position: i32) -> Token {
Token { Token {
token_type: TokenType::String, token_type: TokenType::String,
value, value,
position, _position: position,
} }
} }