diff --git a/CHANGELOG.md b/CHANGELOG.md index 41b06fc..1a1873b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - Get datatype of an identifier from the symbol table - Improve documentation of the code +- Simple ASI: insert semicolon after a single or series of new lines ## v0.0.2 diff --git a/src/lexic/mod.rs b/src/lexic/mod.rs index 2894eef..7d643be 100755 --- a/src/lexic/mod.rs +++ b/src/lexic/mod.rs @@ -65,12 +65,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { return LexResult::None(current_pos) } - // Ignore new lines for now... - if next_char == '\n' { - return next_token(chars, current_pos + 1) - } - - // Handle whitespace recursively + // Handle whitespace recursively. if next_char == ' ' { return next_token(chars, current_pos + 1) } @@ -82,6 +77,7 @@ fn next_token(chars: &Chars, current_pos: usize) -> LexResult { .or_else(|| scanner::string(next_char, chars, current_pos)) .or_else(|| scanner::operator(next_char, chars, current_pos)) .or_else(|| scanner::grouping_sign(next_char, chars, current_pos)) + .or_else(|| scanner::new_line(next_char, chars, current_pos)) .unwrap_or_else(|| { let error = LexError { position: current_pos, @@ -201,4 +197,30 @@ mod tests { assert_eq!(TokenType::RightBracket, t.token_type); assert_eq!("]", t.value); } + + #[test] + fn should_scan_new_line() { + let input = String::from("3\n22"); + let tokens = get_tokens(&input).unwrap(); + + assert_eq!(TokenType::Semicolon, tokens[1].token_type); + } + + #[test] + fn should_scan_multiple_new_lines() { + let input = String::from("3\n\n\n22"); + let tokens = get_tokens(&input).unwrap(); + + assert_eq!(TokenType::Semicolon, tokens[1].token_type); + assert_eq!(TokenType::Number, tokens[2].token_type); + } + + #[test] + fn should_scan_multiple_new_lines_with_whitespace_in_between() { + let input = String::from("3\n \n \n22"); + let tokens = get_tokens(&input).unwrap(); + + assert_eq!(TokenType::Semicolon, tokens[1].token_type); + assert_eq!(TokenType::Number, tokens[2].token_type); + } } diff --git a/src/lexic/scanner/mod.rs b/src/lexic/scanner/mod.rs index 74517d1..79b8544 100755 --- a/src/lexic/scanner/mod.rs +++ b/src/lexic/scanner/mod.rs @@ -4,6 +4,7 @@ mod number; mod operator; mod identifier; mod string; +mod new_line; // This module contains the individual scanners, and exports them @@ -53,3 +54,8 @@ pub fn string(c: char, chars: &Vec, start_pos: usize) -> Option (c == '"').then(|| string::scan(chars, start_pos + 1)) } +/// Attemts to scan a new line. If not found returns None to be able to chain other scanner +pub fn new_line(c:char, chars: &Vec, start_pos: usize) -> Option { + (c == '\n').then(|| new_line::scan(chars, start_pos)) +} + diff --git a/src/lexic/scanner/new_line.rs b/src/lexic/scanner/new_line.rs new file mode 100644 index 0000000..06f56d7 --- /dev/null +++ b/src/lexic/scanner/new_line.rs @@ -0,0 +1,142 @@ +use crate::{ + lexic::{ + token, LexResult, + }, + token::TokenType +}; + +/// Function to handle new lines +/// +/// It performs Automatic Semicolon Insertion, inserting a semicolon after +/// every new line or group of new lines +/// +/// Assumes the char at start_pos is a new line +pub fn scan(chars: &Vec, start_pos: usize) -> LexResult { + let current = chars.get(start_pos); + + match current { + Some(c) if *c == '\n' => { + scan(chars, start_pos + 1) + } + Some(c) if *c == ' ' => { + match look_ahead_for_new_line(chars, start_pos + 1) { + Some(next_pos) => scan(chars, next_pos), + None => { + let token = token::new( + String::from(";"), + start_pos as i32, + TokenType::Semicolon, + ); + LexResult::Some(token, start_pos) + } + } + } + Some(_) | None => { + let token = token::new( + String::from(";"), + start_pos as i32, + TokenType::Semicolon, + ); + LexResult::Some(token, start_pos) + } + } +} + +/// Returns the position after the new line +fn look_ahead_for_new_line(chars: &Vec, pos: usize) -> Option { + match chars.get(pos) { + Some(c) if *c == ' ' => { + look_ahead_for_new_line(chars, pos + 1) + } + Some(c) if *c == '\n' => { + Some(pos + 1) + } + Some(_) | None => { + None + } + } +} + + +#[cfg(test)] +mod tests { + use crate::lexic::token::TokenType; + + use super::*; + + fn str_to_vec(s: &str) -> Vec { + s.chars().collect() + } + + #[test] + fn should_emit_semicolon_instead_of_new_line() { + let input = str_to_vec("\n"); + let start_pos = 0; + + if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { + assert_eq!(TokenType::Semicolon, token.token_type); + assert_eq!(1, next_pos); + } else { + panic!() + } + } + + #[test] + fn should_emit_a_single_semicolon_with_multiple_new_lines() { + let input = str_to_vec("\n\n\n"); + let start_pos = 0; + + if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { + assert_eq!(TokenType::Semicolon, token.token_type); + assert_eq!(3, next_pos); + } else { + panic!() + } + + + let input = str_to_vec("\n\n\naToken"); + let start_pos = 0; + + if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { + assert_eq!(TokenType::Semicolon, token.token_type); + assert_eq!(3, next_pos); + } else { + panic!() + } + } + + #[test] + fn should_emit_a_single_semicolon_with_multiple_new_lines_and_whitespace() { + let input = str_to_vec("\n \n \n"); + let start_pos = 0; + + if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { + assert_eq!(TokenType::Semicolon, token.token_type); + assert_eq!(6, next_pos); + } else { + panic!() + } + + + let input = str_to_vec("\n \n \n aToken"); + let start_pos = 0; + + if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { + assert_eq!(TokenType::Semicolon, token.token_type); + assert_eq!(6, next_pos); + } else { + panic!() + } + + + let input = str_to_vec("\n \n \n "); + let start_pos = 0; + + if let LexResult::Some(token, next_pos) = scan(&input, start_pos) { + assert_eq!(TokenType::Semicolon, token.token_type); + assert_eq!(6, next_pos); + } else { + panic!() + } + } +} diff --git a/src/token.rs b/src/token.rs index b7772ba..d25f236 100755 --- a/src/token.rs +++ b/src/token.rs @@ -10,6 +10,7 @@ pub enum TokenType { RightBracket, LeftBrace, RightBrace, + Semicolon, VAR, VAL, EOF, @@ -21,14 +22,14 @@ pub struct Token { pub value: String, /// The absolute position of this token, from the /// start of the file - position: i32, + _position: i32, } pub fn new_eof(position: i32) -> Token { Token { token_type: TokenType::EOF, value: String::from(""), - position, + _position: position, } } @@ -36,7 +37,7 @@ pub fn new_number(value: String, position: i32) -> Token { Token { token_type: TokenType::Number, value, - position + _position: position } } @@ -44,19 +45,19 @@ pub fn new_operator(value: String, position: i32) -> Token { Token { token_type: TokenType::Operator, value, - position + _position: position } } pub fn new(value: String, position: i32, token_type: TokenType) -> Token { - Token {token_type, value, position} + Token {token_type, value, _position: position} } pub fn new_identifier(value: String, position: i32) -> Token { Token { token_type: TokenType::Identifier, value, - position, + _position: position, } } @@ -64,6 +65,6 @@ pub fn new_string(value: String, position: i32) -> Token { Token { token_type: TokenType::String, value, - position, + _position: position, } }