feat: Scanning of multiline comments

This commit is contained in:
Araozu 2024-07-29 16:18:33 -05:00
parent 69339a955e
commit 14c1b6f8d8
6 changed files with 180 additions and 1 deletions

View File

@ -25,6 +25,8 @@
## v0.0.15
- [x] Multiline comments
- [ ] Nested multiline comments
- [ ] Include comments in the AST
- [ ] Replace all panics with actual errors
- [ ] Remove all old codegen

View File

@ -21,6 +21,7 @@ pub enum MistiError {
#[derive(Serialize, Debug)]
pub struct LexError {
pub position: usize,
// TODO: Add and end position
pub reason: String,
}

View File

@ -135,6 +135,7 @@ fn next_token(
.or_else(|| scanner::datatype(next_char, chars, current_pos))
.or_else(|| scanner::string(next_char, chars, current_pos))
.or_else(|| scanner::new_comment(next_char, chars, current_pos))
.or_else(|| scanner::new_multiline_comment(next_char, chars, current_pos))
.or_else(|| scanner::operator(next_char, chars, current_pos))
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
.or_else(|| scanner::new_line(next_char, chars, current_pos))

View File

@ -68,3 +68,11 @@ pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexRe
_ => None,
}
}
pub fn new_multiline_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
let next_char = chars.get(start_pos + 1);
match (c, next_char) {
('/', Some('*')) => Some(new_comment::scan_multiline(chars, start_pos)),
_ => None,
}
}

View File

@ -1,5 +1,8 @@
use super::token::Token;
use crate::lexic::{utils, LexResult};
use crate::{
error_handling::LexError,
lexic::{utils, LexResult},
};
/// Scans a new line.
///
@ -26,6 +29,74 @@ fn scan_any_except_new_line(
}
}
/// Scans a multiline commend
/// This function assumes that the character at `start_pos` is '/'
/// and the character at `start_pos + 1` is '*'
pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
match multiline_impl(chars, start_pos + 2) {
Some((value, next_position)) => LexResult::Some(
Token::new_multiline_comment(value, start_pos),
next_position,
),
None => {
// Throw an error: Incomplete multiline comment
LexResult::Err(LexError {
position: start_pos,
// TODO: add an end_position
reason: "Unfinished multiline commend".into(),
})
}
}
}
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(String, usize)> {
let mut current_position = start_pos;
let mut result = Vec::<char>::new();
loop {
match chars.get(current_position) {
Some('/') => {
// TODO: Check for a nested comment instead of
// appending
result.push('/');
current_position += 1;
}
Some('*') => {
// Check for the end of a comment
match chars.get(current_position + 1) {
Some('/') => {
// Create and return the token,
// ignoring the `*/`
return Some((result.iter().collect(), current_position + 2));
}
Some(c) => {
// Append both and continue
result.push('*');
result.push(*c);
current_position += 2;
}
None => {
// Throw an error
return None;
}
}
}
Some(c) => {
// Append and continue
result.push(*c);
current_position += 1;
}
None => {
// Throw an error
// TODO: Also return the position where this token ends,
// to display better error messages.
// Requires LexError to implement an end_position field
return None;
}
}
}
}
#[cfg(test)]
mod tests {
use crate::lexic::scanner::TokenType;
@ -73,4 +144,91 @@ mod tests {
}
}
}
#[test]
fn should_scan_multiline() {
let input = str_to_vec("/**/");
let result = scan_multiline(&input, 0);
match result {
LexResult::Some(t, next) => {
assert_eq!(4, next);
assert_eq!("", t.value);
assert_eq!(0, t.position);
assert_eq!(TokenType::MultilineComment, t.token_type);
}
_ => {
panic!("Expected a multine comment")
}
}
}
#[test]
fn should_scan_multiline_2() {
let input = str_to_vec("/* my comment */");
let result = scan_multiline(&input, 0);
match result {
LexResult::Some(t, next) => {
assert_eq!(16, next);
assert_eq!(" my comment ", t.value);
assert_eq!(0, t.position);
assert_eq!(TokenType::MultilineComment, t.token_type);
}
_ => {
panic!("Expected a multine comment")
}
}
}
#[test]
fn should_scan_multiline_with_multiple_lines() {
let input = str_to_vec("/* my\ncomment */");
let result = scan_multiline(&input, 0);
match result {
LexResult::Some(t, next) => {
assert_eq!(16, next);
assert_eq!(" my\ncomment ", t.value);
assert_eq!(0, t.position);
assert_eq!(TokenType::MultilineComment, t.token_type);
}
_ => {
panic!("Expected a multine comment")
}
}
}
#[test]
fn should_not_scan_multiline_comment_if_invalid() {
let input = str_to_vec("/* my\ncomment");
let result = scan_multiline(&input, 0);
match result {
LexResult::Err(error) => {
assert_eq!(0, error.position)
}
_ => {
panic!("Expected an error scannning an incomplete multiline comment")
}
}
}
#[test]
fn should_scan_multiline_comments_with_asterisk() {
let input = str_to_vec("/* my * comment */");
let result = scan_multiline(&input, 0);
match result {
LexResult::Some(t, next) => {
assert_eq!(18, next);
assert_eq!(" my * comment ", t.value);
assert_eq!(0, t.position);
assert_eq!(TokenType::MultilineComment, t.token_type);
}
_ => {
panic!("Expected a multine comment")
}
}
}
}

View File

@ -16,6 +16,7 @@ pub enum TokenType {
RightBrace,
NewLine,
Comment,
MultilineComment,
Comma,
INDENT,
DEDENT,
@ -114,6 +115,14 @@ impl Token {
}
}
pub fn new_multiline_comment(value: String, position: usize) -> Token {
Token {
token_type: TokenType::MultilineComment,
value,
position,
}
}
pub fn new_indent(position: usize) -> Token {
Token {
token_type: TokenType::INDENT,