feat: Scanning of multiline comments
This commit is contained in:
parent
69339a955e
commit
14c1b6f8d8
@ -25,6 +25,8 @@
|
||||
|
||||
## v0.0.15
|
||||
|
||||
- [x] Multiline comments
|
||||
- [ ] Nested multiline comments
|
||||
- [ ] Include comments in the AST
|
||||
- [ ] Replace all panics with actual errors
|
||||
- [ ] Remove all old codegen
|
||||
|
@ -21,6 +21,7 @@ pub enum MistiError {
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct LexError {
|
||||
pub position: usize,
|
||||
// TODO: Add and end position
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
|
@ -135,6 +135,7 @@ fn next_token(
|
||||
.or_else(|| scanner::datatype(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::string(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::new_comment(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::new_multiline_comment(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::operator(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::grouping_sign(next_char, chars, current_pos))
|
||||
.or_else(|| scanner::new_line(next_char, chars, current_pos))
|
||||
|
@ -68,3 +68,11 @@ pub fn new_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexRe
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_multiline_comment(c: char, chars: &Vec<char>, start_pos: usize) -> Option<LexResult> {
|
||||
let next_char = chars.get(start_pos + 1);
|
||||
match (c, next_char) {
|
||||
('/', Some('*')) => Some(new_comment::scan_multiline(chars, start_pos)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,8 @@
|
||||
use super::token::Token;
|
||||
use crate::lexic::{utils, LexResult};
|
||||
use crate::{
|
||||
error_handling::LexError,
|
||||
lexic::{utils, LexResult},
|
||||
};
|
||||
|
||||
/// Scans a new line.
|
||||
///
|
||||
@ -26,6 +29,74 @@ fn scan_any_except_new_line(
|
||||
}
|
||||
}
|
||||
|
||||
/// Scans a multiline commend
|
||||
/// This function assumes that the character at `start_pos` is '/'
|
||||
/// and the character at `start_pos + 1` is '*'
|
||||
pub fn scan_multiline(chars: &Vec<char>, start_pos: usize) -> LexResult {
|
||||
match multiline_impl(chars, start_pos + 2) {
|
||||
Some((value, next_position)) => LexResult::Some(
|
||||
Token::new_multiline_comment(value, start_pos),
|
||||
next_position,
|
||||
),
|
||||
None => {
|
||||
// Throw an error: Incomplete multiline comment
|
||||
LexResult::Err(LexError {
|
||||
position: start_pos,
|
||||
// TODO: add an end_position
|
||||
reason: "Unfinished multiline commend".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn multiline_impl(chars: &Vec<char>, start_pos: usize) -> Option<(String, usize)> {
|
||||
let mut current_position = start_pos;
|
||||
let mut result = Vec::<char>::new();
|
||||
|
||||
loop {
|
||||
match chars.get(current_position) {
|
||||
Some('/') => {
|
||||
// TODO: Check for a nested comment instead of
|
||||
// appending
|
||||
result.push('/');
|
||||
current_position += 1;
|
||||
}
|
||||
Some('*') => {
|
||||
// Check for the end of a comment
|
||||
match chars.get(current_position + 1) {
|
||||
Some('/') => {
|
||||
// Create and return the token,
|
||||
// ignoring the `*/`
|
||||
return Some((result.iter().collect(), current_position + 2));
|
||||
}
|
||||
Some(c) => {
|
||||
// Append both and continue
|
||||
result.push('*');
|
||||
result.push(*c);
|
||||
current_position += 2;
|
||||
}
|
||||
None => {
|
||||
// Throw an error
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(c) => {
|
||||
// Append and continue
|
||||
result.push(*c);
|
||||
current_position += 1;
|
||||
}
|
||||
None => {
|
||||
// Throw an error
|
||||
// TODO: Also return the position where this token ends,
|
||||
// to display better error messages.
|
||||
// Requires LexError to implement an end_position field
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexic::scanner::TokenType;
|
||||
@ -73,4 +144,91 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_multiline() {
|
||||
let input = str_to_vec("/**/");
|
||||
|
||||
let result = scan_multiline(&input, 0);
|
||||
match result {
|
||||
LexResult::Some(t, next) => {
|
||||
assert_eq!(4, next);
|
||||
assert_eq!("", t.value);
|
||||
assert_eq!(0, t.position);
|
||||
assert_eq!(TokenType::MultilineComment, t.token_type);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a multine comment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_multiline_2() {
|
||||
let input = str_to_vec("/* my comment */");
|
||||
|
||||
let result = scan_multiline(&input, 0);
|
||||
match result {
|
||||
LexResult::Some(t, next) => {
|
||||
assert_eq!(16, next);
|
||||
assert_eq!(" my comment ", t.value);
|
||||
assert_eq!(0, t.position);
|
||||
assert_eq!(TokenType::MultilineComment, t.token_type);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a multine comment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_multiline_with_multiple_lines() {
|
||||
let input = str_to_vec("/* my\ncomment */");
|
||||
|
||||
let result = scan_multiline(&input, 0);
|
||||
match result {
|
||||
LexResult::Some(t, next) => {
|
||||
assert_eq!(16, next);
|
||||
assert_eq!(" my\ncomment ", t.value);
|
||||
assert_eq!(0, t.position);
|
||||
assert_eq!(TokenType::MultilineComment, t.token_type);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a multine comment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_not_scan_multiline_comment_if_invalid() {
|
||||
let input = str_to_vec("/* my\ncomment");
|
||||
|
||||
let result = scan_multiline(&input, 0);
|
||||
match result {
|
||||
LexResult::Err(error) => {
|
||||
assert_eq!(0, error.position)
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected an error scannning an incomplete multiline comment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_scan_multiline_comments_with_asterisk() {
|
||||
let input = str_to_vec("/* my * comment */");
|
||||
|
||||
let result = scan_multiline(&input, 0);
|
||||
match result {
|
||||
LexResult::Some(t, next) => {
|
||||
assert_eq!(18, next);
|
||||
assert_eq!(" my * comment ", t.value);
|
||||
assert_eq!(0, t.position);
|
||||
assert_eq!(TokenType::MultilineComment, t.token_type);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected a multine comment")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ pub enum TokenType {
|
||||
RightBrace,
|
||||
NewLine,
|
||||
Comment,
|
||||
MultilineComment,
|
||||
Comma,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
@ -114,6 +115,14 @@ impl Token {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_multiline_comment(value: String, position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::MultilineComment,
|
||||
value,
|
||||
position,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_indent(position: usize) -> Token {
|
||||
Token {
|
||||
token_type: TokenType::INDENT,
|
||||
|
Loading…
Reference in New Issue
Block a user