Hashbang lexer support (#1631)

3 years ago · 69d9f62088
3 changed files with 88 additions and 18 deletions
--- a/boa/src/syntax/lexer/comment.rs
+++ b/boa/src/syntax/lexer/comment.rs
@ -8,6 +8,7 @@ use crate::{
        lexer::{Token, TokenKind},
    },
 };
 use core::convert::TryFrom;
 use std::io::Read;
 /// Lexes a single line comment.
@ -65,27 +66,59 @@ impl<R> Tokenizer<R> for MultiLineComment {
        let _timer = BoaProfiler::global().start_event("MultiLineComment", "Lexing");
        let mut new_line = false;
-        loop {
+        while let Some(ch) = cursor.next_char()? {
-            if let Some(ch) = cursor.next_byte()? {
+            let tried_ch = char::try_from(ch);
-                if ch == b'*' && cursor.next_is(b'/')? {
+            match tried_ch {
-                    break;
+                Ok(c) if c == '*' && cursor.next_is(b'/')? => {
-                } else if ch == b'\n' {
+                    return Ok(Token::new(
-                    new_line = true;
+                        if new_line {
-                }
+                            TokenKind::LineTerminator
                        } else {
-                return Err(Error::syntax(
+                            TokenKind::Comment
                        },
                        Span::new(start_pos, cursor.pos()),
                    ))
                }
                Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => {
                    new_line = true
                }
                _ => {}
            };
        }
        Err(Error::syntax(
            "unterminated multiline comment",
            cursor.pos(),
-                ));
+        ))
    }
 }
 ///Lexes a first line Hashbang comment
 ///
 /// More information:
 ///  - [ECMAScript reference][spec]
 ///
 /// [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar
 pub(super) struct HashbangComment;
 impl<R> Tokenizer<R> for HashbangComment {
    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
    where
        R: Read,
    {
        let _timer = BoaProfiler::global().start_event("Hashbang", "Lexing");
        while let Some(ch) = cursor.next_char()? {
            let tried_ch = char::try_from(ch);
            match tried_ch {
                Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => break,
                _ => {}
            };
        }
        Ok(Token::new(
-            if new_line {
+            TokenKind::Comment,
                TokenKind::LineTerminator
            } else {
                TokenKind::Comment
            },
            Span::new(start_pos, cursor.pos()),
        ))
    }
--- a/boa/src/syntax/lexer/mod.rs
+++ b/boa/src/syntax/lexer/mod.rs
@ -30,7 +30,7 @@ pub mod token;
 mod tests;
 use self::{
-    comment::{MultiLineComment, SingleLineComment},
+    comment::{HashbangComment, MultiLineComment, SingleLineComment},
    cursor::Cursor,
    identifier::Identifier,
    number::NumberLiteral,
@ -191,6 +191,17 @@ impl<R> Lexer<R> {
            }
        };
        //handle hashbang here so the below match block still throws error on
        //# if position isn't (1, 1)
        if start.column_number() == 1 && start.line_number() == 1 && next_ch == 0x23 {
            if let Some(hashbang_peek) = self.cursor.peek()? {
                if hashbang_peek == 0x21 {
                    let _token = HashbangComment.lex(&mut self.cursor, start);
                    return self.next();
                }
            }
        };
        if let Ok(c) = char::try_from(next_ch) {
            let token = match c {
                '\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new(
--- a/boa/src/syntax/parser/tests.rs
+++ b/boa/src/syntax/parser/tests.rs
@ -372,3 +372,29 @@ fn empty_statement() {
        ],
    );
 }
 #[test]
 fn hashbang_use_strict_no_with() {
    check_parser(
        r#"#!\"use strict"
        "#,
        vec![],
    );
 }
 #[test]
 #[ignore]
 fn hashbang_use_strict_with_with_statement() {
    check_parser(
        r#"#!\"use strict"
        with({}) {}
        "#,
        vec![],
    );
 }
 #[test]
 fn hashbang_comment() {
    check_parser(r"#!Comment Here", vec![]);
 }