From 69d9f620886b55d770bb8f3aea07c38eae38d74e Mon Sep 17 00:00:00 2001 From: Kevin <46825870+nekevss@users.noreply.github.com> Date: Thu, 7 Oct 2021 13:57:13 -0400 Subject: [PATCH] Hashbang lexer support (#1631) --- boa/src/syntax/lexer/comment.rs | 67 ++++++++++++++++++++++++--------- boa/src/syntax/lexer/mod.rs | 13 ++++++- boa/src/syntax/parser/tests.rs | 26 +++++++++++++ 3 files changed, 88 insertions(+), 18 deletions(-) diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs index 88e5238839..24c94c1903 100644 --- a/boa/src/syntax/lexer/comment.rs +++ b/boa/src/syntax/lexer/comment.rs @@ -8,6 +8,7 @@ use crate::{ lexer::{Token, TokenKind}, }, }; +use core::convert::TryFrom; use std::io::Read; /// Lexes a single line comment. @@ -65,27 +66,59 @@ impl Tokenizer for MultiLineComment { let _timer = BoaProfiler::global().start_event("MultiLineComment", "Lexing"); let mut new_line = false; - loop { - if let Some(ch) = cursor.next_byte()? { - if ch == b'*' && cursor.next_is(b'/')? { - break; - } else if ch == b'\n' { - new_line = true; + while let Some(ch) = cursor.next_char()? { + let tried_ch = char::try_from(ch); + match tried_ch { + Ok(c) if c == '*' && cursor.next_is(b'/')? => { + return Ok(Token::new( + if new_line { + TokenKind::LineTerminator + } else { + TokenKind::Comment + }, + Span::new(start_pos, cursor.pos()), + )) } - } else { - return Err(Error::syntax( - "unterminated multiline comment", - cursor.pos(), - )); - } + Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => { + new_line = true + } + _ => {} + }; + } + + Err(Error::syntax( + "unterminated multiline comment", + cursor.pos(), + )) + } +} + +///Lexes a first line Hashbang comment +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar + +pub(super) struct HashbangComment; + +impl Tokenizer for HashbangComment { + fn lex(&mut self, cursor: &mut Cursor, start_pos: Position) -> Result + where + R: Read, + { + let _timer = BoaProfiler::global().start_event("Hashbang", "Lexing"); + + while let Some(ch) = cursor.next_char()? { + let tried_ch = char::try_from(ch); + match tried_ch { + Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => break, + _ => {} + }; } Ok(Token::new( - if new_line { - TokenKind::LineTerminator - } else { - TokenKind::Comment - }, + TokenKind::Comment, Span::new(start_pos, cursor.pos()), )) } diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs index 35c700e850..e2ed8227e6 100644 --- a/boa/src/syntax/lexer/mod.rs +++ b/boa/src/syntax/lexer/mod.rs @@ -30,7 +30,7 @@ pub mod token; mod tests; use self::{ - comment::{MultiLineComment, SingleLineComment}, + comment::{HashbangComment, MultiLineComment, SingleLineComment}, cursor::Cursor, identifier::Identifier, number::NumberLiteral, @@ -191,6 +191,17 @@ impl Lexer { } }; + //handle hashbang here so the below match block still throws error on + //# if position isn't (1, 1) + if start.column_number() == 1 && start.line_number() == 1 && next_ch == 0x23 { + if let Some(hashbang_peek) = self.cursor.peek()? { + if hashbang_peek == 0x21 { + let _token = HashbangComment.lex(&mut self.cursor, start); + return self.next(); + } + } + }; + if let Ok(c) = char::try_from(next_ch) { let token = match c { '\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new( diff --git a/boa/src/syntax/parser/tests.rs b/boa/src/syntax/parser/tests.rs index 12fae2072e..f8999a0244 100644 --- a/boa/src/syntax/parser/tests.rs +++ b/boa/src/syntax/parser/tests.rs @@ -372,3 +372,29 @@ fn empty_statement() { ], ); } + +#[test] +fn hashbang_use_strict_no_with() { + check_parser( + r#"#!\"use strict" + "#, + vec![], + ); +} + +#[test] +#[ignore] +fn hashbang_use_strict_with_with_statement() { + check_parser( + r#"#!\"use strict" + + with({}) {} + "#, + vec![], + ); +} + +#[test] +fn hashbang_comment() { + check_parser(r"#!Comment Here", vec![]); +}