Browse Source

Fix hashbang comments by using proper goal symbols (#3876)

pull/3878/head
raskad 7 months ago committed by GitHub
parent
commit
1eaf9230ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 49
      core/parser/src/lexer/mod.rs
  2. 5
      core/parser/src/parser/mod.rs

49
core/parser/src/lexer/mod.rs

@ -161,7 +161,7 @@ impl<R> Lexer<R> {
)) ))
} }
} }
InputElement::RegExp => { InputElement::RegExp | InputElement::HashbangOrRegExp => {
// Can be a regular expression. // Can be a regular expression.
RegexLiteral.lex(&mut self.cursor, start, interner) RegexLiteral.lex(&mut self.cursor, start, interner)
} }
@ -214,28 +214,34 @@ impl<R> Lexer<R> {
{ {
let _timer = Profiler::global().start_event("next()", "Lexing"); let _timer = Profiler::global().start_event("next()", "Lexing");
let (start, next_ch) = loop { let mut start = self.cursor.pos();
let start = self.cursor.pos(); let Some(mut next_ch) = self.cursor.next_char()? else {
if let Some(next_ch) = self.cursor.next_char()? { return Ok(None);
// Ignore whitespace
if !is_whitespace(next_ch) {
break (start, next_ch);
}
} else {
return Ok(None);
}
}; };
//handle hashbang here so the below match block still throws error on // If the goal symbol is HashbangOrRegExp, then we need to check if the next token is a hashbang comment.
//# if position isn't (1, 1) // Since the goal symbol is only valid for the first token, we need to change it to RegExp after the first token.
if start.column_number() == 1 if self.get_goal() == InputElement::HashbangOrRegExp {
&& start.line_number() == 1 self.set_goal(InputElement::RegExp);
&& next_ch == 0x23 if next_ch == 0x23 && self.cursor.peek_char()? == Some(0x21) {
&& self.cursor.peek_char()? == Some(0x21) let _token = HashbangComment.lex(&mut self.cursor, start, interner);
{ return self.next(interner);
let _token = HashbangComment.lex(&mut self.cursor, start, interner); };
return self.next(interner); }
};
// Ignore whitespace
if is_whitespace(next_ch) {
loop {
start = self.cursor.pos();
let Some(next) = self.cursor.next_char()? else {
return Ok(None);
};
if !is_whitespace(next) {
next_ch = next;
break;
}
}
}
if let Ok(c) = char::try_from(next_ch) { if let Ok(c) = char::try_from(next_ch) {
let token = match c { let token = match c {
@ -392,6 +398,7 @@ pub(crate) enum InputElement {
Div, Div,
RegExp, RegExp,
TemplateTail, TemplateTail,
HashbangOrRegExp,
} }
impl Default for InputElement { impl Default for InputElement {

5
core/parser/src/parser/mod.rs

@ -11,7 +11,7 @@ mod tests;
use crate::{ use crate::{
error::ParseResult, error::ParseResult,
lexer::Error as LexError, lexer::{Error as LexError, InputElement},
parser::{ parser::{
cursor::Cursor, cursor::Cursor,
function::{FormalParameters, FunctionStatementList}, function::{FormalParameters, FunctionStatementList},
@ -140,6 +140,7 @@ impl<'a, R: ReadChar> Parser<'a, R> {
/// ///
/// [spec]: https://tc39.es/ecma262/#prod-Script /// [spec]: https://tc39.es/ecma262/#prod-Script
pub fn parse_script(&mut self, interner: &mut Interner) -> ParseResult<boa_ast::Script> { pub fn parse_script(&mut self, interner: &mut Interner) -> ParseResult<boa_ast::Script> {
self.cursor.set_goal(InputElement::HashbangOrRegExp);
ScriptParser::new(false).parse(&mut self.cursor, interner) ScriptParser::new(false).parse(&mut self.cursor, interner)
} }
@ -155,6 +156,7 @@ impl<'a, R: ReadChar> Parser<'a, R> {
where where
R: ReadChar, R: ReadChar,
{ {
self.cursor.set_goal(InputElement::HashbangOrRegExp);
ModuleParser.parse(&mut self.cursor, interner) ModuleParser.parse(&mut self.cursor, interner)
} }
@ -172,6 +174,7 @@ impl<'a, R: ReadChar> Parser<'a, R> {
direct: bool, direct: bool,
interner: &mut Interner, interner: &mut Interner,
) -> ParseResult<boa_ast::Script> { ) -> ParseResult<boa_ast::Script> {
self.cursor.set_goal(InputElement::HashbangOrRegExp);
ScriptParser::new(direct).parse(&mut self.cursor, interner) ScriptParser::new(direct).parse(&mut self.cursor, interner)
} }

Loading…
Cancel
Save