Browse Source

Hashbang lexer support (#1631)

pull/1650/head
Kevin 3 years ago committed by GitHub
parent
commit
69d9f62088
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 61
      boa/src/syntax/lexer/comment.rs
  2. 13
      boa/src/syntax/lexer/mod.rs
  3. 26
      boa/src/syntax/parser/tests.rs

61
boa/src/syntax/lexer/comment.rs

@ -8,6 +8,7 @@ use crate::{
lexer::{Token, TokenKind}, lexer::{Token, TokenKind},
}, },
}; };
use core::convert::TryFrom;
use std::io::Read; use std::io::Read;
/// Lexes a single line comment. /// Lexes a single line comment.
@ -65,27 +66,59 @@ impl<R> Tokenizer<R> for MultiLineComment {
let _timer = BoaProfiler::global().start_event("MultiLineComment", "Lexing"); let _timer = BoaProfiler::global().start_event("MultiLineComment", "Lexing");
let mut new_line = false; let mut new_line = false;
loop { while let Some(ch) = cursor.next_char()? {
if let Some(ch) = cursor.next_byte()? { let tried_ch = char::try_from(ch);
if ch == b'*' && cursor.next_is(b'/')? { match tried_ch {
break; Ok(c) if c == '*' && cursor.next_is(b'/')? => {
} else if ch == b'\n' { return Ok(Token::new(
new_line = true; if new_line {
} TokenKind::LineTerminator
} else { } else {
return Err(Error::syntax( TokenKind::Comment
},
Span::new(start_pos, cursor.pos()),
))
}
Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => {
new_line = true
}
_ => {}
};
}
Err(Error::syntax(
"unterminated multiline comment", "unterminated multiline comment",
cursor.pos(), cursor.pos(),
)); ))
} }
}
///Lexes a first line Hashbang comment
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar
pub(super) struct HashbangComment;
impl<R> Tokenizer<R> for HashbangComment {
fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
where
R: Read,
{
let _timer = BoaProfiler::global().start_event("Hashbang", "Lexing");
while let Some(ch) = cursor.next_char()? {
let tried_ch = char::try_from(ch);
match tried_ch {
Ok(c) if c == '\r' || c == '\n' || c == '\u{2028}' || c == '\u{2029}' => break,
_ => {}
};
} }
Ok(Token::new( Ok(Token::new(
if new_line { TokenKind::Comment,
TokenKind::LineTerminator
} else {
TokenKind::Comment
},
Span::new(start_pos, cursor.pos()), Span::new(start_pos, cursor.pos()),
)) ))
} }

13
boa/src/syntax/lexer/mod.rs

@ -30,7 +30,7 @@ pub mod token;
mod tests; mod tests;
use self::{ use self::{
comment::{MultiLineComment, SingleLineComment}, comment::{HashbangComment, MultiLineComment, SingleLineComment},
cursor::Cursor, cursor::Cursor,
identifier::Identifier, identifier::Identifier,
number::NumberLiteral, number::NumberLiteral,
@ -191,6 +191,17 @@ impl<R> Lexer<R> {
} }
}; };
//handle hashbang here so the below match block still throws error on
//# if position isn't (1, 1)
if start.column_number() == 1 && start.line_number() == 1 && next_ch == 0x23 {
if let Some(hashbang_peek) = self.cursor.peek()? {
if hashbang_peek == 0x21 {
let _token = HashbangComment.lex(&mut self.cursor, start);
return self.next();
}
}
};
if let Ok(c) = char::try_from(next_ch) { if let Ok(c) = char::try_from(next_ch) {
let token = match c { let token = match c {
'\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new( '\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new(

26
boa/src/syntax/parser/tests.rs

@ -372,3 +372,29 @@ fn empty_statement() {
], ],
); );
} }
#[test]
fn hashbang_use_strict_no_with() {
check_parser(
r#"#!\"use strict"
"#,
vec![],
);
}
#[test]
#[ignore]
fn hashbang_use_strict_with_with_statement() {
check_parser(
r#"#!\"use strict"
with({}) {}
"#,
vec![],
);
}
#[test]
fn hashbang_comment() {
check_parser(r"#!Comment Here", vec![]);
}

Loading…
Cancel
Save