Browse Source

Add UTF-16 input parsing (#3538)

* Remove unused lexer code

* Remove utf8 byte handling in lexer

* Implement generic encoding input

* Allow non UTF-8 regex parsing

* Apply review

* Apply review
pull/3569/head
raskad 11 months ago committed by GitHub
parent
commit
84a5e45447
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      core/engine/src/builtins/eval/mod.rs
  2. 5
      core/engine/src/context/mod.rs
  3. 4
      core/engine/src/module/mod.rs
  4. 6
      core/engine/src/script.rs
  5. 6
      core/engine/src/tests/operators.rs
  6. 10
      core/parser/src/lexer/comment.rs
  7. 407
      core/parser/src/lexer/cursor.rs
  8. 14
      core/parser/src/lexer/identifier.rs
  9. 78
      core/parser/src/lexer/mod.rs
  10. 95
      core/parser/src/lexer/number.rs
  11. 55
      core/parser/src/lexer/operator.rs
  12. 6
      core/parser/src/lexer/private_identifier.rs
  13. 81
      core/parser/src/lexer/regex.rs
  14. 8
      core/parser/src/lexer/spread.rs
  15. 135
      core/parser/src/lexer/string.rs
  16. 35
      core/parser/src/lexer/template.rs
  17. 173
      core/parser/src/lexer/tests.rs
  18. 2
      core/parser/src/lib.rs
  19. 14
      core/parser/src/parser/cursor/buffered_lexer/mod.rs
  20. 4
      core/parser/src/parser/cursor/mod.rs
  21. 8
      core/parser/src/parser/expression/assignment/arrow_function.rs
  22. 6
      core/parser/src/parser/expression/assignment/async_arrow_function.rs
  23. 4
      core/parser/src/parser/expression/assignment/conditional.rs
  24. 4
      core/parser/src/parser/expression/assignment/exponentiation.rs
  25. 4
      core/parser/src/parser/expression/assignment/mod.rs
  26. 4
      core/parser/src/parser/expression/assignment/yield.rs
  27. 4
      core/parser/src/parser/expression/await_expr.rs
  28. 8
      core/parser/src/parser/expression/identifiers.rs
  29. 4
      core/parser/src/parser/expression/left_hand_side/arguments.rs
  30. 6
      core/parser/src/parser/expression/left_hand_side/call.rs
  31. 4
      core/parser/src/parser/expression/left_hand_side/member.rs
  32. 6
      core/parser/src/parser/expression/left_hand_side/mod.rs
  33. 4
      core/parser/src/parser/expression/left_hand_side/optional/mod.rs
  34. 4
      core/parser/src/parser/expression/left_hand_side/template.rs
  35. 10
      core/parser/src/parser/expression/mod.rs
  36. 4
      core/parser/src/parser/expression/primary/array_initializer/mod.rs
  37. 4
      core/parser/src/parser/expression/primary/async_function_expression/mod.rs
  38. 4
      core/parser/src/parser/expression/primary/async_generator_expression/mod.rs
  39. 4
      core/parser/src/parser/expression/primary/class_expression/mod.rs
  40. 4
      core/parser/src/parser/expression/primary/function_expression/mod.rs
  41. 4
      core/parser/src/parser/expression/primary/generator_expression/mod.rs
  42. 6
      core/parser/src/parser/expression/primary/mod.rs
  43. 20
      core/parser/src/parser/expression/primary/object_initializer/mod.rs
  44. 4
      core/parser/src/parser/expression/primary/template/mod.rs
  45. 4
      core/parser/src/parser/expression/unary.rs
  46. 4
      core/parser/src/parser/expression/update.rs
  47. 12
      core/parser/src/parser/function/mod.rs
  48. 21
      core/parser/src/parser/mod.rs
  49. 4
      core/parser/src/parser/statement/block/mod.rs
  50. 4
      core/parser/src/parser/statement/break_stm/mod.rs
  51. 4
      core/parser/src/parser/statement/continue_stm/mod.rs
  52. 10
      core/parser/src/parser/statement/declaration/export.rs
  53. 8
      core/parser/src/parser/statement/declaration/hoistable/async_function_decl/mod.rs
  54. 8
      core/parser/src/parser/statement/declaration/hoistable/async_generator_decl/mod.rs
  55. 12
      core/parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs
  56. 8
      core/parser/src/parser/statement/declaration/hoistable/function_decl/mod.rs
  57. 8
      core/parser/src/parser/statement/declaration/hoistable/generator_decl/mod.rs
  58. 6
      core/parser/src/parser/statement/declaration/hoistable/mod.rs
  59. 14
      core/parser/src/parser/statement/declaration/import.rs
  60. 8
      core/parser/src/parser/statement/declaration/lexical.rs
  61. 6
      core/parser/src/parser/statement/declaration/mod.rs
  62. 4
      core/parser/src/parser/statement/expression/mod.rs
  63. 4
      core/parser/src/parser/statement/if_stm/mod.rs
  64. 4
      core/parser/src/parser/statement/iteration/do_while_statement.rs
  65. 4
      core/parser/src/parser/statement/iteration/for_statement.rs
  66. 4
      core/parser/src/parser/statement/iteration/while_statement.rs
  67. 4
      core/parser/src/parser/statement/labelled_stm/mod.rs
  68. 16
      core/parser/src/parser/statement/mod.rs
  69. 4
      core/parser/src/parser/statement/return_stm/mod.rs
  70. 6
      core/parser/src/parser/statement/switch/mod.rs
  71. 4
      core/parser/src/parser/statement/throw/mod.rs
  72. 6
      core/parser/src/parser/statement/try_stm/catch.rs
  73. 11
      core/parser/src/parser/statement/try_stm/finally.rs
  74. 4
      core/parser/src/parser/statement/try_stm/mod.rs
  75. 9
      core/parser/src/parser/statement/variable/mod.rs
  76. 4
      core/parser/src/parser/statement/with/mod.rs
  77. 72
      core/parser/src/source/mod.rs
  78. 65
      core/parser/src/source/utf16.rs
  79. 72
      core/parser/src/source/utf8.rs

4
core/engine/src/builtins/eval/mod.rs

@ -99,7 +99,7 @@ impl Eval {
// 2. If Type(x) is not String, return x.
// TODO: rework parser to take an iterator of `u32` unicode codepoints
let Some(x) = x.as_string().map(JsString::to_std_string_escaped) else {
let Some(x) = x.as_string() else {
return Ok(x.clone());
};
@ -118,7 +118,7 @@ impl Eval {
// b. If script is a List of errors, throw a SyntaxError exception.
// c. If script Contains ScriptBody is false, return undefined.
// d. Let body be the ScriptBody of script.
let mut parser = Parser::new(Source::from_bytes(&x));
let mut parser = Parser::new(Source::from_utf16(x));
parser.set_identifier(context.next_parser_identifier());
if strict {
parser.set_strict();

5
core/engine/src/context/mod.rs

@ -5,6 +5,7 @@ mod hooks;
pub(crate) mod icu;
pub mod intrinsics;
use boa_parser::source::ReadChar;
pub use hooks::{DefaultHooks, HostHooks};
#[cfg(feature = "intl")]
@ -14,7 +15,7 @@ use intrinsics::Intrinsics;
#[cfg(not(feature = "intl"))]
pub use std::marker::PhantomData;
use std::{cell::Cell, io::Read, path::Path, rc::Rc};
use std::{cell::Cell, path::Path, rc::Rc};
use crate::{
builtins,
@ -185,7 +186,7 @@ impl Context {
/// Note that this won't run any scheduled promise jobs; you need to call [`Context::run_jobs`]
/// on the context or [`JobQueue::run_jobs`] on the provided queue to run them.
#[allow(clippy::unit_arg, dropping_copy_types)]
pub fn eval<R: Read>(&mut self, src: Source<'_, R>) -> JsResult<JsValue> {
pub fn eval<R: ReadChar>(&mut self, src: Source<'_, R>) -> JsResult<JsValue> {
let main_timer = Profiler::global().start_event("Script evaluation", "Main");
let result = Script::parse(src, None, self)?.evaluate(self);

4
core/engine/src/module/mod.rs

@ -25,6 +25,7 @@ mod loader;
mod namespace;
mod source;
mod synthetic;
use boa_parser::source::ReadChar;
pub use loader::*;
pub use namespace::ModuleNamespace;
use source::SourceTextModule;
@ -33,7 +34,6 @@ pub use synthetic::{SyntheticModule, SyntheticModuleInitializer};
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
use std::hash::Hash;
use std::io::Read;
use std::rc::Rc;
use rustc_hash::FxHashSet;
@ -141,7 +141,7 @@ impl Module {
/// Parses the provided `src` as an ECMAScript module, returning an error if parsing fails.
///
/// [spec]: https://tc39.es/ecma262/#sec-parsemodule
pub fn parse<R: Read>(
pub fn parse<R: ReadChar>(
src: Source<'_, R>,
realm: Option<Realm>,
context: &mut Context,

6
core/engine/src/script.rs

@ -8,10 +8,8 @@
//! [spec]: https://tc39.es/ecma262/#sec-scripts
//! [script]: https://tc39.es/ecma262/#sec-script-records
use std::io::Read;
use boa_gc::{Finalize, Gc, GcRefCell, Trace};
use boa_parser::{Parser, Source};
use boa_parser::{source::ReadChar, Parser, Source};
use boa_profiler::Profiler;
use rustc_hash::FxHashMap;
@ -76,7 +74,7 @@ impl Script {
/// Parses the provided `src` as an ECMAScript script, returning an error if parsing fails.
///
/// [spec]: https://tc39.es/ecma262/#sec-parse-script
pub fn parse<R: Read>(
pub fn parse<R: ReadChar>(
src: Source<'_, R>,
realm: Option<Realm>,
context: &mut Context,

6
core/engine/src/tests/operators.rs

@ -334,7 +334,7 @@ fn assignment_to_non_assignable_ctd() {
TestAction::assert_native_error(
src,
JsNativeErrorKind::Syntax,
"Invalid left-hand side in assignment at line 1, col 13",
"Invalid left-hand side in assignment at line 1, col 12",
)
}),
);
@ -362,7 +362,7 @@ fn multicharacter_assignment_to_non_assignable_ctd() {
TestAction::assert_native_error(
src,
JsNativeErrorKind::Syntax,
"Invalid left-hand side in assignment at line 1, col 13",
"Invalid left-hand side in assignment at line 1, col 12",
)
}),
);
@ -397,7 +397,7 @@ fn multicharacter_bitwise_assignment_to_non_assignable_ctd() {
TestAction::assert_native_error(
src,
JsNativeErrorKind::Syntax,
"Invalid left-hand side in assignment at line 1, col 13",
"Invalid left-hand side in assignment at line 1, col 12",
)
}),
);

10
core/parser/src/lexer/comment.rs

@ -1,10 +1,10 @@
//! Boa's lexing for ECMAScript comments.
use crate::lexer::{Cursor, Error, Token, TokenKind, Tokenizer};
use crate::source::ReadChar;
use boa_ast::{Position, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Lexes a single line comment.
///
@ -26,7 +26,7 @@ impl<R> Tokenizer<R> for SingleLineComment {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("SingleLineComment", "Lexing");
@ -66,7 +66,7 @@ impl<R> Tokenizer<R> for MultiLineComment {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("MultiLineComment", "Lexing");
@ -74,7 +74,7 @@ impl<R> Tokenizer<R> for MultiLineComment {
while let Some(ch) = cursor.next_char()? {
let tried_ch = char::try_from(ch);
match tried_ch {
Ok(c) if c == '*' && cursor.next_is(b'/')? => {
Ok(c) if c == '*' && cursor.next_if(0x2F /* / */)? => {
return Ok(Token::new(
if new_line {
TokenKind::LineTerminator
@ -115,7 +115,7 @@ impl<R> Tokenizer<R> for HashbangComment {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("Hashbang", "Lexing");

407
core/parser/src/lexer/cursor.rs

@ -1,15 +1,18 @@
//! Boa's lexer cursor that manages the input byte stream.
use crate::source::{ReadChar, UTF8Input};
use boa_ast::Position;
use boa_profiler::Profiler;
use std::io::{self, Bytes, Error, ErrorKind, Read};
use std::io::{self, Error, ErrorKind};
/// Cursor over the source code.
#[derive(Debug)]
pub(super) struct Cursor<R> {
iter: InnerIter<R>,
iter: R,
pos: Position,
module: bool,
strict: bool,
peeked: [Option<u32>; 4],
}
impl<R> Cursor<R> {
@ -19,7 +22,7 @@ impl<R> Cursor<R> {
}
/// Advances the position to the next column.
pub(super) fn next_column(&mut self) {
fn next_column(&mut self) {
let current_line = self.pos.line_number();
let next_column = self.pos.column_number() + 1;
self.pos = Position::new(current_line, next_column);
@ -53,62 +56,66 @@ impl<R> Cursor<R> {
}
}
impl<R> Cursor<R>
where
R: Read,
{
impl<R: ReadChar> Cursor<R> {
/// Creates a new Lexer cursor.
pub(super) fn new(inner: R) -> Self {
Self {
iter: InnerIter::new(inner.bytes()),
iter: inner,
pos: Position::new(1, 1),
strict: false,
module: false,
peeked: [None; 4],
}
}
/// Creates a new Lexer cursor with an initial position.
pub(super) fn with_position(inner: R, pos: Position) -> Self {
Self {
iter: InnerIter::new(inner.bytes()),
iter: inner,
pos,
strict: false,
module: false,
peeked: [None; 4],
}
}
/// Peeks the next byte.
pub(super) fn peek(&mut self) -> Result<Option<u8>, Error> {
let _timer = Profiler::global().start_event("cursor::peek()", "Lexing");
self.iter.peek_byte()
}
/// Peeks the next n bytes, the maximum number of peeked bytes is 4 (n <= 4).
pub(super) fn peek_n(&mut self, n: u8) -> Result<&[u8], Error> {
pub(super) fn peek_n(&mut self, n: u8) -> Result<&[Option<u32>; 4], Error> {
let _timer = Profiler::global().start_event("cursor::peek_n()", "Lexing");
self.iter.peek_n_bytes(n)
let peeked = self.peeked.iter().filter(|c| c.is_some()).count();
let needs_peek = n as usize - peeked;
for i in 0..needs_peek {
let next = self.iter.next_char()?;
self.peeked[i + peeked] = next;
}
Ok(&self.peeked)
}
/// Peeks the next UTF-8 character in u32 code point.
pub(super) fn peek_char(&mut self) -> Result<Option<u32>, Error> {
let _timer = Profiler::global().start_event("cursor::peek_char()", "Lexing");
self.iter.peek_char()
if let Some(c) = self.peeked[0] {
return Ok(Some(c));
}
let next = self.iter.next_char()?;
self.peeked[0] = next;
Ok(next)
}
/// Compares the byte passed in to the next byte, if they match true is returned and the buffer is incremented.
pub(super) fn next_is(&mut self, byte: u8) -> io::Result<bool> {
let _timer = Profiler::global().start_event("cursor::next_is()", "Lexing");
pub(super) fn next_if(&mut self, c: u32) -> io::Result<bool> {
let _timer = Profiler::global().start_event("cursor::next_if()", "Lexing");
Ok(match self.peek()? {
Some(next) if next == byte => {
self.next_byte()?;
true
if self.peek_char()? == Some(c) {
self.next_char()?;
Ok(true)
} else {
Ok(false)
}
_ => false,
})
}
/// Applies the predicate to the next character and returns the result.
@ -120,41 +127,30 @@ where
where
F: Fn(char) -> bool,
{
let _timer = Profiler::global().start_event("cursor::next_is_ascii_pred()", "Lexing");
let _timer = Profiler::global().start_event("cursor::next_is_pred()", "Lexing");
Ok(match self.peek()? {
Some(byte) if (0..=0x7F).contains(&byte) => pred(char::from(byte)),
Ok(match self.peek_char()? {
Some(byte) if (0..=0x7F).contains(&byte) =>
{
#[allow(clippy::cast_possible_truncation)]
pred(char::from(byte as u8))
}
Some(_) | None => false,
})
}
/// Applies the predicate to the next UTF-8 character and returns the result.
/// Returns false if there is no next character, otherwise returns the result from the
/// predicate on the ascii char
///
/// The buffer is not incremented.
#[cfg(test)]
pub(super) fn next_is_char_pred<F>(&mut self, pred: &F) -> io::Result<bool>
where
F: Fn(u32) -> bool,
{
let _timer = Profiler::global().start_event("cursor::next_is_char_pred()", "Lexing");
Ok(self.peek_char()?.map_or(false, pred))
}
/// Fills the buffer with all bytes until the stop byte is found.
/// Returns error when reaching the end of the buffer.
///
/// Note that all bytes up until the stop byte are added to the buffer, including the byte right before.
pub(super) fn take_until(&mut self, stop: u8, buf: &mut Vec<u8>) -> io::Result<()> {
pub(super) fn take_until(&mut self, stop: u32, buf: &mut Vec<u32>) -> io::Result<()> {
let _timer = Profiler::global().start_event("cursor::take_until()", "Lexing");
loop {
if self.next_is(stop)? {
if self.next_if(stop)? {
return Ok(());
} else if let Some(byte) = self.next_byte()? {
buf.push(byte);
} else if let Some(c) = self.next_char()? {
buf.push(c);
} else {
return Err(io::Error::new(
ErrorKind::UnexpectedEof,
@ -177,36 +173,9 @@ where
loop {
if !self.next_is_ascii_pred(pred)? {
return Ok(());
} else if let Some(byte) = self.next_byte()? {
buf.push(byte);
} else {
// next_is_pred will return false if the next value is None so the None case should already be handled.
unreachable!();
}
}
}
/// Fills the buffer with characters until the first character for which the predicate (pred) is false.
/// It also stops when there is no next character.
///
/// Note that all characters up until the stop character are added to the buffer, including the character right before.
#[cfg(test)]
pub(super) fn take_while_char_pred<F>(&mut self, buf: &mut Vec<u8>, pred: &F) -> io::Result<()>
where
F: Fn(u32) -> bool,
{
let _timer = Profiler::global().start_event("cursor::take_while_char_pred()", "Lexing");
loop {
if !self.next_is_char_pred(pred)? {
return Ok(());
} else if let Some(ch) = self.peek_char()? {
for _ in 0..utf8_len(ch) {
buf.push(
self.next_byte()?
.expect("already checked that the next character exists"),
);
}
} else if let Some(byte) = self.next_char()? {
#[allow(clippy::cast_possible_truncation)]
buf.push(byte as u8);
} else {
// next_is_pred will return false if the next value is None so the None case should already be handled.
unreachable!();
@ -214,61 +183,25 @@ where
}
}
/// It will fill the buffer with bytes.
///
/// This expects for the buffer to be fully filled. If it's not, it will fail with an
/// `UnexpectedEof` I/O error.
pub(super) fn fill_bytes(&mut self, buf: &mut [u8]) -> io::Result<()> {
let _timer = Profiler::global().start_event("cursor::fill_bytes()", "Lexing");
self.iter.fill_bytes(buf)
}
/// Retrieves the next byte.
pub(crate) fn next_byte(&mut self) -> Result<Option<u8>, Error> {
let _timer = Profiler::global().start_event("cursor::next_byte()", "Lexing");
let byte = self.iter.next_byte()?;
match byte {
Some(b'\r') => {
// Try to take a newline if it's next, for windows "\r\n" newlines
// Otherwise, treat as a Mac OS9 bare '\r' newline
if self.peek()? == Some(b'\n') {
let _next = self.iter.next_byte();
}
self.next_line();
}
Some(b'\n') => self.next_line(),
Some(0xE2) => {
// Try to match '\u{2028}' (e2 80 a8) and '\u{2029}' (e2 80 a9)
let next_bytes = self.peek_n(2)?;
if next_bytes == [0x80, 0xA8] || next_bytes == [0x80, 0xA9] {
self.next_line();
} else {
// 0xE2 is a utf8 first byte
self.next_column();
}
}
Some(b) if utf8_is_first_byte(b) => self.next_column(),
_ => {}
}
Ok(byte)
}
/// Retrieves the next UTF-8 character.
pub(crate) fn next_char(&mut self) -> Result<Option<u32>, Error> {
let _timer = Profiler::global().start_event("cursor::next_char()", "Lexing");
let ch = self.iter.next_char()?;
let ch = if let Some(c) = self.peeked[0] {
self.peeked[0] = None;
self.peeked.rotate_left(1);
Some(c)
} else {
self.iter.next_char()?
};
match ch {
Some(0xD) => {
// Try to take a newline if it's next, for windows "\r\n" newlines
// Otherwise, treat as a Mac OS9 bare '\r' newline
if self.peek()? == Some(0xA) {
let _next = self.iter.next_byte();
if self.peek_char()? == Some(0xA) {
self.peeked[0] = None;
self.peeked.rotate_left(1);
}
self.next_line();
}
@ -282,224 +215,8 @@ where
}
}
/// Inner iterator for a cursor.
#[derive(Debug)]
struct InnerIter<R> {
iter: Bytes<R>,
num_peeked_bytes: u8,
peeked_bytes: [u8; 4],
#[allow(clippy::option_option)]
peeked_char: Option<Option<u32>>,
}
impl<R> InnerIter<R> {
/// Creates a new inner iterator.
const fn new(iter: Bytes<R>) -> Self {
Self {
iter,
num_peeked_bytes: 0,
peeked_bytes: [0; 4],
peeked_char: None,
}
}
}
impl<R> InnerIter<R>
where
R: Read,
{
/// It will fill the buffer with checked ascii bytes.
///
/// This expects for the buffer to be fully filled. If it's not, it will fail with an
/// `UnexpectedEof` I/O error.
fn fill_bytes(&mut self, buf: &mut [u8]) -> io::Result<()> {
for byte in &mut *buf {
*byte = self.next_byte()?.ok_or_else(|| {
io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF when filling buffer",
)
})?;
}
Ok(())
}
/// Increments the iter by n bytes.
fn increment(&mut self, n: u32) -> Result<(), Error> {
for _ in 0..n {
if (self.next_byte()?).is_none() {
break;
}
}
Ok(())
}
/// Peeks the next byte.
pub(super) fn peek_byte(&mut self) -> Result<Option<u8>, Error> {
if self.num_peeked_bytes > 0 {
let byte = self.peeked_bytes[0];
Ok(Some(byte))
} else {
match self.iter.next().transpose()? {
Some(byte) => {
self.num_peeked_bytes = 1;
self.peeked_bytes[0] = byte;
Ok(Some(byte))
}
None => Ok(None),
}
}
}
/// Peeks the next n bytes, the maximum number of peeked bytes is 4 (n <= 4).
pub(super) fn peek_n_bytes(&mut self, n: u8) -> Result<&[u8], Error> {
while self.num_peeked_bytes < n && self.num_peeked_bytes < 4 {
match self.iter.next().transpose()? {
Some(byte) => {
self.peeked_bytes[usize::from(self.num_peeked_bytes)] = byte;
self.num_peeked_bytes += 1;
}
None => break,
};
}
Ok(&self.peeked_bytes[..usize::from(u8::min(n, self.num_peeked_bytes))])
}
/// Peeks the next unchecked character in u32 code point.
pub(super) fn peek_char(&mut self) -> Result<Option<u32>, Error> {
if let Some(ch) = self.peeked_char {
Ok(ch)
} else {
// Decode UTF-8
let (x, y, z, w) = match self.peek_n_bytes(4)? {
[b, ..] if *b < 128 => {
let char = u32::from(*b);
self.peeked_char = Some(Some(char));
return Ok(Some(char));
}
[] => {
self.peeked_char = None;
return Ok(None);
}
bytes => (
bytes[0],
bytes.get(1).copied(),
bytes.get(2).copied(),
bytes.get(3).copied(),
),
};
// Multibyte case follows
// Decode from a byte combination out of: [[[x y] z] w]
// NOTE: Performance is sensitive to the exact formulation here
let init = utf8_first_byte(x, 2);
let y = y.unwrap_or_default();
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
// [[x y z] w] case
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
let z = z.unwrap_or_default();
let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
ch = init << 12 | y_z;
if x >= 0xF0 {
// [x y z w] case
// use only the lower 3 bits of `init`
let w = w.unwrap_or_default();
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
};
self.peeked_char = Some(Some(ch));
Ok(Some(ch))
}
}
/// Retrieves the next byte
fn next_byte(&mut self) -> io::Result<Option<u8>> {
self.peeked_char = None;
if self.num_peeked_bytes > 0 {
let byte = self.peeked_bytes[0];
self.num_peeked_bytes -= 1;
self.peeked_bytes.rotate_left(1);
Ok(Some(byte))
} else {
self.iter.next().transpose()
}
}
/// Retrieves the next unchecked char in u32 code point.
fn next_char(&mut self) -> io::Result<Option<u32>> {
if let Some(ch) = self.peeked_char.take() {
if let Some(c) = ch {
self.increment(utf8_len(c))?;
}
return Ok(ch);
}
// Decode UTF-8
let x = match self.next_byte()? {
Some(b) if b < 128 => return Ok(Some(u32::from(b))),
Some(b) => b,
None => return Ok(None),
};
// Multibyte case follows
// Decode from a byte combination out of: [[[x y] z] w]
// NOTE: Performance is sensitive to the exact formulation here
let init = utf8_first_byte(x, 2);
let y = unwrap_or_0(self.next_byte()?);
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
// [[x y z] w] case
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
let z = unwrap_or_0(self.next_byte()?);
let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
ch = init << 12 | y_z;
if x >= 0xF0 {
// [x y z w] case
// use only the lower 3 bits of `init`
let w = unwrap_or_0(self.next_byte()?);
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
};
Ok(Some(ch))
}
}
/// Mask of the value bits of a continuation byte.
const CONT_MASK: u8 = 0b0011_1111;
/// Returns the initial codepoint accumulator for the first byte.
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
/// for width 3, and 3 bits for width 4.
fn utf8_first_byte(byte: u8, width: u32) -> u32 {
u32::from(byte & (0x7F >> width))
}
/// Returns the value of `ch` updated with continuation byte `byte`.
fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | u32::from(byte & CONT_MASK)
}
/// Checks whether the byte is a UTF-8 first byte (i.e., ascii byte or starts with the
/// bits `11`).
const fn utf8_is_first_byte(byte: u8) -> bool {
byte <= 0x7F || (byte >> 6) == 0x11
}
fn unwrap_or_0(opt: Option<u8>) -> u8 {
opt.unwrap_or(0)
}
const fn utf8_len(ch: u32) -> u32 {
if ch <= 0x7F {
1
} else if ch <= 0x7FF {
2
} else if ch <= 0xFFFF {
3
} else {
4
impl<'a> From<&'a [u8]> for Cursor<UTF8Input<&'a [u8]>> {
fn from(input: &'a [u8]) -> Self {
Self::new(UTF8Input::new(input))
}
}

14
core/parser/src/lexer/identifier.rs

@ -3,10 +3,10 @@
use crate::lexer::{
token::ContainsEscapeSequence, Cursor, Error, StringLiteral, Token, TokenKind, Tokenizer,
};
use crate::source::ReadChar;
use boa_ast::{Keyword, Position, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Identifier lexing.
///
@ -60,7 +60,7 @@ impl<R> Tokenizer<R> for Identifier {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("Identifier", "Lexing");
@ -95,12 +95,12 @@ impl Identifier {
init: char,
) -> Result<(String, bool), Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("Identifier::take_identifier_name", "Lexing");
let mut contains_escaped_chars = false;
let mut identifier_name = if init == '\\' && cursor.next_is(b'u')? {
let mut identifier_name = if init == '\\' && cursor.next_if(0x75 /* u */)? {
let ch = StringLiteral::take_unicode_escape_sequence(cursor, start_pos)?;
if Self::is_identifier_start(ch) {
@ -119,10 +119,10 @@ impl Identifier {
loop {
let ch = match cursor.peek_char()? {
Some(0x005C /* \ */) if cursor.peek_n(2)?.get(1) == Some(&0x75) /* u */ => {
Some(0x005C /* \ */) if cursor.peek_n(2)?[1] == Some(0x75) /* u */ => {
let pos = cursor.pos();
let _next = cursor.next_byte();
let _next = cursor.next_byte();
let _next = cursor.next_char();
let _next = cursor.next_char();
let ch = StringLiteral::take_unicode_escape_sequence(cursor, pos)?;
if Self::is_identifier_part(ch) {

78
core/parser/src/lexer/mod.rs

@ -41,10 +41,10 @@ use self::{
string::StringLiteral,
template::TemplateLiteral,
};
use crate::source::{ReadChar, UTF8Input};
use boa_ast::{Position, Punctuator, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
pub use self::{
error::Error,
@ -60,7 +60,7 @@ trait Tokenizer<R> {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read;
R: ReadChar;
}
/// Lexer or tokenizer for the Boa JavaScript Engine.
@ -104,7 +104,7 @@ impl<R> Lexer<R> {
/// Creates a new lexer.
pub fn new(reader: R) -> Self
where
R: Read,
R: ReadChar,
{
Self {
cursor: Cursor::new(reader),
@ -125,18 +125,20 @@ impl<R> Lexer<R> {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("lex_slash_token", "Lexing");
if let Some(c) = self.cursor.peek()? {
if let Some(c) = self.cursor.peek_char()? {
match c {
b'/' => {
self.cursor.next_byte()?.expect("/ token vanished"); // Consume the '/'
// /
0x002F => {
self.cursor.next_char()?.expect("/ token vanished"); // Consume the '/'
SingleLineComment.lex(&mut self.cursor, start, interner)
}
b'*' => {
self.cursor.next_byte()?.expect("* token vanished"); // Consume the '*'
// *
0x002A => {
self.cursor.next_char()?.expect("* token vanished"); // Consume the '*'
MultiLineComment.lex(&mut self.cursor, start, interner)
}
ch => {
@ -144,9 +146,10 @@ impl<R> Lexer<R> {
InputElement::Div | InputElement::TemplateTail => {
// Only div punctuator allowed, regex not.
if ch == b'=' {
// =
if ch == 0x003D {
// Indicates this is an AssignDiv.
self.cursor.next_byte()?.expect("= token vanished"); // Consume the '='
self.cursor.next_char()?.expect("= token vanished"); // Consume the '='
Ok(Token::new(
Punctuator::AssignDiv.into(),
Span::new(start, self.cursor.pos()),
@ -176,7 +179,7 @@ impl<R> Lexer<R> {
/// Skips an HTML close comment (`-->`) if the `annex-b` feature is enabled.
pub(crate) fn skip_html_close(&mut self, interner: &mut Interner) -> Result<(), Error>
where
R: Read,
R: ReadChar,
{
if cfg!(not(feature = "annex-b")) || self.module() {
return Ok(());
@ -186,10 +189,11 @@ impl<R> Lexer<R> {
let _next = self.cursor.next_char();
}
if self.cursor.peek_n(3)? == [b'-', b'-', b'>'] {
let _next = self.cursor.next_byte();
let _next = self.cursor.next_byte();
let _next = self.cursor.next_byte();
// -->
if self.cursor.peek_n(3)?[..3] == [Some(0x2D), Some(0x2D), Some(0x3E)] {
let _next = self.cursor.next_char();
let _next = self.cursor.next_char();
let _next = self.cursor.next_char();
let start = self.cursor.pos();
SingleLineComment.lex(&mut self.cursor, start, interner)?;
@ -206,7 +210,7 @@ impl<R> Lexer<R> {
// We intentionally don't implement Iterator trait as Result<Option> is cleaner to handle.
pub(crate) fn next_no_skip(&mut self, interner: &mut Interner) -> Result<Option<Token>, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("next()", "Lexing");
@ -224,13 +228,13 @@ impl<R> Lexer<R> {
//handle hashbang here so the below match block still throws error on
//# if position isn't (1, 1)
if start.column_number() == 1 && start.line_number() == 1 && next_ch == 0x23 {
if let Some(hashbang_peek) = self.cursor.peek()? {
if hashbang_peek == 0x21 {
if start.column_number() == 1
&& start.line_number() == 1
&& next_ch == 0x23
&& self.cursor.peek_char()? == Some(0x21)
{
let _token = HashbangComment.lex(&mut self.cursor, start, interner);
return self.next(interner);
}
}
};
if let Ok(c) = char::try_from(next_ch) {
@ -250,7 +254,12 @@ impl<R> Lexer<R> {
Span::new(start, self.cursor.pos()),
)),
'.' => {
if self.cursor.peek()?.as_ref().map(u8::is_ascii_digit) == Some(true) {
if self
.cursor
.peek_char()?
.filter(|c| (0x30..=0x39/* 0..=9 */).contains(c))
.is_some()
{
NumberLiteral::new(b'.').lex(&mut self.cursor, start, interner)
} else {
SpreadLiteral::new().lex(&mut self.cursor, start, interner)
@ -287,10 +296,13 @@ impl<R> Lexer<R> {
'#' => PrivateIdentifier::new().lex(&mut self.cursor, start, interner),
'/' => self.lex_slash_token(start, interner),
#[cfg(feature = "annex-b")]
'<' if !self.module() && self.cursor.peek_n(3)? == [b'!', b'-', b'-'] => {
let _next = self.cursor.next_byte();
let _next = self.cursor.next_byte();
let _next = self.cursor.next_byte();
// <!--
'<' if !self.module()
&& self.cursor.peek_n(3)?[..3] == [Some(0x21), Some(0x2D), Some(0x2D)] =>
{
let _next = self.cursor.next_char();
let _next = self.cursor.next_char();
let _next = self.cursor.next_char();
let start = self.cursor.pos();
SingleLineComment.lex(&mut self.cursor, start, interner)
}
@ -298,7 +310,7 @@ impl<R> Lexer<R> {
'=' | '*' | '+' | '-' | '%' | '|' | '&' | '^' | '<' | '>' | '!' | '~' | '?' => {
Operator::new(next_ch as u8).lex(&mut self.cursor, start, interner)
}
'\\' if self.cursor.peek()? == Some(b'u') => {
'\\' if self.cursor.peek_char()? == Some(0x0075 /* u */) => {
Identifier::new(c).lex(&mut self.cursor, start, interner)
}
_ if Identifier::is_identifier_start(c as u32) => {
@ -340,7 +352,7 @@ impl<R> Lexer<R> {
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self, interner: &mut Interner) -> Result<Option<Token>, Error>
where
R: Read,
R: ReadChar,
{
loop {
let Some(next) = self.next_no_skip(interner)? else {
@ -360,12 +372,18 @@ impl<R> Lexer<R> {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
TemplateLiteral.lex(&mut self.cursor, start, interner)
}
}
impl<'a> From<&'a [u8]> for Lexer<UTF8Input<&'a [u8]>> {
fn from(input: &'a [u8]) -> Self {
Self::new(UTF8Input::new(input))
}
}
/// ECMAScript goal symbols.
///
/// <https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar>

95
core/parser/src/lexer/number.rs

@ -1,12 +1,13 @@
//! This module implements lexing for number literals (123, 787) used in ECMAScript.
use crate::lexer::{token::Numeric, Cursor, Error, Token, TokenKind, Tokenizer};
use crate::source::ReadChar;
use boa_ast::{Position, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use num_bigint::BigInt;
use num_traits::{ToPrimitive, Zero};
use std::{io::Read, str};
use std::str;
/// Number literal lexing.
///
@ -64,29 +65,36 @@ fn take_signed_integer<R>(
kind: NumericKind,
) -> Result<(), Error>
where
R: Read,
R: ReadChar,
{
// The next part must be SignedInteger.
// This is optionally a '+' or '-' followed by 1 or more DecimalDigits.
match cursor.next_byte()? {
Some(b'+') => {
match cursor.next_char()? {
Some(0x2B /* + */) => {
buf.push(b'+');
if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(kind.base()))? {
// A digit must follow the + or - symbol.
return Err(Error::syntax("No digit found after + symbol", cursor.pos()));
}
}
Some(b'-') => {
Some(0x2D /* - */) => {
buf.push(b'-');
if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(kind.base()))? {
// A digit must follow the + or - symbol.
return Err(Error::syntax("No digit found after - symbol", cursor.pos()));
}
}
Some(byte) => {
let ch = char::from(byte);
Some(c) => {
if let Some(ch) = char::from_u32(c) {
if ch.is_ascii() && ch.is_digit(kind.base()) {
buf.push(byte);
#[allow(clippy::cast_possible_truncation)]
buf.push(c as u8);
} else {
return Err(Error::syntax(
"When lexing exponential value found unexpected char",
cursor.pos(),
));
}
} else {
return Err(Error::syntax(
"When lexing exponential value found unexpected char",
@ -115,18 +123,14 @@ fn take_integer<R>(
separator_allowed: bool,
) -> Result<(), Error>
where
R: Read,
R: ReadChar,
{
let mut prev_is_underscore = false;
let mut pos = cursor.pos();
while cursor.next_is_ascii_pred(&|c| c.is_digit(kind.base()) || c == '_')? {
pos = cursor.pos();
match cursor.next_byte()? {
Some(c) if char::from(c).is_digit(kind.base()) => {
prev_is_underscore = false;
buf.push(c);
}
Some(b'_') if separator_allowed => {
match cursor.next_char()? {
Some(0x5F /* _ */) if separator_allowed => {
if prev_is_underscore {
return Err(Error::syntax(
"only one underscore is allowed as numeric separator",
@ -135,9 +139,16 @@ where
}
prev_is_underscore = true;
}
Some(b'_') if !separator_allowed => {
Some(0x5F /* _ */) if !separator_allowed => {
return Err(Error::syntax("separator is not allowed", pos));
}
Some(c) => {
if char::from_u32(c).map(|ch| ch.is_digit(kind.base())) == Some(true) {
prev_is_underscore = false;
#[allow(clippy::cast_possible_truncation)]
buf.push(c as u8);
}
}
_ => (),
}
}
@ -158,7 +169,7 @@ where
/// [spec]: https://tc39.es/ecma262/#sec-literals-numeric-literals
fn check_after_numeric_literal<R>(cursor: &mut Cursor<R>) -> Result<(), Error>
where
R: Read,
R: ReadChar,
{
if cursor.next_is_ascii_pred(&|ch| ch.is_ascii_alphanumeric() || ch == '$' || ch == '_')? {
Err(Error::syntax(
@ -178,7 +189,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("NumberLiteral", "Lexing");
@ -187,13 +198,14 @@ impl<R> Tokenizer<R> for NumberLiteral {
// Default assume the number is a base 10 integer.
let mut kind = NumericKind::Integer(10);
let c = cursor.peek();
let c = cursor.peek_char();
let mut legacy_octal = false;
if self.init == b'0' {
if let Some(ch) = c? {
match ch {
b'x' | b'X' => {
// x | X
0x0078 | 0x0058 => {
// Remove the initial '0' from buffer.
cursor.next_char()?.expect("x or X character vanished");
buf.pop();
@ -209,7 +221,8 @@ impl<R> Tokenizer<R> for NumberLiteral {
));
}
}
b'o' | b'O' => {
// o | O
0x006F | 0x004F => {
// Remove the initial '0' from buffer.
cursor.next_char()?.expect("o or O character vanished");
buf.pop();
@ -225,7 +238,8 @@ impl<R> Tokenizer<R> for NumberLiteral {
));
}
}
b'b' | b'B' => {
// b | B
0x0062 | 0x0042 => {
// Remove the initial '0' from buffer.
cursor.next_char()?.expect("b or B character vanished");
buf.pop();
@ -241,7 +255,8 @@ impl<R> Tokenizer<R> for NumberLiteral {
));
}
}
b'n' => {
// n
0x006E => {
cursor.next_char()?.expect("n character vanished");
// DecimalBigIntegerLiteral '0n'
@ -252,7 +267,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
}
byte => {
legacy_octal = true;
let ch = char::from(byte);
if let Some(ch) = char::from_u32(byte) {
if ch.is_digit(8) {
// LegacyOctalIntegerLiteral, or a number with leading 0s.
if cursor.strict() {
@ -266,11 +281,14 @@ impl<R> Tokenizer<R> for NumberLiteral {
// Remove the initial '0' from buffer.
buf.pop();
buf.push(cursor.next_byte()?.expect("'0' character vanished"));
#[allow(clippy::cast_possible_truncation)]
buf.push(cursor.next_char()?.expect("'0' character vanished") as u8);
take_integer(&mut buf, cursor, NumericKind::Integer(8), false)?;
if !cursor.next_is_ascii_pred(&|c| c.is_ascii_digit() || c == '_')? {
if !cursor
.next_is_ascii_pred(&|c| c.is_ascii_digit() || c == '_')?
{
// LegacyOctalIntegerLiteral
kind = NumericKind::Integer(8);
}
@ -284,6 +302,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
start_pos,
));
}
}
} // Else indicates that the symbol is a non-number.
}
}
@ -298,12 +317,12 @@ impl<R> Tokenizer<R> for NumberLiteral {
}
let next = if self.init == b'.' {
Some(b'.')
Some(0x002E /* . */)
} else {
// Consume digits and separators until a non-digit non-separator
// character is encountered or all the characters are consumed.
take_integer(&mut buf, cursor, kind, !legacy_octal)?;
cursor.peek()?
cursor.peek_char()?
};
// The non-digit character could be:
@ -311,7 +330,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
// '.' To indicate a decimal separator.
// 'e' | 'E' To indicate an ExponentPart.
match next {
Some(b'n') => {
Some(0x006E /* n */) => {
// DecimalBigIntegerLiteral
// Lexing finished.
// Consume the n
@ -321,21 +340,21 @@ impl<R> Tokenizer<R> for NumberLiteral {
cursor.pos(),
));
}
cursor.next_byte()?.expect("n character vanished");
cursor.next_char()?.expect("n character vanished");
kind = kind.to_bigint();
}
Some(b'.') => {
Some(0x002E /* . */) => {
if kind.base() == 10 {
// Only base 10 numbers can have a decimal separator.
// Number literal lexing finished if a . is found for a number in a different base.
if self.init != b'.' {
cursor.next_byte()?.expect("'.' token vanished");
cursor.next_char()?.expect("'.' token vanished");
buf.push(b'.'); // Consume the .
}
kind = NumericKind::Rational;
if cursor.peek()? == Some(b'_') {
if cursor.peek_char()? == Some(0x005F /* _ */) {
return Err(Error::syntax(
"numeric separator not allowed after '.'",
cursor.pos(),
@ -348,10 +367,10 @@ impl<R> Tokenizer<R> for NumberLiteral {
// The non-digit character at this point must be an 'e' or 'E' to indicate an Exponent Part.
// Another '.' or 'n' is not allowed.
match cursor.peek()? {
Some(b'e' | b'E') => {
match cursor.peek_char()? {
Some(0x0065 /*e */ | 0x0045 /* E */) => {
// Consume the ExponentIndicator.
cursor.next_byte()?.expect("e or E token vanished");
cursor.next_char()?.expect("e or E token vanished");
buf.push(b'E');
@ -363,9 +382,9 @@ impl<R> Tokenizer<R> for NumberLiteral {
}
}
}
Some(b'e' | b'E') => {
Some(0x0065 /*e */ | 0x0045 /* E */) => {
kind = NumericKind::Rational;
cursor.next_byte()?.expect("e or E character vanished"); // Consume the ExponentIndicator.
cursor.next_char()?.expect("e or E character vanished"); // Consume the ExponentIndicator.
buf.push(b'E');
take_signed_integer(&mut buf, cursor, kind)?;
}

55
core/parser/src/lexer/operator.rs

@ -1,37 +1,34 @@
//! Boa's lexing for ECMAScript operators (+, - etc.).
use crate::lexer::{Cursor, Error, Token, TokenKind, Tokenizer};
use crate::source::ReadChar;
use boa_ast::{Position, Punctuator, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// `vop` tests the next token to see if we're on an assign operation of just a plain binary operation.
///
/// If the next value is not an assignment operation it will pattern match the provided values and return the corresponding token.
macro_rules! vop {
($cursor:ident, $assign_op:expr, $op:expr) => ({
match $cursor.peek()? {
match $cursor.peek_char()? {
None => Err(Error::syntax("abrupt end - could not preview next value as part of the operator", $cursor.pos())),
Some(b'=') => {
$cursor.next_byte()?.expect("= token vanished");
$cursor.next_column();
Some(0x3D /* = */) => {
$cursor.next_char()?.expect("= token vanished");
$assign_op
}
Some(_) => $op,
}
});
($cursor:ident, $assign_op:expr, $op:expr, {$($case:pat => $block:expr), +}) => ({
match $cursor.peek()? {
match $cursor.peek_char()? {
None => Err(Error::syntax("abrupt end - could not preview next value as part of the operator", $cursor.pos())),
Some(b'=') => {
$cursor.next_byte()?.expect("= token vanished");
$cursor.next_column();
Some(0x3D /* = */) => {
$cursor.next_char()?.expect("= token vanished");
$assign_op
},
$($case => {
$cursor.next_byte()?.expect("Token vanished");
$cursor.next_column();
$cursor.next_char()?.expect("Token vanished");
$block
})+,
_ => $op,
@ -86,19 +83,19 @@ impl<R> Tokenizer<R> for Operator {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("Operator", "Lexing");
match self.init {
b'*' => op!(cursor, start_pos, Ok(Punctuator::AssignMul), Ok(Punctuator::Mul), {
Some(b'*') => vop!(cursor, Ok(Punctuator::AssignPow), Ok(Punctuator::Exp))
Some(0x2A /* * */) => vop!(cursor, Ok(Punctuator::AssignPow), Ok(Punctuator::Exp))
}),
b'+' => op!(cursor, start_pos, Ok(Punctuator::AssignAdd), Ok(Punctuator::Add), {
Some(b'+') => Ok(Punctuator::Inc)
Some(0x2B /* + */) => Ok(Punctuator::Inc)
}),
b'-' => op!(cursor, start_pos, Ok(Punctuator::AssignSub), Ok(Punctuator::Sub), {
Some(b'-') => {
Some(0x2D /* - */) => {
Ok(Punctuator::Dec)
}
}),
@ -109,19 +106,16 @@ impl<R> Tokenizer<R> for Operator {
Ok(Punctuator::Mod)
),
b'|' => op!(cursor, start_pos, Ok(Punctuator::AssignOr), Ok(Punctuator::Or), {
Some(b'|') => vop!(cursor, Ok(Punctuator::AssignBoolOr), Ok(Punctuator::BoolOr))
Some(0x7C /* | */) => vop!(cursor, Ok(Punctuator::AssignBoolOr), Ok(Punctuator::BoolOr))
}),
b'&' => op!(cursor, start_pos, Ok(Punctuator::AssignAnd), Ok(Punctuator::And), {
Some(b'&') => vop!(cursor, Ok(Punctuator::AssignBoolAnd), Ok(Punctuator::BoolAnd))
Some(0x26 /* & */) => vop!(cursor, Ok(Punctuator::AssignBoolAnd), Ok(Punctuator::BoolAnd))
}),
b'?' => {
let (first, second) = (
cursor.peek_n(2)?.first().copied(),
cursor.peek_n(2)?.get(1).copied(),
);
let (first, second) = (cursor.peek_char()?, cursor.peek_n(2)?[1]);
match first {
Some(b'?') => {
cursor.next_byte()?.expect("? vanished");
Some(0x3F /* ? */) => {
cursor.next_char()?.expect("? vanished");
op!(
cursor,
start_pos,
@ -129,8 +123,9 @@ impl<R> Tokenizer<R> for Operator {
Ok(Punctuator::Coalesce)
)
}
Some(b'.') if !matches!(second, Some(second) if second.is_ascii_digit()) => {
cursor.next_byte()?.expect(". vanished");
Some(0x2E /* . */) if !matches!(second, Some(second) if (0x30..=0x39 /* 0..=9 */).contains(&second)) =>
{
cursor.next_char()?.expect(". vanished");
Ok(Token::new(
TokenKind::Punctuator(Punctuator::Optional),
Span::new(start_pos, cursor.pos()),
@ -148,24 +143,24 @@ impl<R> Tokenizer<R> for Operator {
Ok(Punctuator::AssignXor),
Ok(Punctuator::Xor)
),
b'=' => op!(cursor, start_pos, if cursor.next_is(b'=')? {
b'=' => op!(cursor, start_pos, if cursor.next_if(0x3D /* = */)? {
Ok(Punctuator::StrictEq)
} else {
Ok(Punctuator::Eq)
}, Ok(Punctuator::Assign), {
Some(b'>') => {
Some(0x3E /* > */) => {
Ok(Punctuator::Arrow)
}
}),
b'<' => {
op!(cursor, start_pos, Ok(Punctuator::LessThanOrEq), Ok(Punctuator::LessThan), {
Some(b'<') => vop!(cursor, Ok(Punctuator::AssignLeftSh), Ok(Punctuator::LeftSh))
Some(0x3C /* < */) => vop!(cursor, Ok(Punctuator::AssignLeftSh), Ok(Punctuator::LeftSh))
})
}
b'>' => {
op!(cursor, start_pos, Ok(Punctuator::GreaterThanOrEq), Ok(Punctuator::GreaterThan), {
Some(b'>') => vop!(cursor, Ok(Punctuator::AssignRightSh), Ok(Punctuator::RightSh), {
Some(b'>') => vop!(cursor, Ok(Punctuator::AssignURightSh), Ok(Punctuator::URightSh))
Some(0x3E /* > */) => vop!(cursor, Ok(Punctuator::AssignRightSh), Ok(Punctuator::RightSh), {
Some(0x3E /* > */) => vop!(cursor, Ok(Punctuator::AssignURightSh), Ok(Punctuator::URightSh))
})
})
}

6
core/parser/src/lexer/private_identifier.rs

@ -1,10 +1,10 @@
//! Boa's lexing for ECMAScript private identifiers (#foo, #myvar, etc.).
use crate::lexer::{identifier::Identifier, Cursor, Error, Token, TokenKind, Tokenizer};
use crate::source::ReadChar;
use boa_ast::{Position, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Private Identifier lexing.
///
@ -30,14 +30,14 @@ impl<R> Tokenizer<R> for PrivateIdentifier {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("PrivateIdentifier", "Lexing");
if let Some(next_ch) = cursor.next_char()? {
if let Ok(c) = char::try_from(next_ch) {
match c {
'\\' if cursor.peek()? == Some(b'u') => {
'\\' if cursor.peek_char()? == Some(0x0075 /* u */) => {
let (name, _) = Identifier::take_identifier_name(cursor, start_pos, c)?;
Ok(Token::new(
TokenKind::PrivateIdentifier(interner.get_or_intern(name.as_str())),

81
core/parser/src/lexer/regex.rs

@ -1,15 +1,13 @@
//! Boa's lexing for ECMAScript regex literals.
use crate::lexer::{Cursor, Error, Span, Token, TokenKind, Tokenizer};
use crate::source::ReadChar;
use bitflags::bitflags;
use boa_ast::Position;
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use regress::{Flags, Regex};
use std::{
io::{self, ErrorKind, Read},
str::{self, FromStr},
};
use std::str::{self, FromStr};
/// Regex literal lexing.
///
@ -34,7 +32,7 @@ impl<R> Tokenizer<R> for RegexLiteral {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("RegexLiteral", "Lexing");
@ -43,7 +41,7 @@ impl<R> Tokenizer<R> for RegexLiteral {
// Lex RegularExpressionBody.
loop {
match cursor.next_byte()? {
match cursor.next_char()? {
None => {
// Abrupt end.
return Err(Error::syntax(
@ -53,52 +51,40 @@ impl<R> Tokenizer<R> for RegexLiteral {
}
Some(b) => {
match b {
b'/' if !is_class_char => break, // RegularExpressionBody finished.
b'[' => {
// /
0x2F if !is_class_char => break, // RegularExpressionBody finished.
// [
0x5B => {
is_class_char = true;
body.push(b);
}
b']' if is_class_char => {
// ]
0x5D if is_class_char => {
is_class_char = false;
body.push(b);
}
b'\n' | b'\r' => {
// \n | \r | \u{2028} | \u{2029}
0xA | 0xD | 0x2028 | 0x2029 => {
// Not allowed in Regex literal.
return Err(Error::syntax(
"new lines are not allowed in regular expressions",
cursor.pos(),
));
}
0xE2 if (cursor.peek_n(2)? == [0x80, 0xA8]
|| cursor.peek_n(2)? == [0x80, 0xA9]) =>
{
// '\u{2028}' (e2 80 a8) and '\u{2029}' (e2 80 a9) are not allowed
return Err(Error::syntax(
"new lines are not allowed in regular expressions",
cursor.pos(),
));
}
b'\\' => {
// \
0x5C => {
// Escape sequence
body.push(b'\\');
if let Some(sc) = cursor.next_byte()? {
body.push(b);
if let Some(sc) = cursor.next_char()? {
match sc {
b'\n' | b'\r' => {
// \n | \r | \u{2028} | \u{2029}
0xA | 0xD | 0x2028 | 0x2029 => {
// Not allowed in Regex literal.
return Err(Error::syntax(
"new lines are not allowed in regular expressions",
cursor.pos(),
));
}
0xE2 if (cursor.peek_n(2)? == [0x80, 0xA8]
|| cursor.peek_n(2)? == [0x80, 0xA9]) =>
{
// '\u{2028}' (e2 80 a8) and '\u{2029}' (e2 80 a9) are not allowed
return Err(Error::syntax(
"new lines are not allowed in regular expressions",
cursor.pos(),
));
}
b => body.push(b),
}
} else {
@ -119,9 +105,28 @@ impl<R> Tokenizer<R> for RegexLiteral {
let flags_start = cursor.pos();
cursor.take_while_ascii_pred(&mut flags, &char::is_alphabetic)?;
// SAFETY: We have already checked that the bytes are valid UTF-8.
let flags_str = unsafe { str::from_utf8_unchecked(flags.as_slice()) };
if let Ok(body_str) = str::from_utf8(body.as_slice()) {
if let Err(error) = Regex::with_flags(body_str, flags_str) {
let mut body_utf16 = Vec::new();
// We convert the body to UTF-16 since it may contain code points that are not valid UTF-8.
// We already know that the body is valid UTF-16. Casting is fine.
#[allow(clippy::cast_possible_truncation)]
for cp in &body {
let cp = *cp;
if cp <= 0xFFFF {
body_utf16.push(cp as u16);
} else {
let cp = cp - 0x1_0000;
let high = 0xD800 | ((cp >> 10) as u16);
let low = 0xDC00 | ((cp as u16) & 0x3FF);
body_utf16.push(high);
body_utf16.push(low);
}
}
if let Err(error) = Regex::from_unicode(body.into_iter(), flags_str) {
return Err(Error::Syntax(
format!("Invalid regular expression literal: {error}").into(),
start_pos,
@ -130,17 +135,11 @@ impl<R> Tokenizer<R> for RegexLiteral {
Ok(Token::new(
TokenKind::regular_expression_literal(
interner.get_or_intern(body_str),
interner.get_or_intern(body_utf16.as_slice()),
parse_regex_flags(flags_str, flags_start, interner)?,
),
Span::new(start_pos, cursor.pos()),
))
} else {
Err(Error::from(io::Error::new(
ErrorKind::InvalidData,
"Invalid UTF-8 character in regular expressions",
)))
}
}
}

8
core/parser/src/lexer/spread.rs

@ -1,10 +1,10 @@
//! Boa's lexing for ECMAScript spread (...) literals.
use crate::lexer::{Cursor, Error, Token, Tokenizer};
use crate::source::ReadChar;
use boa_ast::{Position, Punctuator, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Spread literal lexing.
///
@ -34,13 +34,13 @@ impl<R> Tokenizer<R> for SpreadLiteral {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("SpreadLiteral", "Lexing");
// . or ...
if cursor.next_is(b'.')? {
if cursor.next_is(b'.')? {
if cursor.next_if(0x2E /* . */)? {
if cursor.next_if(0x2E /* . */)? {
Ok(Token::new(
Punctuator::Spread.into(),
Span::new(start_pos, cursor.pos()),

135
core/parser/src/lexer/string.rs

@ -1,13 +1,11 @@
//! Boa's lexing for ECMAScript string literals.
use crate::lexer::{token::EscapeSequence, Cursor, Error, Token, TokenKind, Tokenizer};
use crate::source::ReadChar;
use boa_ast::{Position, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::{
io::{self, ErrorKind, Read},
str,
};
use std::io::{self, ErrorKind};
/// String literal lexing.
///
@ -84,7 +82,7 @@ impl<R> Tokenizer<R> for StringLiteral {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("StringLiteral", "Lexing");
@ -119,7 +117,7 @@ impl StringLiteral {
strict: bool,
) -> Result<(Vec<u16>, Span, EscapeSequence), Error>
where
R: Read,
R: ReadChar,
{
let mut buf = Vec::new();
let mut escape_sequence = EscapeSequence::empty();
@ -172,7 +170,7 @@ impl StringLiteral {
is_template_literal: bool,
) -> Result<(Option<u32>, EscapeSequence), Error>
where
R: Read,
R: ReadChar,
{
let escape_ch = cursor.next_char()?.ok_or_else(|| {
Error::from(io::Error::new(
@ -192,8 +190,8 @@ impl StringLiteral {
0x0027 /* ' */ => (Some(0x0027 /* ' */), EscapeSequence::OTHER),
0x005C /* \ */ => (Some(0x005C /* \ */), EscapeSequence::OTHER),
0x0030 /* 0 */ if cursor
.peek()?
.filter(u8::is_ascii_digit)
.peek_char()?
.filter(|c| (0x30..=0x39 /* 0..=9 */).contains(c))
.is_none() =>
(Some(0x0000 /* NULL */), EscapeSequence::OTHER),
0x0078 /* x */ => {
@ -256,23 +254,32 @@ impl StringLiteral {
start_pos: Position,
) -> Result<u32, Error>
where
R: Read,
R: ReadChar,
{
// Support \u{X..X} (Unicode CodePoint)
if cursor.next_is(b'{')? {
if cursor.next_if(0x7B /* { */)? {
// TODO: use bytes for a bit better performance (using stack)
let mut code_point_buf = Vec::with_capacity(6);
cursor.take_until(b'}', &mut code_point_buf)?;
let code_point = str::from_utf8(code_point_buf.as_slice())
.ok()
.and_then(|code_point_str| {
// The `code_point_str` should represent a single unicode codepoint, convert to u32
u32::from_str_radix(code_point_str, 16).ok()
})
.ok_or_else(|| {
Error::syntax("malformed Unicode character escape sequence", start_pos)
})?;
cursor.take_until(0x7D /* } */, &mut code_point_buf)?;
let mut s = String::with_capacity(code_point_buf.len());
for c in code_point_buf {
if let Some(c) = char::from_u32(c) {
s.push(c);
} else {
return Err(Error::syntax(
"malformed Unicode character escape sequence",
start_pos,
));
}
}
let Ok(code_point) = u32::from_str_radix(&s, 16) else {
return Err(Error::syntax(
"malformed Unicode character escape sequence",
start_pos,
));
};
// UTF16Encoding of a numeric code point value
if code_point > 0x10_FFFF {
@ -286,14 +293,32 @@ impl StringLiteral {
} else {
// Grammar: Hex4Digits
// Collect each character after \u e.g \uD83D will give "D83D"
let mut code_point_utf8_bytes = [0u8; 4];
cursor.fill_bytes(&mut code_point_utf8_bytes)?;
// Convert to u16
let code_point = str::from_utf8(&code_point_utf8_bytes)
.ok()
.and_then(|code_point_str| u16::from_str_radix(code_point_str, 16).ok())
let mut buffer = [0u32; 4];
buffer[0] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
buffer[1] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
buffer[2] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
buffer[3] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
let mut s = String::with_capacity(buffer.len());
for c in buffer {
if let Some(c) = char::from_u32(c) {
s.push(c);
} else {
return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
}
}
let Ok(code_point) = u16::from_str_radix(&s, 16) else {
return Err(Error::syntax("invalid Unicode escape sequence", start_pos));
};
Ok(u32::from(code_point))
}
@ -304,14 +329,34 @@ impl StringLiteral {
start_pos: Position,
) -> Result<u32, Error>
where
R: Read,
R: ReadChar,
{
let mut code_point_utf8_bytes = [0u8; 2];
cursor.fill_bytes(&mut code_point_utf8_bytes)?;
let code_point = str::from_utf8(&code_point_utf8_bytes)
.ok()
.and_then(|code_point_str| u16::from_str_radix(code_point_str, 16).ok())
let mut buffer = [0u32; 2];
buffer[0] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
buffer[1] = cursor
.next_char()?
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
let mut s = String::with_capacity(buffer.len());
for c in buffer {
if let Some(c) = char::from_u32(c) {
s.push(c);
} else {
return Err(Error::syntax(
"invalid Hexadecimal escape sequence",
start_pos,
));
}
}
let Ok(code_point) = u16::from_str_radix(&s, 16) else {
return Err(Error::syntax(
"invalid Hexadecimal escape sequence",
start_pos,
));
};
Ok(u32::from(code_point))
}
@ -321,24 +366,24 @@ impl StringLiteral {
init_byte: u8,
) -> Result<u32, Error>
where
R: Read,
R: ReadChar,
{
// Grammar: OctalDigit
let mut code_point = u32::from(init_byte - b'0');
// Grammar: ZeroToThree OctalDigit
// Grammar: FourToSeven OctalDigit
if let Some(byte) = cursor.peek()? {
if (b'0'..=b'7').contains(&byte) {
cursor.next_byte()?;
code_point = (code_point * 8) + u32::from(byte - b'0');
if let Some(c) = cursor.peek_char()? {
if (0x30..=0x37/* 0..=7 */).contains(&c) {
cursor.next_char()?;
code_point = (code_point * 8) + c - 0x30 /* 0 */;
if (b'0'..=b'3').contains(&init_byte) {
if (0x30..=0x33/* 0..=3 */).contains(&init_byte) {
// Grammar: ZeroToThree OctalDigit OctalDigit
if let Some(byte) = cursor.peek()? {
if (b'0'..=b'7').contains(&byte) {
cursor.next_byte()?;
code_point = (code_point * 8) + u32::from(byte - b'0');
if let Some(c) = cursor.peek_char()? {
if (0x30..=0x37/* 0..=7 */).contains(&c) {
cursor.next_char()?;
code_point = (code_point * 8) + c - 0x30 /* 0 */;
}
}
}

35
core/parser/src/lexer/template.rs

@ -1,13 +1,17 @@
//! Boa's lexing for ECMAScript template literals.
use crate::lexer::{
use crate::source::ReadChar;
use crate::{
lexer::{
string::{StringLiteral, UTF16CodeUnitsBuffer},
Cursor, Error, Token, TokenKind, Tokenizer,
},
source::UTF8Input,
};
use boa_ast::{Position, Span};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::{self, ErrorKind, Read};
use std::io::{self, ErrorKind};
#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -44,7 +48,7 @@ impl TemplateString {
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn to_owned_cooked(self, interner: &mut Interner) -> Result<Sym, Error> {
let string = interner.resolve_expect(self.raw).to_string();
let mut cursor = Cursor::with_position(string.as_bytes(), self.start_pos);
let mut cursor = Cursor::with_position(UTF8Input::new(string.as_bytes()), self.start_pos);
let mut buf: Vec<u16> = Vec::new();
loop {
@ -99,7 +103,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("TemplateLiteral", "Lexing");
@ -124,7 +128,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
));
}
// $
0x0024 if cursor.next_is(b'{')? => {
0x0024 if cursor.next_if(0x7B /* { */)? => {
let raw_sym = interner.get_or_intern(&buf[..]);
let template_string = TemplateString::new(raw_sym, start_pos);
@ -135,7 +139,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
}
// \
0x005C => {
let escape_ch = cursor.peek()?.ok_or_else(|| {
let escape_ch = cursor.peek_char()?.ok_or_else(|| {
Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated escape sequence in literal",
@ -143,13 +147,18 @@ impl<R> Tokenizer<R> for TemplateLiteral {
})?;
buf.push(u16::from(b'\\'));
match escape_ch {
b'`' | b'$' | b'\\' => {
let next_byte =
cursor.next_byte()?.expect("already checked next character");
buf.push(u16::from(next_byte));
}
_ => continue,
let escape_ch = match escape_ch {
// `
0x0060 => Some(0x0060),
// $
0x0024 => Some(0x0024),
// \
0x005C => Some(0x005C),
_ => None,
};
if let Some(ch) = escape_ch {
let _ = cursor.next_char()?.expect("already checked next character");
buf.push(ch);
}
}
ch => {

173
core/parser/src/lexer/tests.rs

@ -3,8 +3,9 @@
use crate::lexer::{
template::TemplateString,
token::{ContainsEscapeSequence, EscapeSequence, Numeric},
Cursor, Error, Interner, Lexer, Position, Punctuator, Read, Span, TokenKind,
Cursor, Error, Interner, Lexer, Position, Punctuator, Span, TokenKind,
};
use crate::source::ReadChar;
use boa_ast::Keyword;
use boa_interner::Sym;
use boa_macros::utf16;
@ -16,7 +17,7 @@ fn span(start: (u32, u32), end: (u32, u32)) -> Span {
fn expect_tokens<R>(lexer: &mut Lexer<R>, expected: &[TokenKind], interner: &mut Interner)
where
R: Read,
R: ReadChar,
{
for expect in expected {
assert_eq!(&lexer.next(interner).unwrap().unwrap().kind(), &expect);
@ -31,7 +32,7 @@ where
#[test]
fn check_single_line_comment() {
let s1 = "var \n//This is a comment\ntrue";
let mut lexer = Lexer::new(s1.as_bytes());
let mut lexer = Lexer::from(s1.as_bytes());
let interner = &mut Interner::default();
let expected = [
@ -47,7 +48,7 @@ fn check_single_line_comment() {
#[test]
fn check_single_line_comment_with_crlf_ending() {
let s1 = "var \r\n//This is a comment\r\ntrue";
let mut lexer = Lexer::new(s1.as_bytes());
let mut lexer = Lexer::from(s1.as_bytes());
let interner = &mut Interner::default();
let expected = [
@ -63,7 +64,7 @@ fn check_single_line_comment_with_crlf_ending() {
#[test]
fn check_multi_line_comment() {
let s = "var /* await \n break \n*/ x";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern_static("x", utf16!("x"));
@ -79,7 +80,7 @@ fn check_multi_line_comment() {
#[test]
fn check_identifier() {
let s = "x x1 _x $x __ $$ Ѐ ЀЀ x\u{200C}\u{200D} \\u0078 \\u0078\\u0078 \\u{0078}x\\u{0078}";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let expected = [
@ -116,7 +117,7 @@ fn check_invalid_identifier_start() {
let invalid_identifier_starts = ["\u{200C}", "\u{200D}", "😀"];
for s in &invalid_identifier_starts {
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
lexer
.next(interner)
@ -132,7 +133,7 @@ fn check_invalid_identifier_part() {
let sym = interner.get_or_intern_static("x", utf16!("x"));
for part in &invalid_identifier_parts {
let s = String::from("x") + part;
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
assert_eq!(
lexer.next(interner).unwrap().unwrap().kind(),
@ -144,7 +145,7 @@ fn check_invalid_identifier_part() {
#[test]
fn check_string() {
let s = "'aaa' \"bbb\"";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let a_sym = interner.get_or_intern_static("aaa", utf16!("aaa"));
@ -160,7 +161,7 @@ fn check_string() {
#[test]
fn check_template_literal_simple() {
let s = "`I'm a template literal`";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym =
@ -175,7 +176,7 @@ fn check_template_literal_simple() {
#[test]
fn check_template_literal_unterminated() {
let s = "`I'm a template";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
lexer
@ -189,7 +190,7 @@ fn check_punctuators() {
let s = "{ ( ) [ ] . ... ; , < > <= >= == != === !== \
+ - * % -- << >> >>> & | ^ ! ~ && || ? : \
= += -= *= &= **= ++ ** <<= >>= >>>= &= |= ^= => ?? ??= &&= ||= ?.";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let expected = [
@ -259,7 +260,7 @@ fn check_keywords() {
do else export extends finally for function if import in instanceof \
new return super switch this throw try typeof var void while with yield";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let expected = [
@ -305,7 +306,7 @@ fn check_keywords() {
#[test]
fn check_variable_definition_tokens() {
let s = "let a = 'hello';";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let a_sym = interner.get_or_intern_static("a", utf16!("a"));
@ -325,7 +326,7 @@ fn check_variable_definition_tokens() {
fn check_positions() {
let s = r#"console.log("hello world"); // Test"#;
// --------123456789
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
// The first column is 1 (not zero indexed)
@ -375,7 +376,7 @@ fn check_positions() {
fn check_positions_codepoint() {
let s = r#"console.log("hello world\u{2764}"); // Test"#;
// --------123456789
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
// The first column is 1 (not zero indexed)
@ -425,7 +426,7 @@ fn check_positions_codepoint() {
fn check_line_numbers() {
let s = "x\ny\n";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
assert_eq!(
@ -450,7 +451,7 @@ fn check_line_numbers() {
#[test]
fn check_decrement_advances_lexer_2_places() {
// Here we want an example of decrementing an integer
let mut lexer = Lexer::new(&b"let a = b--;"[..]);
let mut lexer = Lexer::from(&b"let a = b--;"[..]);
let interner = &mut Interner::default();
for _ in 0..4 {
@ -472,7 +473,7 @@ fn check_decrement_advances_lexer_2_places() {
#[test]
fn single_int() {
let mut lexer = Lexer::new(&b"52"[..]);
let mut lexer = Lexer::from(&b"52"[..]);
let interner = &mut Interner::default();
let expected = [TokenKind::numeric_literal(52)];
@ -482,7 +483,7 @@ fn single_int() {
#[test]
fn numbers() {
let mut lexer = Lexer::new(
let mut lexer = Lexer::from(
"1 2 0x34 056 7.89 42. 5e3 5e+3 5e-3 0b10 0O123 0999 1.0e1 1.0e-1 1.0E1 1E1 0.0 0.12 -32"
.as_bytes(),
);
@ -516,7 +517,7 @@ fn numbers() {
#[test]
fn numbers_with_separators() {
let mut lexer = Lexer::new(
let mut lexer = Lexer::from(
"1_0 2_0 0x3_4 056 7.8_9 4_2. 5_0e2 5_0e+2 5_0e-4 0b1_0 1_0.0_0e2 1.0E-0_1 -3_2".as_bytes(),
);
let interner = &mut Interner::default();
@ -548,7 +549,7 @@ fn numbers_with_bad_separators() {
];
for n in &numbers {
let mut lexer = Lexer::new(n.as_bytes());
let mut lexer = Lexer::from(n.as_bytes());
let interner = &mut Interner::default();
assert!(lexer.next(interner).is_err());
}
@ -556,7 +557,7 @@ fn numbers_with_bad_separators() {
#[test]
fn big_exp_numbers() {
let mut lexer = Lexer::new(&b"1.0e25 1.0e36 9.0e50"[..]);
let mut lexer = Lexer::from(&b"1.0e25 1.0e36 9.0e50"[..]);
let interner = &mut Interner::default();
let expected = [
@ -572,7 +573,7 @@ fn big_exp_numbers() {
#[test]
fn big_literal_numbers() {
let mut lexer = Lexer::new(&b"10000000000000000000000000"[..]);
let mut lexer = Lexer::from(&b"10000000000000000000000000"[..]);
let interner = &mut Interner::default();
let expected = [TokenKind::numeric_literal(
@ -584,7 +585,7 @@ fn big_literal_numbers() {
#[test]
fn implicit_octal_edge_case() {
let mut lexer = Lexer::new(&b"044.5 094.5"[..]);
let mut lexer = Lexer::from(&b"044.5 094.5"[..]);
let interner = &mut Interner::default();
let expected = [
@ -598,7 +599,7 @@ fn implicit_octal_edge_case() {
#[test]
fn hexadecimal_edge_case() {
let mut lexer = Lexer::new(&b"0xffff.ff 0xffffff"[..]);
let mut lexer = Lexer::from(&b"0xffff.ff 0xffffff"[..]);
let interner = &mut Interner::default();
let sym = interner.get_or_intern_static("ff", utf16!("ff"));
@ -614,7 +615,7 @@ fn hexadecimal_edge_case() {
#[test]
fn single_number_without_semicolon() {
let mut lexer = Lexer::new(&b"1"[..]);
let mut lexer = Lexer::from(&b"1"[..]);
let interner = &mut Interner::default();
let expected = [TokenKind::numeric_literal(Numeric::Integer(1))];
@ -624,7 +625,7 @@ fn single_number_without_semicolon() {
#[test]
fn number_followed_by_dot() {
let mut lexer = Lexer::new(&b"1.."[..]);
let mut lexer = Lexer::from(&b"1.."[..]);
let interner = &mut Interner::default();
let expected = [
@ -637,7 +638,7 @@ fn number_followed_by_dot() {
#[test]
fn regex_literal() {
let mut lexer = Lexer::new(&b"/(?:)/"[..]);
let mut lexer = Lexer::from(&b"/(?:)/"[..]);
let interner = &mut Interner::default();
let expected = [TokenKind::regular_expression_literal(
@ -650,7 +651,7 @@ fn regex_literal() {
#[test]
fn regex_equals_following_assignment() {
let mut lexer = Lexer::new(&b"const myRegex = /=/;"[..]);
let mut lexer = Lexer::from(&b"const myRegex = /=/;"[..]);
let interner = &mut Interner::default();
let expected = [
@ -669,7 +670,7 @@ fn regex_equals_following_assignment() {
#[test]
fn regex_literal_flags() {
let mut lexer = Lexer::new(&br"/\/[^\/]*\/*/gmi"[..]);
let mut lexer = Lexer::from(&br"/\/[^\/]*\/*/gmi"[..]);
let interner = &mut Interner::default();
let expected = [TokenKind::regular_expression_literal(
@ -682,14 +683,14 @@ fn regex_literal_flags() {
#[test]
fn regex_literal_flags_err() {
let mut lexer = Lexer::new(&br"/\/[^\/]*\/*/gmip"[..]);
let mut lexer = Lexer::from(&br"/\/[^\/]*\/*/gmip"[..]);
let interner = &mut Interner::default();
lexer
.next(interner)
.expect_err("Lexer did not handle regex literal with error");
let mut lexer = Lexer::new(&br"/\/[^\/]*\/*/gmii"[..]);
let mut lexer = Lexer::from(&br"/\/[^\/]*\/*/gmii"[..]);
let interner = &mut Interner::default();
lexer
@ -699,7 +700,7 @@ fn regex_literal_flags_err() {
#[test]
fn addition_no_spaces() {
let mut lexer = Lexer::new(&b"1+1"[..]);
let mut lexer = Lexer::from(&b"1+1"[..]);
let interner = &mut Interner::default();
let expected = [
@ -713,7 +714,7 @@ fn addition_no_spaces() {
#[test]
fn addition_no_spaces_left_side() {
let mut lexer = Lexer::new(&b"1+ 1"[..]);
let mut lexer = Lexer::from(&b"1+ 1"[..]);
let interner = &mut Interner::default();
let expected = [
@ -727,7 +728,7 @@ fn addition_no_spaces_left_side() {
#[test]
fn addition_no_spaces_right_side() {
let mut lexer = Lexer::new(&b"1 +1"[..]);
let mut lexer = Lexer::from(&b"1 +1"[..]);
let interner = &mut Interner::default();
let expected = [
@ -741,7 +742,7 @@ fn addition_no_spaces_right_side() {
#[test]
fn addition_no_spaces_e_number_left_side() {
let mut lexer = Lexer::new(&b"1e2+ 1"[..]);
let mut lexer = Lexer::from(&b"1e2+ 1"[..]);
let interner = &mut Interner::default();
let expected = [
@ -755,7 +756,7 @@ fn addition_no_spaces_e_number_left_side() {
#[test]
fn addition_no_spaces_e_number_right_side() {
let mut lexer = Lexer::new(&b"1 +1e3"[..]);
let mut lexer = Lexer::from(&b"1 +1e3"[..]);
let interner = &mut Interner::default();
let expected = [
@ -769,7 +770,7 @@ fn addition_no_spaces_e_number_right_side() {
#[test]
fn addition_no_spaces_e_number() {
let mut lexer = Lexer::new(&b"1e3+1e11"[..]);
let mut lexer = Lexer::from(&b"1e3+1e11"[..]);
let interner = &mut Interner::default();
let expected = [
@ -783,7 +784,7 @@ fn addition_no_spaces_e_number() {
#[test]
fn take_while_ascii_pred_simple() {
let mut cur = Cursor::new(&b"abcdefghijk"[..]);
let mut cur = Cursor::from(&b"abcdefghijk"[..]);
let mut buf: Vec<u8> = Vec::new();
@ -795,7 +796,7 @@ fn take_while_ascii_pred_simple() {
#[test]
fn take_while_ascii_pred_immediate_stop() {
let mut cur = Cursor::new(&b"abcdefghijk"[..]);
let mut cur = Cursor::from(&b"abcdefghijk"[..]);
let mut buf: Vec<u8> = Vec::new();
@ -806,7 +807,7 @@ fn take_while_ascii_pred_immediate_stop() {
#[test]
fn take_while_ascii_pred_entire_str() {
let mut cur = Cursor::new(&b"abcdefghijk"[..]);
let mut cur = Cursor::from(&b"abcdefghijk"[..]);
let mut buf: Vec<u8> = Vec::new();
@ -817,7 +818,7 @@ fn take_while_ascii_pred_entire_str() {
#[test]
fn take_while_ascii_pred_non_ascii_stop() {
let mut cur = Cursor::new("abcde😀fghijk".as_bytes());
let mut cur = Cursor::from("abcde😀fghijk".as_bytes());
let mut buf: Vec<u8> = Vec::new();
@ -826,63 +827,13 @@ fn take_while_ascii_pred_non_ascii_stop() {
assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "abcde");
}
#[test]
fn take_while_char_pred_simple() {
let mut cur = Cursor::new(&b"abcdefghijk"[..]);
let mut buf: Vec<u8> = Vec::new();
cur.take_while_char_pred(&mut buf, &|c| {
c == 'a' as u32 || c == 'b' as u32 || c == 'c' as u32
})
.unwrap();
assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "abc");
}
#[test]
fn take_while_char_pred_immediate_stop() {
let mut cur = Cursor::new(&b"abcdefghijk"[..]);
let mut buf: Vec<u8> = Vec::new();
cur.take_while_char_pred(&mut buf, &|_| false).unwrap();
assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "");
}
#[test]
fn take_while_char_pred_entire_str() {
let mut cur = Cursor::new(&b"abcdefghijk"[..]);
let mut buf: Vec<u8> = Vec::new();
cur.take_while_char_pred(&mut buf, &|_| true).unwrap();
assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "abcdefghijk");
}
#[test]
fn take_while_char_pred_utf8_char() {
let mut cur = Cursor::new("abc😀defghijk".as_bytes());
let mut buf: Vec<u8> = Vec::new();
cur.take_while_char_pred(&mut buf, &|c| {
char::try_from(c).map_or(false, |c| c == 'a' || c == 'b' || c == 'c' || c == '😀')
})
.unwrap();
assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "abc😀");
}
#[test]
fn illegal_following_numeric_literal() {
// Checks as per https://tc39.es/ecma262/#sec-literals-numeric-literals that a NumericLiteral cannot
// be immediately followed by an IdentifierStart or DecimalDigit.
// Decimal Digit
let mut lexer = Lexer::new(&b"11.6n3"[..]);
let mut lexer = Lexer::from(&b"11.6n3"[..]);
let interner = &mut Interner::default();
let err = lexer
@ -895,7 +846,7 @@ fn illegal_following_numeric_literal() {
}
// Identifier Start
let mut lexer = Lexer::new(&b"17.4$"[..]);
let mut lexer = Lexer::from(&b"17.4$"[..]);
let interner = &mut Interner::default();
if let Error::Syntax(_, pos) = lexer
@ -907,7 +858,7 @@ fn illegal_following_numeric_literal() {
panic!("invalid error type");
}
let mut lexer = Lexer::new(&b"17.4_"[..]);
let mut lexer = Lexer::from(&b"17.4_"[..]);
let interner = &mut Interner::default();
if let Error::Syntax(_, pos) = lexer
@ -922,7 +873,7 @@ fn illegal_following_numeric_literal() {
#[test]
fn string_codepoint_with_no_braces() {
let mut lexer = Lexer::new(&br#""test\uD38Dtest""#[..]);
let mut lexer = Lexer::from(&br#""test\uD38Dtest""#[..]);
let interner = &mut Interner::default();
assert!(lexer.next(interner).is_ok());
@ -933,7 +884,7 @@ fn string_codepoint_with_no_braces() {
fn illegal_code_point_following_numeric_literal() {
// Checks as per https://tc39.es/ecma262/#sec-literals-numeric-literals that a NumericLiteral cannot
// be immediately followed by an IdentifierStart where the IdentifierStart
let mut lexer = Lexer::new(&br"17.4\u{2764}"[..]);
let mut lexer = Lexer::from(&br"17.4\u{2764}"[..]);
let interner = &mut Interner::default();
assert!(
@ -947,7 +898,7 @@ fn illegal_code_point_following_numeric_literal() {
fn string_unicode() {
let s = r#"'中文';"#;
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern_static("中文", utf16!("中文"));
@ -961,7 +912,7 @@ fn string_unicode() {
#[test]
fn string_unicode_escape_with_braces() {
let mut lexer = Lexer::new(&br"'{\u{20ac}\u{a0}\u{a0}}'"[..]);
let mut lexer = Lexer::from(&br"'{\u{20ac}\u{a0}\u{a0}}'"[..]);
let interner = &mut Interner::default();
let sym =
@ -970,7 +921,7 @@ fn string_unicode_escape_with_braces() {
expect_tokens(&mut lexer, &expected, interner);
lexer = Lexer::new(&br"\u{{a0}"[..]);
lexer = Lexer::from(&br"\u{{a0}"[..]);
if let Error::Syntax(_, pos) = lexer
.next(interner)
@ -981,7 +932,7 @@ fn string_unicode_escape_with_braces() {
panic!("invalid error type");
}
lexer = Lexer::new(&br"\u{{a0}}"[..]);
lexer = Lexer::from(&br"\u{{a0}}"[..]);
if let Error::Syntax(_, pos) = lexer
.next(interner)
@ -997,7 +948,7 @@ fn string_unicode_escape_with_braces() {
fn string_unicode_escape_with_braces_2() {
let s = r"'\u{20ac}\u{a0}\u{a0}'";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern_static("\u{20ac}\u{a0}\u{a0}", utf16!("\u{20ac}\u{a0}\u{a0}"));
@ -1010,7 +961,7 @@ fn string_unicode_escape_with_braces_2() {
fn string_with_single_escape() {
let s = r"'\Б'";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern_static("Б", utf16!("Б"));
@ -1032,7 +983,7 @@ fn string_legacy_octal_escape() {
];
for (s, expected) in &test_cases {
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern(expected.encode_utf16().collect::<Vec<_>>().as_slice());
@ -1045,7 +996,7 @@ fn string_legacy_octal_escape() {
}
for (s, _) in &test_cases {
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
lexer.set_strict(true);
@ -1065,7 +1016,7 @@ fn string_zero_escape() {
let test_cases = [(r"'\0'", "\u{0}"), (r"'\0A'", "\u{0}A")];
for (s, expected) in &test_cases {
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern(expected.encode_utf16().collect::<Vec<_>>().as_slice());
@ -1080,7 +1031,7 @@ fn string_non_octal_decimal_escape() {
let test_cases = [(r"'\8'", "8"), (r"'\9'", "9")];
for (s, expected) in &test_cases {
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern(expected.encode_utf16().collect::<Vec<_>>().as_slice());
@ -1093,7 +1044,7 @@ fn string_non_octal_decimal_escape() {
}
for (s, _) in &test_cases {
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
lexer.set_strict(true);
@ -1112,7 +1063,7 @@ fn string_non_octal_decimal_escape() {
fn string_line_continuation() {
let s = "'hello \\\nworld'";
let mut lexer = Lexer::new(s.as_bytes());
let mut lexer = Lexer::from(s.as_bytes());
let interner = &mut Interner::default();
let sym = interner.get_or_intern_static("hello world", utf16!("hello world"));
@ -1125,7 +1076,7 @@ mod carriage_return {
use super::*;
fn expect_tokens_with_lines(lines: usize, src: &str) {
let mut lexer = Lexer::new(src.as_bytes());
let mut lexer = Lexer::from(src.as_bytes());
let interner = &mut Interner::default();
let mut expected = Vec::with_capacity(lines + 2);

2
core/parser/src/lib.rs

@ -28,7 +28,7 @@
pub mod error;
pub mod lexer;
pub mod parser;
mod source;
pub mod source;
pub use error::Error;
pub use lexer::Lexer;

14
core/parser/src/parser/cursor/buffered_lexer/mod.rs

@ -1,12 +1,12 @@
use crate::{
lexer::{InputElement, Lexer, Token, TokenKind},
parser::ParseResult,
source::{ReadChar, UTF8Input},
Error,
};
use boa_ast::Position;
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
#[cfg(test)]
mod tests;
@ -34,7 +34,7 @@ pub(super) struct BufferedLexer<R> {
impl<R> From<Lexer<R>> for BufferedLexer<R>
where
R: Read,
R: ReadChar,
{
fn from(lexer: Lexer<R>) -> Self {
Self {
@ -58,16 +58,22 @@ where
impl<R> From<R> for BufferedLexer<R>
where
R: Read,
R: ReadChar,
{
fn from(reader: R) -> Self {
Lexer::new(reader).into()
}
}
impl<'a> From<&'a [u8]> for BufferedLexer<UTF8Input<&'a [u8]>> {
fn from(reader: &'a [u8]) -> Self {
Lexer::from(reader).into()
}
}
impl<R> BufferedLexer<R>
where
R: Read,
R: ReadChar,
{
/// Sets the goal symbol for the lexer.
pub(super) fn set_goal(&mut self, elm: InputElement) {

4
core/parser/src/parser/cursor/mod.rs

@ -4,12 +4,12 @@ mod buffered_lexer;
use crate::{
lexer::{InputElement, Lexer, Token, TokenKind},
parser::{OrAbrupt, ParseResult},
source::ReadChar,
Error,
};
use boa_ast::{Position, Punctuator};
use boa_interner::Interner;
use buffered_lexer::BufferedLexer;
use std::io::Read;
/// The result of a peek for a semicolon.
#[derive(Debug)]
@ -41,7 +41,7 @@ pub(super) struct Cursor<R> {
impl<R> Cursor<R>
where
R: Read,
R: ReadChar,
{
/// Creates a new cursor with the given reader.
pub(super) fn new(reader: R) -> Self {

8
core/parser/src/parser/expression/assignment/arrow_function.rs

@ -17,6 +17,7 @@ use crate::{
name_in_lexically_declared_names, AllowAwait, AllowIn, AllowYield, Cursor, OrAbrupt,
TokenParser,
},
source::ReadChar,
};
use ast::operations::{bound_names, lexically_declared_names};
use boa_ast::{
@ -30,7 +31,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Arrow function parsing.
///
@ -73,7 +73,7 @@ impl ArrowFunction {
impl<R> TokenParser<R> for ArrowFunction
where
R: Read,
R: ReadChar,
{
type Output = ast::function::ArrowFunction;
@ -186,7 +186,7 @@ impl ConciseBody {
impl<R> TokenParser<R> for ConciseBody
where
R: Read,
R: ReadChar,
{
type Output = ast::function::FunctionBody;
@ -236,7 +236,7 @@ impl ExpressionBody {
impl<R> TokenParser<R> for ExpressionBody
where
R: Read,
R: ReadChar,
{
type Output = Expression;

6
core/parser/src/parser/expression/assignment/async_arrow_function.rs

@ -16,6 +16,7 @@ use crate::{
function::{FormalParameters, FunctionBody},
name_in_lexically_declared_names, AllowIn, AllowYield, Cursor, OrAbrupt, TokenParser,
},
source::ReadChar,
};
use ast::{
operations::{bound_names, contains, lexically_declared_names, ContainsSymbol},
@ -31,7 +32,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Async arrow function parsing.
///
@ -66,7 +66,7 @@ impl AsyncArrowFunction {
impl<R> TokenParser<R> for AsyncArrowFunction
where
R: Read,
R: ReadChar,
{
type Output = ast::function::AsyncArrowFunction;
@ -174,7 +174,7 @@ impl AsyncConciseBody {
impl<R> TokenParser<R> for AsyncConciseBody
where
R: Read,
R: ReadChar,
{
type Output = ast::function::FunctionBody;

4
core/parser/src/parser/expression/assignment/conditional.rs

@ -13,6 +13,7 @@ use crate::{
expression::{AssignmentExpression, ShortCircuitExpression},
AllowAwait, AllowIn, AllowYield, Cursor, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{
expression::{operator::Conditional, Identifier},
@ -20,7 +21,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Conditional expression parsing.
///
@ -63,7 +63,7 @@ impl ConditionalExpression {
impl<R> TokenParser<R> for ConditionalExpression
where
R: Read,
R: ReadChar,
{
type Output = Expression;

4
core/parser/src/parser/expression/assignment/exponentiation.rs

@ -13,6 +13,7 @@ use crate::{
expression::{unary::UnaryExpression, update::UpdateExpression},
AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{
expression::{
@ -23,7 +24,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses an exponentiation expression.
///
@ -62,7 +62,7 @@ impl ExponentiationExpression {
impl<R> TokenParser<R> for ExponentiationExpression
where
R: Read,
R: ReadChar,
{
type Output = Expression;

4
core/parser/src/parser/expression/assignment/mod.rs

@ -25,6 +25,7 @@ use crate::{
name_in_lexically_declared_names, AllowAwait, AllowIn, AllowYield, Cursor, OrAbrupt,
ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -37,7 +38,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
pub(super) use exponentiation::ExponentiationExpression;
@ -92,7 +92,7 @@ impl AssignmentExpression {
impl<R> TokenParser<R> for AssignmentExpression
where
R: Read,
R: ReadChar,
{
type Output = Expression;

4
core/parser/src/parser/expression/assignment/yield.rs

@ -11,11 +11,11 @@ use super::AssignmentExpression;
use crate::{
lexer::TokenKind,
parser::{cursor::Cursor, AllowAwait, AllowIn, OrAbrupt, ParseResult, TokenParser},
source::ReadChar,
};
use boa_ast::{expression::Yield, Expression, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// `YieldExpression` parsing.
///
@ -47,7 +47,7 @@ impl YieldExpression {
impl<R> TokenParser<R> for YieldExpression
where
R: Read,
R: ReadChar,
{
type Output = Expression;

4
core/parser/src/parser/expression/await_expr.rs

@ -11,10 +11,10 @@ use super::unary::UnaryExpression;
use crate::{
lexer::TokenKind,
parser::{AllowYield, Cursor, ParseResult, TokenParser},
source::ReadChar,
};
use boa_ast::{expression::Await, Keyword};
use boa_interner::Interner;
use std::io::Read;
/// Parses an await expression.
///
@ -43,7 +43,7 @@ impl AwaitExpression {
impl<R> TokenParser<R> for AwaitExpression
where
R: Read,
R: ReadChar,
{
type Output = Await;

8
core/parser/src/parser/expression/identifiers.rs

@ -8,12 +8,12 @@
use crate::{
lexer::TokenKind,
parser::{cursor::Cursor, AllowAwait, AllowYield, OrAbrupt, ParseResult, TokenParser},
source::ReadChar,
Error,
};
use boa_ast::expression::Identifier as AstIdentifier;
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Identifier reference parsing.
///
@ -44,7 +44,7 @@ impl IdentifierReference {
impl<R> TokenParser<R> for IdentifierReference
where
R: Read,
R: ReadChar,
{
type Output = AstIdentifier;
@ -98,7 +98,7 @@ impl BindingIdentifier {
impl<R> TokenParser<R> for BindingIdentifier
where
R: Read,
R: ReadChar,
{
type Output = AstIdentifier;
@ -153,7 +153,7 @@ pub(in crate::parser) struct Identifier;
impl<R> TokenParser<R> for Identifier
where
R: Read,
R: ReadChar,
{
type Output = AstIdentifier;

4
core/parser/src/parser/expression/left_hand_side/arguments.rs

@ -13,12 +13,12 @@ use crate::{
expression::AssignmentExpression, AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult,
TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{expression::Spread, Expression, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a list of arguments.
///
@ -50,7 +50,7 @@ impl Arguments {
impl<R> TokenParser<R> for Arguments
where
R: Read,
R: ReadChar,
{
type Output = Box<[Expression]>;

6
core/parser/src/parser/expression/left_hand_side/call.rs

@ -14,6 +14,7 @@ use crate::{
expression::{left_hand_side::template::TaggedTemplateLiteral, Expression},
AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::function::PrivateName;
@ -27,7 +28,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a call expression.
///
@ -63,7 +63,7 @@ impl CallExpression {
impl<R> TokenParser<R> for CallExpression
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;
@ -115,7 +115,7 @@ impl CallExpressionTail {
impl<R> TokenParser<R> for CallExpressionTail
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;

4
core/parser/src/parser/expression/left_hand_side/member.rs

@ -14,6 +14,7 @@ use crate::{
},
AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::function::PrivateName;
@ -29,7 +30,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a member expression.
///
@ -62,7 +62,7 @@ impl MemberExpression {
impl<R> TokenParser<R> for MemberExpression
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;

6
core/parser/src/parser/expression/left_hand_side/mod.rs

@ -30,6 +30,7 @@ use crate::{
},
AllowAwait, AllowYield, Cursor, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -38,7 +39,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a left hand side expression.
///
@ -73,7 +73,7 @@ impl LeftHandSideExpression {
impl<R> TokenParser<R> for LeftHandSideExpression
where
R: Read,
R: ReadChar,
{
type Output = Expression;
@ -87,7 +87,7 @@ where
/// initialization of `lhs` would make it very hard to return an expression over all
/// possible branches of the `if let`s. Instead, we extract the check into its own function,
/// then use it inside the condition of a simple `if ... else` expression.
fn is_keyword_call<R: Read>(
fn is_keyword_call<R: ReadChar>(
keyword: Keyword,
cursor: &mut Cursor<R>,
interner: &mut Interner,

4
core/parser/src/parser/expression/left_hand_side/optional/mod.rs

@ -7,6 +7,7 @@ use crate::{
cursor::Cursor, expression::left_hand_side::arguments::Arguments, expression::Expression,
AllowAwait, AllowYield, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::function::PrivateName;
@ -17,7 +18,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Parses an optional expression.
///
@ -55,7 +55,7 @@ impl OptionalExpression {
impl<R> TokenParser<R> for OptionalExpression
where
R: Read,
R: ReadChar,
{
type Output = Optional;

4
core/parser/src/parser/expression/left_hand_side/template.rs

@ -4,12 +4,12 @@ use crate::{
cursor::Cursor, expression::Expression, AllowAwait, AllowYield, OrAbrupt, ParseResult,
TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{self as ast, expression::TaggedTemplate, Position, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a tagged template.
///
@ -48,7 +48,7 @@ impl TaggedTemplateLiteral {
impl<R> TokenParser<R> for TaggedTemplateLiteral
where
R: Read,
R: ReadChar,
{
type Output = TaggedTemplate;

10
core/parser/src/parser/expression/mod.rs

@ -25,6 +25,7 @@ use crate::{
expression::assignment::ExponentiationExpression, AllowAwait, AllowIn, AllowYield, Cursor,
OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -41,7 +42,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
pub(super) use self::{assignment::AssignmentExpression, primary::Initializer};
pub(in crate::parser) use {
@ -73,7 +73,7 @@ macro_rules! expression {
($name:ident, $lower:ident, [$( $op:path ),*], [$( $low_param:ident ),*], $goal:expr ) => {
impl<R> TokenParser<R> for $name
where
R: Read
R: ReadChar
{
type Output = ast::Expression;
@ -142,7 +142,7 @@ impl Expression {
impl<R> TokenParser<R> for Expression
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;
@ -264,7 +264,7 @@ impl ShortCircuitExpression {
impl<R> TokenParser<R> for ShortCircuitExpression
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;
@ -558,7 +558,7 @@ impl RelationalExpression {
impl<R> TokenParser<R> for RelationalExpression
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;

4
core/parser/src/parser/expression/primary/array_initializer/mod.rs

@ -16,6 +16,7 @@ use crate::{
expression::AssignmentExpression, AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult,
TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -24,7 +25,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses an array literal.
///
@ -56,7 +56,7 @@ impl ArrayLiteral {
impl<R> TokenParser<R> for ArrayLiteral
where
R: Read,
R: ReadChar,
{
type Output = literal::ArrayLiteral;

4
core/parser/src/parser/expression/primary/async_function_expression/mod.rs

@ -8,6 +8,7 @@ use crate::{
function::{FormalParameters, FunctionBody},
name_in_lexically_declared_names, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -18,7 +19,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Async Function expression parsing.
///
@ -45,7 +45,7 @@ impl AsyncFunctionExpression {
impl<R> TokenParser<R> for AsyncFunctionExpression
where
R: Read,
R: ReadChar,
{
type Output = AsyncFunction;

4
core/parser/src/parser/expression/primary/async_generator_expression/mod.rs

@ -17,6 +17,7 @@ use crate::{
function::{FormalParameters, FunctionBody},
name_in_lexically_declared_names, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -27,7 +28,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Async Generator Expression Parsing
///
@ -52,7 +52,7 @@ impl AsyncGeneratorExpression {
impl<R> TokenParser<R> for AsyncGeneratorExpression
where
R: Read,
R: ReadChar,
{
//The below needs to be implemented in ast::node
type Output = AsyncGenerator;

4
core/parser/src/parser/expression/primary/class_expression/mod.rs

@ -4,11 +4,11 @@ use crate::{
expression::BindingIdentifier, statement::ClassTail, AllowAwait, AllowYield, Cursor,
OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{expression::Identifier, function::Class, Keyword};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Class expression parsing.
///
@ -41,7 +41,7 @@ impl ClassExpression {
impl<R> TokenParser<R> for ClassExpression
where
R: Read,
R: ReadChar,
{
type Output = Class;

4
core/parser/src/parser/expression/primary/function_expression/mod.rs

@ -17,6 +17,7 @@ use crate::{
function::{FormalParameters, FunctionBody},
name_in_lexically_declared_names, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -27,7 +28,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Function expression parsing.
///
@ -54,7 +54,7 @@ impl FunctionExpression {
impl<R> TokenParser<R> for FunctionExpression
where
R: Read,
R: ReadChar,
{
type Output = Function;

4
core/parser/src/parser/expression/primary/generator_expression/mod.rs

@ -17,6 +17,7 @@ use crate::{
function::{FormalParameters, FunctionBody},
name_in_lexically_declared_names, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -27,7 +28,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Generator expression parsing.
///
@ -54,7 +54,7 @@ impl GeneratorExpression {
impl<R> TokenParser<R> for GeneratorExpression
where
R: Read,
R: ReadChar,
{
type Output = Generator;

6
core/parser/src/parser/expression/primary/mod.rs

@ -39,6 +39,7 @@ use crate::{
statement::{ArrayBindingPattern, ObjectBindingPattern},
AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::expression::RegExpLiteral as AstRegExp;
@ -57,7 +58,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
pub(in crate::parser) use object_initializer::Initializer;
@ -94,7 +94,7 @@ impl PrimaryExpression {
impl<R> TokenParser<R> for PrimaryExpression
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;
@ -312,7 +312,7 @@ impl CoverParenthesizedExpressionAndArrowParameterList {
impl<R> TokenParser<R> for CoverParenthesizedExpressionAndArrowParameterList
where
R: Read,
R: ReadChar,
{
type Output = ast::Expression;

20
core/parser/src/parser/expression/primary/object_initializer/mod.rs

@ -21,6 +21,7 @@ use crate::{
name_in_lexically_declared_names, AllowAwait, AllowIn, AllowYield, Cursor, OrAbrupt,
ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -40,7 +41,6 @@ use boa_ast::{
use boa_interner::{Interner, Sym};
use boa_macros::utf16;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses an object literal.
///
@ -72,7 +72,7 @@ impl ObjectLiteral {
impl<R> TokenParser<R> for ObjectLiteral
where
R: Read,
R: ReadChar,
{
type Output = literal::ObjectLiteral;
@ -169,7 +169,7 @@ impl PropertyDefinition {
impl<R> TokenParser<R> for PropertyDefinition
where
R: Read,
R: ReadChar,
{
type Output = property::PropertyDefinition;
@ -574,7 +574,7 @@ impl PropertyName {
impl<R> TokenParser<R> for PropertyName
where
R: Read,
R: ReadChar,
{
type Output = property::PropertyName;
@ -650,7 +650,7 @@ impl ClassElementName {
impl<R> TokenParser<R> for ClassElementName
where
R: Read,
R: ReadChar,
{
type Output = property::ClassElementName;
@ -712,7 +712,7 @@ impl Initializer {
impl<R> TokenParser<R> for Initializer
where
R: Read,
R: ReadChar,
{
type Output = Expression;
@ -753,7 +753,7 @@ impl GeneratorMethod {
impl<R> TokenParser<R> for GeneratorMethod
where
R: Read,
R: ReadChar,
{
type Output = (property::ClassElementName, MethodDefinition);
@ -855,7 +855,7 @@ impl AsyncGeneratorMethod {
impl<R> TokenParser<R> for AsyncGeneratorMethod
where
R: Read,
R: ReadChar,
{
type Output = (property::ClassElementName, MethodDefinition);
@ -971,7 +971,7 @@ impl AsyncMethod {
impl<R> TokenParser<R> for AsyncMethod
where
R: Read,
R: ReadChar,
{
type Output = (property::ClassElementName, MethodDefinition);
@ -1064,7 +1064,7 @@ impl CoverInitializedName {
impl<R> TokenParser<R> for CoverInitializedName
where
R: Read,
R: ReadChar,
{
type Output = property::PropertyDefinition;

4
core/parser/src/parser/expression/primary/template/mod.rs

@ -10,6 +10,7 @@
use crate::{
lexer::TokenKind,
parser::{expression::Expression, AllowAwait, AllowYield, Cursor, ParseResult, TokenParser},
source::ReadChar,
Error,
};
use boa_ast::{
@ -18,7 +19,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a template literal.
///
@ -54,7 +54,7 @@ impl TemplateLiteral {
impl<R> TokenParser<R> for TemplateLiteral
where
R: Read,
R: ReadChar,
{
type Output = literal::TemplateLiteral;

4
core/parser/src/parser/expression/unary.rs

@ -13,6 +13,7 @@ use crate::{
expression::{await_expr::AwaitExpression, update::UpdateExpression},
AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -25,7 +26,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a unary expression.
///
@ -60,7 +60,7 @@ impl UnaryExpression {
impl<R> TokenParser<R> for UnaryExpression
where
R: Read,
R: ReadChar,
{
type Output = Expression;

4
core/parser/src/parser/expression/update.rs

@ -14,6 +14,7 @@ use crate::{
},
AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -28,7 +29,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Parses an update expression.
///
@ -87,7 +87,7 @@ fn as_simple(
impl<R> TokenParser<R> for UpdateExpression
where
R: Read,
R: ReadChar,
{
type Output = Expression;

12
core/parser/src/parser/function/mod.rs

@ -17,6 +17,7 @@ use crate::{
statement::{ArrayBindingPattern, ObjectBindingPattern, StatementList},
AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::{
@ -31,7 +32,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Formal parameters parsing.
///
@ -63,7 +63,7 @@ impl FormalParameters {
impl<R> TokenParser<R> for FormalParameters
where
R: Read,
R: ReadChar,
{
type Output = FormalParameterList;
@ -173,7 +173,7 @@ impl UniqueFormalParameters {
impl<R> TokenParser<R> for UniqueFormalParameters
where
R: Read,
R: ReadChar,
{
type Output = FormalParameterList;
@ -245,7 +245,7 @@ impl BindingRestElement {
impl<R> TokenParser<R> for BindingRestElement
where
R: Read,
R: ReadChar,
{
type Output = ast::function::FormalParameter;
@ -340,7 +340,7 @@ impl FormalParameter {
impl<R> TokenParser<R> for FormalParameter
where
R: Read,
R: ReadChar,
{
type Output = ast::function::FormalParameter;
@ -448,7 +448,7 @@ impl FunctionStatementList {
impl<R> TokenParser<R> for FunctionStatementList
where
R: Read,
R: ReadChar,
{
type Output = ast::function::FunctionBody;

21
core/parser/src/parser/mod.rs

@ -16,6 +16,7 @@ use crate::{
cursor::Cursor,
function::{FormalParameters, FunctionStatementList},
},
source::ReadChar,
Error, Source,
};
use boa_ast::{
@ -29,7 +30,7 @@ use boa_ast::{
};
use boa_interner::Interner;
use rustc_hash::FxHashSet;
use std::{io::Read, path::Path};
use std::path::Path;
use self::statement::ModuleItemList;
@ -38,7 +39,7 @@ use self::statement::ModuleItemList;
/// This makes it possible to abstract over the underlying implementation of a parser.
trait TokenParser<R>: Sized
where
R: Read,
R: ReadChar,
{
/// Output type for the parser.
type Output; // = Node; waiting for https://github.com/rust-lang/rust/issues/29661
@ -121,7 +122,7 @@ pub struct Parser<'a, R> {
cursor: Cursor<R>,
}
impl<'a, R: Read> Parser<'a, R> {
impl<'a, R: ReadChar> Parser<'a, R> {
/// Create a new `Parser` with a `Source` as the input to parse.
pub fn new(source: Source<'a, R>) -> Self {
Self {
@ -152,7 +153,7 @@ impl<'a, R: Read> Parser<'a, R> {
/// [spec]: https://tc39.es/ecma262/#prod-Module
pub fn parse_module(&mut self, interner: &mut Interner) -> ParseResult<boa_ast::Module>
where
R: Read,
R: ReadChar,
{
ModuleParser.parse(&mut self.cursor, interner)
}
@ -211,7 +212,7 @@ impl<R> Parser<'_, R> {
/// Set the parser strict mode to true.
pub fn set_strict(&mut self)
where
R: Read,
R: ReadChar,
{
self.cursor.set_strict(true);
}
@ -219,7 +220,7 @@ impl<R> Parser<'_, R> {
/// Set the parser JSON mode to true.
pub fn set_json_parse(&mut self)
where
R: Read,
R: ReadChar,
{
self.cursor.set_json_parse(true);
}
@ -227,7 +228,7 @@ impl<R> Parser<'_, R> {
/// Set the unique identifier for the parser.
pub fn set_identifier(&mut self, identifier: u32)
where
R: Read,
R: ReadChar,
{
self.cursor.set_identifier(identifier);
}
@ -254,7 +255,7 @@ impl ScriptParser {
impl<R> TokenParser<R> for ScriptParser
where
R: Read,
R: ReadChar,
{
type Output = boa_ast::Script;
@ -315,7 +316,7 @@ impl ScriptBody {
impl<R> TokenParser<R> for ScriptBody
where
R: Read,
R: ReadChar,
{
type Output = StatementList;
@ -387,7 +388,7 @@ struct ModuleParser;
impl<R> TokenParser<R> for ModuleParser
where
R: Read,
R: ReadChar,
{
type Output = boa_ast::Module;

4
core/parser/src/parser/statement/block/mod.rs

@ -16,6 +16,7 @@ use crate::{
statement::StatementList, AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt,
ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -25,7 +26,6 @@ use boa_ast::{
use boa_interner::Interner;
use boa_profiler::Profiler;
use rustc_hash::FxHashMap;
use std::io::Read;
/// The possible `TokenKind` which indicate the end of a block statement.
const BLOCK_BREAK_TOKENS: [TokenKind; 1] = [TokenKind::Punctuator(Punctuator::CloseBlock)];
@ -71,7 +71,7 @@ impl Block {
impl<R> TokenParser<R> for Block
where
R: Read,
R: ReadChar,
{
type Output = statement::Block;

4
core/parser/src/parser/statement/break_stm/mod.rs

@ -17,11 +17,11 @@ use crate::{
expression::LabelIdentifier,
AllowAwait, AllowYield, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{statement::Break, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Break statement parsing
///
@ -53,7 +53,7 @@ impl BreakStatement {
impl<R> TokenParser<R> for BreakStatement
where
R: Read,
R: ReadChar,
{
type Output = Break;

4
core/parser/src/parser/statement/continue_stm/mod.rs

@ -17,11 +17,11 @@ use crate::{
expression::LabelIdentifier,
AllowAwait, AllowYield, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{statement::Continue, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// For statement parsing
///
@ -53,7 +53,7 @@ impl ContinueStatement {
impl<R> TokenParser<R> for ContinueStatement
where
R: Read,
R: ReadChar,
{
type Output = Continue;

10
core/parser/src/parser/statement/declaration/export.rs

@ -17,6 +17,7 @@ use crate::{
statement::{declaration::ClassDeclaration, variable::VariableStatement},
Error, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{
declaration::{ExportDeclaration as AstExportDeclaration, ReExportKind},
@ -24,7 +25,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
use super::{
hoistable::{AsyncFunctionDeclaration, AsyncGeneratorDeclaration, GeneratorDeclaration},
@ -42,7 +42,7 @@ pub(in crate::parser) struct ExportDeclaration;
impl<R> TokenParser<R> for ExportDeclaration
where
R: Read,
R: ReadChar,
{
type Output = AstExportDeclaration;
@ -233,7 +233,7 @@ struct NamedExports;
impl<R> TokenParser<R> for NamedExports
where
R: Read,
R: ReadChar,
{
type Output = Box<[boa_ast::declaration::ExportSpecifier]>;
@ -298,7 +298,7 @@ pub(super) struct ModuleExportName;
impl<R> TokenParser<R> for ModuleExportName
where
R: Read,
R: ReadChar,
{
type Output = (Sym, bool);
@ -338,7 +338,7 @@ struct ExportSpecifier;
impl<R> TokenParser<R> for ExportSpecifier
where
R: Read,
R: ReadChar,
{
type Output = boa_ast::declaration::ExportSpecifier;

8
core/parser/src/parser/statement/declaration/hoistable/async_function_decl/mod.rs

@ -1,13 +1,15 @@
#[cfg(test)]
mod tests;
use crate::parser::{
use crate::{
parser::{
statement::declaration::hoistable::{parse_callable_declaration, CallableDeclaration},
AllowAwait, AllowDefault, AllowYield, Cursor, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{function::AsyncFunction, Keyword};
use boa_interner::Interner;
use std::io::Read;
/// Async Function declaration parsing.
///
@ -72,7 +74,7 @@ impl CallableDeclaration for AsyncFunctionDeclaration {
impl<R> TokenParser<R> for AsyncFunctionDeclaration
where
R: Read,
R: ReadChar,
{
type Output = AsyncFunction;

8
core/parser/src/parser/statement/declaration/hoistable/async_generator_decl/mod.rs

@ -6,13 +6,15 @@
#[cfg(test)]
mod tests;
use crate::parser::{
use crate::{
parser::{
statement::declaration::hoistable::{parse_callable_declaration, CallableDeclaration},
AllowAwait, AllowDefault, AllowYield, Cursor, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{function::AsyncGenerator, Keyword, Punctuator};
use boa_interner::Interner;
use std::io::Read;
/// Async Generator Declaration Parser
///
@ -85,7 +87,7 @@ impl CallableDeclaration for AsyncGeneratorDeclaration {
impl<R> TokenParser<R> for AsyncGeneratorDeclaration
where
R: Read,
R: ReadChar,
{
type Output = AsyncGenerator;

12
core/parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs

@ -12,6 +12,7 @@ use crate::{
statement::StatementList,
AllowAwait, AllowDefault, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::{
@ -31,7 +32,6 @@ use boa_ast::{
use boa_interner::{Interner, Sym};
use boa_macros::utf16;
use rustc_hash::{FxHashMap, FxHashSet};
use std::io::Read;
/// Class declaration parsing.
///
@ -66,7 +66,7 @@ impl ClassDeclaration {
impl<R> TokenParser<R> for ClassDeclaration
where
R: Read,
R: ReadChar,
{
type Output = Class;
@ -143,7 +143,7 @@ impl ClassTail {
impl<R> TokenParser<R> for ClassTail
where
R: Read,
R: ReadChar,
{
type Output = Class;
@ -237,7 +237,7 @@ impl ClassHeritage {
impl<R> TokenParser<R> for ClassHeritage
where
R: Read,
R: ReadChar,
{
type Output = Expression;
@ -289,7 +289,7 @@ impl ClassBody {
impl<R> TokenParser<R> for ClassBody
where
R: Read,
R: ReadChar,
{
type Output = (Option<Function>, Vec<function::ClassElement>);
@ -570,7 +570,7 @@ impl ClassElement {
impl<R> TokenParser<R> for ClassElement
where
R: Read,
R: ReadChar,
{
type Output = (Option<Function>, Option<function::ClassElement>);

8
core/parser/src/parser/statement/declaration/hoistable/function_decl/mod.rs

@ -1,13 +1,15 @@
#[cfg(test)]
mod tests;
use crate::parser::{
use crate::{
parser::{
statement::declaration::hoistable::{parse_callable_declaration, CallableDeclaration},
AllowAwait, AllowDefault, AllowYield, Cursor, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{function::Function, Keyword};
use boa_interner::Interner;
use std::io::Read;
/// Function declaration parsing.
///
@ -70,7 +72,7 @@ impl CallableDeclaration for FunctionDeclaration {
impl<R> TokenParser<R> for FunctionDeclaration
where
R: Read,
R: ReadChar,
{
type Output = Function;

8
core/parser/src/parser/statement/declaration/hoistable/generator_decl/mod.rs

@ -1,13 +1,15 @@
#[cfg(test)]
mod tests;
use crate::parser::{
use crate::{
parser::{
statement::declaration::hoistable::{parse_callable_declaration, CallableDeclaration},
AllowAwait, AllowDefault, AllowYield, Cursor, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{function::Generator, Keyword, Punctuator};
use boa_interner::Interner;
use std::io::Read;
/// Generator declaration parsing.
///
@ -72,7 +74,7 @@ impl CallableDeclaration for GeneratorDeclaration {
impl<R> TokenParser<R> for GeneratorDeclaration
where
R: Read,
R: ReadChar,
{
type Output = Generator;

6
core/parser/src/parser/statement/declaration/hoistable/mod.rs

@ -24,6 +24,7 @@ use crate::{
statement::LexError,
AllowAwait, AllowDefault, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -35,7 +36,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
pub(in crate::parser) use self::{
async_function_decl::AsyncFunctionDeclaration, async_generator_decl::AsyncGeneratorDeclaration,
@ -74,7 +74,7 @@ impl HoistableDeclaration {
impl<R> TokenParser<R> for HoistableDeclaration
where
R: Read,
R: ReadChar,
{
type Output = Declaration;
@ -145,7 +145,7 @@ trait CallableDeclaration {
}
// This is a helper function to not duplicate code in the individual callable declaration parsers.
fn parse_callable_declaration<R: Read, C: CallableDeclaration>(
fn parse_callable_declaration<R: ReadChar, C: CallableDeclaration>(
c: &C,
cursor: &mut Cursor<R>,
interner: &mut Interner,

14
core/parser/src/parser/statement/declaration/import.rs

@ -16,6 +16,7 @@ use crate::{
statement::{declaration::FromClause, BindingIdentifier},
Error, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{
declaration::{
@ -27,7 +28,6 @@ use boa_ast::{
};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Parses an import declaration.
///
@ -40,7 +40,7 @@ pub(in crate::parser) struct ImportDeclaration;
impl ImportDeclaration {
/// Tests if the next node is an `ImportDeclaration`.
pub(in crate::parser) fn test<R: Read>(
pub(in crate::parser) fn test<R: ReadChar>(
cursor: &mut Cursor<R>,
interner: &mut Interner,
) -> ParseResult<bool> {
@ -71,7 +71,7 @@ impl ImportDeclaration {
impl<R> TokenParser<R> for ImportDeclaration
where
R: Read,
R: ReadChar,
{
type Output = AstImportDeclaration;
@ -171,7 +171,7 @@ struct ImportedBinding;
impl<R> TokenParser<R> for ImportedBinding
where
R: Read,
R: ReadChar,
{
type Output = Identifier;
@ -192,7 +192,7 @@ struct NamedImports;
impl<R> TokenParser<R> for NamedImports
where
R: Read,
R: ReadChar,
{
type Output = Box<[AstImportSpecifier]>;
@ -288,7 +288,7 @@ struct ImportSpecifier;
impl<R> TokenParser<R> for ImportSpecifier
where
R: Read,
R: ReadChar,
{
type Output = AstImportSpecifier;
@ -373,7 +373,7 @@ struct NameSpaceImport;
impl<R> TokenParser<R> for NameSpaceImport
where
R: Read,
R: ReadChar,
{
type Output = Identifier;

8
core/parser/src/parser/statement/declaration/lexical.rs

@ -15,6 +15,7 @@ use crate::{
statement::{ArrayBindingPattern, BindingIdentifier, ObjectBindingPattern},
AllowAwait, AllowIn, AllowYield, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::operations::bound_names;
@ -22,7 +23,6 @@ use boa_ast::{self as ast, declaration::Variable, pattern::Pattern, Keyword, Pun
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use rustc_hash::FxHashSet;
use std::io::Read;
/// Parses a lexical declaration.
///
@ -62,7 +62,7 @@ impl LexicalDeclaration {
impl<R> TokenParser<R> for LexicalDeclaration
where
R: Read,
R: ReadChar,
{
type Output = ast::declaration::LexicalDeclaration;
@ -167,7 +167,7 @@ impl BindingList {
impl<R> TokenParser<R> for BindingList
where
R: Read,
R: ReadChar,
{
type Output = ast::declaration::LexicalDeclaration;
@ -276,7 +276,7 @@ impl LexicalBinding {
impl<R> TokenParser<R> for LexicalBinding
where
R: Read,
R: ReadChar,
{
type Output = Variable;

6
core/parser/src/parser/statement/declaration/mod.rs

@ -25,12 +25,12 @@ pub(in crate::parser) use self::{
use crate::{
lexer::TokenKind,
parser::{AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser},
source::ReadChar,
Error,
};
use boa_ast::{self as ast, Keyword};
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use std::io::Read;
/// Parses a declaration.
///
@ -61,7 +61,7 @@ impl Declaration {
impl<R> TokenParser<R> for Declaration
where
R: Read,
R: ReadChar,
{
type Output = ast::Declaration;
@ -116,7 +116,7 @@ impl FromClause {
impl<R> TokenParser<R> for FromClause
where
R: Read,
R: ReadChar,
{
type Output = ast::declaration::ModuleSpecifier;

4
core/parser/src/parser/statement/expression/mod.rs

@ -3,12 +3,12 @@ use crate::{
parser::{
expression::Expression, AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{Keyword, Punctuator, Statement};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Expression statement parsing.
///
@ -38,7 +38,7 @@ impl ExpressionStatement {
impl<R> TokenParser<R> for ExpressionStatement
where
R: Read,
R: ReadChar,
{
type Output = Statement;

4
core/parser/src/parser/statement/if_stm/mod.rs

@ -8,6 +8,7 @@ use crate::{
statement::{declaration::FunctionDeclaration, Statement},
AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -16,7 +17,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// If statement parsing.
///
@ -53,7 +53,7 @@ impl IfStatement {
impl<R> TokenParser<R> for IfStatement
where
R: Read,
R: ReadChar,
{
type Output = If;

4
core/parser/src/parser/statement/iteration/do_while_statement.rs

@ -13,12 +13,12 @@ use crate::{
expression::Expression, statement::Statement, AllowAwait, AllowReturn, AllowYield, Cursor,
OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{statement::DoWhileLoop, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Do...while statement parsing
///
@ -57,7 +57,7 @@ impl DoWhileStatement {
impl<R> TokenParser<R> for DoWhileStatement
where
R: Read,
R: ReadChar,
{
type Output = DoWhileLoop;

4
core/parser/src/parser/statement/iteration/for_statement.rs

@ -15,6 +15,7 @@ use crate::{
statement::{variable::VariableDeclarationList, Statement},
AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::{
@ -32,7 +33,6 @@ use boa_ast::{
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use rustc_hash::FxHashSet;
use std::io::Read;
/// For statement parsing
///
@ -71,7 +71,7 @@ impl ForStatement {
impl<R> TokenParser<R> for ForStatement
where
R: Read,
R: ReadChar,
{
type Output = ast::Statement;

4
core/parser/src/parser/statement/iteration/while_statement.rs

@ -3,12 +3,12 @@ use crate::{
expression::Expression, statement::Statement, AllowAwait, AllowReturn, AllowYield, Cursor,
OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{statement::WhileLoop, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// While statement parsing
///
@ -47,7 +47,7 @@ impl WhileStatement {
impl<R> TokenParser<R> for WhileStatement
where
R: Read,
R: ReadChar,
{
type Output = WhileLoop;

4
core/parser/src/parser/statement/labelled_stm/mod.rs

@ -6,12 +6,12 @@ use crate::{
statement::{declaration::FunctionDeclaration, AllowAwait, AllowReturn, Statement},
AllowYield, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{self as ast, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Labelled Statement Parsing
///
@ -45,7 +45,7 @@ impl LabelledStatement {
impl<R> TokenParser<R> for LabelledStatement
where
R: Read,
R: ReadChar,
{
type Output = ast::statement::Labelled;

16
core/parser/src/parser/statement/mod.rs

@ -44,6 +44,7 @@ use crate::{
expression::{BindingIdentifier, Initializer, PropertyName},
AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::{
@ -58,7 +59,6 @@ use boa_ast::{
use boa_interner::Interner;
use boa_macros::utf16;
use boa_profiler::Profiler;
use std::io::Read;
pub(in crate::parser) use declaration::ClassTail;
@ -113,7 +113,7 @@ impl Statement {
impl<R> TokenParser<R> for Statement
where
R: Read,
R: ReadChar,
{
type Output = ast::Statement;
@ -273,7 +273,7 @@ impl StatementList {
impl<R> TokenParser<R> for StatementList
where
R: Read,
R: ReadChar,
{
type Output = ast::StatementList;
@ -404,7 +404,7 @@ impl StatementListItem {
impl<R> TokenParser<R> for StatementListItem
where
R: Read,
R: ReadChar,
{
type Output = ast::StatementListItem;
@ -474,7 +474,7 @@ impl ObjectBindingPattern {
impl<R> TokenParser<R> for ObjectBindingPattern
where
R: Read,
R: ReadChar,
{
type Output = Vec<ObjectPatternElement>;
@ -719,7 +719,7 @@ impl ArrayBindingPattern {
impl<R> TokenParser<R> for ArrayBindingPattern
where
R: Read,
R: ReadChar,
{
type Output = Vec<ArrayPatternElement>;
@ -905,7 +905,7 @@ pub(super) struct ModuleItemList;
impl<R> TokenParser<R> for ModuleItemList
where
R: Read,
R: ReadChar,
{
type Output = boa_ast::ModuleItemList;
@ -955,7 +955,7 @@ struct ModuleItem;
impl<R> TokenParser<R> for ModuleItem
where
R: Read,
R: ReadChar,
{
type Output = boa_ast::ModuleItem;

4
core/parser/src/parser/statement/return_stm/mod.rs

@ -5,11 +5,11 @@ use crate::{
expression::Expression,
AllowAwait, AllowYield, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{statement::Return, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Return statement parsing
///
@ -41,7 +41,7 @@ impl ReturnStatement {
impl<R> TokenParser<R> for ReturnStatement
where
R: Read,
R: ReadChar,
{
type Output = Return;

6
core/parser/src/parser/statement/switch/mod.rs

@ -7,6 +7,7 @@ use crate::{
expression::Expression, statement::StatementList, AllowAwait, AllowReturn, AllowYield,
Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use ast::operations::{lexically_declared_names_legacy, var_declared_names};
@ -14,7 +15,6 @@ use boa_ast::{self as ast, statement, statement::Switch, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use rustc_hash::FxHashMap;
use std::io::Read;
/// The possible `TokenKind` which indicate the end of a case statement.
const CASE_BREAK_TOKENS: [TokenKind; 3] = [
@ -56,7 +56,7 @@ impl SwitchStatement {
impl<R> TokenParser<R> for SwitchStatement
where
R: Read,
R: ReadChar,
{
type Output = Switch;
@ -141,7 +141,7 @@ impl CaseBlock {
impl<R> TokenParser<R> for CaseBlock
where
R: Read,
R: ReadChar,
{
type Output = Box<[statement::Case]>;

4
core/parser/src/parser/statement/throw/mod.rs

@ -4,11 +4,11 @@ mod tests;
use crate::{
lexer::TokenKind,
parser::{expression::Expression, AllowAwait, AllowYield, Cursor, ParseResult, TokenParser},
source::ReadChar,
};
use boa_ast::{statement::Throw, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// For statement parsing
///
@ -40,7 +40,7 @@ impl ThrowStatement {
impl<R> TokenParser<R> for ThrowStatement
where
R: Read,
R: ReadChar,
{
type Output = Throw;

6
core/parser/src/parser/statement/try_stm/catch.rs

@ -4,6 +4,7 @@ use crate::{
statement::{block::Block, ArrayBindingPattern, BindingIdentifier, ObjectBindingPattern},
AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{
@ -14,7 +15,6 @@ use boa_ast::{
use boa_interner::Interner;
use boa_profiler::Profiler;
use rustc_hash::FxHashSet;
use std::io::Read;
/// Catch parsing
///
@ -49,7 +49,7 @@ impl Catch {
impl<R> TokenParser<R> for Catch
where
R: Read,
R: ReadChar,
{
type Output = statement::Catch;
@ -149,7 +149,7 @@ impl CatchParameter {
impl<R> TokenParser<R> for CatchParameter
where
R: Read,
R: ReadChar,
{
type Output = Binding;

11
core/parser/src/parser/statement/try_stm/finally.rs

@ -1,10 +1,13 @@
use crate::parser::{
statement::block::Block, AllowAwait, AllowReturn, AllowYield, Cursor, ParseResult, TokenParser,
use crate::{
parser::{
statement::block::Block, AllowAwait, AllowReturn, AllowYield, Cursor, ParseResult,
TokenParser,
},
source::ReadChar,
};
use boa_ast::{statement, Keyword};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Finally parsing
///
@ -39,7 +42,7 @@ impl Finally {
impl<R> TokenParser<R> for Finally
where
R: Read,
R: ReadChar,
{
type Output = statement::Finally;

4
core/parser/src/parser/statement/try_stm/mod.rs

@ -9,6 +9,7 @@ use super::block::Block;
use crate::{
lexer::TokenKind,
parser::{AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser},
source::ReadChar,
Error,
};
use boa_ast::{
@ -17,7 +18,6 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Try...catch statement parsing
///
@ -52,7 +52,7 @@ impl TryStatement {
impl<R> TokenParser<R> for TryStatement
where
R: Read,
R: ReadChar,
{
type Output = Try;

9
core/parser/src/parser/statement/variable/mod.rs

@ -8,6 +8,7 @@ use crate::{
statement::{ArrayBindingPattern, BindingIdentifier, ObjectBindingPattern},
AllowAwait, AllowIn, AllowYield, OrAbrupt, ParseResult, TokenParser,
},
source::ReadChar,
};
use boa_ast::{
declaration::{VarDeclaration, Variable},
@ -15,7 +16,7 @@ use boa_ast::{
};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::{convert::TryInto, io::Read};
use std::convert::TryInto;
/// Variable statement parsing.
///
@ -49,7 +50,7 @@ impl VariableStatement {
impl<R> TokenParser<R> for VariableStatement
where
R: Read,
R: ReadChar,
{
type Output = VarDeclaration;
@ -103,7 +104,7 @@ impl VariableDeclarationList {
impl<R> TokenParser<R> for VariableDeclarationList
where
R: Read,
R: ReadChar,
{
type Output = VarDeclaration;
@ -158,7 +159,7 @@ impl VariableDeclaration {
impl<R> TokenParser<R> for VariableDeclaration
where
R: Read,
R: ReadChar,
{
type Output = Variable;

4
core/parser/src/parser/statement/with/mod.rs

@ -5,12 +5,12 @@ use crate::{
cursor::Cursor, expression::Expression, statement::Statement, AllowAwait, AllowReturn,
AllowYield, ParseResult, TokenParser,
},
source::ReadChar,
Error,
};
use boa_ast::{statement::With, Keyword, Punctuator};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// With statement parsing.
///
@ -49,7 +49,7 @@ impl WithStatement {
impl<R> TokenParser<R> for WithStatement
where
R: Read,
R: ReadChar,
{
type Output = With;

72
core/parser/src/source.rs → core/parser/src/source/mod.rs

@ -1,9 +1,17 @@
//! Boa parser input source types.
mod utf16;
mod utf8;
use std::{
fs::File,
io::{self, BufReader, Read},
path::Path,
};
pub use utf16::UTF16Input;
pub use utf8::UTF8Input;
/// A source of ECMAScript code.
///
/// [`Source`]s can be created from plain [`str`]s, file [`Path`]s or more generally, any [`Read`]
@ -14,7 +22,7 @@ pub struct Source<'path, R> {
pub(crate) path: Option<&'path Path>,
}
impl<'bytes> Source<'static, &'bytes [u8]> {
impl<'bytes> Source<'static, UTF8Input<&'bytes [u8]>> {
/// Creates a new `Source` from any type equivalent to a slice of bytes e.g. [`&str`][str],
/// <code>[Vec]<[u8]></code>, <code>[Box]<[\[u8\]][slice]></code> or a plain slice
/// <code>[&\[u8\]][slice]</code>.
@ -30,13 +38,34 @@ impl<'bytes> Source<'static, &'bytes [u8]> {
/// [slice]: std::slice
pub fn from_bytes<T: AsRef<[u8]> + ?Sized>(source: &'bytes T) -> Self {
Self {
reader: source.as_ref(),
reader: UTF8Input::new(source.as_ref()),
path: None,
}
}
}
impl<'input> Source<'static, UTF16Input<'input>> {
/// Creates a new `Source` from a UTF-16 encoded slice e.g. <code>[&\[u16\]][slice]</code>.
///
/// # Examples
///
/// ```
/// # use boa_parser::Source;
/// let utf16: Vec<u16> = "var array = [5, 4, 3, 2, 1];".encode_utf16().collect();
/// let source = Source::from_utf16(&utf16);
/// ```
///
/// [slice]: std::slice
#[must_use]
pub fn from_utf16(input: &'input [u16]) -> Self {
Self {
reader: UTF16Input::new(input),
path: None,
}
}
}
impl<'path> Source<'path, BufReader<File>> {
impl<'path> Source<'path, UTF8Input<BufReader<File>>> {
/// Creates a new `Source` from a `Path` to a file.
///
/// # Errors
@ -57,13 +86,13 @@ impl<'path> Source<'path, BufReader<File>> {
pub fn from_filepath(source: &'path Path) -> io::Result<Self> {
let reader = File::open(source)?;
Ok(Self {
reader: BufReader::new(reader),
reader: UTF8Input::new(BufReader::new(reader)),
path: Some(source),
})
}
}
impl<'path, R: Read> Source<'path, R> {
impl<'path, R: Read> Source<'path, UTF8Input<R>> {
/// Creates a new `Source` from a [`Read`] instance and an optional [`Path`].
///
/// # Examples
@ -82,9 +111,22 @@ impl<'path, R: Read> Source<'path, R> {
/// # Ok(())
/// # }
/// ```
pub const fn from_reader(reader: R, path: Option<&'path Path>) -> Self {
Self { reader, path }
pub fn from_reader(reader: R, path: Option<&'path Path>) -> Self {
Self {
reader: UTF8Input::new(reader),
path,
}
}
}
/// This trait is used to abstract over the different types of input readers.
pub trait ReadChar {
/// Retrieves the next unicode code point. Returns `None` if the end of the input is reached.
///
/// # Errors
///
/// Returns an error if the next input in the input is not a valid unicode code point.
fn next_char(&mut self) -> io::Result<Option<u32>>;
}
#[cfg(test)]
@ -99,7 +141,9 @@ mod tests {
assert!(source.path.is_none());
let mut content = String::new();
source.reader.read_to_string(&mut content).unwrap();
while let Some(c) = source.reader.next_char().unwrap() {
content.push(char::from_u32(c).unwrap());
}
assert_eq!(content, "'Hello' + 'World';");
}
@ -113,7 +157,9 @@ mod tests {
assert_eq!(source.path, Some(&*filepath));
let mut content = String::new();
source.reader.read_to_string(&mut content).unwrap();
while let Some(c) = source.reader.next_char().unwrap() {
content.push(char::from_u32(c).unwrap());
}
assert_eq!(content, "\"Hello\" + \"World\";\n");
}
@ -126,7 +172,9 @@ mod tests {
assert!(source.path.is_none());
let mut content = String::new();
source.reader.read_to_string(&mut content).unwrap();
while let Some(c) = source.reader.next_char().unwrap() {
content.push(char::from_u32(c).unwrap());
}
assert_eq!(content, "'Hello' + 'World';");
@ -137,7 +185,9 @@ mod tests {
assert_eq!(source.path, Some("test.js".as_ref()));
let mut content = String::new();
source.reader.read_to_string(&mut content).unwrap();
while let Some(c) = source.reader.next_char().unwrap() {
content.push(char::from_u32(c).unwrap());
}
assert_eq!(content, "'Hello' + 'World';");
}

65
core/parser/src/source/utf16.rs

@ -0,0 +1,65 @@
use super::ReadChar;
use std::io;
/// Input for UTF-16 encoded sources.
#[derive(Debug)]
pub struct UTF16Input<'a> {
input: &'a [u16],
index: usize,
}
impl<'a> UTF16Input<'a> {
/// Creates a new `UTF16Input` from a UTF-16 encoded slice e.g. <code>[&\[u16\]][slice]</code>.
///
/// [slice]: std::slice
#[must_use]
pub const fn new(input: &'a [u16]) -> Self {
Self { input, index: 0 }
}
}
impl ReadChar for UTF16Input<'_> {
/// Retrieves the next unchecked char in u32 code point.
fn next_char(&mut self) -> io::Result<Option<u32>> {
let Some(u1) = self.input.get(self.index).copied() else {
return Ok(None);
};
self.index += 1;
// If the code unit is not a high surrogate, it is not the start of a surrogate pair.
if !is_high_surrogate(u1) {
return Ok(Some(u1.into()));
}
let Some(u2) = self.input.get(self.index).copied() else {
return Ok(Some(u1.into()));
};
// If the code unit is not a low surrogate, it is not a surrogate pair.
if !is_low_surrogate(u2) {
return Ok(Some(u1.into()));
}
self.index += 1;
Ok(Some(code_point_from_surrogates(u1, u2)))
}
}
const SURROGATE_HIGH_START: u16 = 0xD800;
const SURROGATE_HIGH_END: u16 = 0xDBFF;
const SURROGATE_LOW_START: u16 = 0xDC00;
const SURROGATE_LOW_END: u16 = 0xDFFF;
fn is_high_surrogate(b: u16) -> bool {
(SURROGATE_HIGH_START..=SURROGATE_HIGH_END).contains(&b)
}
fn is_low_surrogate(b: u16) -> bool {
(SURROGATE_LOW_START..=SURROGATE_LOW_END).contains(&b)
}
fn code_point_from_surrogates(high: u16, low: u16) -> u32 {
((u32::from(high & 0x3ff)) << 10 | u32::from(low & 0x3ff)) + 0x1_0000
}

72
core/parser/src/source/utf8.rs

@ -0,0 +1,72 @@
use super::ReadChar;
use std::io::{self, Bytes, Read};
/// Input for UTF-8 encoded sources.
#[derive(Debug)]
pub struct UTF8Input<R> {
input: Bytes<R>,
}
impl<R: Read> UTF8Input<R> {
/// Creates a new `UTF8Input` from a UTF-8 encoded source.
pub(crate) fn new(iter: R) -> Self {
Self {
input: iter.bytes(),
}
}
}
impl<R: Read> UTF8Input<R> {
/// Retrieves the next byte
fn next_byte(&mut self) -> io::Result<Option<u8>> {
self.input.next().transpose()
}
}
impl<R: Read> ReadChar for UTF8Input<R> {
/// Retrieves the next unchecked char in u32 code point.
fn next_char(&mut self) -> io::Result<Option<u32>> {
// Decode UTF-8
let x = match self.next_byte()? {
Some(b) if b >= 128 => b, // UTF-8 codepoint
b => return Ok(b.map(u32::from)), // ASCII or None
};
// Multibyte case follows
// Decode from a byte combination out of: [[[x y] z] w]
// NOTE: Performance is sensitive to the exact formulation here
let init = utf8_first_byte(x, 2);
let y = self.next_byte()?.unwrap_or(0);
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
// [[x y z] w] case
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
let z = self.next_byte()?.unwrap_or(0);
let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
ch = init << 12 | y_z;
if x >= 0xF0 {
// [x y z w] case
// use only the lower 3 bits of `init`
let w = self.next_byte()?.unwrap_or(0);
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
};
Ok(Some(ch))
}
}
/// Mask of the value bits of a continuation byte.
const CONT_MASK: u8 = 0b0011_1111;
/// Returns the initial codepoint accumulator for the first byte.
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
/// for width 3, and 3 bits for width 4.
fn utf8_first_byte(byte: u8, width: u32) -> u32 {
u32::from(byte & (0x7F >> width))
}
/// Returns the value of `ch` updated with continuation byte `byte`.
fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | u32::from(byte & CONT_MASK)
}
Loading…
Cancel
Save