Browse Source

Improve identifier parsing (#2581)

Another change extracted from #2411.

This PR changes the following:

- Improves our identifier parsing with a new `Identifier` parser that unifies parsing for `IdentifierReference`, `BindingIdentifier` and `LabelIdentifier`.
- Slightly improves some error messages.
- Extracts our manual initialization of static `Sym`s with a new `static_syms` proc macro.
- Adds `set_module_mode` and `module_mode` to the cursor to prepare for modules.
pull/2582/head
José Julián Espina 2 years ago
parent
commit
ddf00c8c03
  1. 51
      boa_ast/src/keyword.rs
  2. 5
      boa_engine/src/tests.rs
  3. 2
      boa_interner/src/lib.rs
  4. 225
      boa_interner/src/sym.rs
  5. 91
      boa_macros/src/lib.rs
  6. 9
      boa_parser/src/error.rs
  7. 15
      boa_parser/src/parser/cursor/mod.rs
  8. 233
      boa_parser/src/parser/expression/identifiers.rs
  9. 5
      boa_parser/src/parser/expression/left_hand_side/arguments.rs
  10. 2
      boa_parser/src/parser/expression/left_hand_side/call.rs
  11. 4
      boa_parser/src/parser/expression/left_hand_side/member.rs
  12. 2
      boa_parser/src/parser/expression/left_hand_side/optional/mod.rs
  13. 2
      boa_parser/src/parser/statement/switch/mod.rs
  14. 5
      boa_parser/src/parser/statement/try_stm/catch.rs

51
boa_ast/src/keyword.rs

@ -10,7 +10,7 @@
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#Keywords //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#Keywords
use crate::expression::operator::binary::{BinaryOp, RelationalOp}; use crate::expression::operator::binary::{BinaryOp, RelationalOp};
use boa_interner::{Interner, Sym}; use boa_interner::Sym;
use boa_macros::utf16; use boa_macros::utf16;
use std::{convert::TryFrom, error, fmt, str::FromStr}; use std::{convert::TryFrom, error, fmt, str::FromStr};
@ -534,11 +534,52 @@ impl Keyword {
} }
} }
// TODO: promote all keywords to statics inside Interner
/// Converts the keyword to a symbol in the given interner. /// Converts the keyword to a symbol in the given interner.
pub fn to_sym(self, interner: &mut Interner) -> Sym { #[must_use]
let (utf8, utf16) = self.as_str(); pub const fn to_sym(self) -> Sym {
interner.get_or_intern_static(utf8, utf16) match self {
Self::Await => Sym::AWAIT,
Self::Async => Sym::ASYNC,
Self::Break => Sym::BREAK,
Self::Case => Sym::CASE,
Self::Catch => Sym::CATCH,
Self::Class => Sym::CLASS,
Self::Continue => Sym::CONTINUE,
Self::Const => Sym::CONST,
Self::Debugger => Sym::DEBUGGER,
Self::Default => Sym::DEFAULT,
Self::Delete => Sym::DELETE,
Self::Do => Sym::DO,
Self::Else => Sym::ELSE,
Self::Enum => Sym::ENUM,
Self::Export => Sym::EXPORT,
Self::Extends => Sym::EXTENDS,
Self::False => Sym::FALSE,
Self::Finally => Sym::FINALLY,
Self::For => Sym::FOR,
Self::Function => Sym::FUNCTION,
Self::If => Sym::IF,
Self::In => Sym::IN,
Self::InstanceOf => Sym::INSTANCEOF,
Self::Import => Sym::IMPORT,
Self::Let => Sym::LET,
Self::New => Sym::NEW,
Self::Null => Sym::NULL,
Self::Of => Sym::OF,
Self::Return => Sym::RETURN,
Self::Super => Sym::SUPER,
Self::Switch => Sym::SWITCH,
Self::This => Sym::THIS,
Self::Throw => Sym::THROW,
Self::True => Sym::TRUE,
Self::Try => Sym::TRY,
Self::TypeOf => Sym::TYPEOF,
Self::Var => Sym::VAR,
Self::Void => Sym::VOID,
Self::While => Sym::WHILE,
Self::With => Sym::WITH,
Self::Yield => Sym::YIELD,
}
} }
} }

5
boa_engine/src/tests.rs

@ -1518,7 +1518,10 @@ fn test_conditional_op() {
#[test] #[test]
fn test_identifier_op() { fn test_identifier_op() {
let scenario = "break = 1"; let scenario = "break = 1";
assert_eq!(&exec(scenario), "SyntaxError: expected token \'identifier\', got \'=\' in binding identifier at line 1, col 7"); assert_eq!(
&exec(scenario),
"SyntaxError: expected token \'identifier\', got \'=\' in identifier parsing at line 1, col 7"
);
} }
#[test] #[test]

2
boa_interner/src/lib.rs

@ -93,8 +93,6 @@
extern crate alloc; extern crate alloc;
extern crate static_assertions as sa;
mod fixed_string; mod fixed_string;
mod interned_str; mod interned_str;
mod raw; mod raw;

225
boa_interner/src/sym.rs

@ -1,10 +1,5 @@
use boa_macros::utf16; use boa_macros::static_syms;
use core::{hash::BuildHasherDefault, num::NonZeroUsize}; use core::num::NonZeroUsize;
use indexmap::IndexSet;
use once_cell::sync::Lazy;
use rustc_hash::FxHasher;
type Set<T> = IndexSet<T, BuildHasherDefault<FxHasher>>;
/// The string symbol type for Boa. /// The string symbol type for Boa.
/// ///
@ -23,96 +18,6 @@ pub struct Sym {
} }
impl Sym { impl Sym {
/// Symbol for the empty string (`""`).
pub const EMPTY_STRING: Self = unsafe { Self::new_unchecked(1) };
/// Symbol for the `"arguments"` string.
pub const ARGUMENTS: Self = unsafe { Self::new_unchecked(2) };
/// Symbol for the `"await"` string.
pub const AWAIT: Self = unsafe { Self::new_unchecked(3) };
/// Symbol for the `"yield"` string.
pub const YIELD: Self = unsafe { Self::new_unchecked(4) };
/// Symbol for the `"eval"` string.
pub const EVAL: Self = unsafe { Self::new_unchecked(5) };
/// Symbol for the `"default"` string.
pub const DEFAULT: Self = unsafe { Self::new_unchecked(6) };
/// Symbol for the `"null"` string.
pub const NULL: Self = unsafe { Self::new_unchecked(7) };
/// Symbol for the `"RegExp"` string.
pub const REGEXP: Self = unsafe { Self::new_unchecked(8) };
/// Symbol for the `"get"` string.
pub const GET: Self = unsafe { Self::new_unchecked(9) };
/// Symbol for the `"set"` string.
pub const SET: Self = unsafe { Self::new_unchecked(10) };
/// Symbol for the `"<main>"` string.
pub const MAIN: Self = unsafe { Self::new_unchecked(11) };
/// Symbol for the `"raw"` string.
pub const RAW: Self = unsafe { Self::new_unchecked(12) };
/// Symbol for the `"static"` string.
pub const STATIC: Self = unsafe { Self::new_unchecked(13) };
/// Symbol for the `"prototype"` string.
pub const PROTOTYPE: Self = unsafe { Self::new_unchecked(14) };
/// Symbol for the `"constructor"` string.
pub const CONSTRUCTOR: Self = unsafe { Self::new_unchecked(15) };
/// Symbol for the `"implements"` string.
pub const IMPLEMENTS: Self = unsafe { Self::new_unchecked(16) };
/// Symbol for the `"interface"` string.
pub const INTERFACE: Self = unsafe { Self::new_unchecked(17) };
/// Symbol for the `"let"` string.
pub const LET: Self = unsafe { Self::new_unchecked(18) };
/// Symbol for the `"package"` string.
pub const PACKAGE: Self = unsafe { Self::new_unchecked(19) };
/// Symbol for the `"private"` string.
pub const PRIVATE: Self = unsafe { Self::new_unchecked(20) };
/// Symbol for the `"protected"` string.
pub const PROTECTED: Self = unsafe { Self::new_unchecked(21) };
/// Symbol for the `"public"` string.
pub const PUBLIC: Self = unsafe { Self::new_unchecked(22) };
/// Symbol for the `"anonymous"` string.
pub const ANONYMOUS: Self = unsafe { Self::new_unchecked(23) };
/// Symbol for the `"true"` string.
pub const TRUE: Self = unsafe { Self::new_unchecked(24) };
/// Symbol for the `"false"` string.
pub const FALSE: Self = unsafe { Self::new_unchecked(25) };
/// Symbol for the `"async"` string.
pub const ASYNC: Self = unsafe { Self::new_unchecked(26) };
/// Symbol for the `"of"` string.
pub const OF: Self = unsafe { Self::new_unchecked(27) };
/// Symbol for the `"target"` string.
pub const TARGET: Self = unsafe { Self::new_unchecked(28) };
/// Symbol for the `"__proto__"` string.
pub const __PROTO__: Self = unsafe { Self::new_unchecked(29) };
/// Symbol for the `"name"` string.
pub const NAME: Self = unsafe { Self::new_unchecked(30) };
/// Creates a new [`Sym`] from the provided `value`, or returns `None` if `index` is zero. /// Creates a new [`Sym`] from the provided `value`, or returns `None` if `index` is zero.
pub(super) fn new(value: usize) -> Option<Self> { pub(super) fn new(value: usize) -> Option<Self> {
NonZeroUsize::new(value).map(|value| Self { value }) NonZeroUsize::new(value).map(|value| Self { value })
@ -133,62 +38,79 @@ impl Sym {
} }
} }
/// Returns the internal value of the [`Sym`] /// Checks if this symbol is one of the [reserved identifiers][spec] of the ECMAScript
/// specification, excluding `await` and `yield`
///
/// [spec]: https://tc39.es/ecma262/#prod-ReservedWord
#[inline] #[inline]
#[must_use] #[must_use]
pub const fn get(self) -> usize { pub fn is_reserved_identifier(self) -> bool {
self.value.get() (Self::BREAK..=Self::WITH).contains(&self)
} }
}
macro_rules! create_static_strings { /// Checks if this symbol is one of the [strict reserved identifiers][spec] of the ECMAScript
( $( $s:literal ),+$(,)? ) => { /// specification.
/// Ordered set of commonly used static `UTF-8` strings.
///
/// # Note
/// ///
/// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants /// [spec]: https://tc39.es/ecma262/#prod-ReservedWord
/// defined in [`Sym`] must always be in sync. #[inline]
pub(super) static COMMON_STRINGS_UTF8: phf::OrderedSet<&'static str> = { #[must_use]
const COMMON_STRINGS: phf::OrderedSet<&'static str> = phf::phf_ordered_set! { pub fn is_strict_reserved_identifier(self) -> bool {
$( $s ),+ (Self::IMPLEMENTS..=Self::YIELD).contains(&self)
}; }
// A `COMMON_STRINGS` of size `usize::MAX` would cause an overflow on our `Interner`
sa::const_assert!(COMMON_STRINGS.len() < usize::MAX);
COMMON_STRINGS
};
/// Ordered set of commonly used static `UTF-16` strings. /// Returns the internal value of the [`Sym`]
/// #[inline]
/// # Note #[must_use]
/// pub const fn get(self) -> usize {
/// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants self.value.get()
/// defined in [`Sym`] must always be in sync. }
// FIXME: use phf when const expressions are allowed. https://github.com/rust-phf/rust-phf/issues/188
pub(super) static COMMON_STRINGS_UTF16: Lazy<Set<&'static [u16]>> = Lazy::new(|| {
let mut set = Set::with_capacity_and_hasher(COMMON_STRINGS_UTF8.len(), BuildHasherDefault::default());
$( set.insert(utf16!($s)); )+
set
});
};
} }
create_static_strings! { static_syms! {
"", // Reserved identifiers
"arguments", // See: <https://tc39.es/ecma262/#prod-ReservedWord>
"await", // Note, they must all be together.
"yield", "break",
"eval", "case",
"catch",
"class",
"const",
"continue",
"debugger",
"default", "default",
"delete",
"do",
"else",
"enum",
"export",
"extends",
"false",
"finally",
"for",
"function",
"if",
"import",
"in",
"instanceof",
"new",
"null", "null",
"RegExp", "return",
"get", "super",
"set", "switch",
"<main>", "this",
"raw", "throw",
"static", "true",
"prototype", "try",
"constructor", "typeof",
"var",
"void",
"while",
"with",
// End reserved identifier
// strict reserved identifiers.
// See: <https://tc39.es/ecma262/#prod-Identifier>
// Note, they must all be together.
"implements", "implements",
"interface", "interface",
"let", "let",
@ -196,12 +118,25 @@ create_static_strings! {
"private", "private",
"protected", "protected",
"public", "public",
"static",
"yield",
// End strict reserved identifiers
"",
"prototype",
"constructor",
"arguments",
"eval",
"RegExp",
"get",
"set",
"<main>",
"raw",
"anonymous", "anonymous",
"true",
"false",
"async", "async",
"of", "of",
"target", "target",
"__proto__", "__proto__",
"name", "name",
"await",
} }

91
boa_macros/src/lib.rs

@ -58,10 +58,99 @@
)] )]
use proc_macro::TokenStream; use proc_macro::TokenStream;
use proc_macro2::Ident;
use quote::quote; use quote::quote;
use syn::{parse_macro_input, LitStr}; use syn::{
parse::{Parse, ParseStream},
parse_macro_input,
punctuated::Punctuated,
LitStr, Token,
};
use synstructure::{decl_derive, AddBounds, Structure}; use synstructure::{decl_derive, AddBounds, Structure};
struct Syms(Vec<LitStr>);
impl Parse for Syms {
fn parse(input: ParseStream<'_>) -> syn::Result<Self> {
let parsed = Punctuated::<LitStr, Token![,]>::parse_terminated(input)?;
let literals = parsed.into_iter().collect();
Ok(Self(literals))
}
}
#[doc(hidden)]
#[proc_macro]
pub fn static_syms(input: TokenStream) -> TokenStream {
let literals = parse_macro_input!(input as Syms).0;
let consts = literals.iter().enumerate().map(|(mut idx, lit)| {
let ident = lit.value();
let (doc, ident) = match &*ident {
"" => (
String::from("Symbol for the empty string."),
String::from("EMPTY_STRING"),
),
"<main>" => (
String::from("Symbol for the `<main>` string."),
String::from("MAIN"),
),
ident => (
format!("Symbol for the `{ident}` string.",),
ident.to_uppercase(),
),
};
let ident = Ident::new(&ident, lit.span());
idx += 1;
quote! {
#[doc = #doc]
pub const #ident: Self = unsafe { Self::new_unchecked(#idx) };
}
});
let caches = quote! {
type Set<T> = ::indexmap::IndexSet<T, ::core::hash::BuildHasherDefault<::rustc_hash::FxHasher>>;
/// Ordered set of commonly used static `UTF-8` strings.
///
/// # Note
///
/// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants
/// defined in [`Sym`] must always be in sync.
pub(super) static COMMON_STRINGS_UTF8: ::phf::OrderedSet<&'static str> = {
const COMMON_STRINGS: ::phf::OrderedSet<&'static str> = ::phf::phf_ordered_set! {
#(#literals),*
};
// A `COMMON_STRINGS` of size `usize::MAX` would cause an overflow on our `Interner`
::static_assertions::const_assert!(COMMON_STRINGS.len() < usize::MAX);
COMMON_STRINGS
};
/// Ordered set of commonly used static `UTF-16` strings.
///
/// # Note
///
/// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants
/// defined in [`Sym`] must always be in sync.
// FIXME: use phf when const expressions are allowed. https://github.com/rust-phf/rust-phf/issues/188
pub(super) static COMMON_STRINGS_UTF16: ::once_cell::sync::Lazy<Set<&'static [u16]>> =
::once_cell::sync::Lazy::new(|| {
let mut set = Set::with_capacity_and_hasher(COMMON_STRINGS_UTF8.len(), ::core::hash::BuildHasherDefault::default());
#(
set.insert(::boa_macros::utf16!(#literals));
)*
set
});
};
quote! {
impl Sym {
#(#consts)*
}
#caches
}
.into()
}
/// Construct a utf-16 array literal from a utf-8 [`str`] literal. /// Construct a utf-16 array literal from a utf-8 [`str`] literal.
#[proc_macro] #[proc_macro]
pub fn utf16(input: TokenStream) -> TokenStream { pub fn utf16(input: TokenStream) -> TokenStream {

9
boa_parser/src/error.rs

@ -45,7 +45,7 @@ pub enum Error {
/// When a token is unexpected /// When a token is unexpected
Unexpected { Unexpected {
/// The error message. /// The error message.
message: Option<&'static str>, message: Box<str>,
/// The token that was not expected. /// The token that was not expected.
found: Box<str>, found: Box<str>,
@ -101,11 +101,11 @@ impl Error {
} }
} }
/// Creates an `Expected` parsing error. /// Creates an `Unexpected` parsing error.
pub(crate) fn unexpected<F, C>(found: F, span: Span, message: C) -> Self pub(crate) fn unexpected<F, C>(found: F, span: Span, message: C) -> Self
where where
F: Into<Box<str>>, F: Into<Box<str>>,
C: Into<Option<&'static str>>, C: Into<Box<str>>,
{ {
Self::Unexpected { Self::Unexpected {
found: found.into(), found: found.into(),
@ -187,8 +187,7 @@ impl fmt::Display for Error {
message, message,
} => write!( } => write!(
f, f,
"unexpected token '{found}'{} at line {}, col {}", "unexpected token '{found}', {message} at line {}, col {}",
message.map_or_else(String::new, |m| format!(", {m}")),
span.start().line_number(), span.start().line_number(),
span.start().column_number() span.start().column_number()
), ),

15
boa_parser/src/parser/cursor/mod.rs

@ -36,6 +36,9 @@ pub(super) struct Cursor<R> {
/// Indicate if the cursor is used in `JSON.parse`. /// Indicate if the cursor is used in `JSON.parse`.
json_parse: bool, json_parse: bool,
/// Indicate if the cursor's **goal symbol** is a Module.
module: bool,
} }
impl<R> Cursor<R> impl<R> Cursor<R>
@ -50,7 +53,19 @@ where
private_environment_root_index: 0, private_environment_root_index: 0,
arrow: false, arrow: false,
json_parse: false, json_parse: false,
module: false,
}
} }
/// Sets the goal symbol of the cursor to `Module`.
#[allow(unused)]
pub(super) fn set_module_mode(&mut self) {
self.module = true;
}
/// Returns `true` if the cursor is currently parsing a `Module`.
pub(super) const fn module_mode(&self) -> bool {
self.module
} }
pub(super) fn set_goal(&mut self, elm: InputElement) { pub(super) fn set_goal(&mut self, elm: InputElement) {

233
boa_parser/src/parser/expression/identifiers.rs

@ -6,27 +6,15 @@
//! [spec]: https://tc39.es/ecma262/#sec-identifiers //! [spec]: https://tc39.es/ecma262/#sec-identifiers
use crate::{ use crate::{
lexer::{Error as LexError, TokenKind}, lexer::TokenKind,
parser::{cursor::Cursor, AllowAwait, AllowYield, OrAbrupt, ParseResult, TokenParser}, parser::{cursor::Cursor, AllowAwait, AllowYield, OrAbrupt, ParseResult, TokenParser},
Error, Error,
}; };
use boa_ast::{expression::Identifier, Keyword}; use boa_ast::expression::Identifier as AstIdentifier;
use boa_interner::{Interner, Sym}; use boa_interner::{Interner, Sym};
use boa_profiler::Profiler; use boa_profiler::Profiler;
use std::io::Read; use std::io::Read;
pub(crate) const RESERVED_IDENTIFIERS_STRICT: [Sym; 9] = [
Sym::IMPLEMENTS,
Sym::INTERFACE,
Sym::LET,
Sym::PACKAGE,
Sym::PRIVATE,
Sym::PROTECTED,
Sym::PUBLIC,
Sym::STATIC,
Sym::YIELD,
];
/// Identifier reference parsing. /// Identifier reference parsing.
/// ///
/// More information: /// More information:
@ -57,61 +45,25 @@ impl<R> TokenParser<R> for IdentifierReference
where where
R: Read, R: Read,
{ {
type Output = Identifier; type Output = AstIdentifier;
fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> { fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> {
let _timer = Profiler::global().start_event("IdentifierReference", "Parsing"); let _timer = Profiler::global().start_event("IdentifierReference", "Parsing");
let token = cursor.next(interner).or_abrupt()?; let span = cursor.peek(0, interner).or_abrupt()?.span();
let ident = Identifier.parse(cursor, interner)?;
match token.kind() { match ident.sym() {
TokenKind::IdentifierName((ident, _)) Sym::YIELD if self.allow_yield.0 => Err(Error::unexpected(
if cursor.strict_mode() && RESERVED_IDENTIFIERS_STRICT.contains(ident) => "yield",
{ span,
Err(Error::general( "keyword `yield` not allowed in this context",
"using future reserved keyword not allowed in strict mode IdentifierReference",
token.span().start(),
))
}
TokenKind::IdentifierName((ident, _)) => Ok(Identifier::new(*ident)),
TokenKind::Keyword((Keyword::Let, _)) if cursor.strict_mode() => Err(Error::general(
"using future reserved keyword not allowed in strict mode IdentifierReference",
token.span().start(),
)), )),
TokenKind::Keyword((Keyword::Let, _)) => Ok(Identifier::new(Sym::LET)), Sym::AWAIT if self.allow_await.0 => Err(Error::unexpected(
TokenKind::Keyword((Keyword::Yield, _)) if self.allow_yield.0 => { "await",
// Early Error: It is a Syntax Error if this production has a [Yield] parameter and StringValue of Identifier is "yield". span,
Err(Error::general( "keyword `await` not allowed in this context",
"Unexpected identifier",
token.span().start(),
))
}
TokenKind::Keyword((Keyword::Yield, _)) if !self.allow_yield.0 => {
if cursor.strict_mode() {
return Err(Error::general(
"Unexpected strict mode reserved word",
token.span().start(),
));
}
Ok(Identifier::new(Sym::YIELD))
}
TokenKind::Keyword((Keyword::Await, _)) if self.allow_await.0 => {
// Early Error: It is a Syntax Error if this production has an [Await] parameter and StringValue of Identifier is "await".
Err(Error::general(
"Unexpected identifier",
token.span().start(),
))
}
TokenKind::Keyword((Keyword::Await, _)) if !self.allow_await.0 => {
Ok(Identifier::new(Sym::AWAIT))
}
TokenKind::Keyword((Keyword::Async, _)) => Ok(Identifier::new(Sym::ASYNC)),
TokenKind::Keyword((Keyword::Of, _)) => Ok(Identifier::new(Sym::OF)),
_ => Err(Error::unexpected(
token.to_string(interner),
token.span(),
"IdentifierReference",
)), )),
_ => Ok(ident),
} }
} }
} }
@ -146,85 +98,114 @@ impl<R> TokenParser<R> for BindingIdentifier
where where
R: Read, R: Read,
{ {
type Output = Identifier; type Output = AstIdentifier;
/// Strict mode parsing as per <https://tc39.es/ecma262/#sec-identifiers-static-semantics-early-errors>. /// Strict mode parsing as per <https://tc39.es/ecma262/#sec-identifiers-static-semantics-early-errors>.
fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> { fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> {
let _timer = Profiler::global().start_event("BindingIdentifier", "Parsing"); let _timer = Profiler::global().start_event("BindingIdentifier", "Parsing");
let next_token = cursor.next(interner).or_abrupt()?; let span = cursor.peek(0, interner).or_abrupt()?.span();
let ident = Identifier.parse(cursor, interner)?;
match next_token.kind() { match ident.sym() {
TokenKind::IdentifierName((Sym::ARGUMENTS | Sym::EVAL, _)) if cursor.strict_mode() => { Sym::ARGUMENTS | Sym::EVAL if cursor.strict_mode() => {
Err(Error::lex(LexError::Syntax( let name = interner
format!( .resolve_expect(ident.sym())
"unexpected identifier '{}' in strict mode", .utf8()
next_token.to_string(interner) .expect("keyword must be utf-8");
) Err(Error::unexpected(
.into(), name,
next_token.span().start(), span,
))) format!("binding identifier `{name}` not allowed in strict mode"),
}
TokenKind::IdentifierName((ident, _)) => {
if cursor.strict_mode() && RESERVED_IDENTIFIERS_STRICT.contains(ident) {
return Err(Error::general(
"using future reserved keyword not allowed in strict mode",
next_token.span().start(),
));
}
Ok((*ident).into())
}
TokenKind::Keyword((Keyword::Let, _)) if cursor.strict_mode() => {
Err(Error::lex(LexError::Syntax(
"unexpected identifier 'let' in strict mode".into(),
next_token.span().start(),
)))
}
TokenKind::Keyword((Keyword::Let, _)) => Ok(Sym::LET.into()),
TokenKind::Keyword((Keyword::Yield, _)) if self.allow_yield.0 => {
// Early Error: It is a Syntax Error if this production has a [Yield] parameter and StringValue of Identifier is "yield".
Err(Error::general(
"Unexpected identifier",
next_token.span().start(),
)) ))
} }
TokenKind::Keyword((Keyword::Yield, _)) if !self.allow_yield.0 => { Sym::YIELD if self.allow_yield.0 => Err(Error::unexpected(
if cursor.strict_mode() { "yield",
Err(Error::general( span,
"yield keyword in binding identifier not allowed in strict mode", "keyword `yield` not allowed in this context",
next_token.span().start(),
))
} else {
Ok(Sym::YIELD.into())
}
}
TokenKind::Keyword((Keyword::Await, _)) if cursor.arrow() => Ok(Sym::AWAIT.into()),
TokenKind::Keyword((Keyword::Await, _)) if self.allow_await.0 => {
// Early Error: It is a Syntax Error if this production has an [Await] parameter and StringValue of Identifier is "await".
Err(Error::general(
"Unexpected identifier",
next_token.span().start(),
))
}
TokenKind::Keyword((Keyword::Await, _)) if !self.allow_await.0 => Ok(Sym::AWAIT.into()),
TokenKind::Keyword((Keyword::Async, _)) => Ok(Sym::ASYNC.into()),
TokenKind::Keyword((Keyword::Of, _)) => Ok(Sym::OF.into()),
_ => Err(Error::expected(
["identifier".to_owned()],
next_token.to_string(interner),
next_token.span(),
"binding identifier",
)), )),
Sym::AWAIT if self.allow_await.0 => Err(Error::unexpected(
"await",
span,
"keyword `await` not allowed in this context",
)),
_ => Ok(ident),
} }
} }
} }
/// Label identifier parsing. /// Label identifier parsing.
/// ///
/// This seems to be the same as a `BindingIdentifier`. /// This seems to be the same as an `IdentifierReference`.
/// ///
/// More information: /// More information:
/// - [ECMAScript specification][spec] /// - [ECMAScript specification][spec]
/// ///
/// [spec]: https://tc39.es/ecma262/#prod-LabelIdentifier /// [spec]: https://tc39.es/ecma262/#prod-LabelIdentifier
pub(in crate::parser) type LabelIdentifier = BindingIdentifier; pub(in crate::parser) type LabelIdentifier = IdentifierReference;
/// Identifier parsing.
///
/// More information:
/// - [ECMAScript specification][spec]
///
/// [spec]: https://tc39.es/ecma262/#prod-Identifier
#[derive(Debug, Clone, Copy)]
pub(in crate::parser) struct Identifier;
impl<R> TokenParser<R> for Identifier
where
R: Read,
{
type Output = AstIdentifier;
fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult<Self::Output> {
let _timer = Profiler::global().start_event("Identifier", "Parsing");
let tok = cursor.next(interner).or_abrupt()?;
let ident = match tok.kind() {
TokenKind::IdentifierName((ident, _)) => *ident,
TokenKind::Keyword((kw, _)) => kw.to_sym(),
_ => {
return Err(Error::expected(
["identifier".to_owned()],
tok.to_string(interner),
tok.span(),
"identifier parsing",
))
}
};
if cursor.strict_mode() && ident.is_strict_reserved_identifier() {
return Err(Error::unexpected(
interner
.resolve_expect(ident)
.utf8()
.expect("keyword must always be utf-8"),
tok.span(),
"strict reserved word cannot be an identifier",
));
}
if cursor.module_mode() && ident == Sym::AWAIT {
return Err(Error::unexpected(
"await",
tok.span(),
"`await` cannot be used as an identifier in a module",
));
}
if ident.is_reserved_identifier() {
return Err(Error::unexpected(
interner
.resolve_expect(ident)
.utf8()
.expect("keyword must always be utf-8"),
tok.span(),
"reserved word cannot be an identifier",
));
}
Ok(AstIdentifier::new(ident))
}
}

5
boa_parser/src/parser/expression/left_hand_side/arguments.rs

@ -72,10 +72,11 @@ where
let next_token = cursor.next(interner)?.expect(", token vanished"); // Consume the token. let next_token = cursor.next(interner)?.expect(", token vanished"); // Consume the token.
if args.is_empty() { if args.is_empty() {
return Err(Error::unexpected( return Err(Error::expected(
[String::from("expression")],
next_token.to_string(interner), next_token.to_string(interner),
next_token.span(), next_token.span(),
None, "call",
)); ));
} }

2
boa_parser/src/parser/expression/left_hand_side/call.rs

@ -102,7 +102,7 @@ where
SimplePropertyAccess::new(lhs, *name).into() SimplePropertyAccess::new(lhs, *name).into()
} }
TokenKind::Keyword((kw, _)) => { TokenKind::Keyword((kw, _)) => {
SimplePropertyAccess::new(lhs, kw.to_sym(interner)).into() SimplePropertyAccess::new(lhs, kw.to_sym()).into()
} }
TokenKind::BooleanLiteral(true) => { TokenKind::BooleanLiteral(true) => {
SimplePropertyAccess::new(lhs, Sym::TRUE).into() SimplePropertyAccess::new(lhs, Sym::TRUE).into()

4
boa_parser/src/parser/expression/left_hand_side/member.rs

@ -126,7 +126,7 @@ where
SuperPropertyAccess::new(PropertyAccessField::from(*name)) SuperPropertyAccess::new(PropertyAccessField::from(*name))
} }
TokenKind::Keyword((kw, _)) => { TokenKind::Keyword((kw, _)) => {
SuperPropertyAccess::new(kw.to_sym(interner).into()) SuperPropertyAccess::new(kw.to_sym().into())
} }
TokenKind::BooleanLiteral(true) => { TokenKind::BooleanLiteral(true) => {
SuperPropertyAccess::new(Sym::TRUE.into()) SuperPropertyAccess::new(Sym::TRUE.into())
@ -188,7 +188,7 @@ where
SimplePropertyAccess::new(lhs, *name).into() SimplePropertyAccess::new(lhs, *name).into()
} }
TokenKind::Keyword((kw, _)) => { TokenKind::Keyword((kw, _)) => {
SimplePropertyAccess::new(lhs, kw.to_sym(interner)).into() SimplePropertyAccess::new(lhs, kw.to_sym()).into()
} }
TokenKind::BooleanLiteral(true) => { TokenKind::BooleanLiteral(true) => {
SimplePropertyAccess::new(lhs, Sym::TRUE).into() SimplePropertyAccess::new(lhs, Sym::TRUE).into()

2
boa_parser/src/parser/expression/left_hand_side/optional/mod.rs

@ -72,7 +72,7 @@ where
} }
} }
TokenKind::Keyword((kw, _)) => OptionalOperationKind::SimplePropertyAccess { TokenKind::Keyword((kw, _)) => OptionalOperationKind::SimplePropertyAccess {
field: PropertyAccessField::Const(kw.to_sym(interner)), field: PropertyAccessField::Const(kw.to_sym()),
}, },
TokenKind::BooleanLiteral(true) => OptionalOperationKind::SimplePropertyAccess { TokenKind::BooleanLiteral(true) => OptionalOperationKind::SimplePropertyAccess {
field: PropertyAccessField::Const(Sym::TRUE), field: PropertyAccessField::Const(Sym::TRUE),

2
boa_parser/src/parser/statement/switch/mod.rs

@ -186,7 +186,7 @@ where
return Err(Error::unexpected( return Err(Error::unexpected(
token.to_string(interner), token.to_string(interner),
token.span(), token.span(),
Some("more than one switch default"), "more than one switch default",
)); ));
} }

5
boa_parser/src/parser/statement/try_stm/catch.rs

@ -173,10 +173,11 @@ where
.parse(cursor, interner)?; .parse(cursor, interner)?;
Ok(Binding::Identifier(ident)) Ok(Binding::Identifier(ident))
} }
_ => Err(Error::unexpected( _ => Err(Error::expected(
[String::from("pattern"), String::from("binding identifier")],
token.to_string(interner), token.to_string(interner),
token.span(), token.span(),
None, "catch parameter",
)), )),
} }
} }

Loading…
Cancel
Save