From bd0652fe280148d3e9dec84809a719d6ee6075ac Mon Sep 17 00:00:00 2001 From: raskad <32105367+raskad@users.noreply.github.com> Date: Sat, 28 Jan 2023 16:25:47 +0000 Subject: [PATCH] Add early errors for escaped identifiers (#2546) This Pull Request changes the following: - Add early errors for escaped characters in object and class setters and getters. - Add early errors for escaped characters in class `static`. - Add early errors for escaped characters in `new.target`. - Add early errors for legacy octal/decial escapes that are used in string literals before a `"use strict"` directive. --- boa_parser/src/lexer/identifier.rs | 9 ++- boa_parser/src/lexer/string.rs | 61 +++++++++-------- boa_parser/src/lexer/template.rs | 2 +- boa_parser/src/lexer/tests.rs | 48 ++++++++----- boa_parser/src/lexer/token.rs | 40 +++++++++-- .../src/parser/expression/identifiers.rs | 10 +-- .../parser/expression/left_hand_side/call.rs | 4 +- .../expression/left_hand_side/member.rs | 16 +++-- .../expression/left_hand_side/optional/mod.rs | 2 +- .../src/parser/expression/primary/mod.rs | 2 +- .../primary/object_initializer/mod.rs | 35 +++++++++- .../declaration/hoistable/class_decl/mod.rs | 68 +++++++++++++------ boa_parser/src/parser/statement/mod.rs | 24 ++++++- 13 files changed, 231 insertions(+), 90 deletions(-) diff --git a/boa_parser/src/lexer/identifier.rs b/boa_parser/src/lexer/identifier.rs index aca96e5aef..732f105f8f 100644 --- a/boa_parser/src/lexer/identifier.rs +++ b/boa_parser/src/lexer/identifier.rs @@ -1,6 +1,8 @@ //! This module implements lexing for identifiers (foo, myvar, etc.) used in ECMAScript. -use crate::lexer::{Cursor, Error, StringLiteral, Token, TokenKind, Tokenizer}; +use crate::lexer::{ + token::ContainsEscapeSequence, Cursor, Error, StringLiteral, Token, TokenKind, Tokenizer, +}; use boa_ast::{Keyword, Position, Span}; use boa_interner::Interner; use boa_profiler::Profiler; @@ -71,7 +73,10 @@ impl Tokenizer for Identifier { Ok(Keyword::False) => TokenKind::BooleanLiteral(false), Ok(Keyword::Null) => TokenKind::NullLiteral, Ok(keyword) => TokenKind::Keyword((keyword, contains_escaped_chars)), - _ => TokenKind::identifier(interner.get_or_intern(identifier_name.as_str())), + _ => TokenKind::Identifier(( + interner.get_or_intern(identifier_name.as_str()), + ContainsEscapeSequence(contains_escaped_chars), + )), }; Ok(Token::new(token_kind, Span::new(start_pos, cursor.pos()))) diff --git a/boa_parser/src/lexer/string.rs b/boa_parser/src/lexer/string.rs index b3e6643f9c..33c6666538 100644 --- a/boa_parser/src/lexer/string.rs +++ b/boa_parser/src/lexer/string.rs @@ -1,6 +1,6 @@ //! Boa's lexing for ECMAScript string literals. -use crate::lexer::{Cursor, Error, Token, TokenKind, Tokenizer}; +use crate::lexer::{token::EscapeSequence, Cursor, Error, Token, TokenKind, Tokenizer}; use boa_ast::{Position, Span}; use boa_interner::Interner; use boa_profiler::Profiler; @@ -88,11 +88,11 @@ impl Tokenizer for StringLiteral { { let _timer = Profiler::global().start_event("StringLiteral", "Lexing"); - let (lit, span) = + let (lit, span, escape_sequence) = Self::take_string_characters(cursor, start_pos, self.terminator, cursor.strict_mode())?; Ok(Token::new( - TokenKind::string_literal(interner.get_or_intern(&lit[..])), + TokenKind::string_literal(interner.get_or_intern(&lit[..]), escape_sequence), span, )) } @@ -117,11 +117,13 @@ impl StringLiteral { start_pos: Position, terminator: StringTerminator, is_strict_mode: bool, - ) -> Result<(Vec, Span), Error> + ) -> Result<(Vec, Span, Option), Error> where R: Read, { let mut buf = Vec::new(); + let mut escape_sequence = None; + loop { let ch_start_pos = cursor.pos(); let ch = cursor.next_char()?; @@ -133,12 +135,15 @@ impl StringLiteral { let _timer = Profiler::global().start_event("StringLiteral - escape sequence", "Lexing"); - if let Some(escape_value) = Self::take_escape_sequence_or_line_continuation( - cursor, - ch_start_pos, - is_strict_mode, - false, - )? { + if let Some((escape_value, escape)) = + Self::take_escape_sequence_or_line_continuation( + cursor, + ch_start_pos, + is_strict_mode, + false, + )? + { + escape_sequence = escape_sequence.or(escape); buf.push_code_point(escape_value); } } @@ -156,7 +161,7 @@ impl StringLiteral { } } - Ok((buf, Span::new(start_pos, cursor.pos()))) + Ok((buf, Span::new(start_pos, cursor.pos()), escape_sequence)) } pub(super) fn take_escape_sequence_or_line_continuation( @@ -164,7 +169,7 @@ impl StringLiteral { start_pos: Position, is_strict_mode: bool, is_template_literal: bool, - ) -> Result, Error> + ) -> Result)>, Error> where R: Read, { @@ -176,25 +181,25 @@ impl StringLiteral { })?; let escape_value = match escape_ch { - 0x0062 /* b */ => Some(0x0008 /* */), - 0x0074 /* t */ => Some(0x0009 /* */), - 0x006E /* n */ => Some(0x000A /* */), - 0x0076 /* v */ => Some(0x000B /* */), - 0x0066 /* f */ => Some(0x000C /* */), - 0x0072 /* r */ => Some(0x000D /* */), - 0x0022 /* " */ => Some(0x0022 /* " */), - 0x0027 /* ' */ => Some(0x0027 /* ' */), - 0x005C /* \ */ => Some(0x005C /* \ */), + 0x0062 /* b */ => Some((0x0008 /* */, None)), + 0x0074 /* t */ => Some((0x0009 /* */, None)), + 0x006E /* n */ => Some((0x000A /* */, None)), + 0x0076 /* v */ => Some((0x000B /* */, None)), + 0x0066 /* f */ => Some((0x000C /* */, None)), + 0x0072 /* r */ => Some((0x000D /* */, None)), + 0x0022 /* " */ => Some((0x0022 /* " */, None)), + 0x0027 /* ' */ => Some((0x0027 /* ' */, None)), + 0x005C /* \ */ => Some((0x005C /* \ */, None)), 0x0030 /* 0 */ if cursor .peek()? .filter(|next_byte| (b'0'..=b'9').contains(next_byte)) .is_none() => - Some(0x0000 /* NULL */), + Some((0x0000 /* NULL */, None)), 0x0078 /* x */ => { - Some(Self::take_hex_escape_sequence(cursor, start_pos)?) + Some((Self::take_hex_escape_sequence(cursor, start_pos)?, None)) } 0x0075 /* u */ => { - Some(Self::take_unicode_escape_sequence(cursor, start_pos)?) + Some((Self::take_unicode_escape_sequence(cursor, start_pos)?, None)) } 0x0038 /* 8 */ | 0x0039 /* 9 */ => { // Grammar: NonOctalDecimalEscapeSequence @@ -209,7 +214,7 @@ impl StringLiteral { start_pos, )); } - Some(escape_ch) + Some((escape_ch, Some(EscapeSequence::NonOctalDecimal))) } _ if (0x0030..=0x0037 /* '0'..='7' */).contains(&escape_ch) => { if is_template_literal { @@ -226,10 +231,10 @@ impl StringLiteral { )); } - Some(Self::take_legacy_octal_escape_sequence( + Some((Self::take_legacy_octal_escape_sequence( cursor, escape_ch.try_into().expect("an ascii char must not fail to convert"), - )?) + )?, Some(EscapeSequence::LegacyOctal))) } _ if Self::is_line_terminator(escape_ch) => { // Grammar: LineContinuation @@ -238,7 +243,7 @@ impl StringLiteral { None } _ => { - Some(escape_ch) + Some((escape_ch, None)) } }; diff --git a/boa_parser/src/lexer/template.rs b/boa_parser/src/lexer/template.rs index 7a666d745e..23e92854be 100644 --- a/boa_parser/src/lexer/template.rs +++ b/boa_parser/src/lexer/template.rs @@ -60,7 +60,7 @@ impl TemplateString { true, )?; - if let Some(escape_value) = escape_value { + if let Some((escape_value, _)) = escape_value { buf.push_code_point(escape_value); } } diff --git a/boa_parser/src/lexer/tests.rs b/boa_parser/src/lexer/tests.rs index 01616e1f4d..40f984df44 100644 --- a/boa_parser/src/lexer/tests.rs +++ b/boa_parser/src/lexer/tests.rs @@ -2,8 +2,9 @@ #![allow(clippy::indexing_slicing)] use crate::lexer::{ - template::TemplateString, token::Numeric, Cursor, Error, Interner, Lexer, Position, Punctuator, - Read, Span, TokenKind, + template::TemplateString, + token::{ContainsEscapeSequence, EscapeSequence, Numeric}, + Cursor, Error, Interner, Lexer, Position, Punctuator, Read, Span, TokenKind, }; use boa_ast::Keyword; use boa_interner::Sym; @@ -94,9 +95,18 @@ fn check_identifier() { TokenKind::identifier( interner.get_or_intern_static("x\u{200C}\u{200D}", utf16!("x\u{200C}\u{200D}")), ), - TokenKind::identifier(interner.get_or_intern_static("x", utf16!("x"))), - TokenKind::identifier(interner.get_or_intern_static("xx", utf16!("xx"))), - TokenKind::identifier(interner.get_or_intern_static("xxx", utf16!("xxx"))), + TokenKind::Identifier(( + interner.get_or_intern_static("x", utf16!("x")), + ContainsEscapeSequence(true), + )), + TokenKind::Identifier(( + interner.get_or_intern_static("xx", utf16!("xx")), + ContainsEscapeSequence(true), + )), + TokenKind::Identifier(( + interner.get_or_intern_static("xxx", utf16!("xxx")), + ContainsEscapeSequence(true), + )), ]; expect_tokens(&mut lexer, &expected, interner); @@ -141,8 +151,8 @@ fn check_string() { let a_sym = interner.get_or_intern_static("aaa", utf16!("aaa")); let b_sym = interner.get_or_intern_static("bbb", utf16!("bbb")); let expected = [ - TokenKind::string_literal(a_sym), - TokenKind::string_literal(b_sym), + TokenKind::string_literal(a_sym, None), + TokenKind::string_literal(b_sym, None), ]; expect_tokens(&mut lexer, &expected, interner); @@ -305,7 +315,7 @@ fn check_variable_definition_tokens() { TokenKind::Keyword((Keyword::Let, false)), TokenKind::identifier(a_sym), TokenKind::Punctuator(Punctuator::Assign), - TokenKind::string_literal(hello_sym), + TokenKind::string_literal(hello_sym, None), TokenKind::Punctuator(Punctuator::Semicolon), ]; @@ -943,7 +953,7 @@ fn string_unicode() { let sym = interner.get_or_intern_static("中文", utf16!("中文")); let expected = [ - TokenKind::StringLiteral(sym), + TokenKind::StringLiteral((sym, None)), TokenKind::Punctuator(Punctuator::Semicolon), ]; @@ -957,7 +967,7 @@ fn string_unicode_escape_with_braces() { let sym = interner.get_or_intern_static("{\u{20ac}\u{a0}\u{a0}}", utf16!("{\u{20ac}\u{a0}\u{a0}}")); - let expected = [TokenKind::StringLiteral(sym)]; + let expected = [TokenKind::StringLiteral((sym, None))]; expect_tokens(&mut lexer, &expected, interner); @@ -992,7 +1002,7 @@ fn string_unicode_escape_with_braces_2() { let interner = &mut Interner::default(); let sym = interner.get_or_intern_static("\u{20ac}\u{a0}\u{a0}", utf16!("\u{20ac}\u{a0}\u{a0}")); - let expected = [TokenKind::StringLiteral(sym)]; + let expected = [TokenKind::StringLiteral((sym, None))]; expect_tokens(&mut lexer, &expected, interner); } @@ -1005,7 +1015,7 @@ fn string_with_single_escape() { let interner = &mut Interner::default(); let sym = interner.get_or_intern_static("Б", utf16!("Б")); - let expected = [TokenKind::StringLiteral(sym)]; + let expected = [TokenKind::StringLiteral((sym, None))]; expect_tokens(&mut lexer, &expected, interner); } @@ -1027,7 +1037,10 @@ fn string_legacy_octal_escape() { let interner = &mut Interner::default(); let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); - let expected_tokens = [TokenKind::StringLiteral(sym)]; + let expected_tokens = [TokenKind::StringLiteral(( + sym, + Some(EscapeSequence::LegacyOctal), + ))]; expect_tokens(&mut lexer, &expected_tokens, interner); } @@ -1057,7 +1070,7 @@ fn string_zero_escape() { let interner = &mut Interner::default(); let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); - let expected_tokens = [TokenKind::StringLiteral(sym)]; + let expected_tokens = [TokenKind::StringLiteral((sym, None))]; expect_tokens(&mut lexer, &expected_tokens, interner); } @@ -1072,7 +1085,10 @@ fn string_non_octal_decimal_escape() { let interner = &mut Interner::default(); let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); - let expected_tokens = [TokenKind::StringLiteral(sym)]; + let expected_tokens = [TokenKind::StringLiteral(( + sym, + Some(EscapeSequence::NonOctalDecimal), + ))]; expect_tokens(&mut lexer, &expected_tokens, interner); } @@ -1101,7 +1117,7 @@ fn string_line_continuation() { let interner = &mut Interner::default(); let sym = interner.get_or_intern_static("hello world", utf16!("hello world")); - let expected_tokens = [TokenKind::StringLiteral(sym)]; + let expected_tokens = [TokenKind::StringLiteral((sym, None))]; expect_tokens(&mut lexer, &expected_tokens, interner); } diff --git a/boa_parser/src/lexer/token.rs b/boa_parser/src/lexer/token.rs index 5fcd06cb1d..f7bf2ba06a 100644 --- a/boa_parser/src/lexer/token.rs +++ b/boa_parser/src/lexer/token.rs @@ -99,7 +99,7 @@ pub enum TokenKind { EOF, /// An identifier. - Identifier(Sym), + Identifier((Sym, ContainsEscapeSequence)), /// A private identifier. PrivateIdentifier(Sym), @@ -117,7 +117,7 @@ pub enum TokenKind { Punctuator(Punctuator), /// A string literal. - StringLiteral(Sym), + StringLiteral((Sym, Option)), /// A part of a template literal without substitution. TemplateNoSubstitution(TemplateString), @@ -175,7 +175,7 @@ impl TokenKind { /// Creates an `Identifier` token type. #[must_use] pub const fn identifier(ident: Sym) -> Self { - Self::Identifier(ident) + Self::Identifier((ident, ContainsEscapeSequence(false))) } /// Creates a `NumericLiteral` token kind. @@ -194,8 +194,8 @@ impl TokenKind { /// Creates a `StringLiteral` token type. #[must_use] - pub const fn string_literal(lit: Sym) -> Self { - Self::StringLiteral(lit) + pub const fn string_literal(lit: Sym, escape_sequence: Option) -> Self { + Self::StringLiteral((lit, escape_sequence)) } /// Creates a `TemplateMiddle` token type. @@ -234,7 +234,7 @@ impl TokenKind { match *self { Self::BooleanLiteral(val) => val.to_string(), Self::EOF => "end of file".to_owned(), - Self::Identifier(ident) => interner.resolve_expect(ident).to_string(), + Self::Identifier((ident, _)) => interner.resolve_expect(ident).to_string(), Self::PrivateIdentifier(ident) => format!("#{}", interner.resolve_expect(ident)), Self::Keyword((word, _)) => word.to_string(), Self::NullLiteral => "null".to_owned(), @@ -242,7 +242,7 @@ impl TokenKind { Self::NumericLiteral(Numeric::Integer(num)) => num.to_string(), Self::NumericLiteral(Numeric::BigInt(ref num)) => format!("{num}n"), Self::Punctuator(punc) => punc.to_string(), - Self::StringLiteral(lit) => interner.resolve_expect(lit).to_string(), + Self::StringLiteral((lit, _)) => interner.resolve_expect(lit).to_string(), Self::TemplateNoSubstitution(ts) | Self::TemplateMiddle(ts) => { interner.resolve_expect(ts.as_raw()).to_string() } @@ -258,3 +258,29 @@ impl TokenKind { } } } + +/// Indicates the type of an escape sequence. +#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum EscapeSequence { + /// A legacy escape sequence starting with `0` - `7`. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#prod-LegacyOctalEscapeSequence + LegacyOctal, + + /// A octal escape sequence starting with `8` - `9`. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#prod-NonOctalDecimalEscapeSequence + NonOctalDecimal, +} + +/// Indicates if an identifier contains an escape sequence. +#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ContainsEscapeSequence(pub bool); diff --git a/boa_parser/src/parser/expression/identifiers.rs b/boa_parser/src/parser/expression/identifiers.rs index e82da53d18..ac325d38fd 100644 --- a/boa_parser/src/parser/expression/identifiers.rs +++ b/boa_parser/src/parser/expression/identifiers.rs @@ -65,7 +65,7 @@ where let token = cursor.next(interner).or_abrupt()?; match token.kind() { - TokenKind::Identifier(ident) + TokenKind::Identifier((ident, _)) if cursor.strict_mode() && RESERVED_IDENTIFIERS_STRICT.contains(ident) => { Err(Error::general( @@ -73,7 +73,7 @@ where token.span().start(), )) } - TokenKind::Identifier(ident) => Ok(Identifier::new(*ident)), + TokenKind::Identifier((ident, _)) => Ok(Identifier::new(*ident)), TokenKind::Keyword((Keyword::Let, _)) if cursor.strict_mode() => Err(Error::general( "using future reserved keyword not allowed in strict mode IdentifierReference", token.span().start(), @@ -155,19 +155,19 @@ where let next_token = cursor.next(interner).or_abrupt()?; match next_token.kind() { - TokenKind::Identifier(Sym::ARGUMENTS) if cursor.strict_mode() => { + TokenKind::Identifier((Sym::ARGUMENTS, _)) if cursor.strict_mode() => { Err(Error::lex(LexError::Syntax( "unexpected identifier 'arguments' in strict mode".into(), next_token.span().start(), ))) } - TokenKind::Identifier(Sym::EVAL) if cursor.strict_mode() => { + TokenKind::Identifier((Sym::EVAL, _)) if cursor.strict_mode() => { Err(Error::lex(LexError::Syntax( "unexpected identifier 'eval' in strict mode".into(), next_token.span().start(), ))) } - TokenKind::Identifier(ident) => { + TokenKind::Identifier((ident, _)) => { if cursor.strict_mode() && RESERVED_IDENTIFIERS_STRICT.contains(ident) { return Err(Error::general( "using future reserved keyword not allowed in strict mode", diff --git a/boa_parser/src/parser/expression/left_hand_side/call.rs b/boa_parser/src/parser/expression/left_hand_side/call.rs index 14ec399af1..ecd35bc338 100644 --- a/boa_parser/src/parser/expression/left_hand_side/call.rs +++ b/boa_parser/src/parser/expression/left_hand_side/call.rs @@ -98,7 +98,9 @@ where cursor.advance(interner); let access = match cursor.next(interner).or_abrupt()?.kind() { - TokenKind::Identifier(name) => SimplePropertyAccess::new(lhs, *name).into(), + TokenKind::Identifier((name, _)) => { + SimplePropertyAccess::new(lhs, *name).into() + } TokenKind::Keyword((kw, _)) => { SimplePropertyAccess::new(lhs, kw.to_sym(interner)).into() } diff --git a/boa_parser/src/parser/expression/left_hand_side/member.rs b/boa_parser/src/parser/expression/left_hand_side/member.rs index 4df236e656..71b3b209ed 100644 --- a/boa_parser/src/parser/expression/left_hand_side/member.rs +++ b/boa_parser/src/parser/expression/left_hand_side/member.rs @@ -7,7 +7,7 @@ use super::arguments::Arguments; use crate::{ - lexer::{InputElement, TokenKind}, + lexer::{token::ContainsEscapeSequence, InputElement, TokenKind}, parser::{ expression::{ left_hand_side::template::TaggedTemplateLiteral, primary::PrimaryExpression, Expression, @@ -85,7 +85,13 @@ where if cursor.next_if(Punctuator::Dot, interner)?.is_some() { let token = cursor.next(interner).or_abrupt()?; match token.kind() { - TokenKind::Identifier(Sym::TARGET) => { + TokenKind::Identifier((Sym::TARGET, ContainsEscapeSequence(true))) => { + return Err(Error::general( + "'new.target' must not contain escaped characters", + token.span().start(), + )); + } + TokenKind::Identifier((Sym::TARGET, ContainsEscapeSequence(false))) => { return Ok(ast::Expression::NewTarget) } _ => { @@ -116,7 +122,7 @@ where TokenKind::Punctuator(Punctuator::Dot) => { let token = cursor.next(interner).or_abrupt()?; let field = match token.kind() { - TokenKind::Identifier(name) => { + TokenKind::Identifier((name, _)) => { SuperPropertyAccess::new(PropertyAccessField::from(*name)) } TokenKind::Keyword((kw, _)) => { @@ -178,7 +184,9 @@ where let token = cursor.next(interner).or_abrupt()?; let access = match token.kind() { - TokenKind::Identifier(name) => SimplePropertyAccess::new(lhs, *name).into(), + TokenKind::Identifier((name, _)) => { + SimplePropertyAccess::new(lhs, *name).into() + } TokenKind::Keyword((kw, _)) => { SimplePropertyAccess::new(lhs, kw.to_sym(interner)).into() } diff --git a/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs b/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs index f8dbd51cbb..f44bffb2e3 100644 --- a/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs +++ b/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs @@ -66,7 +66,7 @@ where interner: &mut Interner, ) -> ParseResult { let item = match token.kind() { - TokenKind::Identifier(name) => OptionalOperationKind::SimplePropertyAccess { + TokenKind::Identifier((name, _)) => OptionalOperationKind::SimplePropertyAccess { field: PropertyAccessField::Const(*name), }, TokenKind::Keyword((kw, _)) => OptionalOperationKind::SimplePropertyAccess { diff --git a/boa_parser/src/parser/expression/primary/mod.rs b/boa_parser/src/parser/expression/primary/mod.rs index 37e5e7da0b..f34bacabc2 100644 --- a/boa_parser/src/parser/expression/primary/mod.rs +++ b/boa_parser/src/parser/expression/primary/mod.rs @@ -209,7 +209,7 @@ where )) => IdentifierReference::new(self.allow_yield, self.allow_await) .parse(cursor, interner) .map(Into::into), - TokenKind::StringLiteral(lit) => { + TokenKind::StringLiteral((lit, _)) => { let node = Literal::from(*lit).into(); cursor.advance(interner); Ok(node) diff --git a/boa_parser/src/parser/expression/primary/object_initializer/mod.rs b/boa_parser/src/parser/expression/primary/object_initializer/mod.rs index 31b8219b15..2d497785a7 100644 --- a/boa_parser/src/parser/expression/primary/object_initializer/mod.rs +++ b/boa_parser/src/parser/expression/primary/object_initializer/mod.rs @@ -11,7 +11,10 @@ mod tests; use crate::{ - lexer::{token::Numeric, Error as LexError, TokenKind}, + lexer::{ + token::{ContainsEscapeSequence, Numeric}, + Error as LexError, TokenKind, + }, parser::{ expression::{identifiers::IdentifierReference, AssignmentExpression}, function::{FormalParameter, FormalParameters, FunctionBody, UniqueFormalParameters}, @@ -198,6 +201,7 @@ where cursor.peek(1, interner).or_abrupt()?.kind(), TokenKind::Punctuator(Punctuator::OpenParen | Punctuator::Colon) ); + let token = cursor.peek(0, interner).or_abrupt()?; match token.kind() { TokenKind::Keyword((Keyword::Async, true)) if is_keyword => { @@ -258,7 +262,9 @@ where _ => {} } - if cursor.peek(0, interner).or_abrupt()?.kind() == &TokenKind::Punctuator(Punctuator::Mul) { + let token = cursor.peek(0, interner).or_abrupt()?; + + if token.kind() == &TokenKind::Punctuator(Punctuator::Mul) { let position = cursor.peek(0, interner).or_abrupt()?.span().start(); let (class_element_name, method) = GeneratorMethod::new(self.allow_yield, self.allow_await).parse(cursor, interner)?; @@ -284,6 +290,13 @@ where } } + let set_or_get_escaped_position = match token.kind() { + TokenKind::Identifier((Sym::GET | Sym::SET, ContainsEscapeSequence(true))) => { + Some(token.span().start()) + } + _ => None, + }; + let mut property_name = PropertyName::new(self.allow_yield, self.allow_await).parse(cursor, interner)?; @@ -306,6 +319,13 @@ where match property_name { // MethodDefinition[?Yield, ?Await] -> get ClassElementName[?Yield, ?Await] ( ) { FunctionBody[~Yield, ~Await] } property::PropertyName::Literal(str) if str == Sym::GET && !ordinary_method => { + if let Some(position) = set_or_get_escaped_position { + return Err(Error::general( + "Keyword must not contain escaped characters", + position, + )); + } + let position = cursor.peek(0, interner).or_abrupt()?.span().start(); property_name = PropertyName::new(self.allow_yield, self.allow_await) @@ -359,6 +379,13 @@ where } // MethodDefinition[?Yield, ?Await] -> set ClassElementName[?Yield, ?Await] ( PropertySetParameterList ) { FunctionBody[~Yield, ~Await] } property::PropertyName::Literal(str) if str == Sym::SET && !ordinary_method => { + if let Some(position) = set_or_get_escaped_position { + return Err(Error::general( + "Keyword must not contain escaped characters", + position, + )); + } + property_name = PropertyName::new(self.allow_yield, self.allow_await) .parse(cursor, interner)?; @@ -551,7 +578,9 @@ where cursor.expect(Punctuator::CloseBracket, "expected token ']'", interner)?; return Ok(node.into()); } - TokenKind::Identifier(name) | TokenKind::StringLiteral(name) => (*name).into(), + TokenKind::Identifier((name, _)) | TokenKind::StringLiteral((name, _)) => { + (*name).into() + } TokenKind::NumericLiteral(num) => match num { Numeric::Rational(num) => Expression::Literal(Literal::from(*num)).into(), Numeric::Integer(num) => Expression::Literal(Literal::from(*num)).into(), diff --git a/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs b/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs index cc03fc50ea..591c4c534e 100644 --- a/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs +++ b/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs @@ -2,7 +2,7 @@ mod tests; use crate::{ - lexer::{Error as LexError, TokenKind}, + lexer::{token::ContainsEscapeSequence, Error as LexError, TokenKind}, parser::{ expression::{ AssignmentExpression, AsyncGeneratorMethod, AsyncMethod, BindingIdentifier, @@ -595,7 +595,8 @@ where cursor.advance(interner); return Ok((None, None)); } - TokenKind::Identifier(Sym::STATIC) => { + TokenKind::Identifier((Sym::STATIC, ContainsEscapeSequence(contains_escape))) => { + let contains_escape = *contains_escape; let token = cursor.peek(1, interner).or_abrupt()?; match token.kind() { TokenKind::Identifier(_) @@ -607,6 +608,12 @@ where | TokenKind::Punctuator( Punctuator::OpenBracket | Punctuator::Mul | Punctuator::OpenBlock, ) => { + if contains_escape { + return Err(Error::general( + "keyword must not contain escaped characters", + token.span().start(), + )); + } // this "static" is a keyword. cursor.advance(interner); true @@ -630,7 +637,7 @@ where let token = cursor.peek(0, interner).or_abrupt()?; let position = token.span().start(); let element = match token.kind() { - TokenKind::Identifier(Sym::CONSTRUCTOR) if !r#static => { + TokenKind::Identifier((Sym::CONSTRUCTOR, _)) if !r#static => { cursor.advance(interner); let strict = cursor.strict_mode(); cursor.set_strict_mode(true); @@ -708,11 +715,13 @@ where TokenKind::Punctuator(Punctuator::Mul) => { let token = cursor.peek(1, interner).or_abrupt()?; let name_position = token.span().start(); - if token.kind() == &TokenKind::Identifier(Sym::CONSTRUCTOR) && !r#static { - return Err(Error::general( - "class constructor may not be a generator method", - token.span().start(), - )); + if !r#static { + if let TokenKind::Identifier((Sym::CONSTRUCTOR, _)) = token.kind() { + return Err(Error::general( + "class constructor may not be a generator method", + token.span().start(), + )); + } } let strict = cursor.strict_mode(); cursor.set_strict_mode(true); @@ -764,13 +773,20 @@ where TokenKind::Punctuator(Punctuator::Mul) => { let token = cursor.peek(1, interner).or_abrupt()?; let name_position = token.span().start(); - if token.kind() == &TokenKind::PrivateIdentifier(Sym::CONSTRUCTOR) - || token.kind() == &TokenKind::Identifier(Sym::CONSTRUCTOR) && !r#static - { - return Err(Error::general( - "class constructor may not be a generator method", - token.span().start(), - )); + match token.kind() { + TokenKind::PrivateIdentifier(Sym::CONSTRUCTOR) => { + return Err(Error::general( + "class constructor may not be a private method", + token.span().start(), + )); + } + TokenKind::Identifier((Sym::CONSTRUCTOR, _)) if !r#static => { + return Err(Error::general( + "class constructor may not be a generator method", + token.span().start(), + )); + } + _ => {} } let strict = cursor.strict_mode(); cursor.set_strict_mode(true); @@ -808,7 +824,7 @@ where } } } - TokenKind::Identifier(Sym::CONSTRUCTOR) if !r#static => { + TokenKind::Identifier((Sym::CONSTRUCTOR, _)) if !r#static => { return Err(Error::general( "class constructor may not be an async method", token.span().start(), @@ -859,7 +875,13 @@ where } } } - TokenKind::Identifier(Sym::GET) if is_keyword => { + TokenKind::Identifier((Sym::GET, ContainsEscapeSequence(true))) if is_keyword => { + return Err(Error::general( + "keyword must not contain escaped characters", + token.span().start(), + )) + } + TokenKind::Identifier((Sym::GET, ContainsEscapeSequence(false))) if is_keyword => { cursor.advance(interner); let token = cursor.peek(0, interner).or_abrupt()?; match token.kind() { @@ -911,7 +933,7 @@ where ) } } - TokenKind::Identifier(Sym::CONSTRUCTOR) if !r#static => { + TokenKind::Identifier((Sym::CONSTRUCTOR, _)) if !r#static => { return Err(Error::general( "class constructor may not be a getter method", token.span().start(), @@ -984,7 +1006,13 @@ where } } } - TokenKind::Identifier(Sym::SET) if is_keyword => { + TokenKind::Identifier((Sym::SET, ContainsEscapeSequence(true))) if is_keyword => { + return Err(Error::general( + "keyword must not contain escaped characters", + token.span().start(), + )) + } + TokenKind::Identifier((Sym::SET, ContainsEscapeSequence(false))) if is_keyword => { cursor.advance(interner); let token = cursor.peek(0, interner).or_abrupt()?; match token.kind() { @@ -1036,7 +1064,7 @@ where ) } } - TokenKind::Identifier(Sym::CONSTRUCTOR) if !r#static => { + TokenKind::Identifier((Sym::CONSTRUCTOR, _)) if !r#static => { return Err(Error::general( "class constructor may not be a setter method", token.span().start(), diff --git a/boa_parser/src/parser/statement/mod.rs b/boa_parser/src/parser/statement/mod.rs index 3d47d4cda2..79fc5ac8a2 100644 --- a/boa_parser/src/parser/statement/mod.rs +++ b/boa_parser/src/parser/statement/mod.rs @@ -37,7 +37,7 @@ use self::{ variable::VariableStatement, }; use crate::{ - lexer::{Error as LexError, InputElement, Token, TokenKind}, + lexer::{token::EscapeSequence, Error as LexError, InputElement, Token, TokenKind}, parser::{ expression::{BindingIdentifier, Initializer, PropertyName}, AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser, @@ -282,10 +282,17 @@ where let global_strict = cursor.strict_mode(); let mut directive_prologues = self.directive_prologues; let mut strict = self.strict; + let mut string_literal_escape_sequence = None; loop { match cursor.peek(0, interner)? { Some(token) if self.break_nodes.contains(token.kind()) => break, + Some(token) if directive_prologues && string_literal_escape_sequence.is_none() => { + if let TokenKind::StringLiteral((_, Some(escape_sequence))) = token.kind() { + string_literal_escape_sequence = + Some((token.span().start(), *escape_sequence)); + } + } None => break, _ => {} } @@ -306,6 +313,21 @@ where ) { cursor.set_strict_mode(true); strict = true; + + if let Some((position, escape_sequence)) = string_literal_escape_sequence { + match escape_sequence { + EscapeSequence::LegacyOctal => return Err(Error::general( + "legacy octal escape sequences are not allowed in strict mode", + position, + )), + EscapeSequence::NonOctalDecimal => { + return Err(Error::general( + "decimal escape sequences are not allowed in strict mode", + position, + )) + } + } + } } } else { directive_prologues = false;