From 4a68fb51207546b0a131ee81142265b6b849d5cb Mon Sep 17 00:00:00 2001 From: Veera <32646674+veera-sivarajan@users.noreply.github.com> Date: Tue, 30 May 2023 21:36:20 -0400 Subject: [PATCH] Deny Unicode Escapes in boolean and null expressions (#2931) * Deny Unicode Escapes in boolean and null expressions * Add tests --- boa_parser/src/lexer/identifier.rs | 12 +++++++++--- boa_parser/src/lexer/tests.rs | 4 ++-- boa_parser/src/lexer/token.rs | 12 ++++++------ .../src/parser/expression/assignment/yield.rs | 2 +- .../parser/expression/left_hand_side/call.rs | 8 +++++--- .../parser/expression/left_hand_side/member.rs | 14 ++++++++------ .../expression/left_hand_side/optional/mod.rs | 18 +++++++++++------- .../src/parser/expression/primary/mod.rs | 13 +++++++++---- .../primary/object_initializer/mod.rs | 4 ++-- .../declaration/hoistable/class_decl/mod.rs | 8 ++++---- boa_parser/src/parser/tests/mod.rs | 15 +++++++++++++++ 11 files changed, 72 insertions(+), 38 deletions(-) diff --git a/boa_parser/src/lexer/identifier.rs b/boa_parser/src/lexer/identifier.rs index 845011e1f7..1431aa594b 100644 --- a/boa_parser/src/lexer/identifier.rs +++ b/boa_parser/src/lexer/identifier.rs @@ -94,9 +94,15 @@ impl Tokenizer for Identifier { Self::take_identifier_name(cursor, start_pos, self.init)?; let token_kind = match identifier_name.parse() { - Ok(Keyword::True) => TokenKind::BooleanLiteral(true), - Ok(Keyword::False) => TokenKind::BooleanLiteral(false), - Ok(Keyword::Null) => TokenKind::NullLiteral, + Ok(Keyword::True) => { + TokenKind::BooleanLiteral((true, ContainsEscapeSequence(contains_escaped_chars))) + } + Ok(Keyword::False) => { + TokenKind::BooleanLiteral((false, ContainsEscapeSequence(contains_escaped_chars))) + } + Ok(Keyword::Null) => { + TokenKind::NullLiteral(ContainsEscapeSequence(contains_escaped_chars)) + } Ok(keyword) => TokenKind::Keyword((keyword, contains_escaped_chars)), _ => TokenKind::IdentifierName(( interner.get_or_intern(identifier_name.as_str()), diff --git a/boa_parser/src/lexer/tests.rs b/boa_parser/src/lexer/tests.rs index 1b489c0a53..1d0aba274c 100644 --- a/boa_parser/src/lexer/tests.rs +++ b/boa_parser/src/lexer/tests.rs @@ -38,7 +38,7 @@ fn check_single_line_comment() { TokenKind::Keyword((Keyword::Var, false)), TokenKind::LineTerminator, TokenKind::LineTerminator, - TokenKind::BooleanLiteral(true), + TokenKind::BooleanLiteral((true, ContainsEscapeSequence(false))), ]; expect_tokens(&mut lexer, &expected, interner); @@ -54,7 +54,7 @@ fn check_single_line_comment_with_crlf_ending() { TokenKind::Keyword((Keyword::Var, false)), TokenKind::LineTerminator, TokenKind::LineTerminator, - TokenKind::BooleanLiteral(true), + TokenKind::BooleanLiteral((true, ContainsEscapeSequence(false))), ]; expect_tokens(&mut lexer, &expected, interner); diff --git a/boa_parser/src/lexer/token.rs b/boa_parser/src/lexer/token.rs index 1b6fe79642..7f5002fc9c 100644 --- a/boa_parser/src/lexer/token.rs +++ b/boa_parser/src/lexer/token.rs @@ -95,7 +95,7 @@ impl From for Numeric { #[derive(Clone, PartialEq, Debug)] pub enum TokenKind { /// A boolean literal, which is either `true` or `false`. - BooleanLiteral(bool), + BooleanLiteral((bool, ContainsEscapeSequence)), /// The end of the file. EOF, @@ -118,7 +118,7 @@ pub enum TokenKind { /// The [`null` literal][spec]. /// /// [spec]: https://tc39.es/ecma262/#prod-NullLiteral - NullLiteral, + NullLiteral(ContainsEscapeSequence), /// A numeric literal. NumericLiteral(Numeric), @@ -152,7 +152,7 @@ pub enum TokenKind { impl From for TokenKind { #[inline] fn from(oth: bool) -> Self { - Self::BooleanLiteral(oth) + Self::BooleanLiteral((oth, ContainsEscapeSequence(false))) } } @@ -182,7 +182,7 @@ impl TokenKind { #[inline] #[must_use] pub const fn boolean_literal(lit: bool) -> Self { - Self::BooleanLiteral(lit) + Self::BooleanLiteral((lit, ContainsEscapeSequence(false))) } /// Creates an `EOF` token kind. @@ -261,12 +261,12 @@ impl TokenKind { #[must_use] pub fn to_string(&self, interner: &Interner) -> String { match *self { - Self::BooleanLiteral(val) => val.to_string(), + Self::BooleanLiteral((val, _)) => val.to_string(), Self::EOF => "end of file".to_owned(), Self::IdentifierName((ident, _)) => interner.resolve_expect(ident).to_string(), Self::PrivateIdentifier(ident) => format!("#{}", interner.resolve_expect(ident)), Self::Keyword((word, _)) => word.to_string(), - Self::NullLiteral => "null".to_owned(), + Self::NullLiteral(_) => "null".to_owned(), Self::NumericLiteral(Numeric::Rational(num)) => num.to_string(), Self::NumericLiteral(Numeric::Integer(num)) => num.to_string(), Self::NumericLiteral(Numeric::BigInt(ref num)) => format!("{num}n"), diff --git a/boa_parser/src/parser/expression/assignment/yield.rs b/boa_parser/src/parser/expression/assignment/yield.rs index fdac81e1e0..dc25e3817b 100644 --- a/boa_parser/src/parser/expression/assignment/yield.rs +++ b/boa_parser/src/parser/expression/assignment/yield.rs @@ -103,7 +103,7 @@ where _, )) | TokenKind::BooleanLiteral(_) - | TokenKind::NullLiteral + | TokenKind::NullLiteral(_) | TokenKind::StringLiteral(_) | TokenKind::TemplateNoSubstitution(_) | TokenKind::NumericLiteral(_) diff --git a/boa_parser/src/parser/expression/left_hand_side/call.rs b/boa_parser/src/parser/expression/left_hand_side/call.rs index cac1d4a9d0..b185363b80 100644 --- a/boa_parser/src/parser/expression/left_hand_side/call.rs +++ b/boa_parser/src/parser/expression/left_hand_side/call.rs @@ -140,13 +140,15 @@ where TokenKind::Keyword((kw, _)) => { SimplePropertyAccess::new(lhs, kw.to_sym()).into() } - TokenKind::BooleanLiteral(true) => { + TokenKind::BooleanLiteral((true, _)) => { SimplePropertyAccess::new(lhs, Sym::TRUE).into() } - TokenKind::BooleanLiteral(false) => { + TokenKind::BooleanLiteral((false, _)) => { SimplePropertyAccess::new(lhs, Sym::FALSE).into() } - TokenKind::NullLiteral => SimplePropertyAccess::new(lhs, Sym::NULL).into(), + TokenKind::NullLiteral(_) => { + SimplePropertyAccess::new(lhs, Sym::NULL).into() + } TokenKind::PrivateIdentifier(name) => { PrivatePropertyAccess::new(lhs, PrivateName::new(*name)).into() } diff --git a/boa_parser/src/parser/expression/left_hand_side/member.rs b/boa_parser/src/parser/expression/left_hand_side/member.rs index e58db2dc89..602958e7cb 100644 --- a/boa_parser/src/parser/expression/left_hand_side/member.rs +++ b/boa_parser/src/parser/expression/left_hand_side/member.rs @@ -171,13 +171,13 @@ where TokenKind::Keyword((kw, _)) => { SuperPropertyAccess::new(kw.to_sym().into()) } - TokenKind::BooleanLiteral(true) => { + TokenKind::BooleanLiteral((true, _)) => { SuperPropertyAccess::new(Sym::TRUE.into()) } - TokenKind::BooleanLiteral(false) => { + TokenKind::BooleanLiteral((false, _)) => { SuperPropertyAccess::new(Sym::FALSE.into()) } - TokenKind::NullLiteral => SuperPropertyAccess::new(Sym::NULL.into()), + TokenKind::NullLiteral(_) => SuperPropertyAccess::new(Sym::NULL.into()), TokenKind::PrivateIdentifier(_) => { return Err(Error::general( "unexpected private identifier", @@ -233,13 +233,15 @@ where TokenKind::Keyword((kw, _)) => { SimplePropertyAccess::new(lhs, kw.to_sym()).into() } - TokenKind::BooleanLiteral(true) => { + TokenKind::BooleanLiteral((true, _)) => { SimplePropertyAccess::new(lhs, Sym::TRUE).into() } - TokenKind::BooleanLiteral(false) => { + TokenKind::BooleanLiteral((false, _)) => { SimplePropertyAccess::new(lhs, Sym::FALSE).into() } - TokenKind::NullLiteral => SimplePropertyAccess::new(lhs, Sym::NULL).into(), + TokenKind::NullLiteral(_) => { + SimplePropertyAccess::new(lhs, Sym::NULL).into() + } TokenKind::PrivateIdentifier(name) => { PrivatePropertyAccess::new(lhs, PrivateName::new(*name)).into() } diff --git a/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs b/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs index 8e73947778..725b40494f 100644 --- a/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs +++ b/boa_parser/src/parser/expression/left_hand_side/optional/mod.rs @@ -73,13 +73,17 @@ where TokenKind::Keyword((kw, _)) => OptionalOperationKind::SimplePropertyAccess { field: PropertyAccessField::Const(kw.to_sym()), }, - TokenKind::BooleanLiteral(true) => OptionalOperationKind::SimplePropertyAccess { - field: PropertyAccessField::Const(Sym::TRUE), - }, - TokenKind::BooleanLiteral(false) => OptionalOperationKind::SimplePropertyAccess { - field: PropertyAccessField::Const(Sym::FALSE), - }, - TokenKind::NullLiteral => OptionalOperationKind::SimplePropertyAccess { + TokenKind::BooleanLiteral((true, _)) => { + OptionalOperationKind::SimplePropertyAccess { + field: PropertyAccessField::Const(Sym::TRUE), + } + } + TokenKind::BooleanLiteral((false, _)) => { + OptionalOperationKind::SimplePropertyAccess { + field: PropertyAccessField::Const(Sym::FALSE), + } + } + TokenKind::NullLiteral(_) => OptionalOperationKind::SimplePropertyAccess { field: PropertyAccessField::Const(Sym::NULL), }, TokenKind::PrivateIdentifier(name) => { diff --git a/boa_parser/src/parser/expression/primary/mod.rs b/boa_parser/src/parser/expression/primary/mod.rs index 16a8c258c7..28a52d5069 100644 --- a/boa_parser/src/parser/expression/primary/mod.rs +++ b/boa_parser/src/parser/expression/primary/mod.rs @@ -27,7 +27,10 @@ use self::{ object_initializer::ObjectLiteral, }; use crate::{ - lexer::{token::Numeric, InputElement, Token, TokenKind}, + lexer::{ + token::{ContainsEscapeSequence, Numeric}, + InputElement, Token, TokenKind, + }, parser::{ expression::{ identifiers::IdentifierReference, primary::template::TemplateLiteral, @@ -103,7 +106,9 @@ where let tok_position = tok.span().start(); match tok.kind() { - TokenKind::Keyword((Keyword::This, true)) => Err(Error::general( + TokenKind::Keyword((Keyword::This, true)) + | TokenKind::BooleanLiteral((_, ContainsEscapeSequence(true))) + | TokenKind::NullLiteral(ContainsEscapeSequence(true)) => Err(Error::general( "Keyword must not contain escaped characters", tok_position, )), @@ -187,12 +192,12 @@ where .parse(cursor, interner) .map(Into::into) } - TokenKind::BooleanLiteral(boolean) => { + TokenKind::BooleanLiteral((boolean, _)) => { let node = Literal::from(*boolean).into(); cursor.advance(interner); Ok(node) } - TokenKind::NullLiteral => { + TokenKind::NullLiteral(_) => { cursor.advance(interner); Ok(Literal::Null.into()) } diff --git a/boa_parser/src/parser/expression/primary/object_initializer/mod.rs b/boa_parser/src/parser/expression/primary/object_initializer/mod.rs index 58d77373fe..b791b99de9 100644 --- a/boa_parser/src/parser/expression/primary/object_initializer/mod.rs +++ b/boa_parser/src/parser/expression/primary/object_initializer/mod.rs @@ -600,8 +600,8 @@ where let (utf8, utf16) = word.as_str(); interner.get_or_intern_static(utf8, utf16).into() } - TokenKind::NullLiteral => (Sym::NULL).into(), - TokenKind::BooleanLiteral(bool) => match bool { + TokenKind::NullLiteral(_) => (Sym::NULL).into(), + TokenKind::BooleanLiteral((bool, _)) => match bool { true => Sym::TRUE.into(), false => Sym::FALSE.into(), }, diff --git a/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs b/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs index d9fc304939..a8cf001cff 100644 --- a/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs +++ b/boa_parser/src/parser/statement/declaration/hoistable/class_decl/mod.rs @@ -589,7 +589,7 @@ where | TokenKind::StringLiteral(_) | TokenKind::NumericLiteral(_) | TokenKind::Keyword(_) - | TokenKind::NullLiteral + | TokenKind::NullLiteral(_) | TokenKind::PrivateIdentifier(_) | TokenKind::Punctuator( Punctuator::OpenBracket | Punctuator::Mul | Punctuator::OpenBlock, @@ -931,7 +931,7 @@ where | TokenKind::StringLiteral(_) | TokenKind::NumericLiteral(_) | TokenKind::Keyword(_) - | TokenKind::NullLiteral + | TokenKind::NullLiteral(_) | TokenKind::Punctuator(Punctuator::OpenBracket) => { let name_position = token.span().start(); let name = PropertyName::new(self.allow_yield, self.allow_await) @@ -1062,7 +1062,7 @@ where | TokenKind::StringLiteral(_) | TokenKind::NumericLiteral(_) | TokenKind::Keyword(_) - | TokenKind::NullLiteral + | TokenKind::NullLiteral(_) | TokenKind::Punctuator(Punctuator::OpenBracket) => { let name_position = token.span().start(); let name = PropertyName::new(self.allow_yield, self.allow_await) @@ -1223,7 +1223,7 @@ where | TokenKind::StringLiteral(_) | TokenKind::NumericLiteral(_) | TokenKind::Keyword(_) - | TokenKind::NullLiteral + | TokenKind::NullLiteral(_) | TokenKind::Punctuator(Punctuator::OpenBracket) => { let name_position = token.span().start(); let name = PropertyName::new(self.allow_yield, self.allow_await) diff --git a/boa_parser/src/parser/tests/mod.rs b/boa_parser/src/parser/tests/mod.rs index 3d8cb3cf1b..89744438cb 100644 --- a/boa_parser/src/parser/tests/mod.rs +++ b/boa_parser/src/parser/tests/mod.rs @@ -604,3 +604,18 @@ fn hashbang_use_strict_with_with_statement() { fn hashbang_comment() { check_script_parser(r"#!Comment Here", vec![], &mut Interner::default()); } + +#[test] +fn deny_unicode_escape_in_false_expression() { + check_invalid_script(r"let x = f\u{61}lse;"); +} + +#[test] +fn deny_unicode_escape_in_true_expression() { + check_invalid_script(r"let x = tru\u{65};"); +} + +#[test] +fn deny_unicode_escape_in_null_expression() { + check_invalid_script(r"let x = n\u{75}ll;"); +}