diff --git a/boa_parser/src/lexer/string.rs b/boa_parser/src/lexer/string.rs index c9f1844b54..0e0748f8f2 100644 --- a/boa_parser/src/lexer/string.rs +++ b/boa_parser/src/lexer/string.rs @@ -117,12 +117,12 @@ impl StringLiteral { start_pos: Position, terminator: StringTerminator, strict: bool, - ) -> Result<(Vec, Span, Option), Error> + ) -> Result<(Vec, Span, EscapeSequence), Error> where R: Read, { let mut buf = Vec::new(); - let mut escape_sequence = None; + let mut escape_sequence = EscapeSequence::empty(); loop { let ch_start_pos = cursor.pos(); @@ -135,15 +135,16 @@ impl StringLiteral { let _timer = Profiler::global().start_event("StringLiteral - escape sequence", "Lexing"); - if let Some((escape_value, escape)) = - Self::take_escape_sequence_or_line_continuation( - cursor, - ch_start_pos, - strict, - false, - )? - { - escape_sequence = escape_sequence.or(escape); + let (escape_value, escape) = Self::take_escape_sequence_or_line_continuation( + cursor, + ch_start_pos, + strict, + false, + )?; + + escape_sequence |= escape; + + if let Some(escape_value) = escape_value { buf.push_code_point(escape_value); } } @@ -169,7 +170,7 @@ impl StringLiteral { start_pos: Position, strict: bool, is_template_literal: bool, - ) -> Result)>, Error> + ) -> Result<(Option, EscapeSequence), Error> where R: Read, { @@ -181,25 +182,25 @@ impl StringLiteral { })?; let escape_value = match escape_ch { - 0x0062 /* b */ => Some((0x0008 /* */, None)), - 0x0074 /* t */ => Some((0x0009 /* */, None)), - 0x006E /* n */ => Some((0x000A /* */, None)), - 0x0076 /* v */ => Some((0x000B /* */, None)), - 0x0066 /* f */ => Some((0x000C /* */, None)), - 0x0072 /* r */ => Some((0x000D /* */, None)), - 0x0022 /* " */ => Some((0x0022 /* " */, None)), - 0x0027 /* ' */ => Some((0x0027 /* ' */, None)), - 0x005C /* \ */ => Some((0x005C /* \ */, None)), + 0x0062 /* b */ => (Some(0x0008 /* */), EscapeSequence::OTHER), + 0x0074 /* t */ => (Some(0x0009 /* */), EscapeSequence::OTHER), + 0x006E /* n */ => (Some(0x000A /* */), EscapeSequence::OTHER), + 0x0076 /* v */ => (Some(0x000B /* */), EscapeSequence::OTHER), + 0x0066 /* f */ => (Some(0x000C /* */), EscapeSequence::OTHER), + 0x0072 /* r */ => (Some(0x000D /* */), EscapeSequence::OTHER), + 0x0022 /* " */ => (Some(0x0022 /* " */), EscapeSequence::OTHER), + 0x0027 /* ' */ => (Some(0x0027 /* ' */), EscapeSequence::OTHER), + 0x005C /* \ */ => (Some(0x005C /* \ */), EscapeSequence::OTHER), 0x0030 /* 0 */ if cursor .peek()? .filter(u8::is_ascii_digit) .is_none() => - Some((0x0000 /* NULL */, None)), + (Some(0x0000 /* NULL */), EscapeSequence::OTHER), 0x0078 /* x */ => { - Some((Self::take_hex_escape_sequence(cursor, start_pos)?, None)) + (Some(Self::take_hex_escape_sequence(cursor, start_pos)?), EscapeSequence::OTHER) } 0x0075 /* u */ => { - Some((Self::take_unicode_escape_sequence(cursor, start_pos)?, None)) + (Some(Self::take_unicode_escape_sequence(cursor, start_pos)?), EscapeSequence::OTHER) } 0x0038 /* 8 */ | 0x0039 /* 9 */ => { // Grammar: NonOctalDecimalEscapeSequence @@ -214,7 +215,7 @@ impl StringLiteral { start_pos, )); } - Some((escape_ch, Some(EscapeSequence::NonOctalDecimal))) + (Some(escape_ch), EscapeSequence::NON_OCTAL_DECIMAL) } _ if (0x0030..=0x0037 /* '0'..='7' */).contains(&escape_ch) => { if is_template_literal { @@ -231,19 +232,19 @@ impl StringLiteral { )); } - Some((Self::take_legacy_octal_escape_sequence( + (Some(Self::take_legacy_octal_escape_sequence( cursor, escape_ch.try_into().expect("an ascii char must not fail to convert"), - )?, Some(EscapeSequence::LegacyOctal))) + )?), EscapeSequence::LEGACY_OCTAL) } _ if Self::is_line_terminator(escape_ch) => { // Grammar: LineContinuation // Grammar: \ LineTerminatorSequence // LineContinuation is the empty String. - None + (None, EscapeSequence::OTHER) } _ => { - Some((escape_ch, None)) + (Some(escape_ch), EscapeSequence::OTHER) } }; diff --git a/boa_parser/src/lexer/template.rs b/boa_parser/src/lexer/template.rs index 23e92854be..eb69261cf1 100644 --- a/boa_parser/src/lexer/template.rs +++ b/boa_parser/src/lexer/template.rs @@ -60,7 +60,7 @@ impl TemplateString { true, )?; - if let Some((escape_value, _)) = escape_value { + if let (Some(escape_value), _) = escape_value { buf.push_code_point(escape_value); } } diff --git a/boa_parser/src/lexer/tests.rs b/boa_parser/src/lexer/tests.rs index 67a16e8655..8f8551f3cd 100644 --- a/boa_parser/src/lexer/tests.rs +++ b/boa_parser/src/lexer/tests.rs @@ -151,8 +151,8 @@ fn check_string() { let a_sym = interner.get_or_intern_static("aaa", utf16!("aaa")); let b_sym = interner.get_or_intern_static("bbb", utf16!("bbb")); let expected = [ - TokenKind::string_literal(a_sym, None), - TokenKind::string_literal(b_sym, None), + TokenKind::string_literal(a_sym, EscapeSequence::empty()), + TokenKind::string_literal(b_sym, EscapeSequence::empty()), ]; expect_tokens(&mut lexer, &expected, interner); @@ -315,7 +315,7 @@ fn check_variable_definition_tokens() { TokenKind::Keyword((Keyword::Let, false)), TokenKind::identifier(a_sym), TokenKind::Punctuator(Punctuator::Assign), - TokenKind::string_literal(hello_sym, None), + TokenKind::string_literal(hello_sym, EscapeSequence::empty()), TokenKind::Punctuator(Punctuator::Semicolon), ]; @@ -953,7 +953,7 @@ fn string_unicode() { let sym = interner.get_or_intern_static("中文", utf16!("中文")); let expected = [ - TokenKind::StringLiteral((sym, None)), + TokenKind::StringLiteral((sym, EscapeSequence::empty())), TokenKind::Punctuator(Punctuator::Semicolon), ]; @@ -967,7 +967,7 @@ fn string_unicode_escape_with_braces() { let sym = interner.get_or_intern_static("{\u{20ac}\u{a0}\u{a0}}", utf16!("{\u{20ac}\u{a0}\u{a0}}")); - let expected = [TokenKind::StringLiteral((sym, None))]; + let expected = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))]; expect_tokens(&mut lexer, &expected, interner); @@ -1002,7 +1002,7 @@ fn string_unicode_escape_with_braces_2() { let interner = &mut Interner::default(); let sym = interner.get_or_intern_static("\u{20ac}\u{a0}\u{a0}", utf16!("\u{20ac}\u{a0}\u{a0}")); - let expected = [TokenKind::StringLiteral((sym, None))]; + let expected = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))]; expect_tokens(&mut lexer, &expected, interner); } @@ -1015,7 +1015,7 @@ fn string_with_single_escape() { let interner = &mut Interner::default(); let sym = interner.get_or_intern_static("Б", utf16!("Б")); - let expected = [TokenKind::StringLiteral((sym, None))]; + let expected = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))]; expect_tokens(&mut lexer, &expected, interner); } @@ -1039,7 +1039,7 @@ fn string_legacy_octal_escape() { let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); let expected_tokens = [TokenKind::StringLiteral(( sym, - Some(EscapeSequence::LegacyOctal), + EscapeSequence::LEGACY_OCTAL, ))]; expect_tokens(&mut lexer, &expected_tokens, interner); @@ -1070,7 +1070,7 @@ fn string_zero_escape() { let interner = &mut Interner::default(); let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); - let expected_tokens = [TokenKind::StringLiteral((sym, None))]; + let expected_tokens = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))]; expect_tokens(&mut lexer, &expected_tokens, interner); } @@ -1087,7 +1087,7 @@ fn string_non_octal_decimal_escape() { let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); let expected_tokens = [TokenKind::StringLiteral(( sym, - Some(EscapeSequence::NonOctalDecimal), + EscapeSequence::NON_OCTAL_DECIMAL, ))]; expect_tokens(&mut lexer, &expected_tokens, interner); @@ -1117,7 +1117,7 @@ fn string_line_continuation() { let interner = &mut Interner::default(); let sym = interner.get_or_intern_static("hello world", utf16!("hello world")); - let expected_tokens = [TokenKind::StringLiteral((sym, None))]; + let expected_tokens = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))]; expect_tokens(&mut lexer, &expected_tokens, interner); } diff --git a/boa_parser/src/lexer/token.rs b/boa_parser/src/lexer/token.rs index 5db6161b31..1b6fe79642 100644 --- a/boa_parser/src/lexer/token.rs +++ b/boa_parser/src/lexer/token.rs @@ -6,6 +6,7 @@ //! [spec]: https://tc39.es/ecma262/#sec-tokens use crate::lexer::template::TemplateString; +use bitflags::bitflags; use boa_ast::{Keyword, Punctuator, Span}; use boa_interner::{Interner, Sym}; use num_bigint::BigInt; @@ -128,7 +129,7 @@ pub enum TokenKind { /// A [**string literal**][spec]. /// /// [spec]: https://tc39.es/ecma262/#prod-StringLiteral - StringLiteral((Sym, Option)), + StringLiteral((Sym, EscapeSequence)), /// A part of a template literal without substitution. TemplateNoSubstitution(TemplateString), @@ -217,7 +218,7 @@ impl TokenKind { /// Creates a `StringLiteral` token type. #[inline] #[must_use] - pub const fn string_literal(lit: Sym, escape_sequence: Option) -> Self { + pub const fn string_literal(lit: Sym, escape_sequence: EscapeSequence) -> Self { Self::StringLiteral((lit, escape_sequence)) } @@ -287,25 +288,37 @@ impl TokenKind { } } -/// Indicates the type of an escape sequence. -#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum EscapeSequence { - /// A legacy escape sequence starting with `0` - `7`. - /// - /// More information: - /// - [ECMAScript reference][spec] - /// - /// [spec]: https://tc39.es/ecma262/#prod-LegacyOctalEscapeSequence - LegacyOctal, +bitflags! { + /// Indicates the set of escape sequences a string contains. + #[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))] + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct EscapeSequence: u8 { + /// A legacy escape sequence starting with `0` - `7`. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#prod-LegacyOctalEscapeSequence + const LEGACY_OCTAL = 0b0000_0001; + + /// A octal escape sequence starting with `8` - `9`. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#prod-NonOctalDecimalEscapeSequence + const NON_OCTAL_DECIMAL = 0b0000_0010; + + /// A generic escape sequence, either single (`\t`), unicode (`\u1238`) + /// or a line continuation (`\`) + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#prod-LineContinuation + const OTHER = 0b0000_0100; + } - /// A octal escape sequence starting with `8` - `9`. - /// - /// More information: - /// - [ECMAScript reference][spec] - /// - /// [spec]: https://tc39.es/ecma262/#prod-NonOctalDecimalEscapeSequence - NonOctalDecimal, } /// Indicates if an identifier contains an escape sequence. diff --git a/boa_parser/src/parser/statement/mod.rs b/boa_parser/src/parser/statement/mod.rs index 9baaba2fb0..7116877d0e 100644 --- a/boa_parser/src/parser/statement/mod.rs +++ b/boa_parser/src/parser/statement/mod.rs @@ -297,15 +297,14 @@ where let global_strict = cursor.strict(); let mut directive_prologues = self.directive_prologues; let mut strict = self.strict; - let mut string_literal_escape_sequence = None; + let mut directives_stack = Vec::new(); loop { match cursor.peek(0, interner)? { Some(token) if self.break_nodes.contains(token.kind()) => break, - Some(token) if directive_prologues && string_literal_escape_sequence.is_none() => { - if let TokenKind::StringLiteral((_, Some(escape_sequence))) = token.kind() { - string_literal_escape_sequence = - Some((token.span().start(), *escape_sequence)); + Some(token) if directive_prologues => { + if let TokenKind::StringLiteral((_, escape)) = token.kind() { + directives_stack.push((token.span().start(), *escape)); } } None => break, @@ -317,35 +316,50 @@ where .parse(cursor, interner)?; if directive_prologues { - if let ast::StatementListItem::Statement(ast::Statement::Expression( - ast::Expression::Literal(ast::expression::literal::Literal::String(string)), - )) = &item - { - if interner.resolve_expect(*string).join( - |s| s == "use strict", - |g| g == utf16!("use strict"), - true, - ) { - cursor.set_strict(true); - strict = true; - - if let Some((position, escape_sequence)) = string_literal_escape_sequence { - match escape_sequence { - EscapeSequence::LegacyOctal => return Err(Error::general( - "legacy octal escape sequences are not allowed in strict mode", - position, - )), - EscapeSequence::NonOctalDecimal => { + match &item { + ast::StatementListItem::Statement(ast::Statement::Expression( + ast::Expression::Literal(ast::expression::literal::Literal::String(string)), + )) if !strict => { + if interner.resolve_expect(*string).join( + |s| s == "use strict", + |g| g == utf16!("use strict"), + true, + ) && directives_stack.last().expect("token should exist").1 + == EscapeSequence::empty() + { + cursor.set_strict(true); + strict = true; + + directives_stack.pop(); + + for (position, escape) in std::mem::take(&mut directives_stack) { + if escape.contains(EscapeSequence::LEGACY_OCTAL) { + return Err(Error::general( + "legacy octal escape sequences are not allowed in strict mode", + position, + )); + } + + if escape.contains(EscapeSequence::NON_OCTAL_DECIMAL) { return Err(Error::general( "decimal escape sequences are not allowed in strict mode", position, - )) + )); } } } } - } else { - directive_prologues = false; + ast::StatementListItem::Statement(ast::Statement::Expression( + ast::Expression::Literal(ast::expression::literal::Literal::String( + _string, + )), + )) => { + // TODO: should store directives in some place + } + _ => { + directive_prologues = false; + directives_stack.clear(); + } } }