diff --git a/boa_cli/Cargo.toml b/boa_cli/Cargo.toml index bfa1fe1bdc..8093205e12 100644 --- a/boa_cli/Cargo.toml +++ b/boa_cli/Cargo.toml @@ -12,8 +12,8 @@ repository.workspace = true rust-version.workspace = true [dependencies] -boa_engine = { workspace = true, features = ["deser", "console", "flowgraph", "trace"] } -boa_ast = { workspace = true, features = ["serde"]} +boa_engine = { workspace = true, features = ["deser", "console", "flowgraph", "trace", "annex-b"] } +boa_ast = { workspace = true, features = ["serde"] } boa_parser.workspace = true rustyline = { version = "11.0.0", features = ["derive"]} clap = { version = "4.2.1", features = ["derive"] } diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index 751960c146..31a177a29c 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -39,6 +39,9 @@ trace = [] # Enable Boa's WHATWG console object implementation. console = [] +# Enable Boa's additional ECMAScript features for web browsers. +annex-b = [] + [dependencies] boa_interner.workspace = true boa_gc = { workspace = true, features = [ "thinvec" ] } diff --git a/boa_engine/src/builtins/escape/mod.rs b/boa_engine/src/builtins/escape/mod.rs new file mode 100644 index 0000000000..df5d1fdb6c --- /dev/null +++ b/boa_engine/src/builtins/escape/mod.rs @@ -0,0 +1,256 @@ +//! Boa's implementation of ECMAScript's string escaping functions. +//! +//! The `escape()` function replaces all characters with escape sequences, with the exception of ASCII +//! word characters (A–Z, a–z, 0–9, _) and @*_+-./. +//! +//! The `unescape()` function replaces any escape sequence with the character that it represents. +//! +//! More information: +//! - [ECMAScript reference][spec] +//! +//! [spec]: https://tc39.es/ecma262/#sec-additional-properties-of-the-global-object + +use crate::{ + context::intrinsics::Intrinsics, js_string, Context, JsArgs, JsObject, JsResult, JsValue, +}; + +use super::{BuiltInBuilder, BuiltInObject, IntrinsicObject}; + +/// The `escape` function +#[derive(Debug, Clone, Copy)] +pub(crate) struct Escape; + +impl IntrinsicObject for Escape { + fn init(intrinsics: &Intrinsics) { + BuiltInBuilder::with_intrinsic::(intrinsics) + .callable(escape) + .name(Self::NAME) + .length(1) + .build(); + } + fn get(intrinsics: &Intrinsics) -> JsObject { + intrinsics.objects().escape().into() + } +} + +impl BuiltInObject for Escape { + const NAME: &'static str = "escape"; +} + +/// Builtin JavaScript `escape ( string )` function. +fn escape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult { + /// Returns `true` if the codepoint `cp` is part of the `unescapedSet`. + fn is_unescaped(cp: u16) -> bool { + let Ok(cp) = TryInto::::try_into(cp) else { + return false; + }; + + // 4. Let unescapedSet be the string-concatenation of the ASCII word characters and "@*+-./". + cp.is_ascii_alphanumeric() || [b'_', b'@', b'*', b'+', b'-', b'.', b'/'].contains(&cp) + } + + // 1. Set string to ? ToString(string). + let string = args.get_or_undefined(0).to_string(context)?; + + // 3. Let R be the empty String. + let mut vec = Vec::with_capacity(string.len()); + + // 2. Let len be the length of string. + // 5. Let k be 0. + // 6. Repeat, while k < len, + // a. Let C be the code unit at index k within string. + for &cp in &*string { + // b. If unescapedSet contains C, then + if is_unescaped(cp) { + // i. Let S be C. + vec.push(cp); + continue; + } + // c. Else, + // i. Let n be the numeric value of C. + // ii. If n < 256, then + let c = if cp < 256 { + // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number. + // 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start). + format!("%{cp:02X}") + } + // iii. Else, + else { + // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number. + // 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start). + format!("%u{cp:04X}") + }; + // d. Set R to the string-concatenation of R and S. + // e. Set k to k + 1. + vec.extend(c.encode_utf16()); + } + + // 7. Return R. + Ok(js_string!(vec).into()) +} + +/// The `unescape` function +#[derive(Debug, Clone, Copy)] +pub(crate) struct Unescape; + +impl IntrinsicObject for Unescape { + fn init(intrinsics: &Intrinsics) { + BuiltInBuilder::with_intrinsic::(intrinsics) + .callable(unescape) + .name(Self::NAME) + .length(1) + .build(); + } + fn get(intrinsics: &Intrinsics) -> JsObject { + intrinsics.objects().unescape().into() + } +} + +impl BuiltInObject for Unescape { + const NAME: &'static str = "unescape"; +} + +/// Builtin JavaScript `unescape ( string )` function. +fn unescape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult { + /// Converts a char `cp` to its corresponding hex digit value. + fn to_hex_digit(cp: u16) -> Option { + char::from_u32(u32::from(cp)) + .and_then(|c| c.to_digit(16)) + .and_then(|d| d.try_into().ok()) + } + + // 1. Set string to ? ToString(string). + let string = args.get_or_undefined(0).to_string(context)?; + + // 3. Let R be the empty String. + let mut vec = Vec::with_capacity(string.len()); + + let mut codepoints = >::new(string.iter().copied()); + + // 2. Let len be the length of string. + // 4. Let k be 0. + // 5. Repeat, while k < len, + loop { + // a. Let C be the code unit at index k within string. + let Some(cp) = codepoints.next() else { + break; + }; + + // b. If C is the code unit 0x0025 (PERCENT SIGN), then + if cp != u16::from(b'%') { + vec.push(cp); + continue; + } + + // i. Let hexDigits be the empty String. + // ii. Let optionalAdvance be 0. + // TODO: Try blocks :( + let Some(unescaped_cp) = (|| match *codepoints.peek_n(5) { + // iii. If k + 5 < len and the code unit at index k + 1 within string is the code unit + // 0x0075 (LATIN SMALL LETTER U), then + [u, n1, n2, n3, n4] if u == u16::from(b'u') => { + // 1. Set hexDigits to the substring of string from k + 2 to k + 6. + // 2. Set optionalAdvance to 5. + let n1 = to_hex_digit(n1)?; + let n2 = to_hex_digit(n2)?; + let n3 = to_hex_digit(n3)?; + let n4 = to_hex_digit(n4)?; + + // TODO: https://github.com/rust-lang/rust/issues/77404 + for _ in 0..5 { + codepoints.next(); + } + + Some((n1 << 12) + (n2 << 8) + (n3 << 4) + n4) + } + // iv. Else if k + 3 ≤ len, then + [n1, n2, ..] => { + // 1. Set hexDigits to the substring of string from k + 1 to k + 3. + // 2. Set optionalAdvance to 2. + let n1 = to_hex_digit(n1)?; + let n2 = to_hex_digit(n2)?; + + // TODO: https://github.com/rust-lang/rust/issues/77404 + for _ in 0..2 { + codepoints.next(); + } + + Some((n1 << 4) + n2) + } + _ => None + })() else { + vec.push(u16::from(b'%')); + continue; + }; + + // v. Let parseResult be ParseText(StringToCodePoints(hexDigits), HexDigits[~Sep]). + // vi. If parseResult is a Parse Node, then + // 1. Let n be the MV of parseResult. + // 2. Set C to the code unit whose numeric value is n. + // 3. Set k to k + optionalAdvance. + // c. Set R to the string-concatenation of R and C. + // d. Set k to k + 1. + vec.push(unescaped_cp); + } + // 6. Return R. + Ok(js_string!(vec).into()) +} + +/// An iterator that can peek `N` items. +struct PeekableN +where + I: Iterator, +{ + iterator: I, + buffer: [I::Item; N], + buffered_end: usize, +} + +impl PeekableN +where + I: Iterator, + I::Item: Default + Copy, +{ + /// Creates a new `PeekableN`. + fn new(iterator: I) -> Self { + Self { + iterator, + buffer: [I::Item::default(); N], + buffered_end: 0, + } + } + + /// Peeks `n` items from the iterator. + fn peek_n(&mut self, count: usize) -> &[I::Item] { + if count <= self.buffered_end { + return &self.buffer[..count]; + } + for _ in 0..(count - self.buffered_end) { + let Some(next) = self.iterator.next() else { + return &self.buffer[..self.buffered_end]; + }; + self.buffer[self.buffered_end] = next; + self.buffered_end += 1; + } + + &self.buffer[..count] + } +} + +impl Iterator for PeekableN +where + I: Iterator, + I::Item: Copy, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + if self.buffered_end > 0 { + let item = self.buffer[0]; + self.buffer.rotate_left(1); + self.buffered_end -= 1; + return Some(item); + } + self.iterator.next() + } +} diff --git a/boa_engine/src/builtins/mod.rs b/boa_engine/src/builtins/mod.rs index bbbc7ec303..0d4191d6ed 100644 --- a/boa_engine/src/builtins/mod.rs +++ b/boa_engine/src/builtins/mod.rs @@ -35,6 +35,9 @@ pub mod weak; pub mod weak_map; pub mod weak_set; +#[cfg(feature = "annex-b")] +pub mod escape; + #[cfg(feature = "intl")] pub mod intl; @@ -253,6 +256,13 @@ impl Intrinsics { WeakRef::init(&intrinsics); WeakMap::init(&intrinsics); WeakSet::init(&intrinsics); + + #[cfg(feature = "annex-b")] + { + escape::Escape::init(&intrinsics); + escape::Unescape::init(&intrinsics); + } + #[cfg(feature = "intl")] { intl::Intl::init(&intrinsics); @@ -354,6 +364,12 @@ pub(crate) fn set_default_global_bindings(context: &mut Context<'_>) -> JsResult global_binding::(context)?; global_binding::(context)?; + #[cfg(feature = "annex-b")] + { + global_binding::(context)?; + global_binding::(context)?; + } + #[cfg(feature = "intl")] global_binding::(context)?; diff --git a/boa_engine/src/context/intrinsics.rs b/boa_engine/src/context/intrinsics.rs index a301d66e9f..c5a00d53fb 100644 --- a/boa_engine/src/context/intrinsics.rs +++ b/boa_engine/src/context/intrinsics.rs @@ -764,6 +764,14 @@ pub struct IntrinsicObjects { /// [`%parseInt%`](https://tc39.es/ecma262/#sec-parseint-string-radix) parse_int: JsFunction, + /// [`%escape%`](https://tc39.es/ecma262/#sec-escape-string) + #[cfg(feature = "annex-b")] + escape: JsFunction, + + /// [`%unescape%`](https://tc39.es/ecma262/#sec-unescape-string) + #[cfg(feature = "annex-b")] + unescape: JsFunction, + /// [`%Intl%`](https://tc39.es/ecma402/#intl-object) #[cfg(feature = "intl")] intl: JsObject, @@ -786,6 +794,10 @@ impl Default for IntrinsicObjects { is_nan: JsFunction::from_object_unchecked(JsObject::default()), parse_float: JsFunction::from_object_unchecked(JsObject::default()), parse_int: JsFunction::from_object_unchecked(JsObject::default()), + #[cfg(feature = "annex-b")] + escape: JsFunction::from_object_unchecked(JsObject::default()), + #[cfg(feature = "annex-b")] + unescape: JsFunction::from_object_unchecked(JsObject::default()), #[cfg(feature = "intl")] intl: JsObject::default(), } @@ -892,6 +904,22 @@ impl IntrinsicObjects { self.parse_int.clone() } + /// Gets the [`%escape%`][spec] intrinsic function. + /// + /// [spec]: https://tc39.es/ecma262/#sec-escape-string + #[cfg(feature = "annex-b")] + pub fn escape(&self) -> JsFunction { + self.escape.clone() + } + + /// Gets the [`%unescape%`][spec] intrinsic function. + /// + /// [spec]: https://tc39.es/ecma262/#sec-unescape-string + #[cfg(feature = "annex-b")] + pub fn unescape(&self) -> JsFunction { + self.unescape.clone() + } + /// Gets the [`%Intl%`][spec] intrinsic object. /// /// [spec]: https://tc39.es/ecma402/#intl-object diff --git a/boa_tester/Cargo.toml b/boa_tester/Cargo.toml index b8d26d3868..76b8aa8be7 100644 --- a/boa_tester/Cargo.toml +++ b/boa_tester/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true rust-version.workspace = true [dependencies] -boa_engine.workspace = true +boa_engine = { workspace = true, features = ["annex-b"] } boa_gc.workspace = true clap = { version = "4.2.1", features = ["derive"] } serde = { version = "1.0.159", features = ["derive"] } diff --git a/boa_tester/src/edition.rs b/boa_tester/src/edition.rs index 4ed8558a0b..8ad0ba873f 100644 --- a/boa_tester/src/edition.rs +++ b/boa_tester/src/edition.rs @@ -341,7 +341,7 @@ impl SpecEdition { pub(crate) fn from_test_metadata(metadata: &MetaData) -> Result> { let mut min_edition = if metadata.flags.contains(&TestFlag::Async) { Self::ES8 - } else if metadata.es6id.is_some() || metadata.flags.contains(&TestFlag::Module) { + } else if metadata.flags.contains(&TestFlag::Module) { Self::ES6 } else { Self::ES5 diff --git a/boa_wasm/Cargo.toml b/boa_wasm/Cargo.toml index cb9f0b98a2..31528450e2 100644 --- a/boa_wasm/Cargo.toml +++ b/boa_wasm/Cargo.toml @@ -12,7 +12,7 @@ repository.workspace = true rust-version.workspace = true [dependencies] -boa_engine = { workspace = true, features = ["console"] } +boa_engine = { workspace = true, features = ["console", "annex-b"] } wasm-bindgen = "0.2.84" getrandom = { version = "0.2.8", features = ["js"] }