Browse Source

Implement `escape` and `unescape` (#2768)

<!---
Thank you for contributing to Boa! Please fill out the template below, and remove or add any
information as you feel necessary.
--->

This Pull Request implements the [`escape`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/escape) and [`unescape`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/unescape) functions.

Both are technically deprecated, but they're also part of the [Additional ECMAScript Features for Web Browsers](https://tc39.es/ecma262/#sec-additional-ecmascript-features-for-web-browsers) section, so it is preferable to have them in place.
pull/2780/head
José Julián Espina 2 years ago
parent
commit
0babe195fb
  1. 2
      boa_cli/Cargo.toml
  2. 3
      boa_engine/Cargo.toml
  3. 256
      boa_engine/src/builtins/escape/mod.rs
  4. 16
      boa_engine/src/builtins/mod.rs
  5. 28
      boa_engine/src/context/intrinsics.rs
  6. 2
      boa_tester/Cargo.toml
  7. 2
      boa_tester/src/edition.rs
  8. 2
      boa_wasm/Cargo.toml

2
boa_cli/Cargo.toml

@ -12,7 +12,7 @@ repository.workspace = true
rust-version.workspace = true
[dependencies]
boa_engine = { workspace = true, features = ["deser", "console", "flowgraph", "trace"] }
boa_engine = { workspace = true, features = ["deser", "console", "flowgraph", "trace", "annex-b"] }
boa_ast = { workspace = true, features = ["serde"] }
boa_parser.workspace = true
rustyline = { version = "11.0.0", features = ["derive"]}

3
boa_engine/Cargo.toml

@ -39,6 +39,9 @@ trace = []
# Enable Boa's WHATWG console object implementation.
console = []
# Enable Boa's additional ECMAScript features for web browsers.
annex-b = []
[dependencies]
boa_interner.workspace = true
boa_gc = { workspace = true, features = [ "thinvec" ] }

256
boa_engine/src/builtins/escape/mod.rs

@ -0,0 +1,256 @@
//! Boa's implementation of ECMAScript's string escaping functions.
//!
//! The `escape()` function replaces all characters with escape sequences, with the exception of ASCII
//! word characters (A–Z, a–z, 0–9, _) and @*_+-./.
//!
//! The `unescape()` function replaces any escape sequence with the character that it represents.
//!
//! More information:
//! - [ECMAScript reference][spec]
//!
//! [spec]: https://tc39.es/ecma262/#sec-additional-properties-of-the-global-object
use crate::{
context::intrinsics::Intrinsics, js_string, Context, JsArgs, JsObject, JsResult, JsValue,
};
use super::{BuiltInBuilder, BuiltInObject, IntrinsicObject};
/// The `escape` function
#[derive(Debug, Clone, Copy)]
pub(crate) struct Escape;
impl IntrinsicObject for Escape {
fn init(intrinsics: &Intrinsics) {
BuiltInBuilder::with_intrinsic::<Self>(intrinsics)
.callable(escape)
.name(Self::NAME)
.length(1)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
intrinsics.objects().escape().into()
}
}
impl BuiltInObject for Escape {
const NAME: &'static str = "escape";
}
/// Builtin JavaScript `escape ( string )` function.
fn escape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
/// Returns `true` if the codepoint `cp` is part of the `unescapedSet`.
fn is_unescaped(cp: u16) -> bool {
let Ok(cp) = TryInto::<u8>::try_into(cp) else {
return false;
};
// 4. Let unescapedSet be the string-concatenation of the ASCII word characters and "@*+-./".
cp.is_ascii_alphanumeric() || [b'_', b'@', b'*', b'+', b'-', b'.', b'/'].contains(&cp)
}
// 1. Set string to ? ToString(string).
let string = args.get_or_undefined(0).to_string(context)?;
// 3. Let R be the empty String.
let mut vec = Vec::with_capacity(string.len());
// 2. Let len be the length of string.
// 5. Let k be 0.
// 6. Repeat, while k < len,
// a. Let C be the code unit at index k within string.
for &cp in &*string {
// b. If unescapedSet contains C, then
if is_unescaped(cp) {
// i. Let S be C.
vec.push(cp);
continue;
}
// c. Else,
// i. Let n be the numeric value of C.
// ii. If n < 256, then
let c = if cp < 256 {
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
// 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
format!("%{cp:02X}")
}
// iii. Else,
else {
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
// 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
format!("%u{cp:04X}")
};
// d. Set R to the string-concatenation of R and S.
// e. Set k to k + 1.
vec.extend(c.encode_utf16());
}
// 7. Return R.
Ok(js_string!(vec).into())
}
/// The `unescape` function
#[derive(Debug, Clone, Copy)]
pub(crate) struct Unescape;
impl IntrinsicObject for Unescape {
fn init(intrinsics: &Intrinsics) {
BuiltInBuilder::with_intrinsic::<Self>(intrinsics)
.callable(unescape)
.name(Self::NAME)
.length(1)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
intrinsics.objects().unescape().into()
}
}
impl BuiltInObject for Unescape {
const NAME: &'static str = "unescape";
}
/// Builtin JavaScript `unescape ( string )` function.
fn unescape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
/// Converts a char `cp` to its corresponding hex digit value.
fn to_hex_digit(cp: u16) -> Option<u16> {
char::from_u32(u32::from(cp))
.and_then(|c| c.to_digit(16))
.and_then(|d| d.try_into().ok())
}
// 1. Set string to ? ToString(string).
let string = args.get_or_undefined(0).to_string(context)?;
// 3. Let R be the empty String.
let mut vec = Vec::with_capacity(string.len());
let mut codepoints = <PeekableN<_, 6>>::new(string.iter().copied());
// 2. Let len be the length of string.
// 4. Let k be 0.
// 5. Repeat, while k < len,
loop {
// a. Let C be the code unit at index k within string.
let Some(cp) = codepoints.next() else {
break;
};
// b. If C is the code unit 0x0025 (PERCENT SIGN), then
if cp != u16::from(b'%') {
vec.push(cp);
continue;
}
// i. Let hexDigits be the empty String.
// ii. Let optionalAdvance be 0.
// TODO: Try blocks :(
let Some(unescaped_cp) = (|| match *codepoints.peek_n(5) {
// iii. If k + 5 < len and the code unit at index k + 1 within string is the code unit
// 0x0075 (LATIN SMALL LETTER U), then
[u, n1, n2, n3, n4] if u == u16::from(b'u') => {
// 1. Set hexDigits to the substring of string from k + 2 to k + 6.
// 2. Set optionalAdvance to 5.
let n1 = to_hex_digit(n1)?;
let n2 = to_hex_digit(n2)?;
let n3 = to_hex_digit(n3)?;
let n4 = to_hex_digit(n4)?;
// TODO: https://github.com/rust-lang/rust/issues/77404
for _ in 0..5 {
codepoints.next();
}
Some((n1 << 12) + (n2 << 8) + (n3 << 4) + n4)
}
// iv. Else if k + 3 ≤ len, then
[n1, n2, ..] => {
// 1. Set hexDigits to the substring of string from k + 1 to k + 3.
// 2. Set optionalAdvance to 2.
let n1 = to_hex_digit(n1)?;
let n2 = to_hex_digit(n2)?;
// TODO: https://github.com/rust-lang/rust/issues/77404
for _ in 0..2 {
codepoints.next();
}
Some((n1 << 4) + n2)
}
_ => None
})() else {
vec.push(u16::from(b'%'));
continue;
};
// v. Let parseResult be ParseText(StringToCodePoints(hexDigits), HexDigits[~Sep]).
// vi. If parseResult is a Parse Node, then
// 1. Let n be the MV of parseResult.
// 2. Set C to the code unit whose numeric value is n.
// 3. Set k to k + optionalAdvance.
// c. Set R to the string-concatenation of R and C.
// d. Set k to k + 1.
vec.push(unescaped_cp);
}
// 6. Return R.
Ok(js_string!(vec).into())
}
/// An iterator that can peek `N` items.
struct PeekableN<I, const N: usize>
where
I: Iterator,
{
iterator: I,
buffer: [I::Item; N],
buffered_end: usize,
}
impl<I, const N: usize> PeekableN<I, N>
where
I: Iterator,
I::Item: Default + Copy,
{
/// Creates a new `PeekableN`.
fn new(iterator: I) -> Self {
Self {
iterator,
buffer: [I::Item::default(); N],
buffered_end: 0,
}
}
/// Peeks `n` items from the iterator.
fn peek_n(&mut self, count: usize) -> &[I::Item] {
if count <= self.buffered_end {
return &self.buffer[..count];
}
for _ in 0..(count - self.buffered_end) {
let Some(next) = self.iterator.next() else {
return &self.buffer[..self.buffered_end];
};
self.buffer[self.buffered_end] = next;
self.buffered_end += 1;
}
&self.buffer[..count]
}
}
impl<I, const N: usize> Iterator for PeekableN<I, N>
where
I: Iterator,
I::Item: Copy,
{
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
if self.buffered_end > 0 {
let item = self.buffer[0];
self.buffer.rotate_left(1);
self.buffered_end -= 1;
return Some(item);
}
self.iterator.next()
}
}

16
boa_engine/src/builtins/mod.rs

@ -35,6 +35,9 @@ pub mod weak;
pub mod weak_map;
pub mod weak_set;
#[cfg(feature = "annex-b")]
pub mod escape;
#[cfg(feature = "intl")]
pub mod intl;
@ -253,6 +256,13 @@ impl Intrinsics {
WeakRef::init(&intrinsics);
WeakMap::init(&intrinsics);
WeakSet::init(&intrinsics);
#[cfg(feature = "annex-b")]
{
escape::Escape::init(&intrinsics);
escape::Unescape::init(&intrinsics);
}
#[cfg(feature = "intl")]
{
intl::Intl::init(&intrinsics);
@ -354,6 +364,12 @@ pub(crate) fn set_default_global_bindings(context: &mut Context<'_>) -> JsResult
global_binding::<WeakMap>(context)?;
global_binding::<WeakSet>(context)?;
#[cfg(feature = "annex-b")]
{
global_binding::<escape::Escape>(context)?;
global_binding::<escape::Unescape>(context)?;
}
#[cfg(feature = "intl")]
global_binding::<intl::Intl>(context)?;

28
boa_engine/src/context/intrinsics.rs

@ -764,6 +764,14 @@ pub struct IntrinsicObjects {
/// [`%parseInt%`](https://tc39.es/ecma262/#sec-parseint-string-radix)
parse_int: JsFunction,
/// [`%escape%`](https://tc39.es/ecma262/#sec-escape-string)
#[cfg(feature = "annex-b")]
escape: JsFunction,
/// [`%unescape%`](https://tc39.es/ecma262/#sec-unescape-string)
#[cfg(feature = "annex-b")]
unescape: JsFunction,
/// [`%Intl%`](https://tc39.es/ecma402/#intl-object)
#[cfg(feature = "intl")]
intl: JsObject,
@ -786,6 +794,10 @@ impl Default for IntrinsicObjects {
is_nan: JsFunction::from_object_unchecked(JsObject::default()),
parse_float: JsFunction::from_object_unchecked(JsObject::default()),
parse_int: JsFunction::from_object_unchecked(JsObject::default()),
#[cfg(feature = "annex-b")]
escape: JsFunction::from_object_unchecked(JsObject::default()),
#[cfg(feature = "annex-b")]
unescape: JsFunction::from_object_unchecked(JsObject::default()),
#[cfg(feature = "intl")]
intl: JsObject::default(),
}
@ -892,6 +904,22 @@ impl IntrinsicObjects {
self.parse_int.clone()
}
/// Gets the [`%escape%`][spec] intrinsic function.
///
/// [spec]: https://tc39.es/ecma262/#sec-escape-string
#[cfg(feature = "annex-b")]
pub fn escape(&self) -> JsFunction {
self.escape.clone()
}
/// Gets the [`%unescape%`][spec] intrinsic function.
///
/// [spec]: https://tc39.es/ecma262/#sec-unescape-string
#[cfg(feature = "annex-b")]
pub fn unescape(&self) -> JsFunction {
self.unescape.clone()
}
/// Gets the [`%Intl%`][spec] intrinsic object.
///
/// [spec]: https://tc39.es/ecma402/#intl-object

2
boa_tester/Cargo.toml

@ -12,7 +12,7 @@ repository.workspace = true
rust-version.workspace = true
[dependencies]
boa_engine.workspace = true
boa_engine = { workspace = true, features = ["annex-b"] }
boa_gc.workspace = true
clap = { version = "4.2.1", features = ["derive"] }
serde = { version = "1.0.159", features = ["derive"] }

2
boa_tester/src/edition.rs

@ -341,7 +341,7 @@ impl SpecEdition {
pub(crate) fn from_test_metadata(metadata: &MetaData) -> Result<Self, Vec<&str>> {
let mut min_edition = if metadata.flags.contains(&TestFlag::Async) {
Self::ES8
} else if metadata.es6id.is_some() || metadata.flags.contains(&TestFlag::Module) {
} else if metadata.flags.contains(&TestFlag::Module) {
Self::ES6
} else {
Self::ES5

2
boa_wasm/Cargo.toml

@ -12,7 +12,7 @@ repository.workspace = true
rust-version.workspace = true
[dependencies]
boa_engine = { workspace = true, features = ["console"] }
boa_engine = { workspace = true, features = ["console", "annex-b"] }
wasm-bindgen = "0.2.84"
getrandom = { version = "0.2.8", features = ["js"] }

Loading…
Cancel
Save