mirror of https://github.com/boa-dev/boa.git
Browse Source
<!--- Thank you for contributing to Boa! Please fill out the template below, and remove or add any information as you feel necessary. ---> This Pull Request implements the [`escape`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/escape) and [`unescape`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/unescape) functions. Both are technically deprecated, but they're also part of the [Additional ECMAScript Features for Web Browsers](https://tc39.es/ecma262/#sec-additional-ecmascript-features-for-web-browsers) section, so it is preferable to have them in place.pull/2780/head
José Julián Espina
2 years ago
8 changed files with 308 additions and 5 deletions
@ -0,0 +1,256 @@
|
||||
//! Boa's implementation of ECMAScript's string escaping functions.
|
||||
//!
|
||||
//! The `escape()` function replaces all characters with escape sequences, with the exception of ASCII
|
||||
//! word characters (A–Z, a–z, 0–9, _) and @*_+-./.
|
||||
//!
|
||||
//! The `unescape()` function replaces any escape sequence with the character that it represents.
|
||||
//!
|
||||
//! More information:
|
||||
//! - [ECMAScript reference][spec]
|
||||
//!
|
||||
//! [spec]: https://tc39.es/ecma262/#sec-additional-properties-of-the-global-object
|
||||
|
||||
use crate::{ |
||||
context::intrinsics::Intrinsics, js_string, Context, JsArgs, JsObject, JsResult, JsValue, |
||||
}; |
||||
|
||||
use super::{BuiltInBuilder, BuiltInObject, IntrinsicObject}; |
||||
|
||||
/// The `escape` function
|
||||
#[derive(Debug, Clone, Copy)] |
||||
pub(crate) struct Escape; |
||||
|
||||
impl IntrinsicObject for Escape { |
||||
fn init(intrinsics: &Intrinsics) { |
||||
BuiltInBuilder::with_intrinsic::<Self>(intrinsics) |
||||
.callable(escape) |
||||
.name(Self::NAME) |
||||
.length(1) |
||||
.build(); |
||||
} |
||||
fn get(intrinsics: &Intrinsics) -> JsObject { |
||||
intrinsics.objects().escape().into() |
||||
} |
||||
} |
||||
|
||||
impl BuiltInObject for Escape { |
||||
const NAME: &'static str = "escape"; |
||||
} |
||||
|
||||
/// Builtin JavaScript `escape ( string )` function.
|
||||
fn escape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> { |
||||
/// Returns `true` if the codepoint `cp` is part of the `unescapedSet`.
|
||||
fn is_unescaped(cp: u16) -> bool { |
||||
let Ok(cp) = TryInto::<u8>::try_into(cp) else { |
||||
return false; |
||||
}; |
||||
|
||||
// 4. Let unescapedSet be the string-concatenation of the ASCII word characters and "@*+-./".
|
||||
cp.is_ascii_alphanumeric() || [b'_', b'@', b'*', b'+', b'-', b'.', b'/'].contains(&cp) |
||||
} |
||||
|
||||
// 1. Set string to ? ToString(string).
|
||||
let string = args.get_or_undefined(0).to_string(context)?; |
||||
|
||||
// 3. Let R be the empty String.
|
||||
let mut vec = Vec::with_capacity(string.len()); |
||||
|
||||
// 2. Let len be the length of string.
|
||||
// 5. Let k be 0.
|
||||
// 6. Repeat, while k < len,
|
||||
// a. Let C be the code unit at index k within string.
|
||||
for &cp in &*string { |
||||
// b. If unescapedSet contains C, then
|
||||
if is_unescaped(cp) { |
||||
// i. Let S be C.
|
||||
vec.push(cp); |
||||
continue; |
||||
} |
||||
// c. Else,
|
||||
// i. Let n be the numeric value of C.
|
||||
// ii. If n < 256, then
|
||||
let c = if cp < 256 { |
||||
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
|
||||
// 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
|
||||
format!("%{cp:02X}") |
||||
} |
||||
// iii. Else,
|
||||
else { |
||||
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
|
||||
// 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
|
||||
format!("%u{cp:04X}") |
||||
}; |
||||
// d. Set R to the string-concatenation of R and S.
|
||||
// e. Set k to k + 1.
|
||||
vec.extend(c.encode_utf16()); |
||||
} |
||||
|
||||
// 7. Return R.
|
||||
Ok(js_string!(vec).into()) |
||||
} |
||||
|
||||
/// The `unescape` function
|
||||
#[derive(Debug, Clone, Copy)] |
||||
pub(crate) struct Unescape; |
||||
|
||||
impl IntrinsicObject for Unescape { |
||||
fn init(intrinsics: &Intrinsics) { |
||||
BuiltInBuilder::with_intrinsic::<Self>(intrinsics) |
||||
.callable(unescape) |
||||
.name(Self::NAME) |
||||
.length(1) |
||||
.build(); |
||||
} |
||||
fn get(intrinsics: &Intrinsics) -> JsObject { |
||||
intrinsics.objects().unescape().into() |
||||
} |
||||
} |
||||
|
||||
impl BuiltInObject for Unescape { |
||||
const NAME: &'static str = "unescape"; |
||||
} |
||||
|
||||
/// Builtin JavaScript `unescape ( string )` function.
|
||||
fn unescape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> { |
||||
/// Converts a char `cp` to its corresponding hex digit value.
|
||||
fn to_hex_digit(cp: u16) -> Option<u16> { |
||||
char::from_u32(u32::from(cp)) |
||||
.and_then(|c| c.to_digit(16)) |
||||
.and_then(|d| d.try_into().ok()) |
||||
} |
||||
|
||||
// 1. Set string to ? ToString(string).
|
||||
let string = args.get_or_undefined(0).to_string(context)?; |
||||
|
||||
// 3. Let R be the empty String.
|
||||
let mut vec = Vec::with_capacity(string.len()); |
||||
|
||||
let mut codepoints = <PeekableN<_, 6>>::new(string.iter().copied()); |
||||
|
||||
// 2. Let len be the length of string.
|
||||
// 4. Let k be 0.
|
||||
// 5. Repeat, while k < len,
|
||||
loop { |
||||
// a. Let C be the code unit at index k within string.
|
||||
let Some(cp) = codepoints.next() else { |
||||
break; |
||||
}; |
||||
|
||||
// b. If C is the code unit 0x0025 (PERCENT SIGN), then
|
||||
if cp != u16::from(b'%') { |
||||
vec.push(cp); |
||||
continue; |
||||
} |
||||
|
||||
// i. Let hexDigits be the empty String.
|
||||
// ii. Let optionalAdvance be 0.
|
||||
// TODO: Try blocks :(
|
||||
let Some(unescaped_cp) = (|| match *codepoints.peek_n(5) { |
||||
// iii. If k + 5 < len and the code unit at index k + 1 within string is the code unit
|
||||
// 0x0075 (LATIN SMALL LETTER U), then
|
||||
[u, n1, n2, n3, n4] if u == u16::from(b'u') => { |
||||
// 1. Set hexDigits to the substring of string from k + 2 to k + 6.
|
||||
// 2. Set optionalAdvance to 5.
|
||||
let n1 = to_hex_digit(n1)?; |
||||
let n2 = to_hex_digit(n2)?; |
||||
let n3 = to_hex_digit(n3)?; |
||||
let n4 = to_hex_digit(n4)?; |
||||
|
||||
// TODO: https://github.com/rust-lang/rust/issues/77404
|
||||
for _ in 0..5 { |
||||
codepoints.next(); |
||||
} |
||||
|
||||
Some((n1 << 12) + (n2 << 8) + (n3 << 4) + n4) |
||||
} |
||||
// iv. Else if k + 3 ≤ len, then
|
||||
[n1, n2, ..] => { |
||||
// 1. Set hexDigits to the substring of string from k + 1 to k + 3.
|
||||
// 2. Set optionalAdvance to 2.
|
||||
let n1 = to_hex_digit(n1)?; |
||||
let n2 = to_hex_digit(n2)?; |
||||
|
||||
// TODO: https://github.com/rust-lang/rust/issues/77404
|
||||
for _ in 0..2 { |
||||
codepoints.next(); |
||||
} |
||||
|
||||
Some((n1 << 4) + n2) |
||||
} |
||||
_ => None |
||||
})() else { |
||||
vec.push(u16::from(b'%')); |
||||
continue; |
||||
}; |
||||
|
||||
// v. Let parseResult be ParseText(StringToCodePoints(hexDigits), HexDigits[~Sep]).
|
||||
// vi. If parseResult is a Parse Node, then
|
||||
// 1. Let n be the MV of parseResult.
|
||||
// 2. Set C to the code unit whose numeric value is n.
|
||||
// 3. Set k to k + optionalAdvance.
|
||||
// c. Set R to the string-concatenation of R and C.
|
||||
// d. Set k to k + 1.
|
||||
vec.push(unescaped_cp); |
||||
} |
||||
// 6. Return R.
|
||||
Ok(js_string!(vec).into()) |
||||
} |
||||
|
||||
/// An iterator that can peek `N` items.
|
||||
struct PeekableN<I, const N: usize> |
||||
where |
||||
I: Iterator, |
||||
{ |
||||
iterator: I, |
||||
buffer: [I::Item; N], |
||||
buffered_end: usize, |
||||
} |
||||
|
||||
impl<I, const N: usize> PeekableN<I, N> |
||||
where |
||||
I: Iterator, |
||||
I::Item: Default + Copy, |
||||
{ |
||||
/// Creates a new `PeekableN`.
|
||||
fn new(iterator: I) -> Self { |
||||
Self { |
||||
iterator, |
||||
buffer: [I::Item::default(); N], |
||||
buffered_end: 0, |
||||
} |
||||
} |
||||
|
||||
/// Peeks `n` items from the iterator.
|
||||
fn peek_n(&mut self, count: usize) -> &[I::Item] { |
||||
if count <= self.buffered_end { |
||||
return &self.buffer[..count]; |
||||
} |
||||
for _ in 0..(count - self.buffered_end) { |
||||
let Some(next) = self.iterator.next() else { |
||||
return &self.buffer[..self.buffered_end]; |
||||
}; |
||||
self.buffer[self.buffered_end] = next; |
||||
self.buffered_end += 1; |
||||
} |
||||
|
||||
&self.buffer[..count] |
||||
} |
||||
} |
||||
|
||||
impl<I, const N: usize> Iterator for PeekableN<I, N> |
||||
where |
||||
I: Iterator, |
||||
I::Item: Copy, |
||||
{ |
||||
type Item = I::Item; |
||||
|
||||
fn next(&mut self) -> Option<Self::Item> { |
||||
if self.buffered_end > 0 { |
||||
let item = self.buffer[0]; |
||||
self.buffer.rotate_left(1); |
||||
self.buffered_end -= 1; |
||||
return Some(item); |
||||
} |
||||
self.iterator.next() |
||||
} |
||||
} |
Loading…
Reference in new issue