Browse Source

Add a display type for JsString to allow formatting without allocations (#3951)

Co-authored-by: Haled Odat <8566042+HalidOdat@users.noreply.github.com>
pull/3992/head
Hans Larsen 3 months ago committed by GitHub
parent
commit
5534ec2773
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 44
      core/string/src/display.rs
  2. 40
      core/string/src/lib.rs
  3. 12
      core/string/src/str.rs
  4. 22
      core/string/src/tests.rs

44
core/string/src/display.rs

@ -0,0 +1,44 @@
//! Display implementations for [`crate::JsString`].
use crate::{CodePoint, JsStr, JsStrVariant};
use std::fmt;
use std::fmt::Write;
/// Display implementation for [`crate::JsString`] that escapes unicode characters.
#[derive(Debug)]
pub struct JsStrDisplayEscaped<'a> {
inner: JsStr<'a>,
}
impl fmt::Display for JsStrDisplayEscaped<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.inner.variant() {
// SAFETY: `JsStrVariant::Latin1` does not contain any unpaired surrogates, so need to check.
JsStrVariant::Latin1(v) => v
.iter()
.copied()
.map(char::from)
.try_for_each(|c| f.write_char(c)),
JsStrVariant::Utf16(_) => self.inner.code_points().try_for_each(|r| match r {
CodePoint::Unicode(c) => f.write_char(c),
CodePoint::UnpairedSurrogate(c) => {
write!(f, "\\u{c:04X}")
}
}),
}
}
}
impl<'a> From<JsStr<'a>> for JsStrDisplayEscaped<'a> {
fn from(inner: JsStr<'a>) -> Self {
Self { inner }
}
}
#[test]
fn latin1() {
// 0xE9 is `é` in ISO-8859-1 (see https://www.ascii-code.com/ISO-8859-1).
let s = JsStr::latin1(b"Hello \xE9 world!");
let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
assert_eq!(rust_str, "Hello é world!");
}

40
core/string/src/lib.rs

@ -17,6 +17,7 @@
#![allow(clippy::module_name_repetitions)]
mod common;
mod display;
mod iter;
mod str;
mod tagged;
@ -25,6 +26,7 @@ mod tagged;
mod tests;
use self::{iter::Windows, str::JsSliceIndex};
use crate::display::JsStrDisplayEscaped;
use crate::tagged::{Tagged, UnwrappedTagged};
#[doc(inline)]
pub use crate::{
@ -32,6 +34,7 @@ pub use crate::{
iter::Iter,
str::{JsStr, JsStrVariant},
};
use std::fmt::Write;
use std::{
alloc::{alloc, dealloc, Layout},
cell::Cell,
@ -150,6 +153,17 @@ impl CodePoint {
}
}
impl std::fmt::Display for CodePoint {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CodePoint::Unicode(c) => f.write_char(*c),
CodePoint::UnpairedSurrogate(c) => {
write!(f, "\\u{c:04X}")
}
}
}
}
/// A `usize` contains a flag and the length of Latin1/UTF-16 .
/// ```text
/// ┌────────────────────────────────────┐
@ -528,10 +542,7 @@ impl JsString {
/// Gets an iterator of all the Unicode codepoints of a [`JsString`].
#[inline]
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
char::decode_utf16(self.iter()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
})
self.as_str().code_points()
}
/// Abstract operation `StringIndexOf ( string, searchValue, fromIndex )`
@ -935,6 +946,15 @@ impl JsString {
UnwrappedTagged::Tag(_inner) => None,
}
}
/// Gets a displayable escaped string. This may be faster and has less
/// allocations than `format!("{}", str.to_string_escaped())` when
/// displaying.
#[inline]
#[must_use]
pub fn display_escaped(&self) -> JsStrDisplayEscaped<'_> {
JsStrDisplayEscaped::from(self.as_str())
}
}
impl Clone for JsString {
@ -1036,10 +1056,7 @@ impl Drop for JsString {
impl ToStringEscaped for JsString {
#[inline]
fn to_string_escaped(&self) -> String {
match self.as_str().variant() {
JsStrVariant::Latin1(v) => v.iter().copied().map(char::from).collect(),
JsStrVariant::Utf16(v) => v.to_string_escaped(),
}
format!("{}", self.display_escaped())
}
}
@ -1232,11 +1249,6 @@ pub(crate) trait ToStringEscaped {
impl ToStringEscaped for [u16] {
#[inline]
fn to_string_escaped(&self) -> String {
char::decode_utf16(self.iter().copied())
.map(|r| match r {
Ok(c) => String::from(c),
Err(e) => format!("\\u{:04X}", e.unpaired_surrogate()),
})
.collect()
JsString::from(self).to_string_escaped()
}
}

12
core/string/src/str.rs

@ -1,4 +1,4 @@
use crate::{is_trimmable_whitespace, is_trimmable_whitespace_latin1, Iter};
use crate::{is_trimmable_whitespace, is_trimmable_whitespace_latin1, CodePoint, Iter};
use std::{
hash::{Hash, Hasher},
slice::SliceIndex,
@ -234,6 +234,16 @@ impl<'a> JsStr<'a> {
let (m, n) = (self.len(), needle.len());
m >= n && needle == self.get(m - n..).expect("already checked size")
}
/// Gets an iterator of all the Unicode codepoints of a [`JsStr`].
/// This is not optimized for Latin1 strings.
#[inline]
pub(crate) fn code_points(self) -> impl Iterator<Item = CodePoint> + Clone + 'a {
char::decode_utf16(self.iter()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
})
}
}
impl Hash for JsStr<'_> {

22
core/string/src/tests.rs

@ -2,7 +2,7 @@
use std::hash::{BuildHasher, BuildHasherDefault, Hash};
use crate::{JsStr, JsString, StaticJsString, StaticJsStrings};
use crate::{JsStr, JsString, StaticJsString, StaticJsStrings, ToStringEscaped};
use rustc_hash::FxHasher;
@ -174,6 +174,26 @@ fn conversion_to_known_static_js_string() {
assert!(string.unwrap().as_str().is_latin1());
}
#[test]
fn to_string_escaped() {
assert_eq!(
JsString::from("Hello, \u{1D49E} world!").to_string_escaped(),
"Hello, \u{1D49E} world!"
);
assert_eq!(
JsString::from("Hello, world!").to_string_escaped(),
"Hello, world!"
);
// 15 should not be escaped.
let unpaired_surrogates: [u16; 3] = [0xDC58, 0xD83C, 0x0015];
assert_eq!(
JsString::from(&unpaired_surrogates).to_string_escaped(),
"\\uDC58\\uD83C\u{15}"
);
}
#[test]
fn from_static_js_string() {
static STATIC_HELLO_WORLD: StaticJsString =

Loading…
Cancel
Save