Browse Source

Add a display_lossy() to write a JsString lossily (#4023)

* Add a display_lossy() to write a JsString lossily

* cargo fmt
expect-lints
Hans Larsen 1 month ago committed by GitHub
parent
commit
acd1a8d9ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 48
      core/string/src/display.rs
  2. 12
      core/string/src/lib.rs
  3. 8
      core/string/src/str.rs

48
core/string/src/display.rs

@ -34,6 +34,27 @@ impl<'a> From<JsStr<'a>> for JsStrDisplayEscaped<'a> {
}
}
/// Display implementation for [`crate::JsString`] that escapes unicode characters.
#[derive(Debug)]
pub struct JsStrDisplayLossy<'a> {
inner: JsStr<'a>,
}
impl fmt::Display for JsStrDisplayLossy<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// No need to optimize latin1.
self.inner
.code_points_lossy()
.try_for_each(|c| f.write_char(c))
}
}
impl<'a> From<JsStr<'a>> for JsStrDisplayLossy<'a> {
fn from(inner: JsStr<'a>) -> Self {
Self { inner }
}
}
#[test]
fn latin1() {
// 0xE9 is `é` in ISO-8859-1 (see https://www.ascii-code.com/ISO-8859-1).
@ -41,4 +62,31 @@ fn latin1() {
let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
assert_eq!(rust_str, "Hello é world!");
let rust_str = format!("{}", JsStrDisplayLossy { inner: s });
assert_eq!(rust_str, "Hello é world!");
}
#[test]
fn emoji() {
// 0x1F600 is `😀` (see https://www.fileformat.info/info/unicode/char/1f600/index.htm).
let s = JsStr::utf16(&[0xD83D, 0xDE00]);
let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
assert_eq!(rust_str, "😀");
let rust_str = format!("{}", JsStrDisplayLossy { inner: s });
assert_eq!(rust_str, "😀");
}
#[test]
fn unpaired_surrogates() {
// 0xD800 is an unpaired surrogate (see https://www.fileformat.info/info/unicode/char/d800/index.htm).
let s = JsStr::utf16(&[0xD800]);
let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
assert_eq!(rust_str, "\\uD800");
let rust_str = format!("{}", JsStrDisplayLossy { inner: s });
assert_eq!(rust_str, "<EFBFBD>");
}

12
core/string/src/lib.rs

@ -26,7 +26,7 @@ mod tagged;
mod tests;
use self::{iter::Windows, str::JsSliceIndex};
use crate::display::JsStrDisplayEscaped;
use crate::display::{JsStrDisplayEscaped, JsStrDisplayLossy};
use crate::tagged::{Tagged, UnwrappedTagged};
#[doc(inline)]
pub use crate::{
@ -960,7 +960,7 @@ impl JsString {
}
}
/// Gets a displayable escaped string. This may be faster and has less
/// Gets a displayable escaped string. This may be faster and has fewer
/// allocations than `format!("{}", str.to_string_escaped())` when
/// displaying.
#[inline]
@ -968,6 +968,14 @@ impl JsString {
pub fn display_escaped(&self) -> JsStrDisplayEscaped<'_> {
JsStrDisplayEscaped::from(self.as_str())
}
/// Gets a displayable lossy string. This may be faster and has fewer
/// allocations than `format!("{}", str.to_string_lossy())` when displaying.
#[inline]
#[must_use]
pub fn display_lossy(&self) -> JsStrDisplayLossy<'_> {
JsStrDisplayLossy::from(self.as_str())
}
}
impl Clone for JsString {

8
core/string/src/str.rs

@ -235,6 +235,14 @@ impl<'a> JsStr<'a> {
m >= n && needle == self.get(m - n..).expect("already checked size")
}
/// Gets an iterator of all the Unicode codepoints of a [`JsStr`], replacing
/// unpaired surrogates with the replacement character. This is faster than
/// using [`Self::code_points`].
#[inline]
pub(crate) fn code_points_lossy(self) -> impl Iterator<Item = char> + 'a {
char::decode_utf16(self.iter()).map(|res| res.unwrap_or('\u{FFFD}'))
}
/// Gets an iterator of all the Unicode codepoints of a [`JsStr`].
/// This is not optimized for Latin1 strings.
#[inline]

Loading…
Cancel
Save