diff --git a/boa_engine/src/builtins/object/mod.rs b/boa_engine/src/builtins/object/mod.rs index 6139ae1ab9..19968c67ac 100644 --- a/boa_engine/src/builtins/object/mod.rs +++ b/boa_engine/src/builtins/object/mod.rs @@ -26,7 +26,7 @@ use crate::{ }, property::{Attribute, PropertyDescriptor, PropertyKey, PropertyNameKind}, realm::Realm, - string::{common::StaticJsStrings, CowJsString}, + string::{common::StaticJsStrings, JsStringSlice}, symbol::JsSymbol, value::JsValue, Context, JsArgs, JsResult, JsString, @@ -836,7 +836,7 @@ impl Object { let tag_str = tag .as_string() .map(JsString::as_str) - .map_or(CowJsString::from(builtin_tag), Into::into); + .map_or(JsStringSlice::from(builtin_tag), Into::into); // 17. Return the string-concatenation of "[object ", tag, and "]". Ok(js_string!("[object ", tag_str, "]").into()) diff --git a/boa_engine/src/builtins/string/mod.rs b/boa_engine/src/builtins/string/mod.rs index 071eeee616..83128798e9 100644 --- a/boa_engine/src/builtins/string/mod.rs +++ b/boa_engine/src/builtins/string/mod.rs @@ -2350,13 +2350,13 @@ impl String { // the code unit 0x0022 (QUOTATION MARK) // escapedV // the code unit 0x0022 (QUOTATION MARK) - p1 = js_string!(p1, " ", attribute, "=\"", &escaped_v[..], "\""); + p1 = js_string!(&p1, " ", attribute, "=\"", &escaped_v[..], "\""); } // 5. Let p2 be the string-concatenation of p1 and ">". // 6. Let p3 be the string-concatenation of p2 and S. // 7. Let p4 be the string-concatenation of p3, "". - let p4 = js_string!(p1, ">", s, ""); + let p4 = js_string!(&p1, ">", &s, ""); // 8. Return p4. Ok(p4.into()) diff --git a/boa_engine/src/string/mod.rs b/boa_engine/src/string/mod.rs index df386d0301..0362478606 100644 --- a/boa_engine/src/string/mod.rs +++ b/boa_engine/src/string/mod.rs @@ -22,6 +22,7 @@ #![allow(unstable_name_collisions)] pub(crate) mod common; +mod slice; mod str; use crate::{ @@ -33,7 +34,10 @@ use boa_gc::{empty_trace, Finalize, Trace}; pub use boa_macros::utf16; #[doc(inline)] -pub use crate::string::str::{JsStr, JsStrVariant}; +pub use crate::string::{ + slice::JsStringSlice, + str::{JsStr, JsStrVariant}, +}; use std::{ alloc::{alloc, dealloc, Layout}, @@ -46,7 +50,7 @@ use std::{ str::FromStr, }; -use self::{common::StaticJsStrings, str::JsSliceIndex}; +use self::{common::StaticJsStrings, slice::JsStringSliceVariant, str::JsSliceIndex}; fn alloc_overflow() -> ! { panic!("detected overflow during string allocation") @@ -102,16 +106,16 @@ macro_rules! js_string { $crate::JsString::default() }; ($s:literal) => { - $crate::JsString::from($crate::string::CowJsString::from($s)) + $crate::JsString::from($s) }; ($s:expr) => { $crate::JsString::from($s) }; ( $x:expr, $y:expr ) => { - $crate::JsString::concat($crate::string::CowJsString::from($x), $crate::string::CowJsString::from($y)) + $crate::JsString::concat($crate::string::JsStringSlice::from($x), $crate::string::JsStringSlice::from($y)) }; ( $( $s:expr ),+ ) => { - $crate::JsString::concat_array(&[ $( $crate::string::CowJsString::from($s) ),+ ]) + $crate::JsString::concat_array(&[ $( $crate::string::JsStringSlice::from($s) ),+ ]) }; } @@ -220,102 +224,21 @@ unsafe impl Trace for JsString { empty_trace!(); } -#[derive(Debug)] -pub enum CowJsString<'a> { - Borrowed(JsStr<'a>), - Owned(JsString), -} - -impl CowJsString<'_> { - fn as_slice(&self) -> JsStr<'_> { - match self { - CowJsString::Borrowed(s) => *s, - CowJsString::Owned(s) => s.as_str(), - } - } -} - -impl From for CowJsString<'_> { - fn from(value: JsString) -> Self { - Self::Owned(value) - } -} - -impl<'a> From<&'a JsString> for CowJsString<'a> { - fn from(value: &'a JsString) -> Self { - Self::Borrowed(value.as_str()) - } -} - -impl<'a> From> for CowJsString<'a> { - fn from(value: JsStr<'a>) -> Self { - Self::Borrowed(value) - } -} - -impl<'a> From<&'a str> for CowJsString<'a> { - fn from(value: &'a str) -> Self { - if value.is_ascii() { - return Self::Borrowed( - // SAFETY: Already checked that it's ASCII, so this is safe. - unsafe { JsStr::ascii_unchecked(value) }, - ); - } - - Self::Owned(JsString::from( - &value.encode_utf16().collect::>()[..], - )) - } -} - -impl<'a> From<&'a [u16]> for CowJsString<'a> { - fn from(s: &'a [u16]) -> Self { - if is_ascii(s) { - let s = s.iter().copied().map(|c| c as u8).collect::>(); - // SAFETY: Already checked that it's ASCII, so this is safe. - let s = unsafe { std::str::from_utf8_unchecked(&s) }; - return Self::Owned(StaticJsStrings::get_string(s).unwrap_or_else(|| { - JsString::from_slice_skip_interning( - // SAFETY: Already checked that it's ASCII, so this is safe. - unsafe { JsStr::ascii_unchecked(s) }, - ) - })); - } - // SAFETY: Already checked that isn't ASCII, so this is safe. - Self::Borrowed(unsafe { JsStr::u16_unchecked(s) }) - } -} - -impl From> for JsString { - fn from(value: JsStr<'_>) -> Self { - match value.variant() { - // TODO: Maybe remove the check that comes from `JsString::from` <&str>. - JsStrVariant::Ascii(s) => JsString::from(s), - JsStrVariant::U16(s) => JsString::from(s), - } - } -} - -impl From> for JsString { - fn from(value: CowJsString<'_>) -> Self { - match value { - CowJsString::Borrowed(s) => JsString::from(s), - CowJsString::Owned(s) => s, - } - } -} - #[derive(Debug, Clone)] pub enum Iter<'a> { Ascii(std::str::Bytes<'a>), + U8(std::str::EncodeUtf16<'a>, usize), U16(std::iter::Copied>), } impl<'a> Iter<'a> { - fn new(s: JsStr<'a>) -> Self { + fn new(s: JsStringSlice<'a>) -> Self { match s.variant() { - JsStrVariant::Ascii(s) => Self::Ascii(s.bytes()), - JsStrVariant::U16(s) => Self::U16(s.iter().copied()), + JsStringSliceVariant::U8Ascii(s) => Self::Ascii(s.bytes()), + JsStringSliceVariant::U8NonAscii(s, len) => Self::U8(s.encode_utf16(), len), + JsStringSliceVariant::U16Ascii(s) | JsStringSliceVariant::U16NonAscii(s) => { + Self::U16(s.iter().copied()) + } } } } @@ -326,6 +249,7 @@ impl Iterator for Iter<'_> { fn next(&mut self) -> Option { match self { Self::Ascii(iter) => iter.map(u16::from).next(), + Self::U8(iter, _) => iter.next(), Self::U16(iter) => iter.next(), } } @@ -337,6 +261,7 @@ impl ExactSizeIterator for Iter<'_> { fn len(&self) -> usize { match self { Self::Ascii(v) => v.len(), + Self::U8(_, len) => *len, Self::U16(v) => v.len(), } } @@ -353,7 +278,7 @@ impl JsString { #[inline] #[must_use] pub fn iter(&self) -> Iter<'_> { - Iter::new(self.as_str()) + Iter::new(self.as_str().into()) } /// Obtains the underlying [`&[u16]`][slice] slice of a [`JsString`] @@ -397,21 +322,21 @@ impl JsString { /// Creates a new [`JsString`] from the concatenation of `x` and `y`. #[must_use] - pub fn concat(x: CowJsString<'_>, y: CowJsString<'_>) -> Self { + pub fn concat(x: JsStringSlice<'_>, y: JsStringSlice<'_>) -> Self { Self::concat_array(&[x, y]) } /// Creates a new [`JsString`] from the concatenation of every element of /// `strings`. #[must_use] - pub fn concat_array(strings: &[CowJsString<'_>]) -> Self { + pub fn concat_array(strings: &[JsStringSlice<'_>]) -> Self { let mut ascii = true; let mut full_count = 0usize; for string in strings { - let Some(sum) = full_count.checked_add(string.as_slice().len()) else { + let Some(sum) = full_count.checked_add(string.len()) else { alloc_overflow() }; - if !string.as_slice().is_ascii() { + if !string.is_ascii() { ascii = false; } full_count = sum; @@ -424,8 +349,7 @@ impl JsString { let string = { // SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer. let mut data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast::() }; - for string in strings { - let string = string.as_slice(); + for &string in strings { let count = string.len(); // SAFETY: // The sum of all `count` for each `string` equals `full_count`, and since we're @@ -439,21 +363,43 @@ impl JsString { // `ptr` and all `string`s should never overlap. unsafe { match (ascii, string.variant()) { - (true, JsStrVariant::Ascii(s)) => { + (true, JsStringSliceVariant::U8Ascii(s)) => { ptr::copy_nonoverlapping(s.as_ptr(), data.cast::(), count); data = data.cast::().add(count).cast::(); } - (false, JsStrVariant::Ascii(s)) => { + (true, JsStringSliceVariant::U16Ascii(s)) => { + for (i, byte) in s.iter().copied().enumerate() { + *data.cast::().add(i) = (byte & 0xFF) as u8; + } + data = data.cast::().add(count).cast::(); + } + (false, JsStringSliceVariant::U8Ascii(s)) => { for (i, byte) in s.bytes().enumerate() { *data.cast::().add(i) = u16::from(byte); } data = data.cast::().add(count).cast::(); } - (false, JsStrVariant::U16(s)) => { + (false, JsStringSliceVariant::U8NonAscii(s, _)) => { + for (i, byte) in s.encode_utf16().enumerate() { + *data.cast::().add(i) = byte; + } + data = data.cast::().add(count).cast::(); + } + ( + false, + JsStringSliceVariant::U16Ascii(s) + | JsStringSliceVariant::U16NonAscii(s), + ) => { ptr::copy_nonoverlapping(s.as_ptr(), data.cast::(), count); data = data.cast::().add(count).cast::(); } - (true, JsStrVariant::U16(_)) => unreachable!(), + ( + true, + JsStringSliceVariant::U8NonAscii(..) + | JsStringSliceVariant::U16NonAscii(_), + ) => { + unreachable!() + } } } } @@ -819,12 +765,12 @@ impl JsString { } /// Creates a new [`JsString`] from `data`, without checking if the string is in the interner. - fn from_slice_skip_interning(string: JsStr<'_>) -> Self { + fn from_slice_skip_interning(string: JsStringSlice<'_>) -> Self { let count = string.len(); let ptr = Self::allocate_inner(count, string.is_ascii()); // SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer. - let data = unsafe { addr_of_mut!((*ptr.as_ptr()).data) }; + let data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast::() }; // SAFETY: // - We read `count = data.len()` elements from `data`, which is within the bounds of the slice. // - `allocate_inner` must allocate at least `count` elements, which allows us to safely @@ -835,12 +781,21 @@ impl JsString { // and `data` should never overlap. unsafe { match string.variant() { - JsStrVariant::Ascii(string) => { - ptr::copy_nonoverlapping(string.as_ptr(), data.cast::(), count); + JsStringSliceVariant::U8Ascii(s) => { + ptr::copy_nonoverlapping(s.as_ptr(), data.cast::(), count); } - JsStrVariant::U16(string) => { - assert!(!is_ascii(string), "should be u16 not ascii"); - ptr::copy_nonoverlapping(string.as_ptr(), data.cast::(), count); + JsStringSliceVariant::U16Ascii(s) => { + for (i, byte) in s.iter().copied().enumerate() { + *data.cast::().add(i) = (byte & 0xFF) as u8; + } + } + JsStringSliceVariant::U8NonAscii(s, _) => { + for (i, byte) in s.encode_utf16().enumerate() { + *data.cast::().add(i) = byte; + } + } + JsStringSliceVariant::U16NonAscii(s) => { + ptr::copy_nonoverlapping(s.as_ptr(), data.cast::(), count); } } } @@ -850,6 +805,19 @@ impl JsString { } } + /// Creates a new [`JsString`] from `data`. + fn from_slice(string: JsStringSlice<'_>) -> Self { + let this = Self::from_slice_skip_interning(string); + + if let Some(s) = this.as_str().as_ascii() { + if let Some(s) = StaticJsStrings::get_string(s) { + return s; + } + } + + this + } + #[inline] #[must_use] pub fn len(&self) -> usize { @@ -898,15 +866,15 @@ impl JsString { } } - pub(crate) fn trim(&self) -> JsStr<'_> { + pub(crate) fn trim(&self) -> JsStringSlice<'_> { self.as_str().trim() } - pub(crate) fn trim_start(&self) -> JsStr<'_> { + pub(crate) fn trim_start(&self) -> JsStringSlice<'_> { self.as_str().trim_start() } - pub(crate) fn trim_end(&self) -> JsStr<'_> { + pub(crate) fn trim_end(&self) -> JsStringSlice<'_> { self.as_str().trim_end() } @@ -993,7 +961,7 @@ impl Drop for JsString { } } -fn is_ascii(slice: &[u16]) -> bool { +pub(crate) fn is_ascii(slice: &[u16]) -> bool { for &element in slice { if (element & 0b0111_1111) != element { return false; @@ -1024,41 +992,49 @@ impl Eq for JsString {} impl From<&[u16]> for JsString { #[inline] fn from(s: &[u16]) -> Self { - if is_ascii(s) { - let s = s.iter().copied().map(|c| c as u8).collect::>(); - // SAFETY: Already checked that it's ASCII, so this is safe. - let s = unsafe { std::str::from_utf8_unchecked(&s) }; - return StaticJsStrings::get_string(s).unwrap_or_else(|| { - Self::from_slice_skip_interning( - // SAFETY: Already checked that it's ASCII, so this is safe. - unsafe { JsStr::ascii_unchecked(s) }, - ) - }); - } - Self::from_slice_skip_interning( - // SAFETY: Already checked that it's not ASCII, so this is safe. - unsafe { JsStr::u16_unchecked(s) }, - ) + JsString::from_slice(JsStringSlice::from(s)) } } impl From<&str> for JsString { #[inline] fn from(s: &str) -> Self { - StaticJsStrings::get_string(s).unwrap_or_else(|| { - if s.is_ascii() { - Self::from_slice_skip_interning( - // SAFETY: Already checked that it's ASCII, so this is safe. - unsafe { JsStr::ascii_unchecked(s) }, - ) - } else { - let s = s.encode_utf16().collect::>(); - Self::from_slice_skip_interning( - // SAFETY: Already checked that it's not ASCII, so this is safe. - unsafe { JsStr::u16_unchecked(&s[..]) }, - ) + StaticJsStrings::get_string(s) + .unwrap_or_else(|| JsString::from_slice_skip_interning(JsStringSlice::from(s))) + } +} + +impl From> for JsString { + fn from(value: JsStr<'_>) -> Self { + match value.variant() { + JsStrVariant::Ascii(s) => { + StaticJsStrings::get_string(s).unwrap_or_else(|| { + // SAFETY: `JsStrVariant::Ascii` Always contains ASCII, so this is safe. + let slice = unsafe { JsStringSlice::u8_ascii_unchecked(s) }; + JsString::from_slice_skip_interning(slice) + }) } - }) + JsStrVariant::U16(s) => { + // SAFETY: `JsStrVariant::U16` Always contains non-ASCII, so this is safe. + let slice = unsafe { JsStringSlice::u16_non_ascii_unchecked(s) }; + JsString::from_slice(slice) + } + } + } +} + +impl From> for JsString { + fn from(value: JsStringSlice<'_>) -> Self { + match value.variant() { + JsStringSliceVariant::U8Ascii(s) => { + StaticJsStrings::get_string(s).unwrap_or_else(|| { + // SAFETY: `JsStrVariant::Ascii` Always contains ASCII, so this is safe. + let slice = unsafe { JsStringSlice::u8_ascii_unchecked(s) }; + JsString::from_slice_skip_interning(slice) + }) + } + _ => JsString::from_slice(value), + } } } @@ -1088,7 +1064,10 @@ impl From<&[u16; N]> for JsString { impl Hash for JsString { fn hash(&self, state: &mut H) { - self.as_str().hash(state); + match self.as_str().variant() { + JsStrVariant::Ascii(s) => s.hash(state), + JsStrVariant::U16(s) => s.hash(state), + } } } @@ -1295,7 +1274,7 @@ mod tests { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; - const HELLOWORLD: &[u16] = utf16!("Hello World!"); + const HELLOWORLD: &str = "Hello World!"; let x = js_string!(HELLOWORLD); assert_eq!(&x, HELLOWORLD); @@ -1331,4 +1310,14 @@ mod tests { assert_eq!(&xyzw, utf16!("hello, world!")); assert_eq!(xyzw.refcount(), Some(1)); } + + #[test] + fn trim_start_non_ascii_to_ascii() { + let s = "\u{2029}abc"; + let x = js_string!(s); + + let y = js_string!(x.trim_start()); + + assert_eq!(&y, s.trim_start()); + } } diff --git a/boa_engine/src/string/slice.rs b/boa_engine/src/string/slice.rs new file mode 100644 index 0000000000..e386be9ff3 --- /dev/null +++ b/boa_engine/src/string/slice.rs @@ -0,0 +1,210 @@ +use crate::{builtins::string::is_trimmable_whitespace, JsString}; + +use super::{is_ascii, JsStr, JsStrVariant}; + +#[derive(Debug, Clone, Copy)] +pub enum JsStringSliceVariant<'a> { + U8Ascii(&'a str), + U8NonAscii(&'a str, usize), + U16Ascii(&'a [u16]), + U16NonAscii(&'a [u16]), +} + +#[derive(Debug, Clone, Copy)] +pub struct JsStringSlice<'a> { + inner: JsStringSliceVariant<'a>, +} + +impl<'a> JsStringSlice<'a> { + pub(crate) unsafe fn u8_ascii_unchecked(value: &'a str) -> Self { + debug_assert!(value.is_ascii(), "string must be ascii"); + + Self { + inner: JsStringSliceVariant::U8Ascii(value), + } + } + + pub(crate) unsafe fn u16_ascii_unchecked(value: &'a [u16]) -> Self { + debug_assert!(is_ascii(value), "string must be ascii"); + + Self { + inner: JsStringSliceVariant::U16Ascii(value), + } + } + + pub(crate) unsafe fn u8_non_ascii_unchecked(value: &'a str) -> Self { + debug_assert!(!value.is_ascii(), "string must not be ascii"); + let len = value.encode_utf16().count(); + + Self { + inner: JsStringSliceVariant::U8NonAscii(value, len), + } + } + + pub(crate) unsafe fn u16_non_ascii_unchecked(value: &'a [u16]) -> Self { + debug_assert!(!is_ascii(value), "string must not be ascii"); + + Self { + inner: JsStringSliceVariant::U16NonAscii(value), + } + } + + pub(crate) fn variant(self) -> JsStringSliceVariant<'a> { + self.inner + } + + pub fn len(&self) -> usize { + match self.variant() { + JsStringSliceVariant::U8Ascii(s) => s.len(), + JsStringSliceVariant::U8NonAscii(_, len) => len, + JsStringSliceVariant::U16NonAscii(s) | JsStringSliceVariant::U16Ascii(s) => s.len(), + } + } + + pub fn is_ascii(&self) -> bool { + matches!( + self.variant(), + JsStringSliceVariant::U8Ascii(_) | JsStringSliceVariant::U16Ascii(_) + ) + } + + /// Trims both leading and trailing space. + #[inline] + #[must_use] + pub fn trim(&self) -> Self { + self.trim_start().trim_end() + } + + /// Trims all leading space. + #[inline] + #[must_use] + pub fn trim_start(&self) -> JsStringSlice<'a> { + match self.variant() { + JsStringSliceVariant::U8Ascii(s) => { + // SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe. + unsafe { JsStringSlice::u8_ascii_unchecked(s.trim_start()) } + } + JsStringSliceVariant::U8NonAscii(s, _) => JsStringSlice::from(s.trim_start()), + JsStringSliceVariant::U16Ascii(s) => { + let value = if let Some(left) = s.iter().copied().position(|r| { + !char::from_u32(u32::from(r)) + .map(is_trimmable_whitespace) + .unwrap_or_default() + }) { + &s[left..] + } else { + // SAFETY: An empty string is valid ASCII, so this is safe. + return unsafe { JsStringSlice::u8_ascii_unchecked("") }; + }; + + // SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe. + unsafe { JsStringSlice::u16_ascii_unchecked(value) } + } + JsStringSliceVariant::U16NonAscii(s) => { + let value = if let Some(left) = s.iter().copied().position(|r| { + !char::from_u32(u32::from(r)) + .map(is_trimmable_whitespace) + .unwrap_or_default() + }) { + &s[left..] + } else { + // SAFETY: An empty string is valid ASCII, so this is safe. + return unsafe { JsStringSlice::u8_ascii_unchecked("") }; + }; + + JsStringSlice::from(value) + } + } + } + + /// Trims all trailing space. + #[inline] + #[must_use] + pub fn trim_end(&self) -> JsStringSlice<'a> { + match self.variant() { + JsStringSliceVariant::U8Ascii(s) => { + // SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe. + unsafe { JsStringSlice::u8_ascii_unchecked(s.trim_end()) } + } + JsStringSliceVariant::U8NonAscii(s, _) => JsStringSlice::from(s.trim_end()), + JsStringSliceVariant::U16Ascii(s) => { + let value = if let Some(right) = s.iter().copied().rposition(|r| { + !char::from_u32(u32::from(r)) + .map(is_trimmable_whitespace) + .unwrap_or_default() + }) { + &s[..=right] + } else { + // SAFETY: An empty string is valid ASCII, so this is safe. + return unsafe { JsStringSlice::u8_ascii_unchecked("") }; + }; + + // SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe. + unsafe { JsStringSlice::u16_ascii_unchecked(value) } + } + JsStringSliceVariant::U16NonAscii(s) => { + let value = if let Some(right) = s.iter().copied().rposition(|r| { + !char::from_u32(u32::from(r)) + .map(is_trimmable_whitespace) + .unwrap_or_default() + }) { + &s[..=right] + } else { + // SAFETY: An empty string is valid ASCII, so this is safe. + return unsafe { JsStringSlice::u8_ascii_unchecked("") }; + }; + + JsStringSlice::from(value) + } + } + } + + pub fn iter(self) -> crate::string::Iter<'a> { + crate::string::Iter::new(self) + } +} + +impl<'a> From<&'a JsString> for JsStringSlice<'a> { + fn from(value: &'a JsString) -> Self { + Self::from(value.as_str()) + } +} + +impl<'a> From> for JsStringSlice<'a> { + fn from(value: JsStr<'a>) -> Self { + match value.variant() { + JsStrVariant::Ascii(s) => { + // SAFETY: `JsStrVariant::Ascii` always contains ASCII string, so this safe. + unsafe { Self::u8_ascii_unchecked(s) } + } + JsStrVariant::U16(s) => { + // SAFETY: `JsStrVariant::Ascii` always contains non-ASCII string, so this safe. + unsafe { Self::u16_non_ascii_unchecked(s) } + } + } + } +} + +impl<'a> From<&'a str> for JsStringSlice<'a> { + fn from(value: &'a str) -> Self { + if value.is_ascii() { + // SAFETY: Already checked that it's ASCII, so this is safe. + return unsafe { Self::u8_ascii_unchecked(value) }; + } + + // SAFETY: Already checked that it's non-ASCII, so this is safe. + unsafe { Self::u8_non_ascii_unchecked(value) } + } +} + +impl<'a> From<&'a [u16]> for JsStringSlice<'a> { + fn from(s: &'a [u16]) -> Self { + if is_ascii(s) { + // SAFETY: Already checked that it's ASCII, so this is safe. + return unsafe { Self::u16_ascii_unchecked(s) }; + } + + // SAFETY: Already checked that it's non-ASCII, so this is safe. + unsafe { Self::u16_non_ascii_unchecked(s) } + } +} diff --git a/boa_engine/src/string/str.rs b/boa_engine/src/string/str.rs index c0c561b0b6..3f039d95f0 100644 --- a/boa_engine/src/string/str.rs +++ b/boa_engine/src/string/str.rs @@ -3,6 +3,8 @@ use std::slice::SliceIndex; use crate::{builtins::string::is_trimmable_whitespace, string::Iter}; use boa_interner::JStrRef; +use super::JsStringSlice; + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum JsStrVariant<'a> { Ascii(&'a str), @@ -69,8 +71,8 @@ impl<'a> JsStr<'a> { /// TODO: doc #[inline] #[must_use] - pub fn iter(&self) -> Iter<'_> { - Iter::new(*self) + pub fn iter(self) -> Iter<'a> { + Iter::new(self.into()) } pub(crate) fn as_str_ref(&self) -> JStrRef<'_> { @@ -89,68 +91,22 @@ impl<'a> JsStr<'a> { /// Trims both leading and trailing space. #[inline] #[must_use] - pub fn trim(&self) -> Self { + pub fn trim(self) -> JsStringSlice<'a> { self.trim_start().trim_end() } /// Trims all leading space. #[inline] #[must_use] - pub fn trim_start(&self) -> Self { - match self.variant() { - JsStrVariant::Ascii(s) => { - // SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe. - unsafe { JsStr::ascii_unchecked(s.trim_start()) } - } - JsStrVariant::U16(s) => { - let value = if let Some(left) = s.iter().copied().position(|r| { - !char::from_u32(u32::from(r)) - .map(is_trimmable_whitespace) - .unwrap_or_default() - }) { - &s[left..] - } else { - // SAFETY: An empty string is valid ASCII, so this is safe. - return unsafe { JsStr::ascii_unchecked("") }; - }; - - // TODO: If we have a string that has ascii non-white space characters, - // and a leading non-ascii white space, that is trimmed making this ascii. - // - // SAFETY: - unsafe { JsStr::u16_unchecked(value) } - } - } + pub fn trim_start(self) -> JsStringSlice<'a> { + JsStringSlice::from(self).trim_start() } /// Trims all trailing space. #[inline] #[must_use] - pub fn trim_end(&self) -> Self { - match self.variant() { - JsStrVariant::Ascii(s) => { - // SAFETY: Calling `trim_end()` on ASCII string always returns ASCII string, so this is safe. - unsafe { JsStr::ascii_unchecked(s.trim_end()) } - } - JsStrVariant::U16(s) => { - let value = if let Some(right) = s.iter().copied().rposition(|r| { - !char::from_u32(u32::from(r)) - .map(is_trimmable_whitespace) - .unwrap_or_default() - }) { - &s[..=right] - } else { - // SAFETY: An empty string is valid ASCII, so this is safe. - return unsafe { JsStr::ascii_unchecked("") }; - }; - - // TODO: If we have a string that has ascii non-white space characters, - // and a trailing non-ascii white space, that is trimmed making this ascii. - // - // SAFETY: - unsafe { JsStr::u16_unchecked(value) } - } - } + pub fn trim_end(self) -> JsStringSlice<'a> { + JsStringSlice::from(self).trim_end() } pub fn get(&'a self, index: I) -> Option diff --git a/boa_engine/src/symbol.rs b/boa_engine/src/symbol.rs index dc469d55b9..ce625158c8 100644 --- a/boa_engine/src/symbol.rs +++ b/boa_engine/src/symbol.rs @@ -213,7 +213,7 @@ impl JsSymbol { return wk.fn_name(); } self.description() - .map(|s| js_string!("[", s, "]")) + .map(|s| js_string!("[", &s, "]")) .unwrap_or_default() }