Browse Source

Prevent allocating when concating strings

refactor/interner
Haled Odat 1 year ago
parent
commit
a19afd11a4
  1. 4
      boa_engine/src/builtins/object/mod.rs
  2. 4
      boa_engine/src/builtins/string/mod.rs
  3. 275
      boa_engine/src/string/mod.rs
  4. 210
      boa_engine/src/string/slice.rs
  5. 62
      boa_engine/src/string/str.rs
  6. 2
      boa_engine/src/symbol.rs

4
boa_engine/src/builtins/object/mod.rs

@ -26,7 +26,7 @@ use crate::{
}, },
property::{Attribute, PropertyDescriptor, PropertyKey, PropertyNameKind}, property::{Attribute, PropertyDescriptor, PropertyKey, PropertyNameKind},
realm::Realm, realm::Realm,
string::{common::StaticJsStrings, CowJsString}, string::{common::StaticJsStrings, JsStringSlice},
symbol::JsSymbol, symbol::JsSymbol,
value::JsValue, value::JsValue,
Context, JsArgs, JsResult, JsString, Context, JsArgs, JsResult, JsString,
@ -836,7 +836,7 @@ impl Object {
let tag_str = tag let tag_str = tag
.as_string() .as_string()
.map(JsString::as_str) .map(JsString::as_str)
.map_or(CowJsString::from(builtin_tag), Into::into); .map_or(JsStringSlice::from(builtin_tag), Into::into);
// 17. Return the string-concatenation of "[object ", tag, and "]". // 17. Return the string-concatenation of "[object ", tag, and "]".
Ok(js_string!("[object ", tag_str, "]").into()) Ok(js_string!("[object ", tag_str, "]").into())

4
boa_engine/src/builtins/string/mod.rs

@ -2350,13 +2350,13 @@ impl String {
// the code unit 0x0022 (QUOTATION MARK) // the code unit 0x0022 (QUOTATION MARK)
// escapedV // escapedV
// the code unit 0x0022 (QUOTATION MARK) // the code unit 0x0022 (QUOTATION MARK)
p1 = js_string!(p1, " ", attribute, "=\"", &escaped_v[..], "\""); p1 = js_string!(&p1, " ", attribute, "=\"", &escaped_v[..], "\"");
} }
// 5. Let p2 be the string-concatenation of p1 and ">". // 5. Let p2 be the string-concatenation of p1 and ">".
// 6. Let p3 be the string-concatenation of p2 and S. // 6. Let p3 be the string-concatenation of p2 and S.
// 7. Let p4 be the string-concatenation of p3, "</", tag, and ">". // 7. Let p4 be the string-concatenation of p3, "</", tag, and ">".
let p4 = js_string!(p1, ">", s, "</", tag, ">"); let p4 = js_string!(&p1, ">", &s, "</", tag, ">");
// 8. Return p4. // 8. Return p4.
Ok(p4.into()) Ok(p4.into())

275
boa_engine/src/string/mod.rs

@ -22,6 +22,7 @@
#![allow(unstable_name_collisions)] #![allow(unstable_name_collisions)]
pub(crate) mod common; pub(crate) mod common;
mod slice;
mod str; mod str;
use crate::{ use crate::{
@ -33,7 +34,10 @@ use boa_gc::{empty_trace, Finalize, Trace};
pub use boa_macros::utf16; pub use boa_macros::utf16;
#[doc(inline)] #[doc(inline)]
pub use crate::string::str::{JsStr, JsStrVariant}; pub use crate::string::{
slice::JsStringSlice,
str::{JsStr, JsStrVariant},
};
use std::{ use std::{
alloc::{alloc, dealloc, Layout}, alloc::{alloc, dealloc, Layout},
@ -46,7 +50,7 @@ use std::{
str::FromStr, str::FromStr,
}; };
use self::{common::StaticJsStrings, str::JsSliceIndex}; use self::{common::StaticJsStrings, slice::JsStringSliceVariant, str::JsSliceIndex};
fn alloc_overflow() -> ! { fn alloc_overflow() -> ! {
panic!("detected overflow during string allocation") panic!("detected overflow during string allocation")
@ -102,16 +106,16 @@ macro_rules! js_string {
$crate::JsString::default() $crate::JsString::default()
}; };
($s:literal) => { ($s:literal) => {
$crate::JsString::from($crate::string::CowJsString::from($s)) $crate::JsString::from($s)
}; };
($s:expr) => { ($s:expr) => {
$crate::JsString::from($s) $crate::JsString::from($s)
}; };
( $x:expr, $y:expr ) => { ( $x:expr, $y:expr ) => {
$crate::JsString::concat($crate::string::CowJsString::from($x), $crate::string::CowJsString::from($y)) $crate::JsString::concat($crate::string::JsStringSlice::from($x), $crate::string::JsStringSlice::from($y))
}; };
( $( $s:expr ),+ ) => { ( $( $s:expr ),+ ) => {
$crate::JsString::concat_array(&[ $( $crate::string::CowJsString::from($s) ),+ ]) $crate::JsString::concat_array(&[ $( $crate::string::JsStringSlice::from($s) ),+ ])
}; };
} }
@ -220,102 +224,21 @@ unsafe impl Trace for JsString {
empty_trace!(); empty_trace!();
} }
#[derive(Debug)]
pub enum CowJsString<'a> {
Borrowed(JsStr<'a>),
Owned(JsString),
}
impl CowJsString<'_> {
fn as_slice(&self) -> JsStr<'_> {
match self {
CowJsString::Borrowed(s) => *s,
CowJsString::Owned(s) => s.as_str(),
}
}
}
impl From<JsString> for CowJsString<'_> {
fn from(value: JsString) -> Self {
Self::Owned(value)
}
}
impl<'a> From<&'a JsString> for CowJsString<'a> {
fn from(value: &'a JsString) -> Self {
Self::Borrowed(value.as_str())
}
}
impl<'a> From<JsStr<'a>> for CowJsString<'a> {
fn from(value: JsStr<'a>) -> Self {
Self::Borrowed(value)
}
}
impl<'a> From<&'a str> for CowJsString<'a> {
fn from(value: &'a str) -> Self {
if value.is_ascii() {
return Self::Borrowed(
// SAFETY: Already checked that it's ASCII, so this is safe.
unsafe { JsStr::ascii_unchecked(value) },
);
}
Self::Owned(JsString::from(
&value.encode_utf16().collect::<Vec<_>>()[..],
))
}
}
impl<'a> From<&'a [u16]> for CowJsString<'a> {
fn from(s: &'a [u16]) -> Self {
if is_ascii(s) {
let s = s.iter().copied().map(|c| c as u8).collect::<Vec<_>>();
// SAFETY: Already checked that it's ASCII, so this is safe.
let s = unsafe { std::str::from_utf8_unchecked(&s) };
return Self::Owned(StaticJsStrings::get_string(s).unwrap_or_else(|| {
JsString::from_slice_skip_interning(
// SAFETY: Already checked that it's ASCII, so this is safe.
unsafe { JsStr::ascii_unchecked(s) },
)
}));
}
// SAFETY: Already checked that isn't ASCII, so this is safe.
Self::Borrowed(unsafe { JsStr::u16_unchecked(s) })
}
}
impl From<JsStr<'_>> for JsString {
fn from(value: JsStr<'_>) -> Self {
match value.variant() {
// TODO: Maybe remove the check that comes from `JsString::from` <&str>.
JsStrVariant::Ascii(s) => JsString::from(s),
JsStrVariant::U16(s) => JsString::from(s),
}
}
}
impl From<CowJsString<'_>> for JsString {
fn from(value: CowJsString<'_>) -> Self {
match value {
CowJsString::Borrowed(s) => JsString::from(s),
CowJsString::Owned(s) => s,
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Iter<'a> { pub enum Iter<'a> {
Ascii(std::str::Bytes<'a>), Ascii(std::str::Bytes<'a>),
U8(std::str::EncodeUtf16<'a>, usize),
U16(std::iter::Copied<std::slice::Iter<'a, u16>>), U16(std::iter::Copied<std::slice::Iter<'a, u16>>),
} }
impl<'a> Iter<'a> { impl<'a> Iter<'a> {
fn new(s: JsStr<'a>) -> Self { fn new(s: JsStringSlice<'a>) -> Self {
match s.variant() { match s.variant() {
JsStrVariant::Ascii(s) => Self::Ascii(s.bytes()), JsStringSliceVariant::U8Ascii(s) => Self::Ascii(s.bytes()),
JsStrVariant::U16(s) => Self::U16(s.iter().copied()), JsStringSliceVariant::U8NonAscii(s, len) => Self::U8(s.encode_utf16(), len),
JsStringSliceVariant::U16Ascii(s) | JsStringSliceVariant::U16NonAscii(s) => {
Self::U16(s.iter().copied())
}
} }
} }
} }
@ -326,6 +249,7 @@ impl Iterator for Iter<'_> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self { match self {
Self::Ascii(iter) => iter.map(u16::from).next(), Self::Ascii(iter) => iter.map(u16::from).next(),
Self::U8(iter, _) => iter.next(),
Self::U16(iter) => iter.next(), Self::U16(iter) => iter.next(),
} }
} }
@ -337,6 +261,7 @@ impl ExactSizeIterator for Iter<'_> {
fn len(&self) -> usize { fn len(&self) -> usize {
match self { match self {
Self::Ascii(v) => v.len(), Self::Ascii(v) => v.len(),
Self::U8(_, len) => *len,
Self::U16(v) => v.len(), Self::U16(v) => v.len(),
} }
} }
@ -353,7 +278,7 @@ impl JsString {
#[inline] #[inline]
#[must_use] #[must_use]
pub fn iter(&self) -> Iter<'_> { pub fn iter(&self) -> Iter<'_> {
Iter::new(self.as_str()) Iter::new(self.as_str().into())
} }
/// Obtains the underlying [`&[u16]`][slice] slice of a [`JsString`] /// Obtains the underlying [`&[u16]`][slice] slice of a [`JsString`]
@ -397,21 +322,21 @@ impl JsString {
/// Creates a new [`JsString`] from the concatenation of `x` and `y`. /// Creates a new [`JsString`] from the concatenation of `x` and `y`.
#[must_use] #[must_use]
pub fn concat(x: CowJsString<'_>, y: CowJsString<'_>) -> Self { pub fn concat(x: JsStringSlice<'_>, y: JsStringSlice<'_>) -> Self {
Self::concat_array(&[x, y]) Self::concat_array(&[x, y])
} }
/// Creates a new [`JsString`] from the concatenation of every element of /// Creates a new [`JsString`] from the concatenation of every element of
/// `strings`. /// `strings`.
#[must_use] #[must_use]
pub fn concat_array(strings: &[CowJsString<'_>]) -> Self { pub fn concat_array(strings: &[JsStringSlice<'_>]) -> Self {
let mut ascii = true; let mut ascii = true;
let mut full_count = 0usize; let mut full_count = 0usize;
for string in strings { for string in strings {
let Some(sum) = full_count.checked_add(string.as_slice().len()) else { let Some(sum) = full_count.checked_add(string.len()) else {
alloc_overflow() alloc_overflow()
}; };
if !string.as_slice().is_ascii() { if !string.is_ascii() {
ascii = false; ascii = false;
} }
full_count = sum; full_count = sum;
@ -424,8 +349,7 @@ impl JsString {
let string = { let string = {
// SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer. // SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
let mut data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast::<u8>() }; let mut data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast::<u8>() };
for string in strings { for &string in strings {
let string = string.as_slice();
let count = string.len(); let count = string.len();
// SAFETY: // SAFETY:
// The sum of all `count` for each `string` equals `full_count`, and since we're // The sum of all `count` for each `string` equals `full_count`, and since we're
@ -439,21 +363,43 @@ impl JsString {
// `ptr` and all `string`s should never overlap. // `ptr` and all `string`s should never overlap.
unsafe { unsafe {
match (ascii, string.variant()) { match (ascii, string.variant()) {
(true, JsStrVariant::Ascii(s)) => { (true, JsStringSliceVariant::U8Ascii(s)) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count); ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
data = data.cast::<u8>().add(count).cast::<u8>(); data = data.cast::<u8>().add(count).cast::<u8>();
} }
(false, JsStrVariant::Ascii(s)) => { (true, JsStringSliceVariant::U16Ascii(s)) => {
for (i, byte) in s.iter().copied().enumerate() {
*data.cast::<u8>().add(i) = (byte & 0xFF) as u8;
}
data = data.cast::<u8>().add(count).cast::<u8>();
}
(false, JsStringSliceVariant::U8Ascii(s)) => {
for (i, byte) in s.bytes().enumerate() { for (i, byte) in s.bytes().enumerate() {
*data.cast::<u16>().add(i) = u16::from(byte); *data.cast::<u16>().add(i) = u16::from(byte);
} }
data = data.cast::<u16>().add(count).cast::<u8>(); data = data.cast::<u16>().add(count).cast::<u8>();
} }
(false, JsStrVariant::U16(s)) => { (false, JsStringSliceVariant::U8NonAscii(s, _)) => {
for (i, byte) in s.encode_utf16().enumerate() {
*data.cast::<u16>().add(i) = byte;
}
data = data.cast::<u16>().add(count).cast::<u8>();
}
(
false,
JsStringSliceVariant::U16Ascii(s)
| JsStringSliceVariant::U16NonAscii(s),
) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count); ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
data = data.cast::<u16>().add(count).cast::<u8>(); data = data.cast::<u16>().add(count).cast::<u8>();
} }
(true, JsStrVariant::U16(_)) => unreachable!(), (
true,
JsStringSliceVariant::U8NonAscii(..)
| JsStringSliceVariant::U16NonAscii(_),
) => {
unreachable!()
}
} }
} }
} }
@ -819,12 +765,12 @@ impl JsString {
} }
/// Creates a new [`JsString`] from `data`, without checking if the string is in the interner. /// Creates a new [`JsString`] from `data`, without checking if the string is in the interner.
fn from_slice_skip_interning(string: JsStr<'_>) -> Self { fn from_slice_skip_interning(string: JsStringSlice<'_>) -> Self {
let count = string.len(); let count = string.len();
let ptr = Self::allocate_inner(count, string.is_ascii()); let ptr = Self::allocate_inner(count, string.is_ascii());
// SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer. // SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
let data = unsafe { addr_of_mut!((*ptr.as_ptr()).data) }; let data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast::<u8>() };
// SAFETY: // SAFETY:
// - We read `count = data.len()` elements from `data`, which is within the bounds of the slice. // - We read `count = data.len()` elements from `data`, which is within the bounds of the slice.
// - `allocate_inner` must allocate at least `count` elements, which allows us to safely // - `allocate_inner` must allocate at least `count` elements, which allows us to safely
@ -835,12 +781,21 @@ impl JsString {
// and `data` should never overlap. // and `data` should never overlap.
unsafe { unsafe {
match string.variant() { match string.variant() {
JsStrVariant::Ascii(string) => { JsStringSliceVariant::U8Ascii(s) => {
ptr::copy_nonoverlapping(string.as_ptr(), data.cast::<u8>(), count); ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
}
JsStringSliceVariant::U16Ascii(s) => {
for (i, byte) in s.iter().copied().enumerate() {
*data.cast::<u8>().add(i) = (byte & 0xFF) as u8;
} }
JsStrVariant::U16(string) => { }
assert!(!is_ascii(string), "should be u16 not ascii"); JsStringSliceVariant::U8NonAscii(s, _) => {
ptr::copy_nonoverlapping(string.as_ptr(), data.cast::<u16>(), count); for (i, byte) in s.encode_utf16().enumerate() {
*data.cast::<u16>().add(i) = byte;
}
}
JsStringSliceVariant::U16NonAscii(s) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
} }
} }
} }
@ -850,6 +805,19 @@ impl JsString {
} }
} }
/// Creates a new [`JsString`] from `data`.
fn from_slice(string: JsStringSlice<'_>) -> Self {
let this = Self::from_slice_skip_interning(string);
if let Some(s) = this.as_str().as_ascii() {
if let Some(s) = StaticJsStrings::get_string(s) {
return s;
}
}
this
}
#[inline] #[inline]
#[must_use] #[must_use]
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
@ -898,15 +866,15 @@ impl JsString {
} }
} }
pub(crate) fn trim(&self) -> JsStr<'_> { pub(crate) fn trim(&self) -> JsStringSlice<'_> {
self.as_str().trim() self.as_str().trim()
} }
pub(crate) fn trim_start(&self) -> JsStr<'_> { pub(crate) fn trim_start(&self) -> JsStringSlice<'_> {
self.as_str().trim_start() self.as_str().trim_start()
} }
pub(crate) fn trim_end(&self) -> JsStr<'_> { pub(crate) fn trim_end(&self) -> JsStringSlice<'_> {
self.as_str().trim_end() self.as_str().trim_end()
} }
@ -993,7 +961,7 @@ impl Drop for JsString {
} }
} }
fn is_ascii(slice: &[u16]) -> bool { pub(crate) fn is_ascii(slice: &[u16]) -> bool {
for &element in slice { for &element in slice {
if (element & 0b0111_1111) != element { if (element & 0b0111_1111) != element {
return false; return false;
@ -1024,42 +992,50 @@ impl Eq for JsString {}
impl From<&[u16]> for JsString { impl From<&[u16]> for JsString {
#[inline] #[inline]
fn from(s: &[u16]) -> Self { fn from(s: &[u16]) -> Self {
if is_ascii(s) { JsString::from_slice(JsStringSlice::from(s))
let s = s.iter().copied().map(|c| c as u8).collect::<Vec<_>>();
// SAFETY: Already checked that it's ASCII, so this is safe.
let s = unsafe { std::str::from_utf8_unchecked(&s) };
return StaticJsStrings::get_string(s).unwrap_or_else(|| {
Self::from_slice_skip_interning(
// SAFETY: Already checked that it's ASCII, so this is safe.
unsafe { JsStr::ascii_unchecked(s) },
)
});
}
Self::from_slice_skip_interning(
// SAFETY: Already checked that it's not ASCII, so this is safe.
unsafe { JsStr::u16_unchecked(s) },
)
} }
} }
impl From<&str> for JsString { impl From<&str> for JsString {
#[inline] #[inline]
fn from(s: &str) -> Self { fn from(s: &str) -> Self {
StaticJsStrings::get_string(s)
.unwrap_or_else(|| JsString::from_slice_skip_interning(JsStringSlice::from(s)))
}
}
impl From<JsStr<'_>> for JsString {
fn from(value: JsStr<'_>) -> Self {
match value.variant() {
JsStrVariant::Ascii(s) => {
StaticJsStrings::get_string(s).unwrap_or_else(|| { StaticJsStrings::get_string(s).unwrap_or_else(|| {
if s.is_ascii() { // SAFETY: `JsStrVariant::Ascii` Always contains ASCII, so this is safe.
Self::from_slice_skip_interning( let slice = unsafe { JsStringSlice::u8_ascii_unchecked(s) };
// SAFETY: Already checked that it's ASCII, so this is safe. JsString::from_slice_skip_interning(slice)
unsafe { JsStr::ascii_unchecked(s) }, })
) }
} else { JsStrVariant::U16(s) => {
let s = s.encode_utf16().collect::<Vec<_>>(); // SAFETY: `JsStrVariant::U16` Always contains non-ASCII, so this is safe.
Self::from_slice_skip_interning( let slice = unsafe { JsStringSlice::u16_non_ascii_unchecked(s) };
// SAFETY: Already checked that it's not ASCII, so this is safe. JsString::from_slice(slice)
unsafe { JsStr::u16_unchecked(&s[..]) }, }
)
} }
}
}
impl From<JsStringSlice<'_>> for JsString {
fn from(value: JsStringSlice<'_>) -> Self {
match value.variant() {
JsStringSliceVariant::U8Ascii(s) => {
StaticJsStrings::get_string(s).unwrap_or_else(|| {
// SAFETY: `JsStrVariant::Ascii` Always contains ASCII, so this is safe.
let slice = unsafe { JsStringSlice::u8_ascii_unchecked(s) };
JsString::from_slice_skip_interning(slice)
}) })
} }
_ => JsString::from_slice(value),
}
}
} }
impl From<&[JsString]> for JsString { impl From<&[JsString]> for JsString {
@ -1088,7 +1064,10 @@ impl<const N: usize> From<&[u16; N]> for JsString {
impl Hash for JsString { impl Hash for JsString {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state); match self.as_str().variant() {
JsStrVariant::Ascii(s) => s.hash(state),
JsStrVariant::U16(s) => s.hash(state),
}
} }
} }
@ -1295,7 +1274,7 @@ mod tests {
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
const HELLOWORLD: &[u16] = utf16!("Hello World!"); const HELLOWORLD: &str = "Hello World!";
let x = js_string!(HELLOWORLD); let x = js_string!(HELLOWORLD);
assert_eq!(&x, HELLOWORLD); assert_eq!(&x, HELLOWORLD);
@ -1331,4 +1310,14 @@ mod tests {
assert_eq!(&xyzw, utf16!("hello, world!")); assert_eq!(&xyzw, utf16!("hello, world!"));
assert_eq!(xyzw.refcount(), Some(1)); assert_eq!(xyzw.refcount(), Some(1));
} }
#[test]
fn trim_start_non_ascii_to_ascii() {
let s = "\u{2029}abc";
let x = js_string!(s);
let y = js_string!(x.trim_start());
assert_eq!(&y, s.trim_start());
}
} }

210
boa_engine/src/string/slice.rs

@ -0,0 +1,210 @@
use crate::{builtins::string::is_trimmable_whitespace, JsString};
use super::{is_ascii, JsStr, JsStrVariant};
#[derive(Debug, Clone, Copy)]
pub enum JsStringSliceVariant<'a> {
U8Ascii(&'a str),
U8NonAscii(&'a str, usize),
U16Ascii(&'a [u16]),
U16NonAscii(&'a [u16]),
}
#[derive(Debug, Clone, Copy)]
pub struct JsStringSlice<'a> {
inner: JsStringSliceVariant<'a>,
}
impl<'a> JsStringSlice<'a> {
pub(crate) unsafe fn u8_ascii_unchecked(value: &'a str) -> Self {
debug_assert!(value.is_ascii(), "string must be ascii");
Self {
inner: JsStringSliceVariant::U8Ascii(value),
}
}
pub(crate) unsafe fn u16_ascii_unchecked(value: &'a [u16]) -> Self {
debug_assert!(is_ascii(value), "string must be ascii");
Self {
inner: JsStringSliceVariant::U16Ascii(value),
}
}
pub(crate) unsafe fn u8_non_ascii_unchecked(value: &'a str) -> Self {
debug_assert!(!value.is_ascii(), "string must not be ascii");
let len = value.encode_utf16().count();
Self {
inner: JsStringSliceVariant::U8NonAscii(value, len),
}
}
pub(crate) unsafe fn u16_non_ascii_unchecked(value: &'a [u16]) -> Self {
debug_assert!(!is_ascii(value), "string must not be ascii");
Self {
inner: JsStringSliceVariant::U16NonAscii(value),
}
}
pub(crate) fn variant(self) -> JsStringSliceVariant<'a> {
self.inner
}
pub fn len(&self) -> usize {
match self.variant() {
JsStringSliceVariant::U8Ascii(s) => s.len(),
JsStringSliceVariant::U8NonAscii(_, len) => len,
JsStringSliceVariant::U16NonAscii(s) | JsStringSliceVariant::U16Ascii(s) => s.len(),
}
}
pub fn is_ascii(&self) -> bool {
matches!(
self.variant(),
JsStringSliceVariant::U8Ascii(_) | JsStringSliceVariant::U16Ascii(_)
)
}
/// Trims both leading and trailing space.
#[inline]
#[must_use]
pub fn trim(&self) -> Self {
self.trim_start().trim_end()
}
/// Trims all leading space.
#[inline]
#[must_use]
pub fn trim_start(&self) -> JsStringSlice<'a> {
match self.variant() {
JsStringSliceVariant::U8Ascii(s) => {
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u8_ascii_unchecked(s.trim_start()) }
}
JsStringSliceVariant::U8NonAscii(s, _) => JsStringSlice::from(s.trim_start()),
JsStringSliceVariant::U16Ascii(s) => {
let value = if let Some(left) = s.iter().copied().position(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[left..]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u16_ascii_unchecked(value) }
}
JsStringSliceVariant::U16NonAscii(s) => {
let value = if let Some(left) = s.iter().copied().position(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[left..]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
JsStringSlice::from(value)
}
}
}
/// Trims all trailing space.
#[inline]
#[must_use]
pub fn trim_end(&self) -> JsStringSlice<'a> {
match self.variant() {
JsStringSliceVariant::U8Ascii(s) => {
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u8_ascii_unchecked(s.trim_end()) }
}
JsStringSliceVariant::U8NonAscii(s, _) => JsStringSlice::from(s.trim_end()),
JsStringSliceVariant::U16Ascii(s) => {
let value = if let Some(right) = s.iter().copied().rposition(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[..=right]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u16_ascii_unchecked(value) }
}
JsStringSliceVariant::U16NonAscii(s) => {
let value = if let Some(right) = s.iter().copied().rposition(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[..=right]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
JsStringSlice::from(value)
}
}
}
pub fn iter(self) -> crate::string::Iter<'a> {
crate::string::Iter::new(self)
}
}
impl<'a> From<&'a JsString> for JsStringSlice<'a> {
fn from(value: &'a JsString) -> Self {
Self::from(value.as_str())
}
}
impl<'a> From<JsStr<'a>> for JsStringSlice<'a> {
fn from(value: JsStr<'a>) -> Self {
match value.variant() {
JsStrVariant::Ascii(s) => {
// SAFETY: `JsStrVariant::Ascii` always contains ASCII string, so this safe.
unsafe { Self::u8_ascii_unchecked(s) }
}
JsStrVariant::U16(s) => {
// SAFETY: `JsStrVariant::Ascii` always contains non-ASCII string, so this safe.
unsafe { Self::u16_non_ascii_unchecked(s) }
}
}
}
}
impl<'a> From<&'a str> for JsStringSlice<'a> {
fn from(value: &'a str) -> Self {
if value.is_ascii() {
// SAFETY: Already checked that it's ASCII, so this is safe.
return unsafe { Self::u8_ascii_unchecked(value) };
}
// SAFETY: Already checked that it's non-ASCII, so this is safe.
unsafe { Self::u8_non_ascii_unchecked(value) }
}
}
impl<'a> From<&'a [u16]> for JsStringSlice<'a> {
fn from(s: &'a [u16]) -> Self {
if is_ascii(s) {
// SAFETY: Already checked that it's ASCII, so this is safe.
return unsafe { Self::u16_ascii_unchecked(s) };
}
// SAFETY: Already checked that it's non-ASCII, so this is safe.
unsafe { Self::u16_non_ascii_unchecked(s) }
}
}

62
boa_engine/src/string/str.rs

@ -3,6 +3,8 @@ use std::slice::SliceIndex;
use crate::{builtins::string::is_trimmable_whitespace, string::Iter}; use crate::{builtins::string::is_trimmable_whitespace, string::Iter};
use boa_interner::JStrRef; use boa_interner::JStrRef;
use super::JsStringSlice;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum JsStrVariant<'a> { pub enum JsStrVariant<'a> {
Ascii(&'a str), Ascii(&'a str),
@ -69,8 +71,8 @@ impl<'a> JsStr<'a> {
/// TODO: doc /// TODO: doc
#[inline] #[inline]
#[must_use] #[must_use]
pub fn iter(&self) -> Iter<'_> { pub fn iter(self) -> Iter<'a> {
Iter::new(*self) Iter::new(self.into())
} }
pub(crate) fn as_str_ref(&self) -> JStrRef<'_> { pub(crate) fn as_str_ref(&self) -> JStrRef<'_> {
@ -89,68 +91,22 @@ impl<'a> JsStr<'a> {
/// Trims both leading and trailing space. /// Trims both leading and trailing space.
#[inline] #[inline]
#[must_use] #[must_use]
pub fn trim(&self) -> Self { pub fn trim(self) -> JsStringSlice<'a> {
self.trim_start().trim_end() self.trim_start().trim_end()
} }
/// Trims all leading space. /// Trims all leading space.
#[inline] #[inline]
#[must_use] #[must_use]
pub fn trim_start(&self) -> Self { pub fn trim_start(self) -> JsStringSlice<'a> {
match self.variant() { JsStringSlice::from(self).trim_start()
JsStrVariant::Ascii(s) => {
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStr::ascii_unchecked(s.trim_start()) }
}
JsStrVariant::U16(s) => {
let value = if let Some(left) = s.iter().copied().position(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[left..]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStr::ascii_unchecked("") };
};
// TODO: If we have a string that has ascii non-white space characters,
// and a leading non-ascii white space, that is trimmed making this ascii.
//
// SAFETY:
unsafe { JsStr::u16_unchecked(value) }
}
}
} }
/// Trims all trailing space. /// Trims all trailing space.
#[inline] #[inline]
#[must_use] #[must_use]
pub fn trim_end(&self) -> Self { pub fn trim_end(self) -> JsStringSlice<'a> {
match self.variant() { JsStringSlice::from(self).trim_end()
JsStrVariant::Ascii(s) => {
// SAFETY: Calling `trim_end()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStr::ascii_unchecked(s.trim_end()) }
}
JsStrVariant::U16(s) => {
let value = if let Some(right) = s.iter().copied().rposition(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[..=right]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStr::ascii_unchecked("") };
};
// TODO: If we have a string that has ascii non-white space characters,
// and a trailing non-ascii white space, that is trimmed making this ascii.
//
// SAFETY:
unsafe { JsStr::u16_unchecked(value) }
}
}
} }
pub fn get<I>(&'a self, index: I) -> Option<I::Value> pub fn get<I>(&'a self, index: I) -> Option<I::Value>

2
boa_engine/src/symbol.rs

@ -213,7 +213,7 @@ impl JsSymbol {
return wk.fn_name(); return wk.fn_name();
} }
self.description() self.description()
.map(|s| js_string!("[", s, "]")) .map(|s| js_string!("[", &s, "]"))
.unwrap_or_default() .unwrap_or_default()
} }

Loading…
Cancel
Save