Browse Source

Prevent allocating when concating strings

refactor/interner
Haled Odat 1 year ago
parent
commit
a19afd11a4
  1. 4
      boa_engine/src/builtins/object/mod.rs
  2. 4
      boa_engine/src/builtins/string/mod.rs
  3. 279
      boa_engine/src/string/mod.rs
  4. 210
      boa_engine/src/string/slice.rs
  5. 62
      boa_engine/src/string/str.rs
  6. 2
      boa_engine/src/symbol.rs

4
boa_engine/src/builtins/object/mod.rs

@ -26,7 +26,7 @@ use crate::{
},
property::{Attribute, PropertyDescriptor, PropertyKey, PropertyNameKind},
realm::Realm,
string::{common::StaticJsStrings, CowJsString},
string::{common::StaticJsStrings, JsStringSlice},
symbol::JsSymbol,
value::JsValue,
Context, JsArgs, JsResult, JsString,
@ -836,7 +836,7 @@ impl Object {
let tag_str = tag
.as_string()
.map(JsString::as_str)
.map_or(CowJsString::from(builtin_tag), Into::into);
.map_or(JsStringSlice::from(builtin_tag), Into::into);
// 17. Return the string-concatenation of "[object ", tag, and "]".
Ok(js_string!("[object ", tag_str, "]").into())

4
boa_engine/src/builtins/string/mod.rs

@ -2350,13 +2350,13 @@ impl String {
// the code unit 0x0022 (QUOTATION MARK)
// escapedV
// the code unit 0x0022 (QUOTATION MARK)
p1 = js_string!(p1, " ", attribute, "=\"", &escaped_v[..], "\"");
p1 = js_string!(&p1, " ", attribute, "=\"", &escaped_v[..], "\"");
}
// 5. Let p2 be the string-concatenation of p1 and ">".
// 6. Let p3 be the string-concatenation of p2 and S.
// 7. Let p4 be the string-concatenation of p3, "</", tag, and ">".
let p4 = js_string!(p1, ">", s, "</", tag, ">");
let p4 = js_string!(&p1, ">", &s, "</", tag, ">");
// 8. Return p4.
Ok(p4.into())

279
boa_engine/src/string/mod.rs

@ -22,6 +22,7 @@
#![allow(unstable_name_collisions)]
pub(crate) mod common;
mod slice;
mod str;
use crate::{
@ -33,7 +34,10 @@ use boa_gc::{empty_trace, Finalize, Trace};
pub use boa_macros::utf16;
#[doc(inline)]
pub use crate::string::str::{JsStr, JsStrVariant};
pub use crate::string::{
slice::JsStringSlice,
str::{JsStr, JsStrVariant},
};
use std::{
alloc::{alloc, dealloc, Layout},
@ -46,7 +50,7 @@ use std::{
str::FromStr,
};
use self::{common::StaticJsStrings, str::JsSliceIndex};
use self::{common::StaticJsStrings, slice::JsStringSliceVariant, str::JsSliceIndex};
fn alloc_overflow() -> ! {
panic!("detected overflow during string allocation")
@ -102,16 +106,16 @@ macro_rules! js_string {
$crate::JsString::default()
};
($s:literal) => {
$crate::JsString::from($crate::string::CowJsString::from($s))
$crate::JsString::from($s)
};
($s:expr) => {
$crate::JsString::from($s)
};
( $x:expr, $y:expr ) => {
$crate::JsString::concat($crate::string::CowJsString::from($x), $crate::string::CowJsString::from($y))
$crate::JsString::concat($crate::string::JsStringSlice::from($x), $crate::string::JsStringSlice::from($y))
};
( $( $s:expr ),+ ) => {
$crate::JsString::concat_array(&[ $( $crate::string::CowJsString::from($s) ),+ ])
$crate::JsString::concat_array(&[ $( $crate::string::JsStringSlice::from($s) ),+ ])
};
}
@ -220,102 +224,21 @@ unsafe impl Trace for JsString {
empty_trace!();
}
#[derive(Debug)]
pub enum CowJsString<'a> {
Borrowed(JsStr<'a>),
Owned(JsString),
}
impl CowJsString<'_> {
fn as_slice(&self) -> JsStr<'_> {
match self {
CowJsString::Borrowed(s) => *s,
CowJsString::Owned(s) => s.as_str(),
}
}
}
impl From<JsString> for CowJsString<'_> {
fn from(value: JsString) -> Self {
Self::Owned(value)
}
}
impl<'a> From<&'a JsString> for CowJsString<'a> {
fn from(value: &'a JsString) -> Self {
Self::Borrowed(value.as_str())
}
}
impl<'a> From<JsStr<'a>> for CowJsString<'a> {
fn from(value: JsStr<'a>) -> Self {
Self::Borrowed(value)
}
}
impl<'a> From<&'a str> for CowJsString<'a> {
fn from(value: &'a str) -> Self {
if value.is_ascii() {
return Self::Borrowed(
// SAFETY: Already checked that it's ASCII, so this is safe.
unsafe { JsStr::ascii_unchecked(value) },
);
}
Self::Owned(JsString::from(
&value.encode_utf16().collect::<Vec<_>>()[..],
))
}
}
impl<'a> From<&'a [u16]> for CowJsString<'a> {
fn from(s: &'a [u16]) -> Self {
if is_ascii(s) {
let s = s.iter().copied().map(|c| c as u8).collect::<Vec<_>>();
// SAFETY: Already checked that it's ASCII, so this is safe.
let s = unsafe { std::str::from_utf8_unchecked(&s) };
return Self::Owned(StaticJsStrings::get_string(s).unwrap_or_else(|| {
JsString::from_slice_skip_interning(
// SAFETY: Already checked that it's ASCII, so this is safe.
unsafe { JsStr::ascii_unchecked(s) },
)
}));
}
// SAFETY: Already checked that isn't ASCII, so this is safe.
Self::Borrowed(unsafe { JsStr::u16_unchecked(s) })
}
}
impl From<JsStr<'_>> for JsString {
fn from(value: JsStr<'_>) -> Self {
match value.variant() {
// TODO: Maybe remove the check that comes from `JsString::from` <&str>.
JsStrVariant::Ascii(s) => JsString::from(s),
JsStrVariant::U16(s) => JsString::from(s),
}
}
}
impl From<CowJsString<'_>> for JsString {
fn from(value: CowJsString<'_>) -> Self {
match value {
CowJsString::Borrowed(s) => JsString::from(s),
CowJsString::Owned(s) => s,
}
}
}
#[derive(Debug, Clone)]
pub enum Iter<'a> {
Ascii(std::str::Bytes<'a>),
U8(std::str::EncodeUtf16<'a>, usize),
U16(std::iter::Copied<std::slice::Iter<'a, u16>>),
}
impl<'a> Iter<'a> {
fn new(s: JsStr<'a>) -> Self {
fn new(s: JsStringSlice<'a>) -> Self {
match s.variant() {
JsStrVariant::Ascii(s) => Self::Ascii(s.bytes()),
JsStrVariant::U16(s) => Self::U16(s.iter().copied()),
JsStringSliceVariant::U8Ascii(s) => Self::Ascii(s.bytes()),
JsStringSliceVariant::U8NonAscii(s, len) => Self::U8(s.encode_utf16(), len),
JsStringSliceVariant::U16Ascii(s) | JsStringSliceVariant::U16NonAscii(s) => {
Self::U16(s.iter().copied())
}
}
}
}
@ -326,6 +249,7 @@ impl Iterator for Iter<'_> {
fn next(&mut self) -> Option<Self::Item> {
match self {
Self::Ascii(iter) => iter.map(u16::from).next(),
Self::U8(iter, _) => iter.next(),
Self::U16(iter) => iter.next(),
}
}
@ -337,6 +261,7 @@ impl ExactSizeIterator for Iter<'_> {
fn len(&self) -> usize {
match self {
Self::Ascii(v) => v.len(),
Self::U8(_, len) => *len,
Self::U16(v) => v.len(),
}
}
@ -353,7 +278,7 @@ impl JsString {
#[inline]
#[must_use]
pub fn iter(&self) -> Iter<'_> {
Iter::new(self.as_str())
Iter::new(self.as_str().into())
}
/// Obtains the underlying [`&[u16]`][slice] slice of a [`JsString`]
@ -397,21 +322,21 @@ impl JsString {
/// Creates a new [`JsString`] from the concatenation of `x` and `y`.
#[must_use]
pub fn concat(x: CowJsString<'_>, y: CowJsString<'_>) -> Self {
pub fn concat(x: JsStringSlice<'_>, y: JsStringSlice<'_>) -> Self {
Self::concat_array(&[x, y])
}
/// Creates a new [`JsString`] from the concatenation of every element of
/// `strings`.
#[must_use]
pub fn concat_array(strings: &[CowJsString<'_>]) -> Self {
pub fn concat_array(strings: &[JsStringSlice<'_>]) -> Self {
let mut ascii = true;
let mut full_count = 0usize;
for string in strings {
let Some(sum) = full_count.checked_add(string.as_slice().len()) else {
let Some(sum) = full_count.checked_add(string.len()) else {
alloc_overflow()
};
if !string.as_slice().is_ascii() {
if !string.is_ascii() {
ascii = false;
}
full_count = sum;
@ -424,8 +349,7 @@ impl JsString {
let string = {
// SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
let mut data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast::<u8>() };
for string in strings {
let string = string.as_slice();
for &string in strings {
let count = string.len();
// SAFETY:
// The sum of all `count` for each `string` equals `full_count`, and since we're
@ -439,21 +363,43 @@ impl JsString {
// `ptr` and all `string`s should never overlap.
unsafe {
match (ascii, string.variant()) {
(true, JsStrVariant::Ascii(s)) => {
(true, JsStringSliceVariant::U8Ascii(s)) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
data = data.cast::<u8>().add(count).cast::<u8>();
}
(false, JsStrVariant::Ascii(s)) => {
(true, JsStringSliceVariant::U16Ascii(s)) => {
for (i, byte) in s.iter().copied().enumerate() {
*data.cast::<u8>().add(i) = (byte & 0xFF) as u8;
}
data = data.cast::<u8>().add(count).cast::<u8>();
}
(false, JsStringSliceVariant::U8Ascii(s)) => {
for (i, byte) in s.bytes().enumerate() {
*data.cast::<u16>().add(i) = u16::from(byte);
}
data = data.cast::<u16>().add(count).cast::<u8>();
}
(false, JsStrVariant::U16(s)) => {
(false, JsStringSliceVariant::U8NonAscii(s, _)) => {
for (i, byte) in s.encode_utf16().enumerate() {
*data.cast::<u16>().add(i) = byte;
}
data = data.cast::<u16>().add(count).cast::<u8>();
}
(
false,
JsStringSliceVariant::U16Ascii(s)
| JsStringSliceVariant::U16NonAscii(s),
) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
data = data.cast::<u16>().add(count).cast::<u8>();
}
(true, JsStrVariant::U16(_)) => unreachable!(),
(
true,
JsStringSliceVariant::U8NonAscii(..)
| JsStringSliceVariant::U16NonAscii(_),
) => {
unreachable!()
}
}
}
}
@ -819,12 +765,12 @@ impl JsString {
}
/// Creates a new [`JsString`] from `data`, without checking if the string is in the interner.
fn from_slice_skip_interning(string: JsStr<'_>) -> Self {
fn from_slice_skip_interning(string: JsStringSlice<'_>) -> Self {
let count = string.len();
let ptr = Self::allocate_inner(count, string.is_ascii());
// SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
let data = unsafe { addr_of_mut!((*ptr.as_ptr()).data) };
let data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast::<u8>() };
// SAFETY:
// - We read `count = data.len()` elements from `data`, which is within the bounds of the slice.
// - `allocate_inner` must allocate at least `count` elements, which allows us to safely
@ -835,12 +781,21 @@ impl JsString {
// and `data` should never overlap.
unsafe {
match string.variant() {
JsStrVariant::Ascii(string) => {
ptr::copy_nonoverlapping(string.as_ptr(), data.cast::<u8>(), count);
JsStringSliceVariant::U8Ascii(s) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
}
JsStrVariant::U16(string) => {
assert!(!is_ascii(string), "should be u16 not ascii");
ptr::copy_nonoverlapping(string.as_ptr(), data.cast::<u16>(), count);
JsStringSliceVariant::U16Ascii(s) => {
for (i, byte) in s.iter().copied().enumerate() {
*data.cast::<u8>().add(i) = (byte & 0xFF) as u8;
}
}
JsStringSliceVariant::U8NonAscii(s, _) => {
for (i, byte) in s.encode_utf16().enumerate() {
*data.cast::<u16>().add(i) = byte;
}
}
JsStringSliceVariant::U16NonAscii(s) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
}
}
}
@ -850,6 +805,19 @@ impl JsString {
}
}
/// Creates a new [`JsString`] from `data`.
fn from_slice(string: JsStringSlice<'_>) -> Self {
let this = Self::from_slice_skip_interning(string);
if let Some(s) = this.as_str().as_ascii() {
if let Some(s) = StaticJsStrings::get_string(s) {
return s;
}
}
this
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
@ -898,15 +866,15 @@ impl JsString {
}
}
pub(crate) fn trim(&self) -> JsStr<'_> {
pub(crate) fn trim(&self) -> JsStringSlice<'_> {
self.as_str().trim()
}
pub(crate) fn trim_start(&self) -> JsStr<'_> {
pub(crate) fn trim_start(&self) -> JsStringSlice<'_> {
self.as_str().trim_start()
}
pub(crate) fn trim_end(&self) -> JsStr<'_> {
pub(crate) fn trim_end(&self) -> JsStringSlice<'_> {
self.as_str().trim_end()
}
@ -993,7 +961,7 @@ impl Drop for JsString {
}
}
fn is_ascii(slice: &[u16]) -> bool {
pub(crate) fn is_ascii(slice: &[u16]) -> bool {
for &element in slice {
if (element & 0b0111_1111) != element {
return false;
@ -1024,41 +992,49 @@ impl Eq for JsString {}
impl From<&[u16]> for JsString {
#[inline]
fn from(s: &[u16]) -> Self {
if is_ascii(s) {
let s = s.iter().copied().map(|c| c as u8).collect::<Vec<_>>();
// SAFETY: Already checked that it's ASCII, so this is safe.
let s = unsafe { std::str::from_utf8_unchecked(&s) };
return StaticJsStrings::get_string(s).unwrap_or_else(|| {
Self::from_slice_skip_interning(
// SAFETY: Already checked that it's ASCII, so this is safe.
unsafe { JsStr::ascii_unchecked(s) },
)
});
}
Self::from_slice_skip_interning(
// SAFETY: Already checked that it's not ASCII, so this is safe.
unsafe { JsStr::u16_unchecked(s) },
)
JsString::from_slice(JsStringSlice::from(s))
}
}
impl From<&str> for JsString {
#[inline]
fn from(s: &str) -> Self {
StaticJsStrings::get_string(s).unwrap_or_else(|| {
if s.is_ascii() {
Self::from_slice_skip_interning(
// SAFETY: Already checked that it's ASCII, so this is safe.
unsafe { JsStr::ascii_unchecked(s) },
)
} else {
let s = s.encode_utf16().collect::<Vec<_>>();
Self::from_slice_skip_interning(
// SAFETY: Already checked that it's not ASCII, so this is safe.
unsafe { JsStr::u16_unchecked(&s[..]) },
)
StaticJsStrings::get_string(s)
.unwrap_or_else(|| JsString::from_slice_skip_interning(JsStringSlice::from(s)))
}
}
impl From<JsStr<'_>> for JsString {
fn from(value: JsStr<'_>) -> Self {
match value.variant() {
JsStrVariant::Ascii(s) => {
StaticJsStrings::get_string(s).unwrap_or_else(|| {
// SAFETY: `JsStrVariant::Ascii` Always contains ASCII, so this is safe.
let slice = unsafe { JsStringSlice::u8_ascii_unchecked(s) };
JsString::from_slice_skip_interning(slice)
})
}
})
JsStrVariant::U16(s) => {
// SAFETY: `JsStrVariant::U16` Always contains non-ASCII, so this is safe.
let slice = unsafe { JsStringSlice::u16_non_ascii_unchecked(s) };
JsString::from_slice(slice)
}
}
}
}
impl From<JsStringSlice<'_>> for JsString {
fn from(value: JsStringSlice<'_>) -> Self {
match value.variant() {
JsStringSliceVariant::U8Ascii(s) => {
StaticJsStrings::get_string(s).unwrap_or_else(|| {
// SAFETY: `JsStrVariant::Ascii` Always contains ASCII, so this is safe.
let slice = unsafe { JsStringSlice::u8_ascii_unchecked(s) };
JsString::from_slice_skip_interning(slice)
})
}
_ => JsString::from_slice(value),
}
}
}
@ -1088,7 +1064,10 @@ impl<const N: usize> From<&[u16; N]> for JsString {
impl Hash for JsString {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state);
match self.as_str().variant() {
JsStrVariant::Ascii(s) => s.hash(state),
JsStrVariant::U16(s) => s.hash(state),
}
}
}
@ -1295,7 +1274,7 @@ mod tests {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
const HELLOWORLD: &[u16] = utf16!("Hello World!");
const HELLOWORLD: &str = "Hello World!";
let x = js_string!(HELLOWORLD);
assert_eq!(&x, HELLOWORLD);
@ -1331,4 +1310,14 @@ mod tests {
assert_eq!(&xyzw, utf16!("hello, world!"));
assert_eq!(xyzw.refcount(), Some(1));
}
#[test]
fn trim_start_non_ascii_to_ascii() {
let s = "\u{2029}abc";
let x = js_string!(s);
let y = js_string!(x.trim_start());
assert_eq!(&y, s.trim_start());
}
}

210
boa_engine/src/string/slice.rs

@ -0,0 +1,210 @@
use crate::{builtins::string::is_trimmable_whitespace, JsString};
use super::{is_ascii, JsStr, JsStrVariant};
#[derive(Debug, Clone, Copy)]
pub enum JsStringSliceVariant<'a> {
U8Ascii(&'a str),
U8NonAscii(&'a str, usize),
U16Ascii(&'a [u16]),
U16NonAscii(&'a [u16]),
}
#[derive(Debug, Clone, Copy)]
pub struct JsStringSlice<'a> {
inner: JsStringSliceVariant<'a>,
}
impl<'a> JsStringSlice<'a> {
pub(crate) unsafe fn u8_ascii_unchecked(value: &'a str) -> Self {
debug_assert!(value.is_ascii(), "string must be ascii");
Self {
inner: JsStringSliceVariant::U8Ascii(value),
}
}
pub(crate) unsafe fn u16_ascii_unchecked(value: &'a [u16]) -> Self {
debug_assert!(is_ascii(value), "string must be ascii");
Self {
inner: JsStringSliceVariant::U16Ascii(value),
}
}
pub(crate) unsafe fn u8_non_ascii_unchecked(value: &'a str) -> Self {
debug_assert!(!value.is_ascii(), "string must not be ascii");
let len = value.encode_utf16().count();
Self {
inner: JsStringSliceVariant::U8NonAscii(value, len),
}
}
pub(crate) unsafe fn u16_non_ascii_unchecked(value: &'a [u16]) -> Self {
debug_assert!(!is_ascii(value), "string must not be ascii");
Self {
inner: JsStringSliceVariant::U16NonAscii(value),
}
}
pub(crate) fn variant(self) -> JsStringSliceVariant<'a> {
self.inner
}
pub fn len(&self) -> usize {
match self.variant() {
JsStringSliceVariant::U8Ascii(s) => s.len(),
JsStringSliceVariant::U8NonAscii(_, len) => len,
JsStringSliceVariant::U16NonAscii(s) | JsStringSliceVariant::U16Ascii(s) => s.len(),
}
}
pub fn is_ascii(&self) -> bool {
matches!(
self.variant(),
JsStringSliceVariant::U8Ascii(_) | JsStringSliceVariant::U16Ascii(_)
)
}
/// Trims both leading and trailing space.
#[inline]
#[must_use]
pub fn trim(&self) -> Self {
self.trim_start().trim_end()
}
/// Trims all leading space.
#[inline]
#[must_use]
pub fn trim_start(&self) -> JsStringSlice<'a> {
match self.variant() {
JsStringSliceVariant::U8Ascii(s) => {
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u8_ascii_unchecked(s.trim_start()) }
}
JsStringSliceVariant::U8NonAscii(s, _) => JsStringSlice::from(s.trim_start()),
JsStringSliceVariant::U16Ascii(s) => {
let value = if let Some(left) = s.iter().copied().position(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[left..]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u16_ascii_unchecked(value) }
}
JsStringSliceVariant::U16NonAscii(s) => {
let value = if let Some(left) = s.iter().copied().position(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[left..]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
JsStringSlice::from(value)
}
}
}
/// Trims all trailing space.
#[inline]
#[must_use]
pub fn trim_end(&self) -> JsStringSlice<'a> {
match self.variant() {
JsStringSliceVariant::U8Ascii(s) => {
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u8_ascii_unchecked(s.trim_end()) }
}
JsStringSliceVariant::U8NonAscii(s, _) => JsStringSlice::from(s.trim_end()),
JsStringSliceVariant::U16Ascii(s) => {
let value = if let Some(right) = s.iter().copied().rposition(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[..=right]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStringSlice::u16_ascii_unchecked(value) }
}
JsStringSliceVariant::U16NonAscii(s) => {
let value = if let Some(right) = s.iter().copied().rposition(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[..=right]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStringSlice::u8_ascii_unchecked("") };
};
JsStringSlice::from(value)
}
}
}
pub fn iter(self) -> crate::string::Iter<'a> {
crate::string::Iter::new(self)
}
}
impl<'a> From<&'a JsString> for JsStringSlice<'a> {
fn from(value: &'a JsString) -> Self {
Self::from(value.as_str())
}
}
impl<'a> From<JsStr<'a>> for JsStringSlice<'a> {
fn from(value: JsStr<'a>) -> Self {
match value.variant() {
JsStrVariant::Ascii(s) => {
// SAFETY: `JsStrVariant::Ascii` always contains ASCII string, so this safe.
unsafe { Self::u8_ascii_unchecked(s) }
}
JsStrVariant::U16(s) => {
// SAFETY: `JsStrVariant::Ascii` always contains non-ASCII string, so this safe.
unsafe { Self::u16_non_ascii_unchecked(s) }
}
}
}
}
impl<'a> From<&'a str> for JsStringSlice<'a> {
fn from(value: &'a str) -> Self {
if value.is_ascii() {
// SAFETY: Already checked that it's ASCII, so this is safe.
return unsafe { Self::u8_ascii_unchecked(value) };
}
// SAFETY: Already checked that it's non-ASCII, so this is safe.
unsafe { Self::u8_non_ascii_unchecked(value) }
}
}
impl<'a> From<&'a [u16]> for JsStringSlice<'a> {
fn from(s: &'a [u16]) -> Self {
if is_ascii(s) {
// SAFETY: Already checked that it's ASCII, so this is safe.
return unsafe { Self::u16_ascii_unchecked(s) };
}
// SAFETY: Already checked that it's non-ASCII, so this is safe.
unsafe { Self::u16_non_ascii_unchecked(s) }
}
}

62
boa_engine/src/string/str.rs

@ -3,6 +3,8 @@ use std::slice::SliceIndex;
use crate::{builtins::string::is_trimmable_whitespace, string::Iter};
use boa_interner::JStrRef;
use super::JsStringSlice;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum JsStrVariant<'a> {
Ascii(&'a str),
@ -69,8 +71,8 @@ impl<'a> JsStr<'a> {
/// TODO: doc
#[inline]
#[must_use]
pub fn iter(&self) -> Iter<'_> {
Iter::new(*self)
pub fn iter(self) -> Iter<'a> {
Iter::new(self.into())
}
pub(crate) fn as_str_ref(&self) -> JStrRef<'_> {
@ -89,68 +91,22 @@ impl<'a> JsStr<'a> {
/// Trims both leading and trailing space.
#[inline]
#[must_use]
pub fn trim(&self) -> Self {
pub fn trim(self) -> JsStringSlice<'a> {
self.trim_start().trim_end()
}
/// Trims all leading space.
#[inline]
#[must_use]
pub fn trim_start(&self) -> Self {
match self.variant() {
JsStrVariant::Ascii(s) => {
// SAFETY: Calling `trim_start()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStr::ascii_unchecked(s.trim_start()) }
}
JsStrVariant::U16(s) => {
let value = if let Some(left) = s.iter().copied().position(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[left..]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStr::ascii_unchecked("") };
};
// TODO: If we have a string that has ascii non-white space characters,
// and a leading non-ascii white space, that is trimmed making this ascii.
//
// SAFETY:
unsafe { JsStr::u16_unchecked(value) }
}
}
pub fn trim_start(self) -> JsStringSlice<'a> {
JsStringSlice::from(self).trim_start()
}
/// Trims all trailing space.
#[inline]
#[must_use]
pub fn trim_end(&self) -> Self {
match self.variant() {
JsStrVariant::Ascii(s) => {
// SAFETY: Calling `trim_end()` on ASCII string always returns ASCII string, so this is safe.
unsafe { JsStr::ascii_unchecked(s.trim_end()) }
}
JsStrVariant::U16(s) => {
let value = if let Some(right) = s.iter().copied().rposition(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&s[..=right]
} else {
// SAFETY: An empty string is valid ASCII, so this is safe.
return unsafe { JsStr::ascii_unchecked("") };
};
// TODO: If we have a string that has ascii non-white space characters,
// and a trailing non-ascii white space, that is trimmed making this ascii.
//
// SAFETY:
unsafe { JsStr::u16_unchecked(value) }
}
}
pub fn trim_end(self) -> JsStringSlice<'a> {
JsStringSlice::from(self).trim_end()
}
pub fn get<I>(&'a self, index: I) -> Option<I::Value>

2
boa_engine/src/symbol.rs

@ -213,7 +213,7 @@ impl JsSymbol {
return wk.fn_name();
}
self.description()
.map(|s| js_string!("[", s, "]"))
.map(|s| js_string!("[", &s, "]"))
.unwrap_or_default()
}

Loading…
Cancel
Save