Browse Source

Make `JsString` conform to miri tests (#2412)

This PR rewrites some patterns of the `JsString` implementation in order to pass all its miri tests. This can be verified by running:

```bash
cargo +nightly miri test -p boa_engine string::tests -- --skip builtins --skip parser
```

Basically, we were doing two operations that were Undefined Behaviour per the [Stacked Borrows](https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/stacked-borrows.md) model:
- Casting `&JsString` to `&mut RawJsString` to `&[u16]`. The intermediate mutable borrow must not exist, or Miri considers this as Undefined Behaviour.
- Trying to access `RawJsString.data` using the dot operator. Miri complains with `this is a zero-size retag ([0x10..0x10]) so the tag in question does not exist anywhere`. To fix this, we can recompute the position of `data` every time we want to access it.
pull/2414/head
José Julián Espina 2 years ago
parent
commit
8a664f2f24
  1. 96
      boa_engine/src/string/mod.rs

96
boa_engine/src/string/mod.rs

@ -39,6 +39,10 @@ use std::{
use self::common::{COMMON_STRINGS, COMMON_STRINGS_CACHE, MAX_COMMON_STRING_LENGTH}; use self::common::{COMMON_STRINGS, COMMON_STRINGS_CACHE, MAX_COMMON_STRING_LENGTH};
fn alloc_overflow() -> ! {
panic!("detected overflow during string allocation")
}
/// Utility macro to create a [`JsString`]. /// Utility macro to create a [`JsString`].
/// ///
/// # Examples /// # Examples
@ -171,6 +175,8 @@ struct RawJsString {
data: [u16; 0], data: [u16; 0],
} }
const DATA_OFFSET: usize = std::mem::size_of::<RawJsString>();
/// This struct uses a technique called tagged pointer to benefit from the fact that newly allocated /// This struct uses a technique called tagged pointer to benefit from the fact that newly allocated
/// pointers are always word aligned on 64-bits platforms, making it impossible to have a LSB equal /// pointers are always word aligned on 64-bits platforms, making it impossible to have a LSB equal
/// to 1. More details about this technique on the article of Wikipedia about [tagged pointers][tagged_wp]. /// to 1. More details about this technique on the article of Wikipedia about [tagged pointers][tagged_wp].
@ -254,11 +260,11 @@ impl TaggedJsString {
} }
} }
/// Enum representing either a reference to a heap allocated [`RawJsString`] or a static reference to /// Enum representing either a pointer to a heap allocated [`RawJsString`] or a static reference to
/// a [`\[u16\]`][slice] inside [`COMMON_STRINGS`]. /// a [`\[u16\]`][slice] inside [`COMMON_STRINGS`].
enum JsStringPtrKind<'a> { enum JsStringPtrKind {
// A string allocated on the heap. // A string allocated on the heap.
Heap(&'a mut RawJsString), Heap(NonNull<RawJsString>),
// A static string slice. // A static string slice.
Static(&'static [u16]), Static(&'static [u16]),
} }
@ -281,6 +287,9 @@ pub struct JsString {
ptr: TaggedJsString, ptr: TaggedJsString,
} }
// JsString should always be pointer sized.
sa::assert_eq_size!(JsString, *const ());
// Safety: `JsString` does not contain any objects which needs to be traced, so this is safe. // Safety: `JsString` does not contain any objects which needs to be traced, so this is safe.
unsafe impl Trace for JsString { unsafe impl Trace for JsString {
unsafe_empty_trace!(); unsafe_empty_trace!();
@ -300,14 +309,19 @@ impl JsString {
/// Creates a new [`JsString`] from the concatenation of every element of /// Creates a new [`JsString`] from the concatenation of every element of
/// `strings`. /// `strings`.
pub fn concat_array(strings: &[&[u16]]) -> Self { pub fn concat_array(strings: &[&[u16]]) -> Self {
let full_count = strings.iter().fold(0, |len, s| len + s.len()); let mut full_count = 0usize;
for &string in strings {
let Some(sum) = full_count.checked_add(string.len()) else {
alloc_overflow()
};
full_count = sum;
}
let ptr = Self::allocate_inner(full_count); let ptr = Self::allocate_inner(full_count);
let string = { let string = {
// SAFETY: // SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
// `ptr` being a `NonNull` ensures that a dereference of its underlying pointer is always valid. let mut data = unsafe { ptr.as_ptr().cast::<u8>().add(DATA_OFFSET).cast() };
let mut data = unsafe { (*ptr.as_ptr()).data.as_mut_ptr() };
for string in strings { for string in strings {
let count = string.len(); let count = string.len();
// SAFETY: // SAFETY:
@ -523,25 +537,45 @@ impl JsString {
/// Returns the inner pointer data, unwrapping its tagged data if the pointer contains a static /// Returns the inner pointer data, unwrapping its tagged data if the pointer contains a static
/// index for [`COMMON_STRINGS`]. /// index for [`COMMON_STRINGS`].
#[inline] #[inline]
fn ptr(&self) -> JsStringPtrKind<'_> { fn ptr(&self) -> JsStringPtrKind {
// Check the first bit to 1. // Check the first bit to 1.
if self.ptr.is_static() { if self.ptr.is_static() {
// Safety: We already checked. // Safety: We already checked.
JsStringPtrKind::Static(unsafe { self.ptr.get_static_unchecked() }) JsStringPtrKind::Static(unsafe { self.ptr.get_static_unchecked() })
} else { } else {
// Safety: We already checked. // Safety: We already checked.
JsStringPtrKind::Heap(unsafe { self.ptr.get_heap_unchecked().as_mut() }) JsStringPtrKind::Heap(unsafe { self.ptr.get_heap_unchecked() })
}
}
/// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars.
///
/// # Panics
///
/// Panics if `try_allocate_inner` returns `Err`.
fn allocate_inner(str_len: usize) -> NonNull<RawJsString> {
match Self::try_allocate_inner(str_len) {
Ok(v) => v,
Err(None) => alloc_overflow(),
Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
} }
} }
// This is marked as safe because it is always valid to call this function to request any number // This is marked as safe because it is always valid to call this function to request any number
// of `u16`, since this function ought to fail on an OOM error. // of `u16`, since this function ought to fail on an OOM error.
/// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars. /// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars.
fn allocate_inner(str_len: usize) -> NonNull<RawJsString> { ///
/// # Errors
///
/// Returns `Err(None)` on integer overflows `usize::MAX`.
/// Returns `Err(Some(Layout))` on allocation error.
fn try_allocate_inner(str_len: usize) -> Result<NonNull<RawJsString>, Option<Layout>> {
let (layout, offset) = Layout::array::<u16>(str_len) let (layout, offset) = Layout::array::<u16>(str_len)
.and_then(|arr| Layout::new::<RawJsString>().extend(arr)) .and_then(|arr| Layout::new::<RawJsString>().extend(arr))
.map(|(layout, offset)| (layout.pad_to_align(), offset)) .map(|(layout, offset)| (layout.pad_to_align(), offset))
.expect("failed to create memory layout"); .map_err(|_| None)?;
debug_assert_eq!(offset, DATA_OFFSET);
// SAFETY: // SAFETY:
// The layout size of `RawJsString` is never zero, since it has to store // The layout size of `RawJsString` is never zero, since it has to store
@ -551,7 +585,7 @@ impl JsString {
// We need to verify that the pointer returned by `alloc` is not null, otherwise // We need to verify that the pointer returned by `alloc` is not null, otherwise
// we should abort, since an allocation error is pretty unrecoverable for us // we should abort, since an allocation error is pretty unrecoverable for us
// right now. // right now.
let inner = NonNull::new(inner).unwrap_or_else(|| std::alloc::handle_alloc_error(layout)); let inner = NonNull::new(inner).ok_or(Some(layout))?;
// SAFETY: // SAFETY:
// `NonNull` verified for us that the pointer returned by `alloc` is valid, // `NonNull` verified for us that the pointer returned by `alloc` is valid,
@ -580,13 +614,16 @@ impl JsString {
} }
}); });
inner Ok(inner)
} }
/// Creates a new [`JsString`] from `data`, without checking if the string is in the interner. /// Creates a new [`JsString`] from `data`, without checking if the string is in the interner.
fn from_slice_skip_interning(data: &[u16]) -> Self { fn from_slice_skip_interning(string: &[u16]) -> Self {
let count = data.len(); let count = string.len();
let ptr = Self::allocate_inner(count); let ptr = Self::allocate_inner(count);
// SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
let data = unsafe { ptr.as_ptr().cast::<u8>().add(DATA_OFFSET) };
// SAFETY: // SAFETY:
// - We read `count = data.len()` elements from `data`, which is within the bounds of the slice. // - We read `count = data.len()` elements from `data`, which is within the bounds of the slice.
// - `allocate_inner` must allocate at least `count` elements, which allows us to safely // - `allocate_inner` must allocate at least `count` elements, which allows us to safely
@ -596,7 +633,7 @@ impl JsString {
// - `allocate_inner` must return a valid pointer to newly allocated memory, meaning `ptr` // - `allocate_inner` must return a valid pointer to newly allocated memory, meaning `ptr`
// and `data` should never overlap. // and `data` should never overlap.
unsafe { unsafe {
ptr::copy_nonoverlapping(data.as_ptr(), (*ptr.as_ptr()).data.as_mut_ptr(), count); ptr::copy_nonoverlapping(string.as_ptr(), data.cast(), count);
} }
Self { Self {
// Safety: We already know it's a valid heap pointer. // Safety: We already know it's a valid heap pointer.
@ -621,6 +658,8 @@ impl Clone for JsString {
#[inline] #[inline]
fn clone(&self) -> Self { fn clone(&self) -> Self {
if let JsStringPtrKind::Heap(inner) = self.ptr() { if let JsStringPtrKind::Heap(inner) = self.ptr() {
// SAFETY: The reference count of `JsString` guarantees that `raw` is always valid.
let inner = unsafe { &mut *inner.as_ptr() };
inner.refcount.set(inner.refcount.get() + 1); inner.refcount.set(inner.refcount.get() + 1);
} }
Self { ptr: self.ptr } Self { ptr: self.ptr }
@ -645,7 +684,9 @@ impl Default for JsString {
impl Drop for JsString { impl Drop for JsString {
#[inline] #[inline]
fn drop(&mut self) { fn drop(&mut self) {
if let JsStringPtrKind::Heap(inner) = self.ptr() { if let JsStringPtrKind::Heap(raw) = self.ptr() {
// SAFETY: The reference count of `JsString` guarantees that `raw` is always valid.
let inner = unsafe { &mut *raw.as_ptr() };
inner.refcount.set(inner.refcount.get() - 1); inner.refcount.set(inner.refcount.get() - 1);
if inner.refcount.get() == 0 { if inner.refcount.get() == 0 {
// SAFETY: // SAFETY:
@ -663,7 +704,7 @@ impl Drop for JsString {
// If refcount is 0 and we call drop, that means this is the last `JsString` which // If refcount is 0 and we call drop, that means this is the last `JsString` which
// points to this memory allocation, so deallocating it is safe. // points to this memory allocation, so deallocating it is safe.
unsafe { unsafe {
dealloc((inner as *mut RawJsString).cast(), layout); dealloc(raw.as_ptr().cast(), layout);
} }
} }
} }
@ -699,7 +740,12 @@ impl Deref for JsString {
// //
// - The lifetime of `&Self::Target` is shorter than the lifetime of `self`, as seen // - The lifetime of `&Self::Target` is shorter than the lifetime of `self`, as seen
// by its signature, so this doesn't outlive `self`. // by its signature, so this doesn't outlive `self`.
unsafe { std::slice::from_raw_parts(h.data.as_ptr(), h.len) } unsafe {
std::slice::from_raw_parts(
h.as_ptr().cast::<u8>().add(DATA_OFFSET).cast(),
h.as_ref().len,
)
}
} }
JsStringPtrKind::Static(s) => s, JsStringPtrKind::Static(s) => s,
} }
@ -900,7 +946,11 @@ mod tests {
#[inline] #[inline]
fn refcount(&self) -> Option<usize> { fn refcount(&self) -> Option<usize> {
match self.ptr() { match self.ptr() {
JsStringPtrKind::Heap(inner) => Some(inner.refcount.get()), JsStringPtrKind::Heap(inner) => {
// SAFETY: The reference count of `JsString` guarantees that `inner` is always valid.
let inner = unsafe { inner.as_ref() };
Some(inner.refcount.get())
}
JsStringPtrKind::Static(_inner) => None, JsStringPtrKind::Static(_inner) => None,
} }
} }
@ -912,12 +962,6 @@ mod tests {
assert_eq!(*s, "".encode_utf16().collect::<Vec<u16>>()); assert_eq!(*s, "".encode_utf16().collect::<Vec<u16>>());
} }
#[test]
fn pointer_size() {
assert_eq!(size_of::<JsString>(), size_of::<*const ()>());
assert_eq!(size_of::<Option<JsString>>(), size_of::<*const ()>());
}
#[test] #[test]
fn refcount() { fn refcount() {
let x = js_string!("Hello world"); let x = js_string!("Hello world");

Loading…
Cancel
Save