mirror of https://github.com/boa-dev/boa.git
Haled Odat
1 year ago
64 changed files with 741 additions and 1576 deletions
@ -1,80 +0,0 @@
|
||||
use alloc::vec::Vec; |
||||
|
||||
use crate::interned_str::InternedStr; |
||||
|
||||
#[derive(Debug)] |
||||
pub(super) struct FixedString<Char> { |
||||
inner: Vec<Char>, |
||||
} |
||||
|
||||
impl<Char> Default for FixedString<Char> { |
||||
fn default() -> Self { |
||||
Self { |
||||
inner: Vec::default(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl<Char> FixedString<Char> { |
||||
/// Creates a new, pinned [`FixedString`].
|
||||
pub(super) fn new(capacity: usize) -> Self { |
||||
Self { |
||||
inner: Vec::with_capacity(capacity), |
||||
} |
||||
} |
||||
|
||||
/// Gets the maximum capacity of the [`FixedString`].
|
||||
pub(super) fn capacity(&self) -> usize { |
||||
self.inner.capacity() |
||||
} |
||||
|
||||
/// Returns `true` if the [`FixedString`] has length zero,
|
||||
/// and `false` otherwise.
|
||||
pub(super) fn is_empty(&self) -> bool { |
||||
self.inner.is_empty() |
||||
} |
||||
} |
||||
|
||||
impl<Char> FixedString<Char> |
||||
where |
||||
Char: Clone, |
||||
{ |
||||
/// Tries to push `string` to the [`FixedString`], and returns
|
||||
/// an [`InternedStr`] pointer to the stored `string`, or
|
||||
/// `None` if the capacity is not enough to store `string`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller is responsible for ensuring `self` outlives the returned
|
||||
/// [`InternedStr`].
|
||||
pub(super) unsafe fn push(&mut self, string: &[Char]) -> Option<InternedStr<Char>> { |
||||
let capacity = self.inner.capacity(); |
||||
(capacity >= self.inner.len() + string.len()).then(|| { |
||||
// SAFETY:
|
||||
// The caller is responsible for extending the lifetime
|
||||
// of `self` to outlive the return value.
|
||||
unsafe { self.push_unchecked(string) } |
||||
}) |
||||
} |
||||
|
||||
/// Pushes `string` to the [`FixedString`], and returns
|
||||
/// an [`InternedStr`] pointer to the stored `string`, without
|
||||
/// checking if the total `capacity` is enough to store `string`,
|
||||
/// and without checking if the string is correctly aligned.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller is responsible for ensuring that `self` outlives the returned
|
||||
/// [`InternedStr`] and that it has enough capacity to store `string` without
|
||||
/// reallocating.
|
||||
pub(super) unsafe fn push_unchecked(&mut self, string: &[Char]) -> InternedStr<Char> { |
||||
let old_len = self.inner.len(); |
||||
self.inner.extend_from_slice(string); |
||||
|
||||
// SAFETY: The caller is responsible for extending the lifetime
|
||||
// of `self` to outlive the return value, and for ensuring
|
||||
// the alignment of `string` is correct.
|
||||
let ptr = &self.inner[old_len..self.inner.len()]; |
||||
unsafe { InternedStr::new(ptr.into()) } |
||||
} |
||||
} |
@ -1,80 +0,0 @@
|
||||
use core::{hash::Hash, ptr::NonNull}; |
||||
|
||||
/// Wrapper for an interned str pointer, required to
|
||||
/// quickly check using a hash if a string is inside an [`Interner`][`super::Interner`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This struct could cause Undefined Behaviour on:
|
||||
/// - Use without ensuring the referenced memory is still allocated.
|
||||
/// - Construction of an [`InternedStr`] from an invalid [`NonNull<Char>`] pointer.
|
||||
/// - Construction of an [`InternedStr`] from a [`NonNull<Char>`] pointer
|
||||
/// without checking if the pointed memory of the [`NonNull<Char>`] outlives
|
||||
/// the [`InternedStr`].
|
||||
///
|
||||
/// In general, this should not be used outside of an [`Interner`][`super::Interner`].
|
||||
#[derive(Debug)] |
||||
pub(super) struct InternedStr<Char> { |
||||
ptr: NonNull<[Char]>, |
||||
} |
||||
|
||||
impl<Char> InternedStr<Char> { |
||||
/// Create a new interned string from the given `*const u8` pointer,
|
||||
/// length and encoding kind
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Not maintaining the invariants specified on the struct definition
|
||||
/// could cause Undefined Behaviour.
|
||||
pub(super) const unsafe fn new(ptr: NonNull<[Char]>) -> Self { |
||||
Self { ptr } |
||||
} |
||||
|
||||
/// Returns a shared reference to the underlying string.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Not maintaining the invariants specified on the struct definition
|
||||
/// could cause Undefined Behaviour.
|
||||
pub(super) unsafe fn as_ref(&self) -> &[Char] { |
||||
// SAFETY:
|
||||
// The caller must ensure `ptr` is still valid throughout the
|
||||
// lifetime of `self`.
|
||||
unsafe { self.ptr.as_ref() } |
||||
} |
||||
} |
||||
|
||||
impl<Char> Clone for InternedStr<Char> { |
||||
fn clone(&self) -> Self { |
||||
*self |
||||
} |
||||
} |
||||
|
||||
impl<Char> Copy for InternedStr<Char> {} |
||||
|
||||
impl<Char> Eq for InternedStr<Char> where Char: Eq {} |
||||
|
||||
impl<Char> PartialEq for InternedStr<Char> |
||||
where |
||||
Char: PartialEq, |
||||
{ |
||||
fn eq(&self, other: &Self) -> bool { |
||||
// SAFETY: The caller must verify the invariants
|
||||
// specified in the struct definition.
|
||||
unsafe { self.as_ref() == other.as_ref() } |
||||
} |
||||
} |
||||
|
||||
impl<Char> Hash for InternedStr<Char> |
||||
where |
||||
Char: Hash, |
||||
{ |
||||
fn hash<H: core::hash::Hasher>(&self, state: &mut H) { |
||||
// SAFETY:
|
||||
// The caller must ensure `ptr` is still valid throughout the
|
||||
// lifetime of `self`.
|
||||
unsafe { |
||||
self.as_ref().hash(state); |
||||
} |
||||
} |
||||
} |
@ -1,194 +0,0 @@
|
||||
use crate::{fixed_string::FixedString, interned_str::InternedStr}; |
||||
use alloc::vec::Vec; |
||||
use core::hash::{BuildHasherDefault, Hash}; |
||||
use hashbrown::HashMap; |
||||
use rustc_hash::FxHasher; |
||||
|
||||
type Map<T, U> = HashMap<T, U, BuildHasherDefault<FxHasher>>; |
||||
|
||||
/// Raw string interner, generic by a char type.
|
||||
#[derive(Debug)] |
||||
pub(super) struct RawInterner<Char> { |
||||
// COMMENT FOR DEVS:
|
||||
// This interner works on the assumption that
|
||||
// `head` won't ever be reallocated, since this could invalidate
|
||||
// some of our stored pointers inside `spans`.
|
||||
// This means that any operation on `head` and `full` should be carefully
|
||||
// reviewed to not cause Undefined Behaviour.
|
||||
// `intern` has a more thorough explanation on this.
|
||||
//
|
||||
// Also, if you want to implement `shrink_to_fit` (and friends),
|
||||
// please check out https://github.com/Robbepop/string-interner/pull/47 first.
|
||||
// This doesn't implement that method, since implementing it increases
|
||||
// our memory footprint.
|
||||
symbol_cache: Map<InternedStr<Char>, usize>, |
||||
spans: Vec<InternedStr<Char>>, |
||||
head: FixedString<Char>, |
||||
full: Vec<FixedString<Char>>, |
||||
} |
||||
|
||||
impl<Char> Default for RawInterner<Char> { |
||||
fn default() -> Self { |
||||
Self { |
||||
symbol_cache: Map::default(), |
||||
spans: Vec::default(), |
||||
head: FixedString::default(), |
||||
full: Vec::default(), |
||||
} |
||||
} |
||||
} |
||||
|
||||
impl<Char> RawInterner<Char> { |
||||
/// Creates a new `RawInterner` with the specified capacity.
|
||||
pub(super) fn with_capacity(capacity: usize) -> Self { |
||||
Self { |
||||
symbol_cache: Map::default(), |
||||
spans: Vec::with_capacity(capacity), |
||||
head: FixedString::new(capacity), |
||||
full: Vec::new(), |
||||
} |
||||
} |
||||
|
||||
/// Returns the number of strings interned by the interner.
|
||||
pub(super) fn len(&self) -> usize { |
||||
self.spans.len() |
||||
} |
||||
|
||||
/// Returns `true` if the interner contains no interned strings.
|
||||
pub(super) fn is_empty(&self) -> bool { |
||||
self.spans.is_empty() |
||||
} |
||||
} |
||||
|
||||
impl<Char> RawInterner<Char> |
||||
where |
||||
Char: Hash + Eq, |
||||
{ |
||||
/// Returns the index position for the given string if any.
|
||||
///
|
||||
/// Can be used to query if a string has already been interned without interning.
|
||||
pub(super) fn get(&self, string: &[Char]) -> Option<usize> { |
||||
// SAFETY:
|
||||
// `string` is a valid slice that doesn't outlive the
|
||||
// created `InternedStr`, so this is safe.
|
||||
unsafe { |
||||
self.symbol_cache |
||||
.get(&InternedStr::new(string.into())) |
||||
.copied() |
||||
} |
||||
} |
||||
|
||||
/// Interns the given `'static` string.
|
||||
///
|
||||
/// Returns the index of `string` within the interner.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// This is more efficient than [`RawInterner::intern`], since it
|
||||
/// avoids storing `string` inside the interner.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the interner already interns the maximum number of strings possible
|
||||
/// by the chosen symbol type.
|
||||
pub(super) fn intern_static(&mut self, string: &'static [Char]) -> usize { |
||||
// SAFETY:
|
||||
// A static string reference is always valid, meaning it cannot outlive
|
||||
// the lifetime of the created `InternedStr`. This makes this
|
||||
// operation safe.
|
||||
let string = unsafe { InternedStr::new(string.into()) }; |
||||
|
||||
// SAFETY:
|
||||
// A `InternedStr` created from a static reference
|
||||
// cannot be invalidated by allocations and deallocations,
|
||||
// so this is safe.
|
||||
unsafe { self.next_index(string) } |
||||
} |
||||
|
||||
/// Returns the string for the given index if any.
|
||||
pub(super) fn index(&self, index: usize) -> Option<&[Char]> { |
||||
self.spans.get(index).map(|ptr| |
||||
// SAFETY: We always ensure the stored `InternedStr`s always
|
||||
// reference memory inside `head` and `full`
|
||||
unsafe {ptr.as_ref()}) |
||||
} |
||||
|
||||
/// Inserts a new string pointer into `spans` and returns its index.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must ensure `string` points to a valid
|
||||
/// memory inside `head` (or only valid in the case of statics)
|
||||
/// and that it won't be invalidated by allocations and deallocations.
|
||||
unsafe fn next_index(&mut self, string: InternedStr<Char>) -> usize { |
||||
let next = self.len(); |
||||
self.spans.push(string); |
||||
self.symbol_cache.insert(string, next); |
||||
next |
||||
} |
||||
} |
||||
|
||||
impl<Char> RawInterner<Char> |
||||
where |
||||
Char: Hash + Eq + Clone, |
||||
{ |
||||
/// Interns the given string.
|
||||
///
|
||||
/// Returns the index of `string` within the interner.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the interner already interns the maximum number of strings possible by the chosen symbol type.
|
||||
pub(super) fn intern(&mut self, string: &[Char]) -> usize { |
||||
// SAFETY:
|
||||
//
|
||||
// Firstly, this interner works on the assumption that the allocated
|
||||
// memory by `head` won't ever be moved from its position on the heap,
|
||||
// which is an important point to understand why manipulating it like
|
||||
// this is safe.
|
||||
//
|
||||
// `String` (which is simply a `Vec<u8>` with additional invariants)
|
||||
// is essentially a pointer to heap memory that can be moved without
|
||||
// any problems, since copying a pointer cannot invalidate the memory
|
||||
// that it points to.
|
||||
//
|
||||
// However, `String` CAN be invalidated when pushing, extending or
|
||||
// shrinking it, since all those operations reallocate on the heap.
|
||||
//
|
||||
// To prevent that, we HAVE to ensure the capacity will succeed without
|
||||
// having to reallocate, and the only way to do that without invalidating
|
||||
// any other alive `InternedStr` is to create a brand new `head` with
|
||||
// enough capacity and push the old `head` to `full` to keep it alive
|
||||
// throughout the lifetime of the whole interner.
|
||||
//
|
||||
// `FixedString` encapsulates this by only allowing checked `push`es
|
||||
// to the internal string, but we still have to ensure the memory
|
||||
// of `head` is not deallocated until the whole interner deallocates,
|
||||
// which we can do by moving it inside the interner itself, specifically
|
||||
// on the `full` vector, where every other old `head` also lives.
|
||||
let interned_str = unsafe { |
||||
self.head.push(string).unwrap_or_else(|| { |
||||
let new_cap = |
||||
(usize::max(self.head.capacity(), string.len()) + 1).next_power_of_two(); |
||||
let new_head = FixedString::new(new_cap); |
||||
let old_head = core::mem::replace(&mut self.head, new_head); |
||||
|
||||
// If the user creates an `Interner`
|
||||
// with `Interner::with_capacity(BIG_NUMBER)` and
|
||||
// the first interned string's length is bigger than `BIG_NUMBER`,
|
||||
// `self.full.push(old_head)` would push a big, empty string of
|
||||
// allocated size `BIG_NUMBER` into `full`.
|
||||
// This prevents that case.
|
||||
if !old_head.is_empty() { |
||||
self.full.push(old_head); |
||||
} |
||||
self.head.push_unchecked(string) |
||||
}) |
||||
}; |
||||
|
||||
// SAFETY: We are obtaining a pointer to the internal memory of
|
||||
// `head`, which is alive through the whole life of the interner, so
|
||||
// this is safe.
|
||||
unsafe { self.next_index(interned_str) } |
||||
} |
||||
} |
Loading…
Reference in new issue