mirror of https://github.com/boa-dev/boa.git
Browse Source
So, @raskad and myself had a short discussion about the state of #736, and we came to the conclusion that it would be a good time to implement our own string interner; partly because the `string-interner` crate is a bit unmaintained (as shown by https://github.com/Robbepop/string-interner/pull/42 and https://github.com/Robbepop/string-interner/pull/47), and partly because it would be hard to experiment with custom optimizations for UTF-16 strings. I still want to thank @Robbepop for the original implementation though, because some parts of this design have been shamelessly stolen from it 😅. Having said that, this PR is a complete reimplementation of the interner, but with some modifications to (hopefully!) make it a bit easier to experiment with UTF-16 strings, apply optimizations, and whatnot :)pull/2153/head
jedel1043
2 years ago
13 changed files with 488 additions and 314 deletions
@ -1,6 +1,6 @@ |
|||||||
//! Garbage collector for the Boa JavaScript engine.
|
//! Garbage collector for the Boa JavaScript engine.
|
||||||
|
|
||||||
pub use gc::{ |
pub use gc::{ |
||||||
custom_trace, force_collect, unsafe_empty_trace, Finalize, Gc, GcCell as Cell, |
custom_trace, finalizer_safe, force_collect, unsafe_empty_trace, Finalize, Gc, GcCell as Cell, |
||||||
GcCellRef as Ref, GcCellRefMut as RefMut, Trace, |
GcCellRef as Ref, GcCellRefMut as RefMut, Trace, |
||||||
}; |
}; |
||||||
|
@ -0,0 +1,62 @@ |
|||||||
|
use crate::interned_str::InternedStr; |
||||||
|
|
||||||
|
#[derive(Debug, Default)] |
||||||
|
pub(super) struct FixedString { |
||||||
|
inner: String, |
||||||
|
} |
||||||
|
|
||||||
|
impl FixedString { |
||||||
|
/// Creates a new, pinned [`FixedString`].
|
||||||
|
pub(super) fn new(capacity: usize) -> Self { |
||||||
|
Self { |
||||||
|
inner: String::with_capacity(capacity), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Gets the maximum capacity of the [`FixedString`].
|
||||||
|
pub(super) fn capacity(&self) -> usize { |
||||||
|
self.inner.capacity() |
||||||
|
} |
||||||
|
|
||||||
|
/// Returns `true` if the [`FixedString`] has length zero,
|
||||||
|
/// and `false` otherwise.
|
||||||
|
pub(super) fn is_empty(&self) -> bool { |
||||||
|
self.inner.is_empty() |
||||||
|
} |
||||||
|
|
||||||
|
/// Tries to push `string` to the [`FixedString`], and returns
|
||||||
|
/// an [`InternedStr`] pointer to the stored `string`, or
|
||||||
|
/// `None` if the capacity is not enough to store `string`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The caller is responsible for ensuring `self` outlives the returned
|
||||||
|
/// `InternedStr`.
|
||||||
|
pub(super) unsafe fn push(&mut self, string: &str) -> Option<InternedStr> { |
||||||
|
let capacity = self.inner.capacity(); |
||||||
|
(capacity >= self.inner.len() + string.len()).then(|| { |
||||||
|
let old_len = self.inner.len(); |
||||||
|
self.inner.push_str(string); |
||||||
|
// SAFETY: The caller is responsible for extending the lifetime
|
||||||
|
// of `self` to outlive the return value.
|
||||||
|
unsafe { InternedStr::new(self.inner[old_len..self.inner.len()].into()) } |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
/// Pushes `string` to the [`FixedString`], and returns
|
||||||
|
/// an [`InternedStr`] pointer to the stored `string`, without
|
||||||
|
/// checking if the total `capacity` is enough to store `string`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The caller is responsible for ensuring that `self` outlives the returned
|
||||||
|
/// `InternedStr` and that it has enough capacity to store `string` without
|
||||||
|
/// reallocating.
|
||||||
|
pub(super) unsafe fn push_unchecked(&mut self, string: &str) -> InternedStr { |
||||||
|
let old_len = self.inner.len(); |
||||||
|
self.inner.push_str(string); |
||||||
|
// SAFETY: The caller is responsible for extending the lifetime
|
||||||
|
// of `self` to outlive the return value.
|
||||||
|
unsafe { InternedStr::new(self.inner[old_len..self.inner.len()].into()) } |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,70 @@ |
|||||||
|
use std::{borrow::Borrow, ptr::NonNull}; |
||||||
|
|
||||||
|
/// Wrapper for an interned str pointer, required to
|
||||||
|
/// quickly check using a hash if a string is inside an [`Interner`][`super::Interner`].
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// This struct could cause Undefined Behaviour on:
|
||||||
|
/// - Use without ensuring the referenced memory is still allocated.
|
||||||
|
/// - Construction of an [`InternedStr`] from an invalid [`NonNull<str>`].
|
||||||
|
///
|
||||||
|
/// In general, this should not be used outside of an [`Interner`][`super::Interner`].
|
||||||
|
#[derive(Debug, Clone)] |
||||||
|
pub(super) struct InternedStr { |
||||||
|
ptr: NonNull<str>, |
||||||
|
} |
||||||
|
|
||||||
|
impl InternedStr { |
||||||
|
/// Create a new interned string from the given `str`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Not maintaining the invariants specified on the struct definition
|
||||||
|
/// could cause Undefined Behaviour.
|
||||||
|
#[inline] |
||||||
|
pub(super) unsafe fn new(ptr: NonNull<str>) -> Self { |
||||||
|
Self { ptr } |
||||||
|
} |
||||||
|
|
||||||
|
/// Returns a shared reference to the underlying string.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Not maintaining the invariants specified on the struct definition
|
||||||
|
/// could cause Undefined Behaviour.
|
||||||
|
#[inline] |
||||||
|
pub(super) unsafe fn as_str(&self) -> &str { |
||||||
|
// SAFETY: The caller must verify the invariants
|
||||||
|
// specified on the struct definition.
|
||||||
|
unsafe { self.ptr.as_ref() } |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl std::hash::Hash for InternedStr { |
||||||
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) { |
||||||
|
// SAFETY: The caller must verify the invariants
|
||||||
|
// specified in the struct definition.
|
||||||
|
unsafe { |
||||||
|
self.as_str().hash(state); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl Eq for InternedStr {} |
||||||
|
|
||||||
|
impl PartialEq for InternedStr { |
||||||
|
fn eq(&self, other: &Self) -> bool { |
||||||
|
// SAFETY: The caller must verify the invariants
|
||||||
|
// specified in the struct definition.
|
||||||
|
unsafe { self.as_str() == other.as_str() } |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
impl Borrow<str> for InternedStr { |
||||||
|
fn borrow(&self) -> &str { |
||||||
|
// SAFETY: The caller must verify the invariants
|
||||||
|
// specified in the struct definition.
|
||||||
|
unsafe { self.as_str() } |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,148 @@ |
|||||||
|
use std::num::NonZeroUsize; |
||||||
|
|
||||||
|
#[cfg(feature = "serde")] |
||||||
|
use serde::{Deserialize, Serialize}; |
||||||
|
|
||||||
|
/// The string symbol type for Boa.
|
||||||
|
///
|
||||||
|
/// This symbol type is internally a `NonZeroUsize`, which makes it pointer-width in size and it's
|
||||||
|
/// optimized so that it can occupy 1 pointer width even in an `Option` type.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] |
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] |
||||||
|
#[cfg_attr(feature = "serde", serde(transparent))] |
||||||
|
#[allow(clippy::unsafe_derive_deserialize)] |
||||||
|
pub struct Sym { |
||||||
|
value: NonZeroUsize, |
||||||
|
} |
||||||
|
|
||||||
|
impl Sym { |
||||||
|
/// Symbol for the empty string (`""`).
|
||||||
|
pub const EMPTY_STRING: Self = unsafe { Self::new_unchecked(1) }; |
||||||
|
|
||||||
|
/// Symbol for the `"arguments"` string.
|
||||||
|
pub const ARGUMENTS: Self = unsafe { Self::new_unchecked(2) }; |
||||||
|
|
||||||
|
/// Symbol for the `"await"` string.
|
||||||
|
pub const AWAIT: Self = unsafe { Self::new_unchecked(3) }; |
||||||
|
|
||||||
|
/// Symbol for the `"yield"` string.
|
||||||
|
pub const YIELD: Self = unsafe { Self::new_unchecked(4) }; |
||||||
|
|
||||||
|
/// Symbol for the `"eval"` string.
|
||||||
|
pub const EVAL: Self = unsafe { Self::new_unchecked(5) }; |
||||||
|
|
||||||
|
/// Symbol for the `"default"` string.
|
||||||
|
pub const DEFAULT: Self = unsafe { Self::new_unchecked(6) }; |
||||||
|
|
||||||
|
/// Symbol for the `"null"` string.
|
||||||
|
pub const NULL: Self = unsafe { Self::new_unchecked(7) }; |
||||||
|
|
||||||
|
/// Symbol for the `"RegExp"` string.
|
||||||
|
pub const REGEXP: Self = unsafe { Self::new_unchecked(8) }; |
||||||
|
|
||||||
|
/// Symbol for the `"get"` string.
|
||||||
|
pub const GET: Self = unsafe { Self::new_unchecked(9) }; |
||||||
|
|
||||||
|
/// Symbol for the `"set"` string.
|
||||||
|
pub const SET: Self = unsafe { Self::new_unchecked(10) }; |
||||||
|
|
||||||
|
/// Symbol for the `"<main>"` string.
|
||||||
|
pub const MAIN: Self = unsafe { Self::new_unchecked(11) }; |
||||||
|
|
||||||
|
/// Symbol for the `"raw"` string.
|
||||||
|
pub const RAW: Self = unsafe { Self::new_unchecked(12) }; |
||||||
|
|
||||||
|
/// Symbol for the `"static"` string.
|
||||||
|
pub const STATIC: Self = unsafe { Self::new_unchecked(13) }; |
||||||
|
|
||||||
|
/// Symbol for the `"prototype"` string.
|
||||||
|
pub const PROTOTYPE: Self = unsafe { Self::new_unchecked(14) }; |
||||||
|
|
||||||
|
/// Symbol for the `"constructor"` string.
|
||||||
|
pub const CONSTRUCTOR: Self = unsafe { Self::new_unchecked(15) }; |
||||||
|
|
||||||
|
/// Symbol for the `"implements"` string.
|
||||||
|
pub const IMPLEMENTS: Self = unsafe { Self::new_unchecked(16) }; |
||||||
|
|
||||||
|
/// Symbol for the `"interface"` string.
|
||||||
|
pub const INTERFACE: Self = unsafe { Self::new_unchecked(17) }; |
||||||
|
|
||||||
|
/// Symbol for the `"let"` string.
|
||||||
|
pub const LET: Self = unsafe { Self::new_unchecked(18) }; |
||||||
|
|
||||||
|
/// Symbol for the `"package"` string.
|
||||||
|
pub const PACKAGE: Self = unsafe { Self::new_unchecked(19) }; |
||||||
|
|
||||||
|
/// Symbol for the `"private"` string.
|
||||||
|
pub const PRIVATE: Self = unsafe { Self::new_unchecked(20) }; |
||||||
|
|
||||||
|
/// Symbol for the `"protected"` string.
|
||||||
|
pub const PROTECTED: Self = unsafe { Self::new_unchecked(21) }; |
||||||
|
|
||||||
|
/// Symbol for the `"public"` string.
|
||||||
|
pub const PUBLIC: Self = unsafe { Self::new_unchecked(22) }; |
||||||
|
|
||||||
|
/// Creates a new [`Sym`] from the provided `value`, or returns `None` if `index` is zero.
|
||||||
|
#[inline] |
||||||
|
pub(super) fn new(value: usize) -> Option<Self> { |
||||||
|
NonZeroUsize::new(value).map(|value| Self { value }) |
||||||
|
} |
||||||
|
|
||||||
|
/// Creates a new [`Sym`] from the provided `value`, without checking if `value` is not zero
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// `value` must not be zero.
|
||||||
|
#[inline] |
||||||
|
pub(super) const unsafe fn new_unchecked(value: usize) -> Self { |
||||||
|
Self { |
||||||
|
value: |
||||||
|
// SAFETY: The caller must ensure the invariants of the function.
|
||||||
|
unsafe { |
||||||
|
NonZeroUsize::new_unchecked(value) |
||||||
|
}, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Returns the internal value of the [`Sym`]
|
||||||
|
#[inline] |
||||||
|
pub(super) const fn get(self) -> usize { |
||||||
|
self.value.get() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/// Ordered set of commonly used static strings.
|
||||||
|
///
|
||||||
|
/// # Note
|
||||||
|
///
|
||||||
|
/// `COMMON_STRINGS` and the constants defined in [`Sym`] must always
|
||||||
|
/// be in sync.
|
||||||
|
pub(super) static COMMON_STRINGS: phf::OrderedSet<&'static str> = { |
||||||
|
const COMMON_STRINGS: phf::OrderedSet<&'static str> = phf::phf_ordered_set! { |
||||||
|
"", |
||||||
|
"arguments", |
||||||
|
"await", |
||||||
|
"yield", |
||||||
|
"eval", |
||||||
|
"default", |
||||||
|
"null", |
||||||
|
"RegExp", |
||||||
|
"get", |
||||||
|
"set", |
||||||
|
"<main>", |
||||||
|
"raw", |
||||||
|
"static", |
||||||
|
"prototype", |
||||||
|
"constructor", |
||||||
|
"implements", |
||||||
|
"interface", |
||||||
|
"let", |
||||||
|
"package", |
||||||
|
"private", |
||||||
|
"protected", |
||||||
|
"public", |
||||||
|
}; |
||||||
|
// A `COMMON_STRINGS` of size `usize::MAX` would cause an overflow on our `Interner`
|
||||||
|
sa::const_assert!(COMMON_STRINGS.len() < usize::MAX); |
||||||
|
COMMON_STRINGS |
||||||
|
}; |
Loading…
Reference in new issue