mirror of https://github.com/boa-dev/boa.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
570 lines
20 KiB
570 lines
20 KiB
//! Boa's **`boa_gc`** crate implements a garbage collector. |
|
//! |
|
//! # Crate Overview |
|
//! **`boa_gc`** is a mark-sweep garbage collector that implements a Trace and Finalize trait |
|
//! for garbage collected values. |
|
//! |
|
//! # About Boa |
|
//! Boa is an open-source, experimental ECMAScript Engine written in Rust for lexing, parsing and executing ECMAScript/JavaScript. Currently, Boa |
|
//! supports some of the [language][boa-conformance]. More information can be viewed at [Boa's website][boa-web]. |
|
//! |
|
//! Try out the most recent release with Boa's live demo [playground][boa-playground]. |
|
//! |
|
//! # Boa Crates |
|
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. |
|
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. |
|
//! - **`boa_gc`** - Boa's garbage collector. |
|
//! - **`boa_interner`** - Boa's string interner. |
|
//! - **`boa_parser`** - Boa's lexer and parser. |
|
//! - **`boa_profiler`** - Boa's code profiler. |
|
//! - **`boa_unicode`** - Boa's Unicode identifier. |
|
//! - **`boa_icu_provider`** - Boa's ICU4X data provider. |
|
//! |
|
//! [boa-conformance]: https://boajs.dev/boa/test262/ |
|
//! [boa-web]: https://boajs.dev/ |
|
//! [boa-playground]: https://boajs.dev/boa/playground/ |
|
|
|
#![doc( |
|
html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg", |
|
html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg" |
|
)] |
|
#![cfg_attr(not(test), forbid(clippy::unwrap_used))] |
|
#![warn(missing_docs, clippy::dbg_macro)] |
|
#![deny( |
|
// rustc lint groups https://doc.rust-lang.org/rustc/lints/groups.html |
|
warnings, |
|
future_incompatible, |
|
let_underscore, |
|
nonstandard_style, |
|
rust_2018_compatibility, |
|
rust_2018_idioms, |
|
rust_2021_compatibility, |
|
unused, |
|
|
|
// rustc allowed-by-default lints https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html |
|
macro_use_extern_crate, |
|
meta_variable_misuse, |
|
missing_abi, |
|
missing_copy_implementations, |
|
missing_debug_implementations, |
|
non_ascii_idents, |
|
noop_method_call, |
|
single_use_lifetimes, |
|
trivial_casts, |
|
trivial_numeric_casts, |
|
unreachable_pub, |
|
unsafe_op_in_unsafe_fn, |
|
unused_crate_dependencies, |
|
unused_import_braces, |
|
unused_lifetimes, |
|
unused_qualifications, |
|
unused_tuple_struct_fields, |
|
variant_size_differences, |
|
|
|
// rustdoc lints https://doc.rust-lang.org/rustdoc/lints.html |
|
rustdoc::broken_intra_doc_links, |
|
rustdoc::private_intra_doc_links, |
|
rustdoc::missing_crate_level_docs, |
|
rustdoc::private_doc_tests, |
|
rustdoc::invalid_codeblock_attributes, |
|
rustdoc::invalid_rust_codeblocks, |
|
rustdoc::bare_urls, |
|
|
|
// clippy categories https://doc.rust-lang.org/clippy/ |
|
clippy::all, |
|
clippy::correctness, |
|
clippy::suspicious, |
|
clippy::style, |
|
clippy::complexity, |
|
clippy::perf, |
|
clippy::pedantic, |
|
clippy::nursery, |
|
clippy::undocumented_unsafe_blocks |
|
)] |
|
#![allow( |
|
clippy::module_name_repetitions, |
|
clippy::redundant_pub_crate, |
|
clippy::let_unit_value |
|
)] |
|
|
|
extern crate self as boa_gc; |
|
|
|
mod cell; |
|
mod pointers; |
|
mod trace; |
|
|
|
pub(crate) mod internals; |
|
|
|
use boa_profiler::Profiler; |
|
use internals::{EphemeronBox, ErasedEphemeronBox, ErasedWeakMapBox, WeakMapBox}; |
|
use std::{ |
|
cell::{Cell, RefCell}, |
|
collections::HashMap, |
|
mem, |
|
ptr::NonNull, |
|
}; |
|
|
|
pub use crate::trace::{Finalize, Trace}; |
|
pub use boa_macros::{Finalize, Trace}; |
|
pub use cell::{GcRef, GcRefCell, GcRefMut}; |
|
pub use internals::GcBox; |
|
pub use pointers::{Ephemeron, Gc, WeakGc, WeakMap}; |
|
|
|
type GcPointer = NonNull<GcBox<dyn Trace>>; |
|
type EphemeronPointer = NonNull<dyn ErasedEphemeronBox>; |
|
type ErasedWeakMapBoxPointer = NonNull<dyn ErasedWeakMapBox>; |
|
|
|
thread_local!(static GC_DROPPING: Cell<bool> = Cell::new(false)); |
|
thread_local!(static BOA_GC: RefCell<BoaGc> = RefCell::new( BoaGc { |
|
config: GcConfig::default(), |
|
runtime: GcRuntimeData::default(), |
|
strong_start: Cell::new(None), |
|
weak_start: Cell::new(None), |
|
weak_map_start: Cell::new(None), |
|
})); |
|
|
|
#[derive(Debug, Clone, Copy)] |
|
struct GcConfig { |
|
threshold: usize, |
|
used_space_percentage: usize, |
|
} |
|
|
|
// Setting the defaults to an arbitrary value currently. |
|
// |
|
// TODO: Add a configure later |
|
impl Default for GcConfig { |
|
fn default() -> Self { |
|
Self { |
|
threshold: 1024, |
|
used_space_percentage: 80, |
|
} |
|
} |
|
} |
|
|
|
#[derive(Default, Debug, Clone, Copy)] |
|
struct GcRuntimeData { |
|
collections: usize, |
|
bytes_allocated: usize, |
|
} |
|
|
|
#[derive(Debug)] |
|
struct BoaGc { |
|
config: GcConfig, |
|
runtime: GcRuntimeData, |
|
strong_start: Cell<Option<GcPointer>>, |
|
weak_start: Cell<Option<EphemeronPointer>>, |
|
weak_map_start: Cell<Option<ErasedWeakMapBoxPointer>>, |
|
} |
|
|
|
impl Drop for BoaGc { |
|
fn drop(&mut self) { |
|
Collector::dump(self); |
|
} |
|
} |
|
|
|
// Whether or not the thread is currently in the sweep phase of garbage collection. |
|
// During this phase, attempts to dereference a `Gc<T>` pointer will trigger a panic. |
|
/// `DropGuard` flags whether the Collector is currently running `Collector::sweep()` or `Collector::dump()` |
|
/// |
|
/// While the `DropGuard` is active, all `GcBox`s must not be dereferenced or accessed as it could cause Undefined Behavior |
|
#[derive(Debug, Clone)] |
|
struct DropGuard; |
|
|
|
impl DropGuard { |
|
fn new() -> Self { |
|
GC_DROPPING.with(|dropping| dropping.set(true)); |
|
Self |
|
} |
|
} |
|
|
|
impl Drop for DropGuard { |
|
fn drop(&mut self) { |
|
GC_DROPPING.with(|dropping| dropping.set(false)); |
|
} |
|
} |
|
|
|
/// Returns `true` if it is safe for a type to run [`Finalize::finalize`]. |
|
#[must_use] |
|
#[inline] |
|
pub fn finalizer_safe() -> bool { |
|
GC_DROPPING.with(|dropping| !dropping.get()) |
|
} |
|
|
|
/// The Allocator handles allocation of garbage collected values. |
|
/// |
|
/// The allocator can trigger a garbage collection. |
|
#[derive(Debug, Clone, Copy)] |
|
struct Allocator; |
|
|
|
impl Allocator { |
|
/// Allocate a new garbage collected value to the Garbage Collector's heap. |
|
fn alloc_gc<T: Trace>(value: GcBox<T>) -> NonNull<GcBox<T>> { |
|
let _timer = Profiler::global().start_event("New GcBox", "BoaAlloc"); |
|
let element_size = mem::size_of_val::<GcBox<T>>(&value); |
|
BOA_GC.with(|st| { |
|
let mut gc = st.borrow_mut(); |
|
|
|
Self::manage_state(&mut gc); |
|
value.header.next.set(gc.strong_start.take()); |
|
// Safety: value cannot be a null pointer, since `Box` cannot return null pointers. |
|
let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) }; |
|
let erased: NonNull<GcBox<dyn Trace>> = ptr; |
|
|
|
gc.strong_start.set(Some(erased)); |
|
gc.runtime.bytes_allocated += element_size; |
|
|
|
ptr |
|
}) |
|
} |
|
|
|
fn alloc_ephemeron<K: Trace + ?Sized, V: Trace>( |
|
value: EphemeronBox<K, V>, |
|
) -> NonNull<EphemeronBox<K, V>> { |
|
let _timer = Profiler::global().start_event("New EphemeronBox", "BoaAlloc"); |
|
let element_size = mem::size_of_val::<EphemeronBox<K, V>>(&value); |
|
BOA_GC.with(|st| { |
|
let mut gc = st.borrow_mut(); |
|
|
|
Self::manage_state(&mut gc); |
|
value.header.next.set(gc.weak_start.take()); |
|
// Safety: value cannot be a null pointer, since `Box` cannot return null pointers. |
|
let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) }; |
|
let erased: NonNull<dyn ErasedEphemeronBox> = ptr; |
|
|
|
gc.weak_start.set(Some(erased)); |
|
gc.runtime.bytes_allocated += element_size; |
|
|
|
ptr |
|
}) |
|
} |
|
|
|
fn alloc_weak_map<K: Trace, V: Trace>() -> WeakMap<K, V> { |
|
let _timer = Profiler::global().start_event("New WeakMap", "BoaAlloc"); |
|
|
|
let weak_map = WeakMap { |
|
inner: Gc::new(GcRefCell::new(HashMap::new())), |
|
}; |
|
let weak = WeakGc::new(&weak_map.inner); |
|
|
|
BOA_GC.with(|st| { |
|
let gc = st.borrow_mut(); |
|
|
|
let weak_box = WeakMapBox { |
|
map: weak, |
|
next: Cell::new(gc.weak_map_start.take()), |
|
}; |
|
|
|
// Safety: value cannot be a null pointer, since `Box` cannot return null pointers. |
|
let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(weak_box))) }; |
|
let erased: ErasedWeakMapBoxPointer = ptr; |
|
|
|
gc.weak_map_start.set(Some(erased)); |
|
|
|
weak_map |
|
}) |
|
} |
|
|
|
fn manage_state(gc: &mut BoaGc) { |
|
if gc.runtime.bytes_allocated > gc.config.threshold { |
|
Collector::collect(gc); |
|
|
|
if gc.runtime.bytes_allocated |
|
> gc.config.threshold / 100 * gc.config.used_space_percentage |
|
{ |
|
gc.config.threshold = |
|
gc.runtime.bytes_allocated / gc.config.used_space_percentage * 100; |
|
} |
|
} |
|
} |
|
} |
|
|
|
struct Unreachables { |
|
strong: Vec<NonNull<GcBox<dyn Trace>>>, |
|
weak: Vec<NonNull<dyn ErasedEphemeronBox>>, |
|
} |
|
|
|
/// This collector currently functions in four main phases |
|
/// |
|
/// Mark -> Finalize -> Mark -> Sweep |
|
/// |
|
/// 1. Mark nodes as reachable. |
|
/// 2. Finalize the unreachable nodes. |
|
/// 3. Mark again because `Finalize::finalize` can potentially resurrect dead nodes. |
|
/// 4. Sweep and drop all dead nodes. |
|
/// |
|
/// A better approach in a more concurrent structure may be to reorder. |
|
/// |
|
/// Mark -> Sweep -> Finalize |
|
struct Collector; |
|
|
|
impl Collector { |
|
/// Run a collection on the full heap. |
|
fn collect(gc: &mut BoaGc) { |
|
let _timer = Profiler::global().start_event("Gc Full Collection", "gc"); |
|
gc.runtime.collections += 1; |
|
let unreachables = Self::mark_heap(&gc.strong_start, &gc.weak_start, &gc.weak_map_start); |
|
|
|
// Only finalize if there are any unreachable nodes. |
|
if !unreachables.strong.is_empty() || unreachables.weak.is_empty() { |
|
// Finalize all the unreachable nodes. |
|
// SAFETY: All passed pointers are valid, since we won't deallocate until `Self::sweep`. |
|
unsafe { Self::finalize(unreachables) }; |
|
|
|
let _final_unreachables = |
|
Self::mark_heap(&gc.strong_start, &gc.weak_start, &gc.weak_map_start); |
|
} |
|
|
|
// SAFETY: The head of our linked list is always valid per the invariants of our GC. |
|
unsafe { |
|
Self::sweep( |
|
&gc.strong_start, |
|
&gc.weak_start, |
|
&mut gc.runtime.bytes_allocated, |
|
); |
|
} |
|
|
|
// Weak maps have to be cleared after the sweep, since the process dereferences GcBoxes. |
|
let mut weak_map = &gc.weak_map_start; |
|
while let Some(w) = weak_map.get() { |
|
// SAFETY: The caller must ensure the validity of every node of `heap_start`. |
|
let node_ref = unsafe { w.as_ref() }; |
|
|
|
if node_ref.is_live() { |
|
node_ref.clear_dead_entries(); |
|
weak_map = node_ref.next(); |
|
} else { |
|
weak_map.set(node_ref.next().take()); |
|
|
|
// SAFETY: |
|
// The `Allocator` must always ensure its start node is a valid, non-null pointer that |
|
// was allocated by `Box::from_raw(Box::new(..))`. |
|
let _unmarked_node = unsafe { Box::from_raw(w.as_ptr()) }; |
|
} |
|
} |
|
} |
|
|
|
/// Walk the heap and mark any nodes deemed reachable |
|
fn mark_heap( |
|
mut strong: &Cell<Option<NonNull<GcBox<dyn Trace>>>>, |
|
mut weak: &Cell<Option<NonNull<dyn ErasedEphemeronBox>>>, |
|
mut weak_map: &Cell<Option<ErasedWeakMapBoxPointer>>, |
|
) -> Unreachables { |
|
let _timer = Profiler::global().start_event("Gc Marking", "gc"); |
|
// Walk the list, tracing and marking the nodes |
|
let mut strong_dead = Vec::new(); |
|
let mut pending_ephemerons = Vec::new(); |
|
|
|
// === Preliminary mark phase === |
|
// |
|
// 0. Get the naive list of possibly dead nodes. |
|
while let Some(node) = strong.get() { |
|
// SAFETY: node must be valid as this phase cannot drop any node. |
|
let node_ref = unsafe { node.as_ref() }; |
|
if node_ref.header.roots() > 0 { |
|
// SAFETY: the reference to node must be valid as it is rooted. Passing |
|
// invalid references can result in Undefined Behavior |
|
unsafe { |
|
node_ref.mark_and_trace(); |
|
} |
|
} else if !node_ref.is_marked() { |
|
strong_dead.push(node); |
|
} |
|
strong = &node_ref.header.next; |
|
} |
|
|
|
// 0.1. Early return if there are no ephemerons in the GC |
|
if weak.get().is_none() { |
|
strong_dead.retain_mut(|node| { |
|
// SAFETY: node must be valid as this phase cannot drop any node. |
|
unsafe { !node.as_ref().is_marked() } |
|
}); |
|
return Unreachables { |
|
strong: strong_dead, |
|
weak: Vec::new(), |
|
}; |
|
} |
|
|
|
// === Weak mark phase === |
|
// |
|
// 1. Get the naive list of ephemerons that are supposedly dead or their key is dead and |
|
// trace all the ephemerons that have roots and their keys are live. Also remove from |
|
// this list the ephemerons that are marked but their value is dead. |
|
while let Some(eph) = weak.get() { |
|
// SAFETY: node must be valid as this phase cannot drop any node. |
|
let eph_ref = unsafe { eph.as_ref() }; |
|
// SAFETY: the garbage collector ensures `eph_ref` always points to valid data. |
|
if unsafe { !eph_ref.trace() } { |
|
pending_ephemerons.push(eph); |
|
} |
|
weak = &eph_ref.header().next; |
|
} |
|
|
|
// 2. Trace all the weak pointers in the live weak maps to make sure they do not get swept. |
|
while let Some(w) = weak_map.get() { |
|
// SAFETY: node must be valid as this phase cannot drop any node. |
|
let node_ref = unsafe { w.as_ref() }; |
|
|
|
// SAFETY: The garbage collector ensures that all nodes are valid. |
|
unsafe { node_ref.trace() }; |
|
|
|
weak_map = node_ref.next(); |
|
} |
|
|
|
// 3. Iterate through all pending ephemerons, removing the ones which have been successfully |
|
// traced. If there are no changes in the pending ephemerons list, it means that there are no |
|
// more reachable ephemerons from the remaining ephemeron values. |
|
let mut previous_len = pending_ephemerons.len(); |
|
loop { |
|
pending_ephemerons.retain_mut(|eph| { |
|
// SAFETY: node must be valid as this phase cannot drop any node. |
|
let eph_ref = unsafe { eph.as_ref() }; |
|
// SAFETY: the garbage collector ensures `eph_ref` always points to valid data. |
|
unsafe { !eph_ref.trace() } |
|
}); |
|
|
|
if previous_len == pending_ephemerons.len() { |
|
break; |
|
} |
|
|
|
previous_len = pending_ephemerons.len(); |
|
} |
|
|
|
// 4. The remaining list should contain the ephemerons that are either unreachable or its key |
|
// is dead. Cleanup the strong pointers since this procedure could have marked some more strong |
|
// pointers. |
|
strong_dead.retain_mut(|node| { |
|
// SAFETY: node must be valid as this phase cannot drop any node. |
|
unsafe { !node.as_ref().is_marked() } |
|
}); |
|
|
|
Unreachables { |
|
strong: strong_dead, |
|
weak: pending_ephemerons, |
|
} |
|
} |
|
|
|
/// # Safety |
|
/// |
|
/// Passing a `strong` or a `weak` vec with invalid pointers will result in Undefined Behaviour. |
|
unsafe fn finalize(unreachables: Unreachables) { |
|
let _timer = Profiler::global().start_event("Gc Finalization", "gc"); |
|
for node in unreachables.strong { |
|
// SAFETY: The caller must ensure all pointers inside `unreachables.strong` are valid. |
|
let node = unsafe { node.as_ref() }; |
|
Trace::run_finalizer(&node.value()); |
|
} |
|
for node in unreachables.weak { |
|
// SAFETY: The caller must ensure all pointers inside `unreachables.weak` are valid. |
|
let node = unsafe { node.as_ref() }; |
|
node.finalize_and_clear(); |
|
} |
|
} |
|
|
|
/// # Safety |
|
/// |
|
/// - Providing an invalid pointer in the `heap_start` or in any of the headers of each |
|
/// node will result in Undefined Behaviour. |
|
/// - Providing a list of pointers that weren't allocated by `Box::into_raw(Box::new(..))` |
|
/// will result in Undefined Behaviour. |
|
unsafe fn sweep( |
|
mut strong: &Cell<Option<NonNull<GcBox<dyn Trace>>>>, |
|
mut weak: &Cell<Option<NonNull<dyn ErasedEphemeronBox>>>, |
|
total_allocated: &mut usize, |
|
) { |
|
let _timer = Profiler::global().start_event("Gc Sweeping", "gc"); |
|
let _guard = DropGuard::new(); |
|
|
|
while let Some(node) = strong.get() { |
|
// SAFETY: The caller must ensure the validity of every node of `heap_start`. |
|
let node_ref = unsafe { node.as_ref() }; |
|
if node_ref.header.roots() > 0 || node_ref.is_marked() { |
|
node_ref.header.unmark(); |
|
strong = &node_ref.header.next; |
|
} else { |
|
// SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped. |
|
// The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`. |
|
let unmarked_node = unsafe { Box::from_raw(node.as_ptr()) }; |
|
let unallocated_bytes = mem::size_of_val(&*unmarked_node); |
|
*total_allocated -= unallocated_bytes; |
|
strong.set(unmarked_node.header.next.take()); |
|
} |
|
} |
|
|
|
while let Some(eph) = weak.get() { |
|
// SAFETY: The caller must ensure the validity of every node of `heap_start`. |
|
let eph_ref = unsafe { eph.as_ref() }; |
|
let header = eph_ref.header(); |
|
if header.roots() > 0 || header.is_marked() { |
|
header.unmark(); |
|
weak = &header.next; |
|
} else { |
|
// SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped. |
|
// The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`. |
|
let unmarked_eph = unsafe { Box::from_raw(eph.as_ptr()) }; |
|
let unallocated_bytes = mem::size_of_val(&*unmarked_eph); |
|
*total_allocated -= unallocated_bytes; |
|
weak.set(unmarked_eph.header().next.take()); |
|
} |
|
} |
|
} |
|
|
|
// Clean up the heap when BoaGc is dropped |
|
fn dump(gc: &mut BoaGc) { |
|
// Weak maps have to be dropped first, since the process dereferences GcBoxes. |
|
// This can be done without initializing a dropguard since no GcBox's are being dropped. |
|
let weak_map_head = &gc.weak_map_start; |
|
while let Some(node) = weak_map_head.get() { |
|
// SAFETY: |
|
// The `Allocator` must always ensure its start node is a valid, non-null pointer that |
|
// was allocated by `Box::from_raw(Box::new(..))`. |
|
let unmarked_node = unsafe { Box::from_raw(node.as_ptr()) }; |
|
weak_map_head.set(unmarked_node.next().take()); |
|
} |
|
|
|
// Not initializing a dropguard since this should only be invoked when BOA_GC is being dropped. |
|
let _guard = DropGuard::new(); |
|
|
|
let strong_head = &gc.strong_start; |
|
while let Some(node) = strong_head.get() { |
|
// SAFETY: |
|
// The `Allocator` must always ensure its start node is a valid, non-null pointer that |
|
// was allocated by `Box::from_raw(Box::new(..))`. |
|
let unmarked_node = unsafe { Box::from_raw(node.as_ptr()) }; |
|
strong_head.set(unmarked_node.header.next.take()); |
|
} |
|
|
|
let eph_head = &gc.weak_start; |
|
while let Some(node) = eph_head.get() { |
|
// SAFETY: |
|
// The `Allocator` must always ensure its start node is a valid, non-null pointer that |
|
// was allocated by `Box::from_raw(Box::new(..))`. |
|
let unmarked_node = unsafe { Box::from_raw(node.as_ptr()) }; |
|
eph_head.set(unmarked_node.header().next.take()); |
|
} |
|
} |
|
} |
|
|
|
/// Forcefully runs a garbage collection of all unaccessible nodes. |
|
pub fn force_collect() { |
|
BOA_GC.with(|current| { |
|
let mut gc = current.borrow_mut(); |
|
|
|
if gc.runtime.bytes_allocated > 0 { |
|
Collector::collect(&mut gc); |
|
} |
|
}); |
|
} |
|
|
|
#[cfg(test)] |
|
mod test; |
|
|
|
/// Returns `true` is any weak maps are currently allocated. |
|
#[cfg(test)] |
|
#[must_use] |
|
pub fn has_weak_maps() -> bool { |
|
BOA_GC.with(|current| { |
|
let gc = current.borrow(); |
|
|
|
gc.weak_map_start.get().is_some() |
|
}) |
|
}
|
|
|