//! Boa's **`boa_gc`** crate implements a garbage collector. //! //! # Crate Overview //! **`boa_gc`** is a mark-sweep garbage collector that implements a Trace and Finalize trait //! for garbage collected values. //! //! # About Boa //! Boa is an open-source, experimental ECMAScript Engine written in Rust for lexing, parsing and executing ECMAScript/JavaScript. Currently, Boa //! supports some of the [language][boa-conformance]. More information can be viewed at [Boa's website][boa-web]. //! //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates //! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. //! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. //! - **`boa_gc`** - Boa's garbage collector. //! - **`boa_interner`** - Boa's string interner. //! - **`boa_parser`** - Boa's lexer and parser. //! - **`boa_profiler`** - Boa's code profiler. //! - **`boa_unicode`** - Boa's Unicode identifier. //! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [boa-conformance]: https://boa-dev.github.io/boa/test262/ //! [boa-web]: https://boa-dev.github.io/ //! [boa-playground]: https://boa-dev.github.io/boa/playground/ #![doc( html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg", html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg" )] #![cfg_attr(not(test), forbid(clippy::unwrap_used))] #![warn(missing_docs, clippy::dbg_macro)] #![deny( // rustc lint groups https://doc.rust-lang.org/rustc/lints/groups.html warnings, future_incompatible, let_underscore, nonstandard_style, rust_2018_compatibility, rust_2018_idioms, rust_2021_compatibility, unused, // rustc allowed-by-default lints https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html macro_use_extern_crate, meta_variable_misuse, missing_abi, missing_copy_implementations, missing_debug_implementations, non_ascii_idents, noop_method_call, single_use_lifetimes, trivial_casts, trivial_numeric_casts, unreachable_pub, unsafe_op_in_unsafe_fn, unused_crate_dependencies, unused_import_braces, unused_lifetimes, unused_qualifications, unused_tuple_struct_fields, variant_size_differences, // rustdoc lints https://doc.rust-lang.org/rustdoc/lints.html rustdoc::broken_intra_doc_links, rustdoc::private_intra_doc_links, rustdoc::missing_crate_level_docs, rustdoc::private_doc_tests, rustdoc::invalid_codeblock_attributes, rustdoc::invalid_rust_codeblocks, rustdoc::bare_urls, // clippy categories https://doc.rust-lang.org/clippy/ clippy::all, clippy::correctness, clippy::suspicious, clippy::style, clippy::complexity, clippy::perf, clippy::pedantic, clippy::nursery, clippy::undocumented_unsafe_blocks )] #![allow( clippy::module_name_repetitions, clippy::redundant_pub_crate, clippy::let_unit_value )] extern crate self as boa_gc; mod cell; mod pointers; mod trace; pub(crate) mod internals; use boa_profiler::Profiler; use internals::{EphemeronBox, ErasedEphemeronBox}; use std::{ cell::{Cell, RefCell}, mem, ptr::NonNull, }; pub use crate::trace::{Finalize, Trace}; pub use boa_macros::{Finalize, Trace}; pub use cell::{GcRef, GcRefCell, GcRefMut}; pub use internals::GcBox; pub use pointers::{Ephemeron, Gc, WeakGc}; type GcPointer = NonNull>; type EphemeronPointer = NonNull; thread_local!(static GC_DROPPING: Cell = Cell::new(false)); thread_local!(static BOA_GC: RefCell = RefCell::new( BoaGc { config: GcConfig::default(), runtime: GcRuntimeData::default(), strong_start: Cell::new(None), weak_start: Cell::new(None) })); #[derive(Debug, Clone, Copy)] struct GcConfig { threshold: usize, used_space_percentage: usize, } // Setting the defaults to an arbitrary value currently. // // TODO: Add a configure later impl Default for GcConfig { fn default() -> Self { Self { threshold: 1024, used_space_percentage: 80, } } } #[derive(Default, Debug, Clone, Copy)] struct GcRuntimeData { collections: usize, bytes_allocated: usize, } #[derive(Debug)] struct BoaGc { config: GcConfig, runtime: GcRuntimeData, strong_start: Cell>, weak_start: Cell>, } impl Drop for BoaGc { fn drop(&mut self) { Collector::dump(self); } } // Whether or not the thread is currently in the sweep phase of garbage collection. // During this phase, attempts to dereference a `Gc` pointer will trigger a panic. /// `DropGuard` flags whether the Collector is currently running `Collector::sweep()` or `Collector::dump()` /// /// While the `DropGuard` is active, all `GcBox`s must not be dereferenced or accessed as it could cause Undefined Behavior #[derive(Debug, Clone)] struct DropGuard; impl DropGuard { fn new() -> Self { GC_DROPPING.with(|dropping| dropping.set(true)); Self } } impl Drop for DropGuard { fn drop(&mut self) { GC_DROPPING.with(|dropping| dropping.set(false)); } } /// Returns `true` if it is safe for a type to run [`Finalize::finalize`]. #[must_use] #[inline] pub fn finalizer_safe() -> bool { GC_DROPPING.with(|dropping| !dropping.get()) } /// The Allocator handles allocation of garbage collected values. /// /// The allocator can trigger a garbage collection. #[derive(Debug, Clone, Copy)] struct Allocator; impl Allocator { /// Allocate a new garbage collected value to the Garbage Collector's heap. fn alloc_gc(value: GcBox) -> NonNull> { let _timer = Profiler::global().start_event("New GcBox", "BoaAlloc"); let element_size = mem::size_of_val::>(&value); BOA_GC.with(|st| { let mut gc = st.borrow_mut(); Self::manage_state(&mut gc); value.header.next.set(gc.strong_start.take()); // Safety: value cannot be a null pointer, since `Box` cannot return null pointers. let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) }; let erased: NonNull> = ptr; gc.strong_start.set(Some(erased)); gc.runtime.bytes_allocated += element_size; ptr }) } fn alloc_ephemeron( value: EphemeronBox, ) -> NonNull> { let _timer = Profiler::global().start_event("New EphemeronBox", "BoaAlloc"); let element_size = mem::size_of_val::>(&value); BOA_GC.with(|st| { let mut gc = st.borrow_mut(); Self::manage_state(&mut gc); value.header.next.set(gc.weak_start.take()); // Safety: value cannot be a null pointer, since `Box` cannot return null pointers. let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) }; let erased: NonNull = ptr; gc.weak_start.set(Some(erased)); gc.runtime.bytes_allocated += element_size; ptr }) } fn manage_state(gc: &mut BoaGc) { if gc.runtime.bytes_allocated > gc.config.threshold { Collector::collect(gc); if gc.runtime.bytes_allocated > gc.config.threshold / 100 * gc.config.used_space_percentage { gc.config.threshold = gc.runtime.bytes_allocated / gc.config.used_space_percentage * 100; } } } } struct Unreachables { strong: Vec>>, weak: Vec>, } /// This collector currently functions in four main phases /// /// Mark -> Finalize -> Mark -> Sweep /// /// 1. Mark nodes as reachable. /// 2. Finalize the unreachable nodes. /// 3. Mark again because `Finalize::finalize` can potentially resurrect dead nodes. /// 4. Sweep and drop all dead nodes. /// /// A better approach in a more concurrent structure may be to reorder. /// /// Mark -> Sweep -> Finalize struct Collector; impl Collector { /// Run a collection on the full heap. fn collect(gc: &mut BoaGc) { let _timer = Profiler::global().start_event("Gc Full Collection", "gc"); gc.runtime.collections += 1; let unreachables = Self::mark_heap(&gc.strong_start, &gc.weak_start); // Only finalize if there are any unreachable nodes. if !unreachables.strong.is_empty() || unreachables.weak.is_empty() { // Finalize all the unreachable nodes. // SAFETY: All passed pointers are valid, since we won't deallocate until `Self::sweep`. unsafe { Self::finalize(unreachables) }; let _final_unreachables = Self::mark_heap(&gc.strong_start, &gc.weak_start); } // SAFETY: The head of our linked list is always valid per the invariants of our GC. unsafe { Self::sweep( &gc.strong_start, &gc.weak_start, &mut gc.runtime.bytes_allocated, ); } } /// Walk the heap and mark any nodes deemed reachable fn mark_heap( mut strong: &Cell>>>, mut weak: &Cell>>, ) -> Unreachables { let _timer = Profiler::global().start_event("Gc Marking", "gc"); // Walk the list, tracing and marking the nodes let mut strong_dead = Vec::new(); let mut pending_ephemerons = Vec::new(); // === Preliminary mark phase === // // 0. Get the naive list of possibly dead nodes. while let Some(node) = strong.get() { // SAFETY: node must be valid as this phase cannot drop any node. let node_ref = unsafe { node.as_ref() }; if node_ref.header.roots() > 0 { // SAFETY: the reference to node must be valid as it is rooted. Passing // invalid references can result in Undefined Behavior unsafe { node_ref.mark_and_trace(); } } else if !node_ref.is_marked() { strong_dead.push(node); } strong = &node_ref.header.next; } // 0.1. Early return if there are no ephemerons in the GC if weak.get().is_none() { strong_dead.retain_mut(|node| { // SAFETY: node must be valid as this phase cannot drop any node. unsafe { !node.as_ref().is_marked() } }); return Unreachables { strong: strong_dead, weak: Vec::new(), }; } // === Weak mark phase === // // 1. Get the naive list of ephemerons that are supposedly dead or their key is dead and // trace all the ephemerons that have roots and their keys are live. Also remove from // this list the ephemerons that are marked but their value is dead. while let Some(eph) = weak.get() { // SAFETY: node must be valid as this phase cannot drop any node. let eph_ref = unsafe { eph.as_ref() }; // SAFETY: the garbage collector ensures `eph_ref` always points to valid data. if unsafe { !eph_ref.trace() } { pending_ephemerons.push(eph); } weak = &eph_ref.header().next; } // 2. Iterate through all pending ephemerons, removing the ones which have been successfully // traced. If there are no changes in the pending ephemerons list, it means that there are no // more reachable ephemerons from the remaining ephemeron values. let mut previous_len = pending_ephemerons.len(); loop { pending_ephemerons.retain_mut(|eph| { // SAFETY: node must be valid as this phase cannot drop any node. let eph_ref = unsafe { eph.as_ref() }; // SAFETY: the garbage collector ensures `eph_ref` always points to valid data. unsafe { !eph_ref.trace() } }); if previous_len == pending_ephemerons.len() { break; } previous_len = pending_ephemerons.len(); } // 3. The remaining list should contain the ephemerons that are either unreachable or its key // is dead. Cleanup the strong pointers since this procedure could have marked some more strong // pointers. strong_dead.retain_mut(|node| { // SAFETY: node must be valid as this phase cannot drop any node. unsafe { !node.as_ref().is_marked() } }); Unreachables { strong: strong_dead, weak: pending_ephemerons, } } /// # Safety /// /// Passing a `strong` or a `weak` vec with invalid pointers will result in Undefined Behaviour. unsafe fn finalize(unreachables: Unreachables) { let _timer = Profiler::global().start_event("Gc Finalization", "gc"); for node in unreachables.strong { // SAFETY: The caller must ensure all pointers inside `unreachables.strong` are valid. let node = unsafe { node.as_ref() }; Trace::run_finalizer(&node.value()); } for node in unreachables.weak { // SAFETY: The caller must ensure all pointers inside `unreachables.weak` are valid. let node = unsafe { node.as_ref() }; node.finalize_and_clear(); } } /// # Safety /// /// - Providing an invalid pointer in the `heap_start` or in any of the headers of each /// node will result in Undefined Behaviour. /// - Providing a list of pointers that weren't allocated by `Box::into_raw(Box::new(..))` /// will result in Undefined Behaviour. unsafe fn sweep( mut strong: &Cell>>>, mut weak: &Cell>>, total_allocated: &mut usize, ) { let _timer = Profiler::global().start_event("Gc Sweeping", "gc"); let _guard = DropGuard::new(); while let Some(node) = strong.get() { // SAFETY: The caller must ensure the validity of every node of `heap_start`. let node_ref = unsafe { node.as_ref() }; if node_ref.header.roots() > 0 || node_ref.is_marked() { node_ref.header.unmark(); strong = &node_ref.header.next; } else { // SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped. // The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`. let unmarked_node = unsafe { Box::from_raw(node.as_ptr()) }; let unallocated_bytes = mem::size_of_val(&*unmarked_node); *total_allocated -= unallocated_bytes; strong.set(unmarked_node.header.next.take()); } } while let Some(eph) = weak.get() { // SAFETY: The caller must ensure the validity of every node of `heap_start`. let eph_ref = unsafe { eph.as_ref() }; let header = eph_ref.header(); if header.roots() > 0 || header.is_marked() { header.unmark(); weak = &header.next; } else { // SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped. // The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`. let unmarked_eph = unsafe { Box::from_raw(eph.as_ptr()) }; let unallocated_bytes = mem::size_of_val(&*unmarked_eph); *total_allocated -= unallocated_bytes; weak.set(unmarked_eph.header().next.take()); } } } // Clean up the heap when BoaGc is dropped fn dump(gc: &mut BoaGc) { // Not initializing a dropguard since this should only be invoked when BOA_GC is being dropped. let _guard = DropGuard::new(); let strong_head = &gc.strong_start; while let Some(node) = strong_head.get() { // SAFETY: // The `Allocator` must always ensure its start node is a valid, non-null pointer that // was allocated by `Box::from_raw(Box::new(..))`. let unmarked_node = unsafe { Box::from_raw(node.as_ptr()) }; strong_head.set(unmarked_node.header.next.take()); } let eph_head = &gc.weak_start; while let Some(node) = eph_head.get() { // SAFETY: // The `Allocator` must always ensure its start node is a valid, non-null pointer that // was allocated by `Box::from_raw(Box::new(..))`. let unmarked_node = unsafe { Box::from_raw(node.as_ptr()) }; eph_head.set(unmarked_node.header().next.take()); } } } /// Forcefully runs a garbage collection of all unaccessible nodes. pub fn force_collect() { BOA_GC.with(|current| { let mut gc = current.borrow_mut(); if gc.runtime.bytes_allocated > 0 { Collector::collect(&mut gc); } }); } #[cfg(test)] mod test;