Browse Source

Use perfect hash function for static strings

refactor/interner
Haled Odat 1 year ago
parent
commit
78e7d32aa1
  1. 1
      Cargo.lock
  2. 1
      boa_engine/Cargo.toml
  3. 2
      boa_engine/src/builtins/array/mod.rs
  4. 278
      boa_engine/src/string/common.rs
  5. 2
      boa_engine/src/string/str.rs

1
Cargo.lock generated

@ -428,6 +428,7 @@ dependencies = [
"num_enum", "num_enum",
"once_cell", "once_cell",
"paste", "paste",
"phf",
"pollster", "pollster",
"portable-atomic", "portable-atomic",
"rand", "rand",

1
boa_engine/Cargo.toml

@ -92,6 +92,7 @@ bytemuck = { version = "1.14.0", features = ["derive"] }
arrayvec = "0.7.4" arrayvec = "0.7.4"
intrusive-collections = "0.9.6" intrusive-collections = "0.9.6"
cfg-if = "1.0.0" cfg-if = "1.0.0"
phf.workspace = true
# intl deps # intl deps
boa_icu_provider = {workspace = true, features = ["std"], optional = true } boa_icu_provider = {workspace = true, features = ["std"], optional = true }

2
boa_engine/src/builtins/array/mod.rs

@ -134,7 +134,7 @@ impl IntrinsicObject for Array {
.method(Self::unshift, "unshift", 1) .method(Self::unshift, "unshift", 1)
.method(Self::with, "with", 2) .method(Self::with, "with", 2)
.property( .property(
utf16!("toString"), "toString",
to_string_function, to_string_function,
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE, Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE,
) )

278
boa_engine/src/string/common.rs

@ -1,18 +1,13 @@
use std::hash::BuildHasherDefault;
use crate::tagged::Tagged; use crate::tagged::Tagged;
use super::JsString; use super::JsString;
use paste::paste; use paste::paste;
use rustc_hash::{FxHashMap, FxHasher};
macro_rules! well_known_statics { macro_rules! well_known_statics {
( $( $(#[$attr:meta])* ($name:ident, $string:literal) ),+$(,)? ) => { ( $( $(#[$attr:meta])* ($name:ident, $string:literal) ),+$(,)? ) => {
$( $(
paste!{ paste!{
// TODO: doc #[doc = "Gets the static `JsString` for `\"" $string "\"`."]
// #[doc = "Gets the static `JsString` for `\"" $string "\"`."]
#[doc = "Gets the static `JsString`."]
#[allow(unused)] #[allow(unused)]
pub(crate) const $name: JsString = JsString { pub(crate) const $name: JsString = JsString {
ptr: Tagged::from_tag( ptr: Tagged::from_tag(
@ -47,15 +42,18 @@ impl StaticJsStrings {
} }
true true
} }
let len = RAW_STATICS.len();
let mut i = 0; let mut i = 0;
while i < RAW_STATICS.len() { while i < len {
let s = RAW_STATICS[i]; // TOOD: Because `get_index` is not const, we are accessing doc hidden stuff, that may change.
// assert!(s.is_ascii()); let s = RAW_STATICS.map.entries[i].0;
if const_eq(s, candidate) { if const_eq(s, candidate) {
return i; return i;
} }
i += 1; i += 1;
} }
panic!("couldn't find the required string on the common string array"); panic!("couldn't find the required string on the common string array");
} }
@ -66,7 +64,7 @@ impl StaticJsStrings {
return None; return None;
} }
let index = RAW_STATICS_CACHE.with(|map| map.get(string).copied())?; let index = RAW_STATICS.get_index(string)?;
Some(JsString { Some(JsString {
ptr: Tagged::from_tag(index), ptr: Tagged::from_tag(index),
@ -76,7 +74,7 @@ impl StaticJsStrings {
/// Gets the `&[u16]` slice corresponding to the provided index, or `None` if the index /// Gets the `&[u16]` slice corresponding to the provided index, or `None` if the index
/// provided exceeds the size of the static array. /// provided exceeds the size of the static array.
pub(crate) fn get(index: usize) -> Option<&'static str> { pub(crate) fn get(index: usize) -> Option<&'static str> {
RAW_STATICS.get(index).copied() RAW_STATICS.index(index).copied()
} }
// Some consts are only used on certain features, which triggers the unused lint. // Some consts are only used on certain features, which triggers the unused lint.
@ -192,11 +190,12 @@ impl StaticJsStrings {
} }
} }
static MAX_STATIC_LENGTH: usize = { const MAX_STATIC_LENGTH: usize = {
let mut max = 0; let mut max = 0;
let mut i = 0; let mut i = 0;
while i < RAW_STATICS.len() { while i < RAW_STATICS.len() {
let len = RAW_STATICS[i].len(); // TOOD: Because `get_index` is not const, we are accessing doc hidden stuff, that may change.
let len = RAW_STATICS.map.entries[i].0.len();
if len > max { if len > max {
max = len; max = len;
} }
@ -205,24 +204,8 @@ static MAX_STATIC_LENGTH: usize = {
max max
}; };
thread_local! {
/// Map from a string inside [`RAW_STATICS`] to its corresponding static index on `RAW_STATICS`.
static RAW_STATICS_CACHE: FxHashMap<&'static str, usize> = {
let mut constants = FxHashMap::with_capacity_and_hasher(
RAW_STATICS.len(),
BuildHasherDefault::<FxHasher>::default(),
);
for (idx, &s) in RAW_STATICS.iter().enumerate() {
constants.insert(s, idx);
}
constants
};
}
/// Array of raw static strings that aren't reference counted. /// Array of raw static strings that aren't reference counted.
const RAW_STATICS: &[&str] = &[ const RAW_STATICS: phf::OrderedSet<&'static str> = phf::phf_ordered_set!(
"", "",
// Well known symbols // Well known symbols
"Symbol.asyncIterator", "Symbol.asyncIterator",
@ -251,6 +234,8 @@ const RAW_STATICS: &[&str] = &[
"[Symbol.toStringTag]", "[Symbol.toStringTag]",
"Symbol.unscopables", "Symbol.unscopables",
"[Symbol.unscopables]", "[Symbol.unscopables]",
"get [Symbol.species]",
"get [Symbol.toStringTag]",
// Well known builtins // Well known builtins
"Array", "Array",
"ArrayBuffer", "ArrayBuffer",
@ -361,7 +346,6 @@ const RAW_STATICS: &[&str] = &[
"enumerable", "enumerable",
"configurable", "configurable",
// Object object // Object object
"Object",
"assign", "assign",
"create", "create",
"toString", "toString",
@ -386,10 +370,26 @@ const RAW_STATICS: &[&str] = &[
"values", "values",
"entries", "entries",
"fromEntries", "fromEntries",
"propertyIsEnumerable",
"preventExtensions",
"getOwnPropertyDescriptor",
"getOwnPropertyDescriptors",
"getOwnPropertyNames",
"getOwnPropertySymbols",
"__defineGetter__",
"__defineSetter__",
"__lookupGetter__",
"__lookupSetter__",
"__proto__",
"get __proto__",
"set __proto__",
// Function object // Function object
"apply", "apply",
"bind", "bind",
"call", "call",
"caller",
// Arguments object
"callee",
// Array object // Array object
"at", "at",
"from", "from",
@ -422,6 +422,11 @@ const RAW_STATICS: &[&str] = &[
"unshift", "unshift",
"push", "push",
"pop", "pop",
"groupBy",
"toReversed",
"toSorted",
"toSpliced",
"with",
// String object // String object
"charAt", "charAt",
"charCodeAt", "charCodeAt",
@ -451,6 +456,26 @@ const RAW_STATICS: &[&str] = &[
"trim", "trim",
"trimEnd", "trimEnd",
"trimStart", "trimStart",
"isWellFormed",
"localeCompare",
"toWellFormed",
"toLocaleLowerCase",
"toLocaleUpperCase",
"trimLeft",
"trimRight",
"anchor",
"big",
"blink",
"bold",
"fixed",
"fontcolor",
"fontsize",
"italics",
"link",
"small",
"strike",
"sub",
"sup",
// Number object // Number object
"Infinity", "Infinity",
"NaN", "NaN",
@ -459,6 +484,8 @@ const RAW_STATICS: &[&str] = &[
"MIN_SAFE_INTEGER", "MIN_SAFE_INTEGER",
"MAX_VALUE", "MAX_VALUE",
"MIN_VALUE", "MIN_VALUE",
"NEGATIVE_INFINITY",
"POSITIVE_INFINITY",
"isSafeInteger", "isSafeInteger",
"isInteger", "isInteger",
"toExponential", "toExponential",
@ -470,6 +497,7 @@ const RAW_STATICS: &[&str] = &[
// RegExp object // RegExp object
"exec", "exec",
"test", "test",
"compile",
"flags", "flags",
"index", "index",
"lastIndex", "lastIndex",
@ -500,6 +528,7 @@ const RAW_STATICS: &[&str] = &[
"iterator", "iterator",
"toStringTag", "toStringTag",
"toPrimitive", "toPrimitive",
"isConcatSpreadable",
"get description", "get description",
// Map object // Map object
"clear", "clear",
@ -554,15 +583,35 @@ const RAW_STATICS: &[&str] = &[
"toUTCString", "toUTCString",
"now", "now",
"UTC", "UTC",
"getTimezoneOffset",
"getUTCMilliseconds",
"setUTCMilliseconds",
"toLocaleDateString",
"toLocaleTimeString",
// JSON object // JSON object
"parse", "parse",
"stringify", "stringify",
// Promise object
"promise",
"resolve",
"reject",
"all",
"allSettled",
"any",
"race",
"then",
"catch",
"finally",
"withResolvers",
// Iterator object // Iterator object
"Array Iterator", "Array Iterator",
"Set Iterator", "Set Iterator",
"String Iterator", "String Iterator",
"Map Iterator", "Map Iterator",
"For In Iterator", "For In Iterator",
"RegExp String Iterator",
// Iterator result object
"done",
// Math object // Math object
"LN10", "LN10",
"LN2", "LN2",
@ -607,6 +656,7 @@ const RAW_STATICS: &[&str] = &[
"tanh", "tanh",
"trunc", "trunc",
// TypedArray object // TypedArray object
"BYTES_PER_ELEMENT",
"buffer", "buffer",
"byteLength", "byteLength",
"byteOffset", "byteOffset",
@ -638,6 +688,168 @@ const RAW_STATICS: &[&str] = &[
"setUint8", "setUint8",
"setUint16", "setUint16",
"setUint32", "setUint32",
// WeakRef object
"deref",
// Atomic object
"and",
"compareExchange",
"exchange",
"isLockFree",
"load",
"or",
"store",
"wait",
"notify",
"xor",
// Intl object
"getCanonicalLocales",
"get compare",
"supportedLocalesOf",
"Intl.Collator",
"compare",
"resolvedOptions",
"Intl.ListFormat",
"format",
"formatToParts",
"get baseName",
"get calendar",
"get caseFirst",
"get collation",
"get hourCycle",
"get numeric",
"get numberingSystem",
"get language",
"get script",
"get region",
"Intl.Locale",
"maximize",
"minimize",
"baseName",
"calendar",
"caseFirst",
"collation",
"hourCycle",
"numeric",
"numberingSystem",
"language",
"script",
"region",
"Intl.Segmenter",
"segment",
"containing",
"Segmenter String Iterator",
"Intl.PluralRules",
"select",
// Temporal object
"get Id",
"getOffsetNanosecondsFor",
"getOffsetStringFor",
"getPlainDateTimeFor",
"getInstantFor",
"getPossibleInstantFor",
"getNextTransition",
"getPreviousTransition",
"id",
"Now",
"Calendar",
"Duration",
"Instant",
"PlainDate",
"PlainDateTime",
"PlainMonthDay",
"PlainTime",
"PlainYearMonth",
"TimeZone",
"ZonedDateTime",
"timeZoneId",
"instant",
"plainDateTime",
"plainDateTimeISO",
"zonedDateTime",
"zonedDateTimeISO",
"plainDate",
"plainDateISO",
"get epochSeconds",
"get epochMilliseconds",
"get epochMicroseconds",
"get epochNanoseconds",
"epochSeconds",
"epochMilliseconds",
"epochMicroseconds",
"epochNanoseconds",
"subtract",
"until",
"since",
"equals",
"toZonedDateTime",
"toZonedDateTimeISO",
"get Years",
"get Months",
"get Weeks",
"get Days",
"get Hours",
"get Minutes",
"get Seconds",
"get Milliseconds",
"get Microseconds",
"get Nanoseconds",
"get Sign",
"get blank",
"years",
"months",
"weeks",
"days",
"hours",
"minutes",
"seconds",
"milliseconds",
"microseconds",
"nanoseconds",
"blank",
"negated",
"total",
"get calendarId",
"get year",
"get month",
"get monthCode",
"get day",
"get dayOfWeek",
"get dayOfYear",
"get weekOfYear",
"get yearOfWeek",
"get daysInWeek",
"get daysInMonth",
"get daysInYear",
"get monthsInYear",
"get inLeapYear",
"calendarId",
"year",
"month",
"monthCode",
"day",
"dayOfWeek",
"dayOfYear",
"weekOfYear",
"yearOfWeek",
"daysInWeek",
"daysInMonth",
"daysInYear",
"monthsInYear",
"inLeapYear",
"toPlainYearMonth",
"toPlainMonthDay",
"getISOFields",
"getCalendar",
"withCalendar",
"dateFromFields",
"yearMonthFromFields",
"monthDayFromFields",
"dateAdd",
"dateUntil",
"era",
"eraYear",
"fields",
"mergeFields",
// Console object // Console object
"console", "console",
"assert", "assert",
@ -659,7 +871,6 @@ const RAW_STATICS: &[&str] = &[
"dirxml", "dirxml",
// Minified name // Minified name
"a", "a",
"",
"c", "c",
"d", "d",
"e", "e",
@ -685,7 +896,6 @@ const RAW_STATICS: &[&str] = &[
"y", "y",
"z", "z",
"A", "A",
"",
"C", "C",
"D", "D",
"E", "E",
@ -712,4 +922,4 @@ const RAW_STATICS: &[&str] = &[
"Z", "Z",
"_", "_",
"$", "$",
]; );

2
boa_engine/src/string/str.rs

@ -1,6 +1,6 @@
use std::slice::SliceIndex; use std::slice::SliceIndex;
use crate::{builtins::string::is_trimmable_whitespace, string::Iter}; use crate::string::Iter;
use boa_interner::JStrRef; use boa_interner::JStrRef;
use super::JsStringSlice; use super::JsStringSlice;

Loading…
Cancel
Save