Browse Source

Redesign Intl API and implement some services (#2478)

This Pull Request fixes/closes #1180. (I'll open a tracking issue for the progress)

It changes the following:

- Redesigns the internal API of Intl to (hopefully!) make it easier to implement a service.
- Implements the `Intl.Locale` service. 
- Implements the `Intl.Collator` service.
- Implements the `Intl.ListFormat` service.

On the subject of the failing tests. Some of them are caused by missing locale data in the `icu_testdata` crate; we would need to regenerate that with the missing locales, or vendor a custom default data.

On the other hand, there are some tests that are bugs from the ICU4X crate. The repo https://github.com/jedel1043/icu4x-test262 currently tracks the found bugs when running test262. I'll sync with the ICU4X team to try to fix those.

cc @sffc
pull/2509/head
José Julián Espina 2 years ago
parent
commit
3bf5de2929
  1. 2821
      Cargo.lock
  2. 2
      Cargo.toml
  3. 19
      boa_ast/src/lib.rs
  4. 2
      boa_cli/Cargo.toml
  5. 30
      boa_engine/Cargo.toml
  6. 68
      boa_engine/src/builtins/array/mod.rs
  7. 23
      boa_engine/src/builtins/console/mod.rs
  8. 548
      boa_engine/src/builtins/intl/collator/mod.rs
  9. 80
      boa_engine/src/builtins/intl/collator/options.rs
  10. 17
      boa_engine/src/builtins/intl/date_time_format.rs
  11. 495
      boa_engine/src/builtins/intl/list_format/mod.rs
  12. 53
      boa_engine/src/builtins/intl/list_format/options.rs
  13. 730
      boa_engine/src/builtins/intl/locale/mod.rs
  14. 21
      boa_engine/src/builtins/intl/locale/options.rs
  15. 126
      boa_engine/src/builtins/intl/locale/tests.rs
  16. 604
      boa_engine/src/builtins/intl/locale/utils.rs
  17. 842
      boa_engine/src/builtins/intl/mod.rs
  18. 252
      boa_engine/src/builtins/intl/options.rs
  19. 41
      boa_engine/src/builtins/intl/segmenter/mod.rs
  20. 29
      boa_engine/src/builtins/intl/segmenter/options.rs
  21. 547
      boa_engine/src/builtins/intl/tests.rs
  22. 63
      boa_engine/src/builtins/string/mod.rs
  23. 200
      boa_engine/src/context/icu.rs
  24. 64
      boa_engine/src/context/intrinsics.rs
  25. 61
      boa_engine/src/context/mod.rs
  26. 17
      boa_engine/src/lib.rs
  27. 131
      boa_engine/src/object/mod.rs
  28. 5
      boa_engine/src/object/operations.rs
  29. 10
      boa_engine/src/string/mod.rs
  30. 19
      boa_gc/src/lib.rs
  31. 28
      boa_icu_provider/Cargo.toml
  32. 12
      boa_icu_provider/README.md
  33. BIN
      boa_icu_provider/data/icudata.postcard
  34. 21
      boa_icu_provider/src/bin/datagen.rs
  35. 93
      boa_icu_provider/src/lib.rs
  36. 17
      boa_interner/src/lib.rs
  37. 17
      boa_parser/src/lib.rs
  38. 11
      boa_profiler/src/lib.rs
  39. 3
      boa_tester/Cargo.toml
  40. 19
      boa_tester/src/exec/mod.rs
  41. 17
      boa_unicode/src/lib.rs
  42. 10
      test_ignore.toml

2821
Cargo.lock generated

File diff suppressed because it is too large Load Diff

2
Cargo.toml

@ -12,6 +12,7 @@ members = [
"boa_wasm",
"boa_examples",
"boa_macros",
"boa_icu_provider",
]
[workspace.package]
@ -32,6 +33,7 @@ boa_unicode = { version = "0.16.0", path = "boa_unicode" }
boa_macros = { version = "0.16.0", path = "boa_macros" }
boa_ast = { version = "0.16.0", path = "boa_ast" }
boa_parser = { version = "0.16.0", path = "boa_parser" }
boa_icu_provider = { version = "0.16.0", path = "boa_icu_provider" }
[workspace.metadata.workspaces]
allow_branch = "main"

19
boa_ast/src/lib.rs

@ -1,7 +1,7 @@
//! Boa's **boa_ast** crate implements an ECMAScript abstract syntax tree.
//! Boa's **`boa_ast`** crate implements an ECMAScript abstract syntax tree.
//!
//! # Crate Overview
//! **boa_ast** contains representations of [**Parse Nodes**][grammar] as defined by the ECMAScript
//! **`boa_ast`** contains representations of [**Parse Nodes**][grammar] as defined by the ECMAScript
//! spec. Some `Parse Node`s are not represented by Boa's AST, because a lot of grammar productions
//! are only used to throw [**Early Errors**][early], and don't influence the evaluation of the AST
//! itself.
@ -17,13 +17,14 @@
//! Try out the most recent release with Boa's live demo [playground][boa-playground].
//!
//! # Boa Crates
//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree.
//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **boa_gc** - Boa's garbage collector
//! - **boa_interner** - Boa's string interner
//! - **boa_parser** - Boa's lexer and parser
//! - **boa_profiler** - Boa's code profiler
//! - **boa_unicode** - Boa's Unicode identifier
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **`boa_gc`** - Boa's garbage collector.
//! - **`boa_interner`** - Boa's string interner.
//! - **`boa_parser`** - Boa's lexer and parser.
//! - **`boa_profiler`** - Boa's code profiler.
//! - **`boa_unicode`** - Boa's Unicode identifier.
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
//!
//! [grammar]: https://tc39.es/ecma262/#sec-syntactic-grammar
//! [early]: https://tc39.es/ecma262/#sec-static-semantic-rules

2
boa_cli/Cargo.toml

@ -12,7 +12,7 @@ repository.workspace = true
rust-version.workspace = true
[dependencies]
boa_engine = { workspace = true, features = ["deser", "console", "flowgraph"] }
boa_engine = { workspace = true, features = ["deser", "console", "flowgraph", "intl"] }
boa_ast = { workspace = true, features = ["serde"]}
boa_parser.workspace = true
rustyline = "10.0.0"

30
boa_engine/Cargo.toml

@ -15,13 +15,18 @@ rust-version.workspace = true
profiler = ["boa_profiler/profiler"]
deser = ["boa_interner/serde", "boa_ast/serde"]
intl = [
"dep:icu_locale_canonicalizer",
"dep:boa_icu_provider",
"dep:icu_locid_transform",
"dep:icu_locid",
"dep:icu_datetime",
"dep:icu_plurals",
"dep:icu_provider",
"dep:icu_testdata",
"dep:sys-locale"
"dep:icu_provider_adapters",
"dep:icu_calendar",
"dep:icu_collator",
"dep:icu_list",
"dep:writeable",
"dep:sys-locale",
]
fuzz = ["boa_ast/fuzz", "boa_interner/fuzz"]
@ -59,12 +64,19 @@ tap = "1.0.1"
sptr = "0.3.2"
static_assertions = "1.1.0"
thiserror = "1.0.38"
icu_locale_canonicalizer = { version = "0.6.0", features = ["serde"], optional = true }
icu_locid = { version = "0.6.0", features = ["serde"], optional = true }
icu_datetime = { version = "0.6.0", features = ["serde"], optional = true }
icu_plurals = { version = "0.6.0", features = ["serde"], optional = true }
icu_provider = { version = "0.6.0", optional = true }
icu_testdata = { version = "0.6.0", optional = true }
# intl deps
boa_icu_provider = { workspace = true, optional = true }
icu_locid_transform = { version = "1.0.0", features = ["serde"], optional = true }
icu_locid = { version = "1.0.0", features = ["serde"], optional = true }
icu_datetime = { version = "1.0.0", features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "1.0.0", optional = true }
icu_collator = { version = "1.0.1", features = ["serde"], optional = true }
icu_plurals = { version = "1.0.0", features = ["serde"], optional = true }
icu_provider = { version = "1.0.1", optional = true }
icu_provider_adapters = { version = "1.0.0", features = ["serde"], optional = true }
icu_list = { version = "1.0.0", features = ["serde"], optional = true }
writeable = { version = "0.5.0", optional = true }
sys-locale = { version = "0.2.3", optional = true }
[dev-dependencies]

68
boa_engine/src/builtins/array/mod.rs

@ -13,6 +13,7 @@ pub mod array_iterator;
#[cfg(test)]
mod tests;
use boa_macros::utf16;
use boa_profiler::Profiler;
use tap::{Conv, Pipe};
@ -117,6 +118,7 @@ impl BuiltIn for Array {
.method(Self::some, "some", 1)
.method(Self::sort, "sort", 1)
.method(Self::splice, "splice", 2)
.method(Self::to_locale_string, "toLocaleString", 0)
.method(Self::reduce, "reduce", 1)
.method(Self::reduce_right, "reduceRight", 1)
.method(Self::keys, "keys", 0)
@ -2027,6 +2029,72 @@ impl Array {
Ok(a.into())
}
/// [`Array.prototype.toLocaleString ( [ locales [ , options ] ] )`][spec].
///
/// Returns a string representing the elements of the array. The elements are converted to
/// strings using their `toLocaleString` methods and these strings are separated by a
/// locale-specific string (such as a comma ",").
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sup-array.prototype.tolocalestring
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/toLocaleString
pub(crate) fn to_locale_string(
this: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. Let array be ? ToObject(this value).
let array = this.to_object(context)?;
// 2. Let len be ? ToLength(? Get(array, "length")).
let len = array.length_of_array_like(context)?;
// 3. Let separator be the implementation-defined list-separator String value appropriate for the host environment's current locale (such as ", ").
let separator = {
#[cfg(feature = "intl")]
{
// TODO: this should eventually return a locale-sensitive separator.
utf16!(", ")
}
#[cfg(not(feature = "intl"))]
{
utf16!(", ")
}
};
// 4. Let R be the empty String.
let mut r = Vec::new();
// 5. Let k be 0.
// 6. Repeat, while k < len,
for k in 0..len {
// a. If k > 0, then
if k > 0 {
// i. Set R to the string-concatenation of R and separator.
r.extend_from_slice(separator);
}
// b. Let nextElement be ? Get(array, ! ToString(k)).
let next = array.get(k, context)?;
// c. If nextElement is not undefined or null, then
if !next.is_null_or_undefined() {
// i. Let S be ? ToString(? Invoke(nextElement, "toLocaleString", « locales, options »)).
let s = next
.invoke("toLocaleString", args, context)?
.to_string(context)?;
// ii. Set R to the string-concatenation of R and S.
r.extend_from_slice(&s);
}
// d. Increase k by 1.
}
// 7. Return R.
Ok(js_string!(r).into())
}
/// `Array.prototype.splice ( start, [deleteCount[, ...items]] )`
///
/// Splices an array by following

23
boa_engine/src/builtins/console/mod.rs

@ -52,19 +52,16 @@ fn logger(msg: LogMessage, console_state: &Console) {
/// This represents the `console` formatter.
pub fn formatter(data: &[JsValue], context: &mut Context) -> JsResult<String> {
let target = data
.get(0)
.cloned()
.unwrap_or_default()
.to_string(context)?;
match data.len() {
0 => Ok(String::new()),
1 => Ok(target.to_std_string_escaped()),
_ => {
match data {
[] => Ok(String::new()),
[val] => Ok(val.to_string(context)?.to_std_string_escaped()),
data => {
let mut formatted = String::new();
let mut arg_index = 1;
let target = target.to_std_string_escaped();
let target = data
.get_or_undefined(0)
.to_string(context)?
.to_std_string_escaped();
let mut chars = target.chars();
while let Some(c) = chars.next() {
if c == '%' {
@ -94,9 +91,7 @@ pub fn formatter(data: &[JsValue], context: &mut Context) -> JsResult<String> {
/* string */
's' => {
let arg = data
.get(arg_index)
.cloned()
.unwrap_or_default()
.get_or_undefined(arg_index)
.to_string(context)?
.to_std_string_escaped();
formatted.push_str(&arg);

548
boa_engine/src/builtins/intl/collator/mod.rs

@ -0,0 +1,548 @@
use boa_gc::{custom_trace, Finalize, Trace};
use boa_profiler::Profiler;
use icu_collator::{
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, MaxVariable, Numeric,
};
use icu_locid::{
extensions::unicode::Value, extensions_unicode_key as key, extensions_unicode_value as value,
Locale,
};
use icu_provider::{DataLocale, DataProvider};
use tap::{Conv, Pipe};
use crate::{
builtins::{BuiltIn, JsArgs},
context::{intrinsics::StandardConstructors, BoaProvider},
object::{
internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder,
JsFunction, JsObject, ObjectData,
},
property::Attribute,
symbol::WellKnownSymbols,
Context, JsNativeError, JsResult, JsValue,
};
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales, validate_extension},
options::{coerce_options_to_object, get_option, IntlOptions, LocaleMatcher},
Service,
};
mod options;
pub(crate) use options::*;
pub struct Collator {
locale: Locale,
collation: Value,
numeric: bool,
case_first: Option<CaseFirst>,
usage: Usage,
sensitivity: Sensitivity,
ignore_punctuation: bool,
collator: icu_collator::Collator,
bound_compare: Option<JsFunction>,
}
impl Finalize for Collator {}
// SAFETY: only `bound_compare` is a traceable object.
unsafe impl Trace for Collator {
custom_trace!(this, mark(&this.bound_compare));
}
impl Collator {
/// Gets the inner [`icu_collator::Collator`] comparator.
pub(crate) const fn collator(&self) -> &icu_collator::Collator {
&self.collator
}
}
impl std::fmt::Debug for Collator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Collator")
.field("locale", &self.locale)
.field("collation", &self.collation)
.field("numeric", &self.numeric)
.field("case_first", &self.case_first)
.field("usage", &self.usage)
.field("sensitivity", &self.sensitivity)
.field("ignore_punctuation", &self.ignore_punctuation)
.field("collator", &"ICUCollator")
.field("bound_compare", &self.bound_compare)
.finish()
}
}
#[derive(Debug, Clone)]
pub(in crate::builtins::intl) struct CollatorLocaleOptions {
collation: Option<Value>,
numeric: Option<bool>,
case_first: Option<CaseFirst>,
}
impl<P> Service<P> for Collator
where
P: DataProvider<CollationMetadataV1Marker>,
{
type LangMarker = CollationMetadataV1Marker;
type LocaleOptions = CollatorLocaleOptions;
fn resolve(locale: &mut Locale, options: &mut Self::LocaleOptions, provider: &P) {
let collation = options
.collation
.take()
.filter(|co| validate_extension(locale.id.clone(), key!("co"), co, provider))
.or_else(|| {
locale
.extensions
.unicode
.keywords
.get(&key!("co"))
.cloned()
.filter(|co| validate_extension(locale.id.clone(), key!("co"), co, provider))
})
.filter(|co| co != &value!("search"));
let numeric =
options.numeric.or_else(
|| match locale.extensions.unicode.keywords.get(&key!("kn")) {
Some(a) if a == &value!("true") => Some(true),
Some(_) => Some(false),
_ => None,
},
);
let case_first = options.case_first.or_else(|| {
match locale.extensions.unicode.keywords.get(&key!("kf")) {
Some(a) if a == &value!("upper") => Some(CaseFirst::UpperFirst),
Some(a) if a == &value!("lower") => Some(CaseFirst::LowerFirst),
Some(_) => Some(CaseFirst::Off),
_ => None,
}
});
locale.extensions.unicode.clear();
if let Some(co) = collation.clone() {
locale.extensions.unicode.keywords.set(key!("co"), co);
}
if let Some(kn) = numeric.map(|kn| if kn { value!("true") } else { value!("false") }) {
locale.extensions.unicode.keywords.set(key!("kn"), kn);
}
if let Some(kf) = case_first.map(|kf| match kf {
CaseFirst::Off => value!("false"),
CaseFirst::LowerFirst => value!("lower"),
CaseFirst::UpperFirst => value!("upper"),
_ => unreachable!(),
}) {
locale.extensions.unicode.keywords.set(key!("kf"), kf);
}
options.collation = collation;
options.numeric = numeric;
options.case_first = case_first;
}
}
impl BuiltIn for Collator {
const NAME: &'static str = "Collator";
fn init(context: &mut Context) -> Option<JsValue> {
let _timer = Profiler::global().start_event(Self::NAME, "init");
let compare = FunctionBuilder::native(context, Self::compare)
.name("get compare")
.constructor(false)
.build();
ConstructorBuilder::with_standard_constructor(
context,
Self::constructor,
context.intrinsics().constructors().collator().clone(),
)
.name(Self::NAME)
.length(Self::LENGTH)
.static_method(Self::supported_locales_of, "supportedLocalesOf", 1)
.property(
WellKnownSymbols::to_string_tag(),
"Intl.Collator",
Attribute::CONFIGURABLE,
)
.accessor("compare", Some(compare), None, Attribute::CONFIGURABLE)
.method(Self::resolved_options, "resolvedOptions", 0)
.build()
.conv::<JsValue>()
.pipe(Some)
}
}
impl Collator {
pub(crate) const LENGTH: usize = 0;
/// Constructor [`Intl.Collator ( [ locales [ , options ] ] )`][spec].
///
/// Constructor for `Collator` objects.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.collator
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator
pub(crate) fn constructor(
new_target: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. If NewTarget is undefined, let newTarget be the active function object, else let newTarget be NewTarget.
// 2. Let internalSlotsList be « [[InitializedCollator]], [[Locale]], [[Usage]], [[Sensitivity]], [[IgnorePunctuation]], [[Collation]], [[BoundCompare]] ».
// 3. If %Collator%.[[RelevantExtensionKeys]] contains "kn", then
// a. Append [[Numeric]] as the last element of internalSlotsList.
// 4. If %Collator%.[[RelevantExtensionKeys]] contains "kf", then
// a. Append [[CaseFirst]] as the last element of internalSlotsList.
// 5. Let collator be ? OrdinaryCreateFromConstructor(newTarget, "%Collator.prototype%", internalSlotsList).
// 6. Return ? InitializeCollator(collator, locales, options).
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// Abstract operation `InitializeCollator ( collator, locales, options )`
// https://tc39.es/ecma402/#sec-initializecollator
// 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 2. Set options to ? CoerceOptionsToObject(options).
let options = coerce_options_to_object(options, context)?;
// 3. Let usage be ? GetOption(options, "usage", string, « "sort", "search" », "sort").
// 4. Set collator.[[Usage]] to usage.
// 5. If usage is "sort", then
// a. Let localeData be %Collator%.[[SortLocaleData]].
// 6. Else,
// a. Let localeData be %Collator%.[[SearchLocaleData]].
let usage = get_option::<Usage>(&options, "usage", false, context)?.unwrap_or_default();
// 7. Let opt be a new Record.
// 8. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit").
// 9. Set opt.[[localeMatcher]] to matcher.
let matcher = get_option::<LocaleMatcher>(&options, "localeMatcher", false, context)?
.unwrap_or_default();
// 10. Let collation be ? GetOption(options, "collation", string, empty, undefined).
// 11. If collation is not undefined, then
// a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
// 12. Set opt.[[co]] to collation.
let collation = get_option::<Value>(&options, "collation", false, context)?;
// 13. Let numeric be ? GetOption(options, "numeric", boolean, empty, undefined).
// 14. If numeric is not undefined, then
// a. Let numeric be ! ToString(numeric).
// 15. Set opt.[[kn]] to numeric.
let numeric = get_option::<bool>(&options, "numeric", false, context)?;
// 16. Let caseFirst be ? GetOption(options, "caseFirst", string, « "upper", "lower", "false" », undefined).
// 17. Set opt.[[kf]] to caseFirst.
let case_first = get_option::<CaseFirst>(&options, "caseFirst", false, context)?;
let mut intl_options = IntlOptions {
matcher,
service_options: CollatorLocaleOptions {
collation,
numeric,
case_first,
},
};
// 18. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
// 19. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt, relevantExtensionKeys, localeData).
let mut locale =
resolve_locale::<Self, _>(&requested_locales, &mut intl_options, context.icu());
let collator_locale = {
// `collator_locale` needs to be different from the resolved locale because ECMA402 doesn't
// define `search` as a resolvable extension of a locale, so we need to add that extension
// only to the locale passed to the collator.
let mut col_loc = DataLocale::from(&locale);
if usage == Usage::Search {
intl_options.service_options.collation = None;
locale.extensions.unicode.keywords.remove(key!("co"));
col_loc.set_unicode_ext(key!("co"), value!("search"));
}
col_loc
};
// 20. Set collator.[[Locale]] to r.[[locale]].
// 21. Let collation be r.[[co]].
// 22. If collation is null, let collation be "default".
// 23. Set collator.[[Collation]] to collation.
let collation = intl_options
.service_options
.collation
.unwrap_or(value!("default"));
// 24. If relevantExtensionKeys contains "kn", then
// a. Set collator.[[Numeric]] to SameValue(r.[[kn]], "true").
let numeric = intl_options.service_options.numeric.unwrap_or_default();
// 25. If relevantExtensionKeys contains "kf", then
// a. Set collator.[[CaseFirst]] to r.[[kf]].
let case_first = intl_options.service_options.case_first;
// 26. Let sensitivity be ? GetOption(options, "sensitivity", string, « "base", "accent", "case", "variant" », undefined).
// 28. Set collator.[[Sensitivity]] to sensitivity.
let sensitivity = get_option::<Sensitivity>(&options, "sensitivity", false, context)?
// 27. If sensitivity is undefined, then
// a. If usage is "sort", then
// i. Let sensitivity be "variant".
// b. Else,
// i. Let dataLocale be r.[[dataLocale]].
// ii. Let dataLocaleData be localeData.[[<dataLocale>]].
// iii. Let sensitivity be dataLocaleData.[[sensitivity]].
.or_else(|| (usage == Usage::Sort).then_some(Sensitivity::Variant));
// 29. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", boolean, empty, false).
// 30. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
let ignore_punctuation =
get_option::<bool>(&options, "ignorePunctuation", false, context)?.unwrap_or_default();
let (strength, case_level) = sensitivity.map(Sensitivity::to_collator_options).unzip();
let (alternate_handling, max_variable) = ignore_punctuation
.then_some((AlternateHandling::Shifted, MaxVariable::Punctuation))
.unzip();
let collator = context
.icu()
.provider()
.try_new_collator(&collator_locale, {
let mut options = icu_collator::CollatorOptions::new();
options.strength = strength;
options.case_level = case_level;
options.case_first = case_first;
options.numeric = Some(if numeric { Numeric::On } else { Numeric::Off });
options.alternate_handling = alternate_handling;
options.max_variable = max_variable;
options
})
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;
let prototype =
get_prototype_from_constructor(new_target, StandardConstructors::collator, context)?;
let collator = JsObject::from_proto_and_data(
prototype,
ObjectData::collator(Collator {
locale,
collation,
numeric,
case_first,
usage,
sensitivity: sensitivity.unwrap_or(Sensitivity::Variant),
ignore_punctuation,
collator,
bound_compare: None,
}),
);
// 31. Return collator.
Ok(collator.into())
}
/// [`Intl.Collator.supportedLocalesOf ( locales [ , options ] )`][spec].
///
/// Returns an array containing those of the provided locales that are supported in collation
/// without having to fall back to the runtime's default locale.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.collator.supportedlocalesof
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator/supportedLocalesOf
fn supported_locales_of(
_: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 1. Let availableLocales be %Collator%.[[AvailableLocales]].
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service<BoaProvider>>::LangMarker>(
&requested_locales,
options,
context,
)
.map(JsValue::from)
}
/// [`get Intl.Collator.prototype.compare`][spec].
///
/// Compares two strings according to the sort order of this Intl.Collator object.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.collator.prototype.compare
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator/compare
fn compare(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
// 1. Let collator be the this value.
// 2. Perform ? RequireInternalSlot(collator, [[InitializedCollator]]).
let this = this.as_object().ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolvedOptions` can only be called on a `Collator` object")
})?;
let collator_obj = this.clone();
let mut collator = this.borrow_mut();
let collator = collator.as_collator_mut().ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolvedOptions` can only be called on a `Collator` object")
})?;
// 3. If collator.[[BoundCompare]] is undefined, then
// a. Let F be a new built-in function object as defined in 10.3.3.1.
// b. Set F.[[Collator]] to collator.
// c. Set collator.[[BoundCompare]] to F.
let bound_compare = if let Some(f) = collator.bound_compare.clone() {
f
} else {
let bound_compare = FunctionBuilder::closure_with_captures(
context,
// 10.3.3.1. Collator Compare Functions
// https://tc39.es/ecma402/#sec-collator-compare-functions
|_, args, collator, context| {
// 1. Let collator be F.[[Collator]].
// 2. Assert: Type(collator) is Object and collator has an [[InitializedCollator]] internal slot.
let collator = collator.borrow();
let collator = collator
.as_collator()
.expect("checked above that the object was a collator object");
// 3. If x is not provided, let x be undefined.
// 5. Let X be ? ToString(x).
let x = args.get_or_undefined(0).to_string(context)?;
// 4. If y is not provided, let y be undefined.
// 6. Let Y be ? ToString(y).
let y = args.get_or_undefined(1).to_string(context)?;
// 7. Return CompareStrings(collator, X, Y).
let result = collator.collator.compare_utf16(&x, &y) as i32;
Ok(result.into())
},
collator_obj,
)
.length(2)
.build();
collator.bound_compare = Some(bound_compare.clone());
bound_compare
};
// 4. Return collator.[[BoundCompare]].
Ok(bound_compare.into())
}
/// [`Intl.Collator.prototype.resolvedOptions ( )`][spec].
///
/// Returns a new object with properties reflecting the locale and collation options computed
/// during initialization of this `Intl.Collator` object.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.collator.prototype.resolvedoptions
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator/resolvedOptions
fn resolved_options(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
// 1. Let collator be the this value.
// 2. Perform ? RequireInternalSlot(collator, [[InitializedCollator]]).
let collator = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolvedOptions` can only be called on a `Collator` object")
})?;
let collator = collator.as_collator().ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolvedOptions` can only be called on a `Collator` object")
})?;
// 3. Let options be OrdinaryObjectCreate(%Object.prototype%).
let options = JsObject::from_proto_and_data(
context.intrinsics().constructors().object().prototype(),
ObjectData::ordinary(),
);
// 4. For each row of Table 4, except the header row, in table order, do
// a. Let p be the Property value of the current row.
// b. Let v be the value of collator's internal slot whose name is the Internal Slot value of the current row.
// c. If the current row has an Extension Key value, then
// i. Let extensionKey be the Extension Key value of the current row.
// ii. If %Collator%.[[RelevantExtensionKeys]] does not contain extensionKey, then
// 1. Let v be undefined.
// d. If v is not undefined, then
// i. Perform ! CreateDataPropertyOrThrow(options, p, v).
// 5. Return options.
options
.create_data_property_or_throw("locale", collator.locale.to_string(), context)
.expect("operation must not fail per the spec");
options
.create_data_property_or_throw(
"usage",
match collator.usage {
Usage::Search => "search",
Usage::Sort => "sort",
},
context,
)
.expect("operation must not fail per the spec");
options
.create_data_property_or_throw(
"sensitivity",
match collator.sensitivity {
Sensitivity::Base => "base",
Sensitivity::Accent => "accent",
Sensitivity::Case => "case",
Sensitivity::Variant => "variant",
},
context,
)
.expect("operation must not fail per the spec");
options
.create_data_property_or_throw(
"ignorePunctuation",
collator.ignore_punctuation,
context,
)
.expect("operation must not fail per the spec");
options
.create_data_property_or_throw("collation", collator.collation.to_string(), context)
.expect("operation must not fail per the spec");
options
.create_data_property_or_throw("numeric", collator.numeric, context)
.expect("operation must not fail per the spec");
if let Some(kf) = collator.case_first {
options
.create_data_property_or_throw(
"caseFirst",
match kf {
CaseFirst::Off => "false",
CaseFirst::LowerFirst => "lower",
CaseFirst::UpperFirst => "upper",
_ => unreachable!(),
},
context,
)
.expect("operation must not fail per the spec");
}
// 5. Return options.
Ok(options.into())
}
}

80
boa_engine/src/builtins/intl/collator/options.rs

@ -0,0 +1,80 @@
use std::str::FromStr;
use icu_collator::{CaseLevel, Strength};
use crate::builtins::intl::options::OptionTypeParsable;
#[derive(Debug, Clone, Copy)]
pub(crate) enum Sensitivity {
Base,
Accent,
Case,
Variant,
}
impl Sensitivity {
/// Converts the sensitivity option to the equivalent ICU4X collator options.
pub(crate) const fn to_collator_options(self) -> (Strength, CaseLevel) {
match self {
Sensitivity::Base => (Strength::Primary, CaseLevel::Off),
Sensitivity::Accent => (Strength::Secondary, CaseLevel::Off),
Sensitivity::Case => (Strength::Primary, CaseLevel::On),
Sensitivity::Variant => (Strength::Tertiary, CaseLevel::On),
}
}
}
#[derive(Debug)]
pub(crate) struct ParseSensitivityError;
impl std::fmt::Display for ParseSensitivityError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("provided string was not `base`, `accent`, `case` or `variant`")
}
}
impl FromStr for Sensitivity {
type Err = ParseSensitivityError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"base" => Ok(Self::Base),
"accent" => Ok(Self::Accent),
"case" => Ok(Self::Case),
"variant" => Ok(Self::Variant),
_ => Err(ParseSensitivityError),
}
}
}
impl OptionTypeParsable for Sensitivity {}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub(crate) enum Usage {
#[default]
Sort,
Search,
}
#[derive(Debug)]
pub(crate) struct ParseUsageError;
impl std::fmt::Display for ParseUsageError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("provided string was not `sort` or `search`")
}
}
impl FromStr for Usage {
type Err = ParseUsageError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"sort" => Ok(Self::Sort),
"search" => Ok(Self::Search),
_ => Err(ParseUsageError),
}
}
}
impl OptionTypeParsable for Usage {}

17
boa_engine/src/builtins/intl/date_time_format.rs

@ -20,6 +20,23 @@ use crate::{
use boa_gc::{Finalize, Trace};
use boa_profiler::Profiler;
use icu_datetime::options::preferences::HourCycle;
use super::options::OptionType;
impl OptionType for HourCycle {
fn from_value(value: JsValue, context: &mut Context) -> JsResult<Self> {
match value.to_string(context)?.to_std_string_escaped().as_str() {
"h11" => Ok(HourCycle::H11),
"h12" => Ok(HourCycle::H12),
"h23" => Ok(HourCycle::H23),
"h24" => Ok(HourCycle::H24),
_ => Err(JsNativeError::range()
.with_message("provided string was not `h11`, `h12`, `h23` or `h24`")
.into()),
}
}
}
/// JavaScript `Intl.DateTimeFormat` object.
#[derive(Debug, Clone, Trace, Finalize)]

495
boa_engine/src/builtins/intl/list_format/mod.rs

@ -0,0 +1,495 @@
use std::fmt::Write;
use boa_profiler::Profiler;
use icu_list::{provider::AndListV1Marker, ListFormatter, ListLength};
use icu_locid::Locale;
use icu_provider::DataLocale;
use tap::{Conv, Pipe};
use crate::{
builtins::{Array, BuiltIn, JsArgs},
context::{intrinsics::StandardConstructors, BoaProvider},
object::{
internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData,
},
property::Attribute,
symbol::WellKnownSymbols,
Context, JsNativeError, JsResult, JsString, JsValue,
};
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales},
options::{get_option, get_options_object, IntlOptions, LocaleMatcher},
Service,
};
mod options;
pub(crate) use options::*;
pub struct ListFormat {
locale: Locale,
typ: ListFormatType,
style: ListLength,
formatter: ListFormatter,
}
impl std::fmt::Debug for ListFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ListFormat")
.field("locale", &self.locale)
.field("typ", &self.typ)
.field("style", &self.style)
.field("formatter", &"ListFormatter")
.finish()
}
}
impl<P> Service<P> for ListFormat {
type LangMarker = AndListV1Marker;
type LocaleOptions = ();
}
impl BuiltIn for ListFormat {
const NAME: &'static str = "ListFormat";
fn init(context: &mut Context) -> Option<JsValue> {
let _timer = Profiler::global().start_event(Self::NAME, "init");
ConstructorBuilder::with_standard_constructor(
context,
Self::constructor,
context.intrinsics().constructors().list_format().clone(),
)
.name(Self::NAME)
.length(Self::LENGTH)
.static_method(Self::supported_locales_of, "supportedLocalesOf", 1)
.property(
WellKnownSymbols::to_string_tag(),
"Intl.ListFormat",
Attribute::CONFIGURABLE,
)
.method(Self::format, "format", 1)
.method(Self::format_to_parts, "formatToParts", 1)
.method(Self::resolved_options, "resolvedOptions", 0)
.build()
.conv::<JsValue>()
.pipe(Some)
}
}
impl ListFormat {
pub(crate) const LENGTH: usize = 0;
/// Constructor [`Intl.ListFormat ( [ locales [ , options ] ] )`][spec].
///
/// Constructor for `ListFormat` objects.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.ListFormat
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/ListFormat
pub(crate) fn constructor(
new_target: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. If NewTarget is undefined, throw a TypeError exception.
if new_target.is_undefined() {
return Err(JsNativeError::typ()
.with_message("cannot call `Intl.ListFormat` constructor without `new`")
.into());
}
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 4. Set options to ? GetOptionsObject(options).
let options = get_options_object(options)?;
// 5. Let opt be a new Record.
// 6. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit").
let matcher = get_option::<LocaleMatcher>(&options, "localeMatcher", false, context)?
.unwrap_or_default();
// 7. Set opt.[[localeMatcher]] to matcher.
// 8. Let localeData be %ListFormat%.[[LocaleData]].
// 9. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]], requestedLocales, opt, %ListFormat%.[[RelevantExtensionKeys]], localeData).
// 10. Set listFormat.[[Locale]] to r.[[locale]].
let locale = resolve_locale::<Self, _>(
&requested_locales,
&mut IntlOptions {
matcher,
..Default::default()
},
context.icu(),
);
// 11. Let type be ? GetOption(options, "type", string, « "conjunction", "disjunction", "unit" », "conjunction").
// 12. Set listFormat.[[Type]] to type.
let typ =
get_option::<ListFormatType>(&options, "type", false, context)?.unwrap_or_default();
// 13. Let style be ? GetOption(options, "style", string, « "long", "short", "narrow" », "long").
// 14. Set listFormat.[[Style]] to style.
let style = get_option::<ListLength>(&options, "style", false, context)?
.unwrap_or(ListLength::Wide);
// 15. Let dataLocale be r.[[dataLocale]].
// 16. Let dataLocaleData be localeData.[[<dataLocale>]].
// 17. Let dataLocaleTypes be dataLocaleData.[[<type>]].
// 18. Set listFormat.[[Templates]] to dataLocaleTypes.[[<style>]].
// 2. Let listFormat be ? OrdinaryCreateFromConstructor(NewTarget, "%ListFormat.prototype%", « [[InitializedListFormat]], [[Locale]], [[Type]], [[Style]], [[Templates]] »).
let prototype =
get_prototype_from_constructor(new_target, StandardConstructors::list_format, context)?;
let list_format = JsObject::from_proto_and_data(
prototype,
ObjectData::list_format(ListFormat {
formatter: context
.icu()
.provider()
.try_new_list_formatter(&DataLocale::from(&locale), typ, style)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?,
locale,
typ,
style,
}),
);
// 19. Return listFormat.
Ok(list_format.into())
}
/// [`Intl.ListFormat.supportedLocalesOf ( locales [ , options ] )`][spec].
///
/// Returns an array containing those of the provided locales that are supported in list
/// formatting without having to fall back to the runtime's default locale.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.ListFormat.supportedLocalesOf
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/ListFormat/supportedLocalesOf
fn supported_locales_of(
_: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 1. Let availableLocales be %ListFormat%.[[AvailableLocales]].
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service<BoaProvider>>::LangMarker>(
&requested_locales,
options,
context,
)
.map(JsValue::from)
}
/// [`Intl.ListFormat.prototype.format ( list )`][spec].
///
/// Returns a language-specific formatted string representing the elements of the list.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.format
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/ListFormat/format
fn format(this: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
// 1. Let lf be the this value.
// 2. Perform ? RequireInternalSlot(lf, [[InitializedListFormat]]).
let lf = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`format` can only be called on a `ListFormat` object")
})?;
let lf = lf.as_list_format().ok_or_else(|| {
JsNativeError::typ()
.with_message("`format` can only be called on a `ListFormat` object")
})?;
// 3. Let stringList be ? StringListFromIterable(list).
// TODO: support for UTF-16 unpaired surrogates formatting
let strings = string_list_from_iterable(args.get_or_undefined(0), context)?;
// 4. Return ! FormatList(lf, stringList).
Ok(lf
.formatter
.format_to_string(strings.into_iter().map(|s| s.to_std_string_escaped()))
.into())
}
/// [`Intl.ListFormat.prototype.formatToParts ( list )`][spec].
///
/// Returns a language-specific formatted string representing the elements of the list.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.formatToParts
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/ListFormat/formatToParts
fn format_to_parts(
this: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// TODO: maybe try to move this into icu4x?
use writeable::{PartsWrite, Writeable};
#[derive(Debug, Clone)]
enum Part {
Literal(String),
Element(String),
}
impl Part {
const fn typ(&self) -> &'static str {
match self {
Part::Literal(_) => "literal",
Part::Element(_) => "element",
}
}
#[allow(clippy::missing_const_for_fn)]
fn value(self) -> String {
match self {
Part::Literal(s) | Part::Element(s) => s,
}
}
}
#[derive(Debug, Clone)]
struct WriteString(String);
impl Write for WriteString {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
self.0.write_str(s)
}
fn write_char(&mut self, c: char) -> std::fmt::Result {
self.0.write_char(c)
}
}
impl PartsWrite for WriteString {
type SubPartsWrite = WriteString;
fn with_part(
&mut self,
_part: writeable::Part,
mut f: impl FnMut(&mut Self::SubPartsWrite) -> std::fmt::Result,
) -> std::fmt::Result {
f(self)
}
}
#[derive(Debug, Clone)]
struct PartsCollector(Vec<Part>);
impl Write for PartsCollector {
fn write_str(&mut self, _: &str) -> std::fmt::Result {
Ok(())
}
}
impl PartsWrite for PartsCollector {
type SubPartsWrite = WriteString;
fn with_part(
&mut self,
part: writeable::Part,
mut f: impl FnMut(&mut Self::SubPartsWrite) -> core::fmt::Result,
) -> core::fmt::Result {
assert!(part.category == "list");
let mut string = WriteString(String::new());
f(&mut string)?;
if !string.0.is_empty() {
match part.value {
"element" => self.0.push(Part::Element(string.0)),
"literal" => self.0.push(Part::Literal(string.0)),
_ => unreachable!(),
};
}
Ok(())
}
}
// 1. Let lf be the this value.
// 2. Perform ? RequireInternalSlot(lf, [[InitializedListFormat]]).
let lf = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`formatToParts` can only be called on a `ListFormat` object")
})?;
let lf = lf.as_list_format().ok_or_else(|| {
JsNativeError::typ()
.with_message("`formatToParts` can only be called on a `ListFormat` object")
})?;
// 3. Let stringList be ? StringListFromIterable(list).
// TODO: support for UTF-16 unpaired surrogates formatting
let strings = string_list_from_iterable(args.get_or_undefined(0), context)?
.into_iter()
.map(|s| s.to_std_string_escaped());
// 4. Return ! FormatListToParts(lf, stringList).
// Abstract operation `FormatListToParts ( listFormat, list )`
// https://tc39.es/ecma402/#sec-formatlisttoparts
// 1. Let parts be ! CreatePartsFromList(listFormat, list).
let mut parts = PartsCollector(Vec::new());
lf.formatter
.format(strings)
.write_to_parts(&mut parts)
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;
// 2. Let result be ! ArrayCreate(0).
let result = Array::array_create(0, None, context)
.expect("creating an empty array with default proto must not fail");
// 3. Let n be 0.
// 4. For each Record { [[Type]], [[Value]] } part in parts, do
for (n, part) in parts.0.into_iter().enumerate() {
// a. Let O be OrdinaryObjectCreate(%Object.prototype%).
let o = JsObject::from_proto_and_data(
context.intrinsics().constructors().object().prototype(),
ObjectData::ordinary(),
);
// b. Perform ! CreateDataPropertyOrThrow(O, "type", part.[[Type]]).
o.create_data_property_or_throw("type", part.typ(), context)
.expect("operation must not fail per the spec");
// c. Perform ! CreateDataPropertyOrThrow(O, "value", part.[[Value]]).
o.create_data_property_or_throw("value", part.value(), context)
.expect("operation must not fail per the spec");
// d. Perform ! CreateDataPropertyOrThrow(result, ! ToString(n), O).
result
.create_data_property_or_throw(n, o, context)
.expect("operation must not fail per the spec");
// e. Increment n by 1.
}
// 5. Return result.
Ok(result.into())
}
/// [`Intl.ListFormat.prototype.resolvedOptions ( )`][spec].
///
/// Returns a new object with properties reflecting the locale and style formatting options
/// computed during the construction of the current `Intl.ListFormat` object.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.resolvedoptions
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/ListFormat/resolvedOptions
fn resolved_options(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
// 1. Let lf be the this value.
// 2. Perform ? RequireInternalSlot(lf, [[InitializedListFormat]]).
let lf = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolvedOptions` can only be called on a `ListFormat` object")
})?;
let lf = lf.as_list_format().ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolvedOptions` can only be called on a `ListFormat` object")
})?;
// 3. Let options be OrdinaryObjectCreate(%Object.prototype%).
let options = JsObject::from_proto_and_data(
context.intrinsics().constructors().object().prototype(),
ObjectData::ordinary(),
);
// 4. For each row of Table 11, except the header row, in table order, do
// a. Let p be the Property value of the current row.
// b. Let v be the value of lf's internal slot whose name is the Internal Slot value of the current row.
// c. Assert: v is not undefined.
// d. Perform ! CreateDataPropertyOrThrow(options, p, v).
options
.create_data_property_or_throw("locale", lf.locale.to_string(), context)
.expect("operation must not fail per the spec");
options
.create_data_property_or_throw(
"type",
match lf.typ {
ListFormatType::Conjunction => "conjunction",
ListFormatType::Disjunction => "disjunction",
ListFormatType::Unit => "unit",
},
context,
)
.expect("operation must not fail per the spec");
options
.create_data_property_or_throw(
"style",
match lf.style {
ListLength::Wide => "long",
ListLength::Short => "short",
ListLength::Narrow => "narrow",
_ => unreachable!(),
},
context,
)
.expect("operation must not fail per the spec");
// 5. Return options.
Ok(options.into())
}
}
/// Abstract operation [`StringListFromIterable ( iterable )`][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-createstringlistfromiterable
fn string_list_from_iterable(iterable: &JsValue, context: &mut Context) -> JsResult<Vec<JsString>> {
// 1. If iterable is undefined, then
if iterable.is_undefined() {
// a. Return a new empty List.
return Ok(Vec::new());
}
// 2. Let iteratorRecord be ? GetIterator(iterable).
let iterator = iterable.get_iterator(context, None, None)?;
// 3. Let list be a new empty List.
let mut list = Vec::new();
// 4. Let next be true.
// 5. Repeat, while next is not false,
// a. Set next to ? IteratorStep(iteratorRecord).
// b. If next is not false, then
while let Some(item) = iterator.step(context)? {
// i. Let nextValue be ? IteratorValue(next).
let item = item.value(context)?;
// ii. If Type(nextValue) is not String, then
let Some(s) = item.as_string().cloned() else {
// 1. Let error be ThrowCompletion(a newly created TypeError object).
// 2. Return ? IteratorClose(iteratorRecord, error).
return Err(iterator
.close(
Err(JsNativeError::typ()
.with_message("StringListFromIterable: can only format strings into a list")
.into()),
context,
)
.expect_err("Should return the provided error"));
};
// iii. Append nextValue to the end of the List list.
list.push(s);
}
// 6. Return list.
Ok(list)
}

53
boa_engine/src/builtins/intl/list_format/options.rs

@ -0,0 +1,53 @@
use std::str::FromStr;
use icu_list::ListLength;
use crate::{
builtins::intl::options::{OptionType, OptionTypeParsable},
Context, JsNativeError, JsResult, JsValue,
};
#[derive(Debug, Clone, Copy, Default)]
pub(crate) enum ListFormatType {
#[default]
Conjunction,
Disjunction,
Unit,
}
#[derive(Debug)]
pub(crate) struct ParseListFormatTypeError;
impl std::fmt::Display for ParseListFormatTypeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("provided string was not `conjunction`, `disjunction` or `unit`")
}
}
impl FromStr for ListFormatType {
type Err = ParseListFormatTypeError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"conjunction" => Ok(Self::Conjunction),
"disjunction" => Ok(Self::Disjunction),
"unit" => Ok(Self::Unit),
_ => Err(ParseListFormatTypeError),
}
}
}
impl OptionTypeParsable for ListFormatType {}
impl OptionType for ListLength {
fn from_value(value: JsValue, context: &mut Context) -> JsResult<Self> {
match value.to_string(context)?.to_std_string_escaped().as_str() {
"long" => Ok(Self::Wide),
"short" => Ok(Self::Short),
"narrow" => Ok(Self::Narrow),
_ => Err(JsNativeError::range()
.with_message("provided string was not `long`, `short` or `narrow`")
.into()),
}
}
}

730
boa_engine/src/builtins/intl/locale/mod.rs

@ -0,0 +1,730 @@
use boa_profiler::Profiler;
use icu_collator::CaseFirst;
use icu_datetime::options::preferences::HourCycle;
use icu_locid::{
extensions::unicode::Value,
extensions_unicode_key as key, extensions_unicode_value as value,
subtags::{Language, Region, Script},
};
use tap::{Conv, Pipe};
#[cfg(test)]
mod tests;
mod utils;
pub(crate) use utils::*;
mod options;
use crate::{
builtins::{BuiltIn, JsArgs},
context::intrinsics::StandardConstructors,
js_string,
object::{
internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder,
JsObject, ObjectData,
},
property::Attribute,
symbol::WellKnownSymbols,
Context, JsNativeError, JsResult, JsString, JsValue,
};
use super::options::{coerce_options_to_object, get_option};
#[derive(Debug, Clone)]
pub(crate) struct Locale;
impl BuiltIn for Locale {
const NAME: &'static str = "Locale";
fn init(context: &mut Context) -> Option<JsValue> {
let _timer = Profiler::global().start_event(Self::NAME, "init");
let base_name = FunctionBuilder::native(context, Self::base_name)
.name("get baseName")
.constructor(false)
.build();
let calendar = FunctionBuilder::native(context, Self::calendar)
.name("get calendar")
.constructor(false)
.build();
let case_first = FunctionBuilder::native(context, Self::case_first)
.name("get caseFirst")
.constructor(false)
.build();
let collation = FunctionBuilder::native(context, Self::collation)
.name("get collation")
.constructor(false)
.build();
let hour_cycle = FunctionBuilder::native(context, Self::hour_cycle)
.name("get hourCycle")
.constructor(false)
.build();
let numeric = FunctionBuilder::native(context, Self::numeric)
.name("get numeric")
.constructor(false)
.build();
let numbering_system = FunctionBuilder::native(context, Self::numbering_system)
.name("get numberingSystem")
.constructor(false)
.build();
let language = FunctionBuilder::native(context, Self::language)
.name("get language")
.constructor(false)
.build();
let script = FunctionBuilder::native(context, Self::script)
.name("get script")
.constructor(false)
.build();
let region = FunctionBuilder::native(context, Self::region)
.name("get region")
.constructor(false)
.build();
ConstructorBuilder::with_standard_constructor(
context,
Self::constructor,
context.intrinsics().constructors().locale().clone(),
)
.name(Self::NAME)
.length(Self::LENGTH)
.property(
WellKnownSymbols::to_string_tag(),
"Intl.Locale",
Attribute::CONFIGURABLE,
)
.method(Self::maximize, "maximize", 0)
.method(Self::minimize, "minimize", 0)
.method(Self::to_string, "toString", 0)
.accessor("baseName", Some(base_name), None, Attribute::CONFIGURABLE)
.accessor("calendar", Some(calendar), None, Attribute::CONFIGURABLE)
.accessor("caseFirst", Some(case_first), None, Attribute::CONFIGURABLE)
.accessor("collation", Some(collation), None, Attribute::CONFIGURABLE)
.accessor("hourCycle", Some(hour_cycle), None, Attribute::CONFIGURABLE)
.accessor("numeric", Some(numeric), None, Attribute::CONFIGURABLE)
.accessor(
"numberingSystem",
Some(numbering_system),
None,
Attribute::CONFIGURABLE,
)
.accessor("language", Some(language), None, Attribute::CONFIGURABLE)
.accessor("script", Some(script), None, Attribute::CONFIGURABLE)
.accessor("region", Some(region), None, Attribute::CONFIGURABLE)
.build()
.conv::<JsValue>()
.pipe(Some)
}
}
impl Locale {
pub(crate) const LENGTH: usize = 1;
/// Constructor [`Intl.Locale ( tag [ , options ] )`][spec].
///
/// Constructor for `Locale` objects.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.Locale
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale
pub(crate) fn constructor(
new_target: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. If NewTarget is undefined, throw a TypeError exception.
if new_target.is_undefined() {
return Err(JsNativeError::typ()
.with_message("cannot call `Intl.Locale` constructor without `new`")
.into());
}
let tag = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 2. Let relevantExtensionKeys be %Locale%.[[RelevantExtensionKeys]].
// 3. Let internalSlotsList be « [[InitializedLocale]], [[Locale]], [[Calendar]], [[Collation]], [[HourCycle]], [[NumberingSystem]] ».
// 4. If relevantExtensionKeys contains "kf", then
// a. Append [[CaseFirst]] as the last element of internalSlotsList.
// 5. If relevantExtensionKeys contains "kn", then
// a. Append [[Numeric]] as the last element of internalSlotsList.
// 7. If Type(tag) is not String or Object, throw a TypeError exception.
if !(tag.is_object() || tag.is_string()) {
return Err(JsNativeError::typ()
.with_message("Intl.Locale: `tag` should be a String or Object")
.into());
}
// 8. If Type(tag) is Object and tag has an [[InitializedLocale]] internal slot, then
let mut tag = if let Some(tag) = tag
.as_object()
.and_then(|obj| obj.borrow().as_locale().cloned())
{
// a. Let tag be tag.[[Locale]].
tag
}
// 9. Else,
else {
// a. Let tag be ? ToString(tag).
tag.to_string(context)?
.to_std_string_escaped()
.parse()
.map_err(|_| {
JsNativeError::range()
.with_message("Intl.Locale: `tag` is not a structurally valid language tag")
})?
};
// 10. Set options to ? CoerceOptionsToObject(options).
let options = &coerce_options_to_object(options, context)?;
// 11. Set tag to ? ApplyOptionsToTag(tag, options).
// Abstract operation [`ApplyOptionsToTag ( tag, options )`][https://tc39.es/ecma402/#sec-apply-options-to-tag]
{
// 1. Assert: Type(tag) is String.
// 2. Assert: Type(options) is Object.
// 3. If ! IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
// 4. Let language be ? GetOption(options, "language", string, empty, undefined).
// 5. If language is not undefined, then
let language = get_option::<JsString>(options, "language", false, context)?
// a. If language does not match the unicode_language_subtag production, throw a RangeError exception.
.map(|s| s.to_std_string_escaped().parse::<Language>())
.transpose()
.map_err(|e| JsNativeError::range().with_message(e.to_string()))?;
// 6. Let script be ? GetOption(options, "script", string, empty, undefined).
// 7. If script is not undefined, then
let script = get_option::<JsString>(options, "script", false, context)?
.map(|s| s.to_std_string_escaped().parse::<Script>())
.transpose()
// a. If script does not match the unicode_script_subtag production, throw a RangeError exception.
.map_err(|e| JsNativeError::range().with_message(e.to_string()))?;
// 8. Let region be ? GetOption(options, "region", string, empty, undefined).
// 9. If region is not undefined, then
let region = get_option::<JsString>(options, "region", false, context)?
.map(|s| s.to_std_string_escaped().parse::<Region>())
.transpose()
// a. If region does not match the unicode_region_subtag production, throw a RangeError exception.
.map_err(|e| JsNativeError::range().with_message(e.to_string()))?;
// 10. Set tag to ! CanonicalizeUnicodeLocaleId(tag).
context.icu().locale_canonicalizer().canonicalize(&mut tag);
// Skipping some boilerplate since this is easier to do using the `Locale` type, but putting the
// spec for completion.
// 11. Assert: tag matches the unicode_locale_id production.
// 12. Let languageId be the substring of tag corresponding to the unicode_language_id production.
// 13. If language is not undefined, then
// a. Set languageId to languageId with the substring corresponding to the unicode_language_subtag production replaced by the string language.
// 14. If script is not undefined, then
// a. If languageId does not contain a unicode_script_subtag production, then
// i. Set languageId to the string-concatenation of the unicode_language_subtag production of languageId, "-", script, and the rest of languageId.
// b. Else,
// i. Set languageId to languageId with the substring corresponding to the unicode_script_subtag production replaced by the string script.
// 15. If region is not undefined, then
// a. If languageId does not contain a unicode_region_subtag production, then
// i. Set languageId to the string-concatenation of the unicode_language_subtag production of languageId, the substring corresponding to "-"` and the `unicode_script_subtag` production if present, `"-", region, and the rest of languageId.
// b. Else,
// i. Set languageId to languageId with the substring corresponding to the unicode_region_subtag production replaced by the string region.
// 16. Set tag to tag with the substring corresponding to the unicode_language_id production replaced by the string languageId.
if let Some(language) = language {
tag.id.language = language;
}
if let Some(script) = script {
tag.id.script = Some(script);
}
if let Some(region) = region {
tag.id.region = Some(region);
}
// 17. Return ! CanonicalizeUnicodeLocaleId(tag).
context.icu().locale_canonicalizer().canonicalize(&mut tag);
}
// 12. Let opt be a new Record.
// 13. Let calendar be ? GetOption(options, "calendar", string, empty, undefined).
// 14. If calendar is not undefined, then
// 15. Set opt.[[ca]] to calendar.
// a. If calendar does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
let ca = get_option::<Value>(options, "calendar", false, context)?;
// 16. Let collation be ? GetOption(options, "collation", string, empty, undefined).
// 17. If collation is not undefined, then
// 18. Set opt.[[co]] to collation.
// a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
let co = get_option::<Value>(options, "collation", false, context)?;
// 19. Let hc be ? GetOption(options, "hourCycle", string, « "h11", "h12", "h23", "h24" », undefined).
// 20. Set opt.[[hc]] to hc.
let hc =
get_option::<HourCycle>(options, "hourCycle", false, context)?.map(|hc| match hc {
HourCycle::H24 => value!("h24"),
HourCycle::H23 => value!("h23"),
HourCycle::H12 => value!("h12"),
HourCycle::H11 => value!("h11"),
});
// 21. Let kf be ? GetOption(options, "caseFirst", string, « "upper", "lower", "false" », undefined).
// 22. Set opt.[[kf]] to kf.
let kf =
get_option::<CaseFirst>(options, "caseFirst", false, context)?.map(|kf| match kf {
CaseFirst::UpperFirst => value!("upper"),
CaseFirst::LowerFirst => value!("lower"),
CaseFirst::Off => value!("false"),
_ => unreachable!(),
});
// 23. Let kn be ? GetOption(options, "numeric", boolean, empty, undefined).
// 24. If kn is not undefined, set kn to ! ToString(kn).
// 25. Set opt.[[kn]] to kn.
let kn = get_option::<bool>(options, "numeric", false, context)?.map(|b| {
if b {
value!("true")
} else {
value!("false")
}
});
// 26. Let numberingSystem be ? GetOption(options, "numberingSystem", string, empty, undefined).
// 27. If numberingSystem is not undefined, then
// 28. Set opt.[[nu]] to numberingSystem.
// a. If numberingSystem does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
let nu = get_option::<Value>(options, "numberingSystem", false, context)?;
// 29. Let r be ! ApplyUnicodeExtensionToTag(tag, opt, relevantExtensionKeys).
// 30. Set locale.[[Locale]] to r.[[locale]].
if let Some(ca) = ca {
// 31. Set locale.[[Calendar]] to r.[[ca]].
tag.extensions.unicode.keywords.set(key!("ca"), ca);
}
if let Some(co) = co {
// 32. Set locale.[[Collation]] to r.[[co]].
tag.extensions.unicode.keywords.set(key!("co"), co);
}
if let Some(hc) = hc {
// 33. Set locale.[[HourCycle]] to r.[[hc]].
tag.extensions.unicode.keywords.set(key!("hc"), hc);
}
if let Some(kf) = kf {
// 34. If relevantExtensionKeys contains "kf", then
// a. Set locale.[[CaseFirst]] to r.[[kf]].
tag.extensions.unicode.keywords.set(key!("kf"), kf);
}
if let Some(kn) = kn {
// 35. If relevantExtensionKeys contains "kn", then
// a. If SameValue(r.[[kn]], "true") is true or r.[[kn]] is the empty String, then
// i. Set locale.[[Numeric]] to true.
// b. Else,
// i. Set locale.[[Numeric]] to false.
tag.extensions.unicode.keywords.set(key!("kn"), kn);
}
if let Some(nu) = nu {
// 36. Set locale.[[NumberingSystem]] to r.[[nu]].
tag.extensions.unicode.keywords.set(key!("nu"), nu);
}
context.icu().locale_canonicalizer().canonicalize(&mut tag);
// 6. Let locale be ? OrdinaryCreateFromConstructor(NewTarget, "%Locale.prototype%", internalSlotsList).
let prototype =
get_prototype_from_constructor(new_target, StandardConstructors::locale, context)?;
let locale = JsObject::from_proto_and_data(prototype, ObjectData::locale(tag));
// 37. Return locale.
Ok(locale.into())
}
/// [`Intl.Locale.prototype.maximize ( )`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.maximize
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/maximize
pub(crate) fn maximize(
this: &JsValue,
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ().with_message("`maximize` can only be called on a `Locale` object")
})?;
let mut loc = loc
.as_locale()
.ok_or_else(|| {
JsNativeError::typ()
.with_message("`maximize` can only be called on a `Locale` object")
})?
.clone();
// 3. Let maximal be the result of the Add Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set maximal to loc.[[Locale]].
context.icu().locale_expander().maximize(&mut loc);
// 4. Return ! Construct(%Locale%, maximal).
let prototype = context.intrinsics().constructors().locale().prototype();
Ok(JsObject::from_proto_and_data(prototype, ObjectData::locale(loc)).into())
}
/// [`Intl.Locale.prototype.minimize ( )`][spec]
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.minimize
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/minimize
pub(crate) fn minimize(
this: &JsValue,
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ().with_message("`minimize` can only be called on a `Locale` object")
})?;
let mut loc = loc
.as_locale()
.ok_or_else(|| {
JsNativeError::typ()
.with_message("`minimize` can only be called on a `Locale` object")
})?
.clone();
// 3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set minimal to loc.[[Locale]].
context.icu().locale_expander().minimize(&mut loc);
// 4. Return ! Construct(%Locale%, minimal).
let prototype = context.intrinsics().constructors().locale().prototype();
Ok(JsObject::from_proto_and_data(prototype, ObjectData::locale(loc)).into())
}
/// [`Intl.Locale.prototype.toString ( )`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.toString
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/toString
pub(crate) fn to_string(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ().with_message("`toString` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ().with_message("`toString` can only be called on a `Locale` object")
})?;
// 3. Return loc.[[Locale]].
Ok(js_string!(loc.to_string()).into())
}
/// [`get Intl.Locale.prototype.baseName`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/baseName
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.baseName
pub(crate) fn base_name(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get baseName` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get baseName` can only be called on a `Locale` object")
})?;
// 3. Let locale be loc.[[Locale]].
// 4. Return the substring of locale corresponding to the unicode_language_id production.
Ok(js_string!(loc.id.to_string()).into())
}
/// [`get Intl.Locale.prototype.calendar`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.calendar
pub(crate) fn calendar(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get calendar` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get calendar` can only be called on a `Locale` object")
})?;
// 3. Return loc.[[Calendar]].
Ok(loc
.extensions
.unicode
.keywords
.get(&key!("ca"))
.map(|v| js_string!(v.to_string()).into())
.unwrap_or_default())
}
/// [`get Intl.Locale.prototype.caseFirst`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.calendar
pub(crate) fn case_first(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get caseFirst` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get caseFirst` can only be called on a `Locale` object")
})?;
// 3. Return loc.[[CaseFirst]].
Ok(loc
.extensions
.unicode
.keywords
.get(&key!("kf"))
.map(|v| js_string!(v.to_string()).into())
.unwrap_or_default())
}
/// [`get Intl.Locale.prototype.collation`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/collation
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.collation
pub(crate) fn collation(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get collation` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get collation` can only be called on a `Locale` object")
})?;
// 3. Return loc.[[Collation]].
Ok(loc
.extensions
.unicode
.keywords
.get(&key!("co"))
.map(|v| js_string!(v.to_string()).into())
.unwrap_or_default())
}
/// [`get Intl.Locale.prototype.hourCycle`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/hourCycle
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.hourCycle
pub(crate) fn hour_cycle(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get hourCycle` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get hourCycle` can only be called on a `Locale` object")
})?;
// 3. Return loc.[[HourCycle]].
Ok(loc
.extensions
.unicode
.keywords
.get(&key!("hc"))
.map(|v| js_string!(v.to_string()).into())
.unwrap_or_default())
}
/// [`get Intl.Locale.prototype.numeric`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/numeric
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.numeric
pub(crate) fn numeric(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get numeric` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get numeric` can only be called on a `Locale` object")
})?;
// 3. Return loc.[[Numeric]].
let kn = loc
.extensions
.unicode
.keywords
.get(&key!("kn"))
.map(Value::as_tinystr_slice);
Ok(JsValue::Boolean(match kn {
Some([]) => true,
Some([kn]) if kn == "true" => true,
_ => false,
}))
}
/// [`get Intl.Locale.prototype.numberingSystem`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/numeric
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.numeric
pub(crate) fn numbering_system(
this: &JsValue,
_: &[JsValue],
_: &mut Context,
) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get numberingSystem` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get numberingSystem` can only be called on a `Locale` object")
})?;
// 3. Return loc.[[NumberingSystem]].
Ok(loc
.extensions
.unicode
.keywords
.get(&key!("nu"))
.map(|v| js_string!(v.to_string()).into())
.unwrap_or_default())
}
/// [`get Intl.Locale.prototype.language`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/language
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.language
pub(crate) fn language(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get language` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get language` can only be called on a `Locale` object")
})?;
// 3. Let locale be loc.[[Locale]].
// 4. Assert: locale matches the unicode_locale_id production.
// 5. Return the substring of locale corresponding to the unicode_language_subtag production of the unicode_language_id.
Ok(js_string!(loc.id.language.to_string()).into())
}
/// [`get Intl.Locale.prototype.script`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/script
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.script
pub(crate) fn script(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get script` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get script` can only be called on a `Locale` object")
})?;
// 3. Let locale be loc.[[Locale]].
// 4. Assert: locale matches the unicode_locale_id production.
// 5. If the unicode_language_id production of locale does not contain the ["-" unicode_script_subtag] sequence, return undefined.
// 6. Return the substring of locale corresponding to the unicode_script_subtag production of the unicode_language_id.
Ok(loc
.id
.script
.map(|sc| js_string!(sc.to_string()).into())
.unwrap_or_default())
}
/// [`get Intl.Locale.prototype.region`][spec].
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/region
/// [mdn]: https://tc39.es/ecma402/#sec-Intl.Locale.prototype.region
pub(crate) fn region(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
// 1. Let loc be the this value.
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
let loc = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`get region` can only be called on a `Locale` object")
})?;
let loc = loc.as_locale().ok_or_else(|| {
JsNativeError::typ()
.with_message("`get region` can only be called on a `Locale` object")
})?;
// 3. Let locale be loc.[[Locale]].
// 4. Assert: locale matches the unicode_locale_id production.
// 5. If the unicode_language_id production of locale does not contain the ["-" unicode_region_subtag] sequence, return undefined.
// 6. Return the substring of locale corresponding to the unicode_region_subtag production of the unicode_language_id.
Ok(loc
.id
.region
.map(|sc| js_string!(sc.to_string()).into())
.unwrap_or_default())
}
}

21
boa_engine/src/builtins/intl/locale/options.rs

@ -0,0 +1,21 @@
use icu_locid::extensions::unicode::Value;
use crate::{builtins::intl::options::OptionType, JsNativeError};
impl OptionType for Value {
fn from_value(value: crate::JsValue, context: &mut crate::Context) -> crate::JsResult<Self> {
let val = value
.to_string(context)?
.to_std_string_escaped()
.parse::<Value>()
.map_err(|e| JsNativeError::range().with_message(e.to_string()))?;
if val.as_tinystr_slice().is_empty() {
return Err(JsNativeError::range()
.with_message("Unicode Locale Identifier `type` cannot be empty")
.into());
}
Ok(val)
}
}

126
boa_engine/src/builtins/intl/locale/tests.rs

@ -0,0 +1,126 @@
use icu_datetime::{
options::preferences::HourCycle, pattern::CoarseHourCycle,
provider::calendar::TimeLengthsV1Marker,
};
use icu_locid::{
extensions::unicode::Value, extensions_unicode_key as key, extensions_unicode_value as value,
locale, Locale,
};
use icu_plurals::provider::CardinalV1Marker;
use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata};
use icu_provider_adapters::fallback::LocaleFallbackProvider;
use crate::{
builtins::intl::{
locale::{best_locale_for_provider, default_locale, resolve_locale},
options::{IntlOptions, LocaleMatcher},
Service,
},
context::icu::{BoaProvider, Icu},
};
#[derive(Debug)]
struct TestOptions {
hc: Option<HourCycle>,
}
struct TestService;
impl<P> Service<P> for TestService
where
P: DataProvider<TimeLengthsV1Marker>,
{
type LangMarker = CardinalV1Marker;
type LocaleOptions = TestOptions;
fn resolve(locale: &mut Locale, options: &mut Self::LocaleOptions, provider: &P) {
let loc_hc = locale
.extensions
.unicode
.keywords
.get(&key!("hc"))
.and_then(Value::as_single_subtag)
.and_then(|s| match &**s {
"h11" => Some(HourCycle::H11),
"h12" => Some(HourCycle::H12),
"h23" => Some(HourCycle::H23),
"h24" => Some(HourCycle::H24),
_ => None,
});
let hc = options.hc.or(loc_hc).unwrap_or_else(|| {
let req = DataRequest {
locale: &DataLocale::from(&*locale),
metadata: DataRequestMetadata::default(),
};
let preferred = DataProvider::<TimeLengthsV1Marker>::load(provider, req)
.unwrap()
.take_payload()
.unwrap()
.get()
.preferred_hour_cycle;
match preferred {
CoarseHourCycle::H11H12 => HourCycle::H11,
CoarseHourCycle::H23H24 => HourCycle::H23,
}
});
let hc_value = match hc {
HourCycle::H11 => value!("h11"),
HourCycle::H12 => value!("h12"),
HourCycle::H23 => value!("h23"),
HourCycle::H24 => value!("h24"),
};
locale.extensions.unicode.keywords.set(key!("hc"), hc_value);
options.hc = Some(hc);
}
}
#[test]
fn locale_resolution() {
let provider =
LocaleFallbackProvider::try_new_with_buffer_provider(boa_icu_provider::blob()).unwrap();
let icu = Icu::new(BoaProvider::Buffer(Box::new(provider))).unwrap();
let mut default = default_locale(icu.locale_canonicalizer());
default
.extensions
.unicode
.keywords
.set(key!("hc"), value!("h11"));
// test lookup
let mut options = IntlOptions {
matcher: LocaleMatcher::Lookup,
service_options: TestOptions {
hc: Some(HourCycle::H11),
},
};
let locale = resolve_locale::<TestService, _>(&[], &mut options, &icu);
assert_eq!(locale, default);
// test best fit
let mut options = IntlOptions {
matcher: LocaleMatcher::BestFit,
service_options: TestOptions {
hc: Some(HourCycle::H11),
},
};
let locale = resolve_locale::<TestService, _>(&[], &mut options, &icu);
let best = best_locale_for_provider::<<TestService as Service<BoaProvider>>::LangMarker>(
default.id.clone(),
icu.provider(),
)
.unwrap();
let mut best = Locale::from(best);
best.extensions = locale.extensions.clone();
assert_eq!(locale, best);
// requested: [es-ES]
let mut options = IntlOptions {
matcher: LocaleMatcher::Lookup,
service_options: TestOptions { hc: None },
};
let locale = resolve_locale::<TestService, _>(&[locale!("es-AR")], &mut options, &icu);
assert_eq!(locale, "es-u-hc-h23".parse().unwrap());
}

604
boa_engine/src/builtins/intl/locale/utils.rs

@ -0,0 +1,604 @@
use crate::{
builtins::{
intl::{
options::{coerce_options_to_object, get_option, IntlOptions, LocaleMatcher},
Service,
},
Array,
},
context::{icu::Icu, BoaProvider},
object::JsObject,
Context, JsNativeError, JsResult, JsValue,
};
use icu_collator::provider::CollationMetadataV1Marker;
use icu_locid::{
extensions::unicode::{Key, Value},
subtags::Variants,
LanguageIdentifier, Locale,
};
use icu_locid_transform::LocaleCanonicalizer;
use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker};
use indexmap::IndexSet;
use tap::TapOptional;
/// Abstract operation `DefaultLocale ( )`
///
/// Returns a String value representing the structurally valid and canonicalized
/// Unicode BCP 47 locale identifier for the host environment's current locale.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-defaultlocale
pub(crate) fn default_locale(canonicalizer: &LocaleCanonicalizer) -> Locale {
sys_locale::get_locale()
.and_then(|loc| loc.parse::<Locale>().ok())
.tap_some_mut(|loc| {
canonicalizer.canonicalize(loc);
})
.unwrap_or_default()
}
/// Abstract operation `CanonicalizeLocaleList ( locales )`
///
/// Converts an array of [`JsValue`]s containing structurally valid
/// [Unicode BCP 47 locale identifiers][bcp-47] into their [canonical form][canon].
///
/// For efficiency, this returns [`Locale`]s instead of [`String`]s, since
/// `Locale` allows us to modify individual parts of the locale without scanning
/// the whole string again.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-canonicalizelocalelist
/// [bcp-47]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
/// [canon]: https://unicode.org/reports/tr35/#LocaleId_Canonicalization
pub(crate) fn canonicalize_locale_list(
locales: &JsValue,
context: &mut Context,
) -> JsResult<Vec<Locale>> {
// 1. If locales is undefined, then
if locales.is_undefined() {
// a. Return a new empty List.
return Ok(Vec::default());
}
// 2. Let seen be a new empty List.
let mut seen = IndexSet::new();
// 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, then
let o = if locales.is_string()
|| locales
.as_object()
.map_or(false, |o| o.borrow().is_locale())
{
// a. Let O be CreateArrayFromList(« locales »).
Array::create_array_from_list([locales.clone()], context)
} else {
// 4. Else,
// a. Let O be ? ToObject(locales).
locales.to_object(context)?
};
// 5. Let len be ? ToLength(? Get(O, "length")).
let len = o.length_of_array_like(context)?;
// 6 Let k be 0.
// 7. Repeat, while k < len,
for k in 0..len {
// a. Let Pk be ToString(k).
// b. Let kPresent be ? HasProperty(O, Pk).
let k_present = o.has_property(k, context)?;
// c. If kPresent is true, then
if k_present {
// i. Let kValue be ? Get(O, Pk).
let k_value = o.get(k, context)?;
// ii. If Type(kValue) is not String or Object, throw a TypeError exception.
if !(k_value.is_object() || k_value.is_string()) {
return Err(JsNativeError::typ()
.with_message("locale should be a String or Object")
.into());
}
// iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then
let mut tag = if let Some(tag) = k_value
.as_object()
.and_then(|obj| obj.borrow().as_locale().cloned())
{
// 1. Let tag be kValue.[[Locale]].
tag
}
// iv. Else,
else {
// 1. Let tag be ? ToString(kValue).
k_value
.to_string(context)?
.to_std_string_escaped()
.parse()
// v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
.map_err(|_| {
JsNativeError::range()
.with_message("locale is not a structurally valid language tag")
})?
};
// vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
context.icu().locale_canonicalizer().canonicalize(&mut tag);
// vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen.
seen.insert(tag);
}
// d. Increase k by 1.
}
// 8. Return seen.
Ok(seen.into_iter().collect())
}
/// Abstract operation `BestAvailableLocale ( availableLocales, locale )`
///
/// Compares the provided argument `locale`, which must be a String value with a
/// structurally valid and canonicalized Unicode BCP 47 locale identifier, against
/// the locales in `availableLocales` and returns either the longest non-empty prefix
/// of `locale` that is an element of `availableLocales`, or undefined if there is no
/// such element.
///
/// We only work with language identifiers, which have the same semantics
/// but are a bit easier to manipulate.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale
pub(crate) fn best_available_locale<M: KeyedDataMarker>(
candidate: LanguageIdentifier,
provider: &(impl DataProvider<M> + ?Sized),
) -> Option<LanguageIdentifier> {
// 1. Let candidate be locale.
let mut candidate = candidate.into();
// 2. Repeat
loop {
// a. If availableLocales contains an element equal to candidate, return candidate.
// ICU4X requires doing data requests in order to check if a locale
// is part of the set of supported locales.
let response = DataProvider::<M>::load(
provider,
DataRequest {
locale: &candidate,
metadata: DataRequestMetadata::default(),
},
);
if let Ok(req) = response {
// `metadata.locale` returns None when the provider doesn't have a fallback mechanism,
// but supports the required locale. However, if the provider has a fallback mechanism,
// this will return `Some(locale)`, where the locale is the used locale after applying
// the fallback algorithm, even if the used locale is exactly the same as the required
// locale.
match req.metadata.locale {
Some(loc)
if loc == candidate
// TODO: ugly hack to accept locales that fallback to "und" in the collator service
|| (loc.is_empty() && M::KEY.path() == CollationMetadataV1Marker::KEY.path()) =>
{
return Some(candidate.into_locale().id)
}
None => return Some(candidate.into_locale().id),
_ => {}
}
}
// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined.
// c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
// d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
//
// Since the definition of `LanguageIdentifier` allows us to manipulate it
// without using strings, we can replace these steps by a simpler
// algorithm.
if candidate.has_variants() {
let mut variants = candidate
.clear_variants()
.iter()
.copied()
.collect::<Vec<_>>();
variants.pop();
candidate.set_variants(Variants::from_vec_unchecked(variants));
} else if candidate.region().is_some() {
candidate.set_region(None);
} else if candidate.script().is_some() {
candidate.set_script(None);
} else {
return None;
}
}
}
/// Returns the locale resolved by the `provider` after using the ICU4X fallback
/// algorithm with `candidate` (if the provider supports this), or None if the locale is not
/// supported.
pub(crate) fn best_locale_for_provider<M: KeyedDataMarker>(
candidate: LanguageIdentifier,
provider: &(impl DataProvider<M> + ?Sized),
) -> Option<LanguageIdentifier> {
let response = DataProvider::<M>::load(
provider,
DataRequest {
locale: &DataLocale::from(&candidate),
metadata: DataRequestMetadata::default(),
},
)
.ok()?;
if candidate == LanguageIdentifier::UND {
return Some(LanguageIdentifier::UND);
}
response
.metadata
.locale
.map(|dl| {
// TODO: ugly hack to accept locales that fallback to "und" in the collator service
if M::KEY.path() == CollationMetadataV1Marker::KEY.path() && dl.is_empty() {
candidate.clone()
} else {
dl.into_locale().id
}
})
.or(Some(candidate))
.filter(|loc| loc != &LanguageIdentifier::UND)
}
/// Abstract operation [`LookupMatcher ( availableLocales, requestedLocales )`][spec]
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request.
///
/// # Note
///
/// This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher
fn lookup_matcher<M: KeyedDataMarker>(
requested_locales: &[Locale],
icu: &Icu<impl DataProvider<M>>,
) -> Locale {
// 1. Let result be a new Record.
// 2. For each element locale of requestedLocales, do
for locale in requested_locales {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale
// extension sequences removed.
let mut locale = locale.clone();
let id = std::mem::take(&mut locale.id);
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
let available_locale = best_available_locale::<M>(id, icu.provider());
// c. If availableLocale is not undefined, then
if let Some(available_locale) = available_locale {
// i. Set result.[[locale]] to availableLocale.
// Assignment deferred. See return statement below.
// ii. If locale and noExtensionsLocale are not the same String value, then
// 1. Let extension be the String value consisting of the substring of the Unicode
// locale extension sequence within locale.
// 2. Set result.[[extension]] to extension.
locale.id = available_locale;
// iii. Return result.
return locale;
}
}
// 3. Let defLocale be ! DefaultLocale().
// 4. Set result.[[locale]] to defLocale.
// 5. Return result.
default_locale(icu.locale_canonicalizer())
}
/// Abstract operation [`BestFitMatcher ( availableLocales, requestedLocales )`][spec]
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request. The algorithm is implementation dependent, but should produce results
/// that a typical user of the requested locales would perceive as at least as good as those
/// produced by the `LookupMatcher` abstract operation.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher
fn best_fit_matcher<M: KeyedDataMarker>(
requested_locales: &[Locale],
icu: &Icu<impl DataProvider<M>>,
) -> Locale {
for mut locale in requested_locales
.iter()
.cloned()
.chain(std::iter::once_with(|| {
default_locale(icu.locale_canonicalizer())
}))
{
let id = std::mem::take(&mut locale.id);
if let Some(available) = best_locale_for_provider(id, icu.provider()) {
locale.id = available;
return locale;
}
}
Locale::default()
}
/// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )`
///
/// Compares a BCP 47 language priority list `requestedLocales` against the locales
/// in `availableLocales` and determines the best available language to meet the request.
/// `availableLocales`, `requestedLocales`, and `relevantExtensionKeys` must be provided as
/// `List` values, options and `localeData` as Records.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale
pub(in crate::builtins::intl) fn resolve_locale<S, P>(
requested_locales: &[Locale],
options: &mut IntlOptions<S::LocaleOptions>,
icu: &Icu<P>,
) -> Locale
where
S: Service<P>,
P: DataProvider<S::LangMarker>,
{
// 1. Let matcher be options.[[localeMatcher]].
// 2. If matcher is "lookup", then
// a. Let r be ! LookupMatcher(availableLocales, requestedLocales).
// 3. Else,
// a. Let r be ! BestFitMatcher(availableLocales, requestedLocales).
// 4. Let foundLocale be r.[[locale]].
let mut found_locale = if options.matcher == LocaleMatcher::Lookup {
lookup_matcher::<S::LangMarker>(requested_locales, icu)
} else {
best_fit_matcher::<S::LangMarker>(requested_locales, icu)
};
// From here, the spec differs significantly from the implementation,
// since ICU4X allows us to skip some steps and modularize the
// extension resolution algorithm. However, the original spec is left here
// for completion purposes.
// 5. Let result be a new Record.
// 6. Set result.[[dataLocale]] to foundLocale.
// 7. If r has an [[extension]] field, then
// a. Let components be ! UnicodeExtensionComponents(r.[[extension]]).
// b. Let keywords be components.[[Keywords]].
// 9. For each element key of relevantExtensionKeys, do
// a. Let foundLocaleData be localeData.[[<foundLocale>]].
// b. Assert: Type(foundLocaleData) is Record.
// c. Let keyLocaleData be foundLocaleData.[[<key>]].
// d. Assert: Type(keyLocaleData) is List.
// e. Let value be keyLocaleData[0].
// f. Assert: Type(value) is either String or Null.
// g. Let supportedExtensionAddition be "".
// h. If r has an [[extension]] field, then
// i. If keywords contains an element whose [[Key]] is the same as key, then
// 1. Let entry be the element of keywords whose [[Key]] is the same as key.
// 2. Let requestedValue be entry.[[Value]].
// 3. If requestedValue is not the empty String, then
// a. If keyLocaleData contains requestedValue, then
// i. Let value be requestedValue.
// ii. Let supportedExtensionAddition be the string-concatenation of "-", key, "-", and value.
// 4. Else if keyLocaleData contains "true", then
// a. Let value be "true".
// b. Let supportedExtensionAddition be the string-concatenation of "-" and key.
// i. If options has a field [[<key>]], then
// i. Let optionsValue be options.[[<key>]].
// ii. Assert: Type(optionsValue) is either String, Undefined, or Null.
// iii. If Type(optionsValue) is String, then
// 1. Let optionsValue be the string optionsValue after performing the algorithm steps to transform
// Unicode extension values to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1
// Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions.
// 2. Let optionsValue be the string optionsValue after performing the algorithm steps to replace
// Unicode extension values with their canonical form per Unicode Technical Standard #35 LDML § 3.2.1
// Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions.
// 3. If optionsValue is the empty String, then
// a. Let optionsValue be "true".
// iv. If keyLocaleData contains optionsValue, then
// 1. If SameValue(optionsValue, value) is false, then
// a. Let value be optionsValue.
// b. Let supportedExtensionAddition be "".
// j. Set result.[[<key>]] to value.
// k. Append supportedExtensionAddition to supportedExtension.
// 10. If the number of elements in supportedExtension is greater than 2, then
// a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension).
// 11. Set result.[[locale]] to foundLocale.
// 12. Return result.
S::resolve(
&mut found_locale,
&mut options.service_options,
icu.provider(),
);
icu.locale_canonicalizer().canonicalize(&mut found_locale);
found_locale
}
/// Abstract operation [`LookupSupportedLocales ( availableLocales, requestedLocales )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list requestedLocales for which
/// `availableLocales` has a matching locale when using the BCP 47 Lookup algorithm. Locales appear
/// in the same order in the returned list as in `requestedLocales`.
///
/// # Note
///
/// This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupsupportedlocales
fn lookup_supported_locales<M: KeyedDataMarker>(
requested_locales: &[Locale],
provider: &impl DataProvider<M>,
) -> Vec<Locale> {
// 1. Let subset be a new empty List.
// 2. For each element locale of requestedLocales, do
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
// c. If availableLocale is not undefined, append locale to the end of subset.
// 3. Return subset.
requested_locales
.iter()
.cloned()
.filter(|loc| best_available_locale(loc.id.clone(), provider).is_some())
.collect()
}
/// Abstract operation [`BestFitSupportedLocales ( availableLocales, requestedLocales )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list `requestedLocales` for which
/// `availableLocales` has a matching locale when using the Best Fit Matcher algorithm. Locales appear
/// in the same order in the returned list as in requestedLocales.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitsupportedlocales
fn best_fit_supported_locales<M: KeyedDataMarker>(
requested_locales: &[Locale],
provider: &impl DataProvider<M>,
) -> Vec<Locale> {
requested_locales
.iter()
.cloned()
.filter(|loc| best_locale_for_provider(loc.id.clone(), provider).is_some())
.collect()
}
/// Abstract operation [`SupportedLocales ( availableLocales, requestedLocales, options )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list requestedLocales for which
/// availableLocales has a matching locale
///
/// [spec]: https://tc39.es/ecma402/#sec-supportedlocales
pub(in crate::builtins::intl) fn supported_locales<M: KeyedDataMarker>(
requested_locales: &[Locale],
options: &JsValue,
context: &mut Context,
) -> JsResult<JsObject>
where
BoaProvider: DataProvider<M>,
{
// 1. Set options to ? CoerceOptionsToObject(options).
let options = coerce_options_to_object(options, context)?;
// 2. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit").
let matcher =
get_option::<LocaleMatcher>(&options, "localeMatcher", false, context)?.unwrap_or_default();
let elements = match matcher {
// 4. Else,
// a. Let supportedLocales be LookupSupportedLocales(availableLocales, requestedLocales).
LocaleMatcher::Lookup => {
lookup_supported_locales(requested_locales, context.icu().provider())
}
// 3. If matcher is "best fit", then
// a. Let supportedLocales be BestFitSupportedLocales(availableLocales, requestedLocales).
LocaleMatcher::BestFit => {
best_fit_supported_locales(requested_locales, context.icu().provider())
}
};
// 5. Return CreateArrayFromList(supportedLocales).
Ok(Array::create_array_from_list(
elements.into_iter().map(|loc| loc.to_string().into()),
context,
))
}
/// Validates that the unicode extension `key` with `value` is a valid extension value for the
/// `language`.
pub(in crate::builtins::intl) fn validate_extension<M: KeyedDataMarker>(
language: LanguageIdentifier,
key: Key,
value: &Value,
provider: &impl DataProvider<M>,
) -> bool {
let mut locale = DataLocale::from(language);
locale.set_unicode_ext(key, value.clone());
let request = DataRequest {
locale: &locale,
metadata: DataRequestMetadata::default(),
};
DataProvider::load(provider, request)
.ok()
.map(|res| res.metadata.locale.unwrap_or_else(|| locale.clone()))
.filter(|loc| loc == &locale)
.is_some()
}
#[cfg(test)]
mod tests {
use icu_locid::{langid, locale, Locale};
use icu_plurals::provider::CardinalV1Marker;
use icu_provider::AsDeserializingBufferProvider;
use icu_provider_adapters::fallback::LocaleFallbackProvider;
use crate::{
builtins::intl::locale::utils::{
best_available_locale, best_fit_matcher, default_locale, lookup_matcher,
},
context::icu::{BoaProvider, Icu},
};
#[test]
fn best_avail_loc() {
let provider = boa_icu_provider::blob();
let provider = provider.as_deserializing();
assert_eq!(
best_available_locale::<CardinalV1Marker>(langid!("en"), &provider),
Some(langid!("en"))
);
assert_eq!(
best_available_locale::<CardinalV1Marker>(langid!("es-ES"), &provider),
Some(langid!("es"))
);
assert_eq!(
best_available_locale::<CardinalV1Marker>(langid!("kr"), &provider),
None
);
}
#[test]
fn lookup_match() {
let provider =
LocaleFallbackProvider::try_new_with_buffer_provider(boa_icu_provider::blob()).unwrap();
let icu = Icu::new(BoaProvider::Buffer(Box::new(provider))).unwrap();
// requested: []
let res = lookup_matcher::<CardinalV1Marker>(&[], &icu);
assert_eq!(res, default_locale(icu.locale_canonicalizer()));
assert!(res.extensions.is_empty());
// requested: [fr-FR-u-hc-h12]
let requested: Locale = "fr-FR-u-hc-h12".parse().unwrap();
let result = lookup_matcher::<CardinalV1Marker>(&[requested.clone()], &icu);
assert_eq!(result.id, langid!("fr"));
assert_eq!(result.extensions, requested.extensions);
// requested: [kr-KR-u-hc-h12, gr-GR-u-hc-h24-x-4a, es-ES-valencia-u-ca-gregory, uz-Cyrl]
let kr = "kr-KR-u-hc-h12".parse().unwrap();
let gr = "gr-GR-u-hc-h24-x-4a".parse().unwrap();
let es: Locale = "es-ES-valencia-u-ca-gregory".parse().unwrap();
let uz = locale!("uz-Cyrl");
let requested = vec![kr, gr, es.clone(), uz];
let res = best_fit_matcher::<CardinalV1Marker>(&requested, &icu);
assert_eq!(res.id, langid!("es"));
assert_eq!(res.extensions, es.extensions);
}
}

842
boa_engine/src/builtins/intl/mod.rs

@ -7,26 +7,30 @@
//!
//! [spec]: https://tc39.es/ecma402/#intl-object
#![allow(clippy::string_lit_as_bytes)]
use super::JsArgs;
use crate::{
builtins::intl::date_time_format::DateTimeFormat,
builtins::{Array, BuiltIn, JsArgs},
error::JsNativeError,
object::{JsObject, ObjectInitializer},
builtins::{Array, BuiltIn},
object::ObjectInitializer,
property::Attribute,
symbol::WellKnownSymbols,
Context, JsResult, JsValue,
};
pub mod date_time_format;
#[cfg(test)]
mod tests;
use boa_profiler::Profiler;
use icu_locale_canonicalizer::LocaleCanonicalizer;
use icu_locid::{locale, Locale};
use indexmap::IndexSet;
use rustc_hash::FxHashMap;
use tap::{Conv, Pipe, TapOptional};
use icu_provider::KeyedDataMarker;
use tap::{Conv, Pipe};
pub(crate) mod collator;
pub(crate) mod date_time_format;
pub(crate) mod list_format;
pub(crate) mod locale;
mod options;
pub(crate) mod segmenter;
use self::{collator::Collator, list_format::ListFormat, locale::Locale, segmenter::Segmenter};
/// JavaScript `Intl` object.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -38,20 +42,50 @@ impl BuiltIn for Intl {
fn init(context: &mut Context) -> Option<JsValue> {
let _timer = Profiler::global().start_event(Self::NAME, "init");
let string_tag = WellKnownSymbols::to_string_tag();
let collator = Collator::init(context).expect("initialization should return a constructor");
let list_format =
ListFormat::init(context).expect("initialization should return a constructor");
let locale = Locale::init(context).expect("initialization should return a constructor");
let segmenter =
Segmenter::init(context).expect("initialization should return a constructor");
let date_time_format = DateTimeFormat::init(context);
ObjectInitializer::new(context)
.function(Self::get_canonical_locales, "getCanonicalLocales", 1)
.property(
string_tag,
WellKnownSymbols::to_string_tag(),
Self::NAME,
Attribute::READONLY | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE,
)
.property(
"Collator",
collator,
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE,
)
.property(
"ListFormat",
list_format,
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE,
)
.property(
"DateTimeFormat",
date_time_format,
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE,
)
.property(
"Locale",
locale,
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE,
)
.property(
"Segmenter",
segmenter,
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE,
)
.function(Self::get_canonical_locales, "getCanonicalLocales", 1)
.build()
.conv::<JsValue>()
.pipe(Some)
@ -74,8 +108,10 @@ impl Intl {
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
let locales = args.get_or_undefined(0);
// 1. Let ll be ? CanonicalizeLocaleList(locales).
let ll = canonicalize_locale_list(args, context)?;
let ll = locale::canonicalize_locale_list(locales, context)?;
// 2. Return CreateArrayFromList(ll).
Ok(JsValue::Object(Array::create_array_from_list(
@ -85,758 +121,30 @@ impl Intl {
}
}
/// `MatcherRecord` type aggregates unicode `locale` string and unicode locale `extension`.
///
/// This is a return value for `lookup_matcher` and `best_fit_matcher` subroutines.
#[derive(Debug)]
struct MatcherRecord {
locale: String,
extension: String,
}
/// Abstract operation `DefaultLocale ( )`
///
/// Returns a String value representing the structurally valid and canonicalized
/// Unicode BCP 47 locale identifier for the host environment's current locale.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-defaultlocale
fn default_locale(canonicalizer: &LocaleCanonicalizer) -> Locale {
#[allow(clippy::string_lit_as_bytes)]
sys_locale::get_locale()
.and_then(|loc| loc.parse::<Locale>().ok())
.tap_some_mut(|loc| canonicalize_unicode_locale_id(loc, canonicalizer))
.unwrap_or(locale!("en-US"))
}
/// Abstract operation `BestAvailableLocale ( availableLocales, locale )`
///
/// Compares the provided argument `locale`, which must be a String value with a
/// structurally valid and canonicalized Unicode BCP 47 locale identifier, against
/// the locales in `availableLocales` and returns either the longest non-empty prefix
/// of `locale` that is an element of `availableLocales`, or undefined if there is no
/// such element.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale
fn best_available_locale<'a>(available_locales: &'_ [&'_ str], locale: &'a str) -> Option<&'a str> {
// 1. Let candidate be locale.
let mut candidate = locale;
// 2. Repeat
loop {
// a. If availableLocales contains an element equal to candidate, return candidate.
if available_locales.contains(&candidate) {
return Some(candidate);
}
// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined.
let pos = candidate.rfind('-');
match pos {
Some(ind) => {
// c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
let tmp_candidate = candidate[..ind].to_string();
let prev_dash = tmp_candidate.rfind('-').unwrap_or(ind);
let trim_ind = if ind >= 2 && prev_dash == ind - 2 {
ind - 2
} else {
ind
};
// d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
candidate = &candidate[..trim_ind];
}
None => return None,
}
}
}
/// Abstract operation `LookupMatcher ( availableLocales, requestedLocales )`
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher
fn lookup_matcher(
available_locales: &[&str],
requested_locales: &[&str],
canonicalizer: &LocaleCanonicalizer,
) -> MatcherRecord {
// 1. Let result be a new Record.
// 2. For each element locale of requestedLocales, do
for locale_str in requested_locales {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale
// extension sequences removed.
let locale: Locale = locale_str.parse().expect("Locale parsing failed");
let no_extensions_locale = locale.id.to_string();
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
let available_locale = best_available_locale(available_locales, &no_extensions_locale);
// c. If availableLocale is not undefined, then
if let Some(available_locale) = available_locale {
// i. Set result.[[locale]] to availableLocale.
// Assignment deferred. See return statement below.
// ii. If locale and noExtensionsLocale are not the same String value, then
let maybe_ext = if locale_str.eq(&no_extensions_locale) {
String::new()
} else {
// 1. Let extension be the String value consisting of the substring of the Unicode
// locale extension sequence within locale.
// 2. Set result.[[extension]] to extension.
locale.extensions.to_string()
};
// iii. Return result.
return MatcherRecord {
locale: available_locale.into(),
extension: maybe_ext,
};
}
}
// 3. Let defLocale be ! DefaultLocale().
// 4. Set result.[[locale]] to defLocale.
// 5. Return result.
MatcherRecord {
locale: default_locale(canonicalizer).to_string(),
extension: String::new(),
}
}
/// Abstract operation `BestFitMatcher ( availableLocales, requestedLocales )`
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request. The algorithm is implementation dependent, but should produce results
/// that a typical user of the requested locales would perceive as at least as good as those
/// produced by the `LookupMatcher` abstract operation.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher
fn best_fit_matcher(
available_locales: &[&str],
requested_locales: &[&str],
canonicalizer: &LocaleCanonicalizer,
) -> MatcherRecord {
lookup_matcher(available_locales, requested_locales, canonicalizer)
}
/// `Keyword` structure is a pair of keyword key and keyword value.
#[derive(Debug)]
struct Keyword {
key: String,
value: String,
}
/// `UniExtRecord` structure represents unicode extension records.
///
/// It contains the list of unicode `extension` attributes and the list of `keywords`.
///
/// For example:
///
/// - `-u-nu-thai` has no attributes and the list of keywords contains `(nu:thai)` pair.
#[allow(dead_code)]
#[derive(Debug)]
struct UniExtRecord {
attributes: Vec<String>, // never read at this point
keywords: Vec<Keyword>,
}
/// Abstract operation `UnicodeExtensionComponents ( extension )`
///
/// Returns the attributes and keywords from `extension`, which must be a String
/// value whose contents are a `Unicode locale extension` sequence.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-unicode-extension-components
fn unicode_extension_components(extension: &str) -> UniExtRecord {
// 1. Let attributes be a new empty List.
let mut attributes: Vec<String> = Vec::new();
// 2. Let keywords be a new empty List.
let mut keywords: Vec<Keyword> = Vec::new();
// 3. Let keyword be undefined.
let mut keyword: Option<Keyword> = None;
// 4. Let size be the length of extension.
let size = extension.len();
// 5. Let k be 3.
let mut k = 3;
// 6. Repeat, while k < size,
while k < size {
// a. Let e be ! StringIndexOf(extension, "-", k).
let e = extension[k..].find('-');
// b. If e = -1, let len be size - k; else let len be e - k.
let len = e.unwrap_or(size - k);
// c. Let subtag be the String value equal to the substring of extension consisting of the
// code units at indices k (inclusive) through k + len (exclusive).
let subtag = &extension[k..k + len];
// d. If keyword is undefined and len ≠ 2, then
if keyword.is_none() && len != 2 {
// i. If subtag is not an element of attributes, then
if !attributes.iter().any(|s| s == subtag) {
// 1. Append subtag to attributes.
attributes.push(subtag.to_string());
}
// e. Else if len = 2, then
} else if len == 2 {
// i. If keyword is not undefined and keywords does not contain an element
// whose [[Key]] is the same as keyword.[[Key]], then
// 1. Append keyword to keywords.
if let Some(keyword_val) = keyword {
let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key);
if !has_key {
keywords.push(keyword_val);
}
};
// ii. Set keyword to the Record { [[Key]]: subtag, [[Value]]: "" }.
keyword = Some(Keyword {
key: subtag.into(),
value: String::new(),
});
// f. Else,
} else {
// i. If keyword.[[Value]] is the empty String, then
// 1. Set keyword.[[Value]] to subtag.
// ii. Else,
// 1. Set keyword.[[Value]] to the string-concatenation of keyword.[[Value]], "-", and subtag.
if let Some(keyword_val) = keyword {
let new_keyword_val = if keyword_val.value.is_empty() {
subtag.into()
} else {
format!("{}-{subtag}", keyword_val.value)
};
keyword = Some(Keyword {
key: keyword_val.key,
value: new_keyword_val,
});
};
}
// g. Let k be k + len + 1.
k = k + len + 1;
}
// 7. If keyword is not undefined and keywords does not contain an element whose [[Key]] is
// the same as keyword.[[Key]], then
// a. Append keyword to keywords.
if let Some(keyword_val) = keyword {
let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key);
if !has_key {
keywords.push(keyword_val);
}
};
// 8. Return the Record { [[Attributes]]: attributes, [[Keywords]]: keywords }.
UniExtRecord {
attributes,
keywords,
}
}
/// Abstract operation `InsertUnicodeExtensionAndCanonicalize ( locale, extension )`
///
/// Inserts `extension`, which must be a Unicode locale extension sequence, into
/// `locale`, which must be a String value with a structurally valid and canonicalized
/// Unicode BCP 47 locale identifier.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-insert-unicode-extension-and-canonicalize
fn insert_unicode_extension_and_canonicalize(
locale: &str,
extension: &str,
canonicalizer: &LocaleCanonicalizer,
) -> String {
// TODO 1. Assert: locale does not contain a substring that is a Unicode locale extension sequence.
// TODO 2. Assert: extension is a Unicode locale extension sequence.
// TODO 3. Assert: tag matches the unicode_locale_id production.
// 4. Let privateIndex be ! StringIndexOf(locale, "-x-", 0).
let private_index = locale.find("-x-");
let new_locale = match private_index {
// 5. If privateIndex = -1, then
None => {
// a. Let locale be the string-concatenation of locale and extension.
locale.to_owned() + extension
}
// 6. Else,
Some(idx) => {
// a. Let preExtension be the substring of locale from position 0, inclusive,
// to position privateIndex, exclusive.
let pre_extension = &locale[0..idx];
// b. Let postExtension be the substring of locale from position privateIndex to
// the end of the string.
let post_extension = &locale[idx..];
// c. Let locale be the string-concatenation of preExtension, extension,
// and postExtension.
pre_extension.to_owned() + extension + post_extension
}
};
// 7. Assert: ! IsStructurallyValidLanguageTag(locale) is true.
let mut new_locale = new_locale
.parse()
.expect("Assert: ! IsStructurallyValidLanguageTag(locale) is true.");
// 8. Return ! CanonicalizeUnicodeLocaleId(locale).
canonicalize_unicode_locale_id(&mut new_locale, canonicalizer);
new_locale.to_string()
}
/// Abstract operation `CanonicalizeLocaleList ( locales )`
///
/// Converts an array of [`JsValue`]s containing structurally valid
/// [Unicode BCP 47 locale identifiers][bcp-47] into their [canonical form][canon].
///
/// For efficiency, this returns a [`Vec`] of [`Locale`]s instead of a [`Vec`] of
/// [`String`]s, since [`Locale`] allows us to modify individual parts of the locale
/// without scanning the whole string again.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-canonicalizelocalelist
/// [bcp-47]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
/// [canon]: https://unicode.org/reports/tr35/#LocaleId_Canonicalization
fn canonicalize_locale_list(args: &[JsValue], context: &mut Context) -> JsResult<Vec<Locale>> {
// 1. If locales is undefined, then
let locales = args.get_or_undefined(0);
if locales.is_undefined() {
// a. Return a new empty List.
return Ok(Vec::new());
}
// 2. Let seen be a new empty List.
let mut seen = IndexSet::new();
// 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, then
// TODO: check if Type(locales) is object and handle the internal slots
let o = if locales.is_string() {
// a. Let O be CreateArrayFromList(« locales »).
Array::create_array_from_list([locales.clone()], context)
} else {
// 4. Else,
// a. Let O be ? ToObject(locales).
locales.to_object(context)?
};
// 5. Let len be ? ToLength(? Get(O, "length")).
let len = o.length_of_array_like(context)?;
// 6 Let k be 0.
// 7. Repeat, while k < len,
for k in 0..len {
// a. Let Pk be ToString(k).
// b. Let kPresent be ? HasProperty(O, Pk).
let k_present = o.has_property(k, context)?;
// c. If kPresent is true, then
if k_present {
// i. Let kValue be ? Get(O, Pk).
let k_value = o.get(k, context)?;
// ii. If Type(kValue) is not String or Object, throw a TypeError exception.
if !(k_value.is_object() || k_value.is_string()) {
return Err(JsNativeError::typ()
.with_message("locale should be a String or Object")
.into());
}
// iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then
// TODO: handle checks for InitializedLocale internal slot (there should be an if statement here)
// 1. Let tag be kValue.[[Locale]].
// iv. Else,
// 1. Let tag be ? ToString(kValue).
// v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
let mut tag = k_value
.to_string(context)?
.to_std_string()
.ok()
.and_then(|tag| tag.parse().ok())
.ok_or_else(|| {
JsNativeError::range()
.with_message("locale is not a structurally valid language tag")
})?;
// vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
canonicalize_unicode_locale_id(&mut tag, context.icu().locale_canonicalizer());
seen.insert(tag);
// vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen.
}
// d. Increase k by 1.
}
// 8. Return seen.
Ok(seen.into_iter().collect())
}
/// `LocaleDataRecord` is the type of `locale_data` argument in `resolve_locale` subroutine.
///
/// It is an alias for a map where key is a string and value is another map.
///
/// Value of that inner map is a vector of strings representing locale parameters.
type LocaleDataRecord = FxHashMap<String, FxHashMap<String, Vec<String>>>;
/// `DateTimeFormatRecord` type aggregates `locale_matcher` selector and `properties` map.
///
/// It is used as a type of `options` parameter in `resolve_locale` subroutine.
#[derive(Debug)]
struct DateTimeFormatRecord {
pub(crate) locale_matcher: String,
pub(crate) properties: FxHashMap<String, JsValue>,
}
/// `ResolveLocaleRecord` type consists of unicode `locale` string, `data_locale` string and `properties` map.
///
/// This is a return value for `resolve_locale` subroutine.
#[derive(Debug)]
struct ResolveLocaleRecord {
pub(crate) locale: String,
pub(crate) properties: FxHashMap<String, JsValue>,
pub(crate) data_locale: String,
}
/// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )`
///
/// Compares a BCP 47 language priority list `requestedLocales` against the locales
/// in `availableLocales` and determines the best available language to meet the request.
/// `availableLocales`, `requestedLocales`, and `relevantExtensionKeys` must be provided as
/// `List` values, options and `localeData` as Records.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale
#[allow(dead_code)]
fn resolve_locale(
available_locales: &[&str],
requested_locales: &[&str],
options: &DateTimeFormatRecord,
relevant_extension_keys: &[&str],
locale_data: &LocaleDataRecord,
context: &mut Context,
) -> ResolveLocaleRecord {
// 1. Let matcher be options.[[localeMatcher]].
let matcher = &options.locale_matcher;
// 2. If matcher is "lookup", then
// a. Let r be ! LookupMatcher(availableLocales, requestedLocales).
// 3. Else,
// a. Let r be ! BestFitMatcher(availableLocales, requestedLocales).
let r = if matcher == "lookup" {
lookup_matcher(
available_locales,
requested_locales,
context.icu().locale_canonicalizer(),
)
} else {
best_fit_matcher(
available_locales,
requested_locales,
context.icu().locale_canonicalizer(),
)
};
// 4. Let foundLocale be r.[[locale]].
let mut found_locale = r.locale;
// 5. Let result be a new Record.
let mut result = ResolveLocaleRecord {
locale: String::new(),
properties: FxHashMap::default(),
data_locale: String::new(),
};
// 6. Set result.[[dataLocale]] to foundLocale.
result.data_locale = found_locale.clone();
// 7. If r has an [[extension]] field, then
let keywords = if r.extension.is_empty() {
Vec::<Keyword>::new()
} else {
// a. Let components be ! UnicodeExtensionComponents(r.[[extension]]).
let components = unicode_extension_components(&r.extension);
// b. Let keywords be components.[[Keywords]].
components.keywords
};
// 8. Let supportedExtension be "-u".
let mut supported_extension = String::from("-u");
// 9. For each element key of relevantExtensionKeys, do
for &key in relevant_extension_keys {
// a. Let foundLocaleData be localeData.[[<foundLocale>]].
// TODO b. Assert: Type(foundLocaleData) is Record.
let found_locale_data = locale_data
.get(&found_locale)
.map_or_else(FxHashMap::default, Clone::clone);
// c. Let keyLocaleData be foundLocaleData.[[<key>]].
// TODO d. Assert: Type(keyLocaleData) is List.
let key_locale_data = found_locale_data
.get(key)
.map_or_else(Vec::new, Clone::clone);
// e. Let value be keyLocaleData[0].
// TODO f. Assert: Type(value) is either String or Null.
let mut value = key_locale_data
.get(0)
.map_or_else(JsValue::null, |first_elt| first_elt.clone().into());
// g. Let supportedExtensionAddition be "".
let mut supported_extension_addition = String::new();
// h. If r has an [[extension]] field, then
if !r.extension.is_empty() {
// i. If keywords contains an element whose [[Key]] is the same as key, then
// 1. Let entry be the element of keywords whose [[Key]] is the same as key.
let maybe_entry = keywords.iter().find(|elem| key.eq(&elem.key));
if let Some(entry) = maybe_entry {
// 2. Let requestedValue be entry.[[Value]].
let requested_value = &entry.value;
// 3. If requestedValue is not the empty String, then
if !requested_value.is_empty() {
// a. If keyLocaleData contains requestedValue, then
if key_locale_data.iter().any(|s| s == requested_value) {
// i. Let value be requestedValue.
value = requested_value.clone().into();
// ii. Let supportedExtensionAddition be the string-concatenation
// of "-", key, "-", and value.
supported_extension_addition = format!("-{key}-{requested_value}");
}
// 4. Else if keyLocaleData contains "true", then
} else if key_locale_data.iter().any(|s| s == "true") {
// a. Let value be "true".
value = "true".into();
// b. Let supportedExtensionAddition be the string-concatenation of "-" and key.
supported_extension_addition = format!("-{key}");
}
}
}
// i. If options has a field [[<key>]], then
if options.properties.contains_key(key) {
// i. Let optionsValue be options.[[<key>]].
// TODO ii. Assert: Type(optionsValue) is either String, Undefined, or Null.
let mut options_value = options
.properties
.get(key)
.unwrap_or(&JsValue::undefined())
.clone();
// iii. If Type(optionsValue) is String, then
if options_value.is_string() {
// TODO 1. Let optionsValue be the string optionsValue after performing the
// algorithm steps to transform Unicode extension values to canonical syntax
// per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale
// Identifiers, treating key as ukey and optionsValue as uvalue productions.
// TODO 2. Let optionsValue be the string optionsValue after performing the
// algorithm steps to replace Unicode extension values with their canonical
// form per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode
// Locale Identifiers, treating key as ukey and optionsValue as uvalue
// productions.
// 3. If optionsValue is the empty String, then
if let Some(options_val_str) = options_value.as_string() {
if options_val_str.is_empty() {
// a. Let optionsValue be "true".
options_value = "true".into();
}
}
}
// iv. If keyLocaleData contains optionsValue, then
let options_val_str = options_value
.to_string(context)
.unwrap_or_else(|_| "".into())
.to_std_string_escaped();
if key_locale_data.iter().any(|s| s == &options_val_str) {
// 1. If SameValue(optionsValue, value) is false, then
if !options_value.eq(&value) {
// a. Let value be optionsValue.
value = options_value;
// b. Let supportedExtensionAddition be "".
supported_extension_addition = String::new();
}
}
}
// j. Set result.[[<key>]] to value.
result.properties.insert(key.to_string(), value);
// k. Append supportedExtensionAddition to supportedExtension.
supported_extension.push_str(&supported_extension_addition);
}
// 10. If the number of elements in supportedExtension is greater than 2, then
if supported_extension.len() > 2 {
// a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension).
found_locale = insert_unicode_extension_and_canonicalize(
&found_locale,
&supported_extension,
context.icu().locale_canonicalizer(),
);
}
// 11. Set result.[[locale]] to foundLocale.
result.locale = found_locale;
// 12. Return result.
result
}
#[allow(unused)]
pub(crate) enum GetOptionType {
String,
Boolean,
}
/// Abstract operation `GetOption ( options, property, type, values, fallback )`
///
/// Extracts the value of the property named `property` from the provided `options` object,
/// converts it to the required `type`, checks whether it is one of a `List` of allowed
/// `values`, and fills in a `fallback` value if necessary. If `values` is
/// undefined, there is no fixed set of values and any is permitted.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-getoption
#[allow(unused)]
pub(crate) fn get_option(
options: &JsObject,
property: &str,
r#type: &GetOptionType,
values: &[&str],
fallback: &JsValue,
context: &mut Context,
) -> JsResult<JsValue> {
// 1. Assert: Type(options) is Object.
// 2. Let value be ? Get(options, property).
let mut value = options.get(property, context)?;
// 3. If value is undefined, return fallback.
if value.is_undefined() {
return Ok(fallback.clone());
}
// 4. Assert: type is "boolean" or "string".
// 5. If type is "boolean", then
// a. Set value to ! ToBoolean(value).
// 6. If type is "string", then
// a. Set value to ? ToString(value).
// 7. If values is not undefined and values does not contain an element equal to value,
// throw a RangeError exception.
value = match r#type {
GetOptionType::Boolean => JsValue::Boolean(value.to_boolean()),
GetOptionType::String => {
let string_value = value.to_string(context)?.to_std_string_escaped();
if !values.is_empty() && !values.contains(&string_value.as_str()) {
return Err(JsNativeError::range()
.with_message("GetOption: values array does not contain value")
.into());
}
JsValue::String(string_value.into())
}
};
// 8. Return value.
Ok(value)
}
/// Abstract operation `GetNumberOption ( options, property, minimum, maximum, fallback )`
///
/// Extracts the value of the property named `property` from the provided `options`
/// object, converts it to a `Number value`, checks whether it is in the allowed range,
/// and fills in a `fallback` value if necessary.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-getnumberoption
#[allow(unused)]
pub(crate) fn get_number_option(
options: &JsObject,
property: &str,
minimum: f64,
maximum: f64,
fallback: Option<f64>,
context: &mut Context,
) -> JsResult<Option<f64>> {
// 1. Assert: Type(options) is Object.
// 2. Let value be ? Get(options, property).
let value = options.get(property, context)?;
// 3. Return ? DefaultNumberOption(value, minimum, maximum, fallback).
default_number_option(&value, minimum, maximum, fallback, context)
}
/// Abstract operation `DefaultNumberOption ( value, minimum, maximum, fallback )`
///
/// Converts `value` to a `Number value`, checks whether it is in the allowed range,
/// and fills in a `fallback` value if necessary.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-defaultnumberoption
#[allow(unused)]
pub(crate) fn default_number_option(
value: &JsValue,
minimum: f64,
maximum: f64,
fallback: Option<f64>,
context: &mut Context,
) -> JsResult<Option<f64>> {
// 1. If value is undefined, return fallback.
if value.is_undefined() {
return Ok(fallback);
}
// Making `provider: &BoaProvider` instead of a type parameter `P` makes it so that we need
// to copy-paste the bounds of `impl<M> DataProvider<M> for BoaProvider` every time we need
// to use `provider`. The type parameter solves this by delegating simpler bounds to every
// implementor of `Service`.
trait Service<P> {
/// The data marker used by [`resolve_locale`][locale::resolve_locale] to decide
/// which locales are supported by this service.
type LangMarker: KeyedDataMarker;
// 2. Set value to ? ToNumber(value).
let value = value.to_number(context)?;
/// The set of options used in the [`Service::resolve`] method to resolve the provided
/// locale.
type LocaleOptions;
// 3. If value is NaN or less than minimum or greater than maximum, throw a RangeError exception.
if value.is_nan() || value < minimum || value > maximum {
return Err(JsNativeError::range()
.with_message("DefaultNumberOption: value is out of range.")
.into());
/// Resolves the final value of `locale` from a set of `options`.
///
/// The provided `options` will also be modified with the final values, in case there were
/// changes in the resolution algorithm.
///
/// # Note
///
/// - A correct implementation must ensure `locale` and `options` are both written with the
/// new final values.
/// - If the implementor service doesn't contain any `[[RelevantExtensionKeys]]`, this can be
/// skipped.
fn resolve(_locale: &mut icu_locid::Locale, _options: &mut Self::LocaleOptions, _provider: &P) {
}
// 4. Return floor(value).
Ok(Some(value.floor()))
}
/// Abstract operation `CanonicalizeUnicodeLocaleId ( locale )`.
///
/// This function differs slightly from the specification by modifying in-place
/// the provided [`Locale`] instead of creating a new canonicalized copy.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
fn canonicalize_unicode_locale_id(locale: &mut Locale, canonicalizer: &LocaleCanonicalizer) {
canonicalizer.canonicalize(locale);
}

252
boa_engine/src/builtins/intl/options.rs

@ -0,0 +1,252 @@
use std::{fmt::Display, str::FromStr};
use icu_collator::CaseFirst;
use crate::{
object::{JsObject, ObjectData},
Context, JsNativeError, JsResult, JsString, JsValue,
};
/// `IntlOptions` aggregates the `locale_matcher` selector and any other object
/// property needed for `Intl` object constructors.
///
/// It is used as the type of the `options` parameter in the operation `resolve_locale`.
#[derive(Debug, Default)]
pub(super) struct IntlOptions<O> {
pub(super) matcher: LocaleMatcher,
pub(super) service_options: O,
}
/// A type used as an option parameter inside the `Intl` [spec].
///
/// [spec]: https://tc39.es/ecma402
pub(super) trait OptionType: Sized {
/// Parses a [`JsValue`] into an instance of `Self`.
///
/// Roughly equivalent to the algorithm steps of [9.12.13.3-7][spec], but allows for parsing
/// steps instead of returning a pure string, number or boolean.
///
/// [spec]: https://tc39.es/ecma402/#sec-getoption
fn from_value(value: JsValue, context: &mut Context) -> JsResult<Self>;
}
pub(super) trait OptionTypeParsable: FromStr {}
impl<T: OptionTypeParsable> OptionType for T
where
T::Err: Display,
{
fn from_value(value: JsValue, context: &mut Context) -> JsResult<Self> {
value
.to_string(context)?
.to_std_string_escaped()
.parse::<Self>()
.map_err(|err| JsNativeError::range().with_message(err.to_string()).into())
}
}
impl OptionType for bool {
fn from_value(value: JsValue, _: &mut Context) -> JsResult<Self> {
// 5. If type is "boolean", then
// a. Set value to ! ToBoolean(value).
Ok(value.to_boolean())
}
}
impl OptionType for JsString {
fn from_value(value: JsValue, context: &mut Context) -> JsResult<Self> {
// 6. If type is "string", then
// a. Set value to ? ToString(value).
value.to_string(context)
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub(super) enum LocaleMatcher {
Lookup,
#[default]
BestFit,
}
#[derive(Debug)]
pub(super) struct ParseLocaleMatcherError;
impl Display for ParseLocaleMatcherError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
"provided string was not `lookup` or `best fit`".fmt(f)
}
}
impl FromStr for LocaleMatcher {
type Err = ParseLocaleMatcherError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"lookup" => Ok(Self::Lookup),
"best fit" => Ok(Self::BestFit),
_ => Err(ParseLocaleMatcherError),
}
}
}
impl OptionTypeParsable for LocaleMatcher {}
impl OptionType for CaseFirst {
fn from_value(value: JsValue, context: &mut Context) -> JsResult<Self> {
match value.to_string(context)?.to_std_string_escaped().as_str() {
"upper" => Ok(CaseFirst::UpperFirst),
"lower" => Ok(CaseFirst::LowerFirst),
"false" => Ok(CaseFirst::Off),
_ => Err(JsNativeError::range()
.with_message("provided string was not `upper`, `lower` or `false`")
.into()),
}
}
}
/// Abstract operation [`GetOption ( options, property, type, values, fallback )`][spec]
///
/// Extracts the value of the property named `property` from the provided `options` object,
/// converts it to the required `type` and checks whether it is one of a `List` of allowed
/// `values`. If `values` is undefined, there is no fixed set of values and any is permitted.
/// If the value is `undefined`, `required` determines if the function should return `None` or
/// an `Err`. Use [`Option::unwrap_or`] and friends to manage the default value.
///
/// This is a safer alternative to `GetOption`, which tries to parse from the
/// provided property a valid variant of the provided type `T`. It doesn't accept
/// a `type` parameter since the type can specify in its implementation of [`TryFrom`] whether
/// it wants to parse from a [`str`] or convert directly from a boolean or number.
///
/// [spec]: https://tc39.es/ecma402/#sec-getoption
pub(super) fn get_option<T: OptionType>(
options: &JsObject,
property: &str,
required: bool,
context: &mut Context,
) -> JsResult<Option<T>> {
// 1. Let value be ? Get(options, property).
let value = options.get(property, context)?;
// 2. If value is undefined, then
if value.is_undefined() {
return if required {
// a. If default is required, throw a RangeError exception.
Err(JsNativeError::range()
.with_message("GetOption: option value cannot be undefined")
.into())
} else {
// b. Return default.
Ok(None)
};
}
// The steps 3 to 7 must be made for each `OptionType`.
T::from_value(value, context).map(Some)
}
/// Abstract operation `GetNumberOption ( options, property, minimum, maximum, fallback )`
///
/// Extracts the value of the property named `property` from the provided `options`
/// object, converts it to a `Number value`, checks whether it is in the allowed range,
/// and fills in a `fallback` value if necessary.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-getnumberoption
#[allow(unused)]
pub(super) fn get_number_option(
options: &JsObject,
property: &str,
minimum: f64,
maximum: f64,
fallback: Option<f64>,
context: &mut Context,
) -> JsResult<Option<f64>> {
// 1. Assert: Type(options) is Object.
// 2. Let value be ? Get(options, property).
let value = options.get(property, context)?;
// 3. Return ? DefaultNumberOption(value, minimum, maximum, fallback).
default_number_option(&value, minimum, maximum, fallback, context)
}
/// Abstract operation [`DefaultNumberOption ( value, minimum, maximum, fallback )`][spec]
///
/// Converts `value` to a `Number value`, checks whether it is in the allowed range,
/// and fills in a `fallback` value if necessary.
///
/// [spec]: https://tc39.es/ecma402/#sec-defaultnumberoption
#[allow(unused)]
pub(super) fn default_number_option(
value: &JsValue,
minimum: f64,
maximum: f64,
fallback: Option<f64>,
context: &mut Context,
) -> JsResult<Option<f64>> {
// 1. If value is undefined, return fallback.
if value.is_undefined() {
return Ok(fallback);
}
// 2. Set value to ? ToNumber(value).
let value = value.to_number(context)?;
// 3. If value is NaN or less than minimum or greater than maximum, throw a RangeError exception.
if value.is_nan() || value < minimum || value > maximum {
return Err(JsNativeError::range()
.with_message("DefaultNumberOption: value is out of range.")
.into());
}
// 4. Return floor(value).
Ok(Some(value.floor()))
}
/// Abstract operation [`GetOptionsObject ( options )`][spec]
///
/// Returns a [`JsObject`] suitable for use with [`get_option`], either `options` itself or a default empty
/// `JsObject`. It throws a `TypeError` if `options` is not undefined and not a `JsObject`.
///
/// [spec]: https://tc39.es/ecma402/#sec-getoptionsobject
pub(super) fn get_options_object(options: &JsValue) -> JsResult<JsObject> {
match options {
// If options is undefined, then
JsValue::Undefined => {
// a. Return OrdinaryObjectCreate(null).
Ok(JsObject::from_proto_and_data(None, ObjectData::ordinary()))
}
// 2. If Type(options) is Object, then
JsValue::Object(obj) => {
// a. Return options.
Ok(obj.clone())
}
// 3. Throw a TypeError exception.
_ => Err(JsNativeError::typ()
.with_message("GetOptionsObject: provided options is not an object")
.into()),
}
}
/// Abstract operation [`CoerceOptionsToObject ( options )`][spec]
///
/// Coerces `options` into a [`JsObject`] suitable for use with [`get_option`], defaulting to an empty
/// `JsObject`.
/// Because it coerces non-null primitive values into objects, its use is discouraged for new
/// functionality in favour of [`get_options_object`].
///
/// [spec]: https://tc39.es/ecma402/#sec-coerceoptionstoobject
pub(super) fn coerce_options_to_object(
options: &JsValue,
context: &mut Context,
) -> JsResult<JsObject> {
// If options is undefined, then
if options.is_undefined() {
// a. Return OrdinaryObjectCreate(null).
return Ok(JsObject::from_proto_and_data(None, ObjectData::ordinary()));
}
// 2. Return ? ToObject(options).
options.to_object(context)
}

41
boa_engine/src/builtins/intl/segmenter/mod.rs

@ -0,0 +1,41 @@
// TODO: implement `Segmenter` when https://github.com/unicode-org/icu4x/issues/2259 closes.
use boa_profiler::Profiler;
use tap::{Conv, Pipe};
use crate::{builtins::BuiltIn, object::ConstructorBuilder, Context, JsResult, JsValue};
mod options;
#[allow(unused)]
pub(crate) use options::*;
#[derive(Debug, Clone)]
pub(crate) struct Segmenter;
impl BuiltIn for Segmenter {
const NAME: &'static str = "Segmenter";
fn init(context: &mut Context) -> Option<JsValue> {
let _timer = Profiler::global().start_event(Self::NAME, "init");
ConstructorBuilder::with_standard_constructor(
context,
Self::constructor,
context.intrinsics().constructors().segmenter().clone(),
)
.name(Self::NAME)
.length(Self::LENGTH)
.build()
.conv::<JsValue>()
.pipe(Some)
}
}
impl Segmenter {
pub(crate) const LENGTH: usize = 0;
#[allow(clippy::unnecessary_wraps)]
pub(crate) fn constructor(_: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
Ok(JsValue::Undefined)
}
}

29
boa_engine/src/builtins/intl/segmenter/options.rs

@ -0,0 +1,29 @@
#[derive(Debug, Clone, Copy, Default)]
pub(crate) enum Granularity {
#[default]
Grapheme,
Word,
Sentence,
}
#[derive(Debug)]
pub(crate) struct ParseGranularityError;
impl std::fmt::Display for ParseGranularityError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("provided string was not `grapheme`, `word` or `sentence`")
}
}
impl std::str::FromStr for Granularity {
type Err = ParseGranularityError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"grapheme" => Ok(Self::Grapheme),
"word" => Ok(Self::Word),
"sentence" => Ok(Self::Sentence),
_ => Err(ParseGranularityError),
}
}
}

547
boa_engine/src/builtins/intl/tests.rs

@ -1,547 +0,0 @@
use crate::{
builtins::intl::date_time_format::{to_date_time_options, DateTimeReqs},
builtins::intl::{
best_available_locale, best_fit_matcher, default_locale, default_number_option,
get_number_option, get_option, insert_unicode_extension_and_canonicalize, lookup_matcher,
resolve_locale, unicode_extension_components, DateTimeFormatRecord, GetOptionType,
},
object::JsObject,
Context, JsValue,
};
use icu_locale_canonicalizer::LocaleCanonicalizer;
use rustc_hash::FxHashMap;
#[test]
fn best_avail_loc() {
let no_extensions_locale = "en-US";
let available_locales = Vec::new();
assert_eq!(
best_available_locale(&available_locales, no_extensions_locale),
None
);
let no_extensions_locale = "de-DE";
let available_locales = vec![no_extensions_locale];
assert_eq!(
best_available_locale(&available_locales, no_extensions_locale),
Some(no_extensions_locale)
);
let locale_part = "fr";
let no_extensions_locale = locale_part.to_string() + "-CA";
let available_locales = vec![locale_part];
assert_eq!(
best_available_locale(&available_locales, &no_extensions_locale),
Some(locale_part)
);
let ja_kana_t = "ja-Kana-JP-t";
let ja_kana = "ja-Kana-JP";
let no_extensions_locale = "ja-Kana-JP-t-it-latn-it";
let available_locales = vec![ja_kana_t, ja_kana];
assert_eq!(
best_available_locale(&available_locales, no_extensions_locale),
Some(ja_kana)
);
}
#[test]
fn lookup_match() {
let provider = icu_testdata::get_provider();
let canonicalizer =
LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer");
// available: [], requested: []
let available_locales = Vec::new();
let requested_locales = Vec::new();
let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer);
assert_eq!(
matcher.locale,
default_locale(&canonicalizer).to_string().as_str()
);
assert_eq!(matcher.extension, "");
// available: [de-DE], requested: []
let available_locales = vec!["de-DE"];
let requested_locales = Vec::new();
let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer);
assert_eq!(
matcher.locale,
default_locale(&canonicalizer).to_string().as_str()
);
assert_eq!(matcher.extension, "");
// available: [fr-FR], requested: [fr-FR-u-hc-h12]
let available_locales = vec!["fr-FR"];
let requested_locales = vec!["fr-FR-u-hc-h12"];
let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer);
assert_eq!(matcher.locale, "fr-FR");
assert_eq!(matcher.extension, "u-hc-h12");
// available: [es-ES], requested: [es-ES]
let available_locales = vec!["es-ES"];
let requested_locales = vec!["es-ES"];
let matcher = best_fit_matcher(&available_locales, &requested_locales, &canonicalizer);
assert_eq!(matcher.locale, "es-ES");
assert_eq!(matcher.extension, "");
}
#[test]
fn insert_unicode_ext() {
let provider = icu_testdata::get_provider();
let canonicalizer =
LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer");
let locale = "hu-HU";
let ext = "";
assert_eq!(
insert_unicode_extension_and_canonicalize(locale, ext, &canonicalizer),
locale
);
let locale = "hu-HU";
let ext = "-u-hc-h12";
assert_eq!(
insert_unicode_extension_and_canonicalize(locale, ext, &canonicalizer),
"hu-HU-u-hc-h12"
);
let locale = "hu-HU-x-PRIVATE";
let ext = "-u-hc-h12";
assert_eq!(
insert_unicode_extension_and_canonicalize(locale, ext, &canonicalizer),
"hu-HU-u-hc-h12-x-private"
);
}
#[test]
fn uni_ext_comp() {
let ext = "-u-ca-japanese-hc-h12";
let components = unicode_extension_components(ext);
assert!(components.attributes.is_empty());
assert_eq!(components.keywords.len(), 2);
assert_eq!(components.keywords[0].key, "ca");
assert_eq!(components.keywords[0].value, "japanese");
assert_eq!(components.keywords[1].key, "hc");
assert_eq!(components.keywords[1].value, "h12");
let ext = "-u-alias-co-phonebk-ka-shifted";
let components = unicode_extension_components(ext);
assert_eq!(components.attributes, vec![String::from("alias")]);
assert_eq!(components.keywords.len(), 2);
assert_eq!(components.keywords[0].key, "co");
assert_eq!(components.keywords[0].value, "phonebk");
assert_eq!(components.keywords[1].key, "ka");
assert_eq!(components.keywords[1].value, "shifted");
let ext = "-u-ca-buddhist-kk-nu-thai";
let components = unicode_extension_components(ext);
assert!(components.attributes.is_empty());
assert_eq!(components.keywords.len(), 3);
assert_eq!(components.keywords[0].key, "ca");
assert_eq!(components.keywords[0].value, "buddhist");
assert_eq!(components.keywords[1].key, "kk");
assert_eq!(components.keywords[1].value, "");
assert_eq!(components.keywords[2].key, "nu");
assert_eq!(components.keywords[2].value, "thai");
let ext = "-u-ca-islamic-civil";
let components = unicode_extension_components(ext);
assert!(components.attributes.is_empty());
assert_eq!(components.keywords.len(), 1);
assert_eq!(components.keywords[0].key, "ca");
assert_eq!(components.keywords[0].value, "islamic-civil");
}
#[test]
fn locale_resolution() {
let mut context = Context::default();
// test lookup
let available_locales = Vec::new();
let requested_locales = Vec::new();
let relevant_extension_keys = Vec::new();
let locale_data = FxHashMap::default();
let options = DateTimeFormatRecord {
locale_matcher: "lookup".into(),
properties: FxHashMap::default(),
};
let locale_record = resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(
locale_record.locale,
default_locale(context.icu().locale_canonicalizer())
.to_string()
.as_str()
);
assert_eq!(
locale_record.data_locale,
default_locale(context.icu().locale_canonicalizer())
.to_string()
.as_str()
);
assert!(locale_record.properties.is_empty());
// test best fit
let available_locales = Vec::new();
let requested_locales = Vec::new();
let relevant_extension_keys = Vec::new();
let locale_data = FxHashMap::default();
let options = DateTimeFormatRecord {
locale_matcher: "best-fit".into(),
properties: FxHashMap::default(),
};
let locale_record = resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(
locale_record.locale,
default_locale(context.icu().locale_canonicalizer())
.to_string()
.as_str()
);
assert_eq!(
locale_record.data_locale,
default_locale(context.icu().locale_canonicalizer())
.to_string()
.as_str()
);
assert!(locale_record.properties.is_empty());
// available: [es-ES], requested: [es-ES]
let available_locales = vec!["es-ES"];
let requested_locales = vec!["es-ES"];
let relevant_extension_keys = Vec::new();
let locale_data = FxHashMap::default();
let options = DateTimeFormatRecord {
locale_matcher: "lookup".into(),
properties: FxHashMap::default(),
};
let locale_record = resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(locale_record.locale, "es-ES");
assert_eq!(locale_record.data_locale, "es-ES");
assert!(locale_record.properties.is_empty());
// available: [zh-CN], requested: []
let available_locales = vec!["zh-CN"];
let requested_locales = Vec::new();
let relevant_extension_keys = Vec::new();
let locale_data = FxHashMap::default();
let options = DateTimeFormatRecord {
locale_matcher: "lookup".into(),
properties: FxHashMap::default(),
};
let locale_record = resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(
locale_record.locale,
default_locale(context.icu().locale_canonicalizer())
.to_string()
.as_str()
);
assert_eq!(
locale_record.data_locale,
default_locale(context.icu().locale_canonicalizer())
.to_string()
.as_str()
);
assert!(locale_record.properties.is_empty());
}
#[test]
fn get_opt() {
let mut context = Context::default();
let values = Vec::new();
let fallback = JsValue::String("fallback".into());
let options_obj = JsObject::empty();
let option_type = GetOptionType::String;
let get_option_result = get_option(
&options_obj,
"",
&option_type,
&values,
&fallback,
&mut context,
)
.expect("GetOption should not fail on fallback test");
assert_eq!(get_option_result, fallback);
let values = Vec::new();
let fallback = JsValue::String("fallback".into());
let options_obj = JsObject::empty();
let locale_value = JsValue::String("en-US".into());
options_obj
.set("Locale", locale_value.clone(), true, &mut context)
.expect("Setting a property should not fail");
let option_type = GetOptionType::String;
let get_option_result = get_option(
&options_obj,
"Locale",
&option_type,
&values,
&fallback,
&mut context,
)
.expect("GetOption should not fail on string test");
assert_eq!(get_option_result, locale_value);
let fallback = JsValue::String("fallback".into());
let options_obj = JsObject::empty();
let locale_string = "en-US";
let locale_value = JsValue::String(locale_string.into());
let values = vec![locale_string];
options_obj
.set("Locale", locale_value.clone(), true, &mut context)
.expect("Setting a property should not fail");
let option_type = GetOptionType::String;
let get_option_result = get_option(
&options_obj,
"Locale",
&option_type,
&values,
&fallback,
&mut context,
)
.expect("GetOption should not fail on values test");
assert_eq!(get_option_result, locale_value);
let fallback = JsValue::new(false);
let options_obj = JsObject::empty();
let boolean_value = JsValue::new(true);
let values = Vec::new();
options_obj
.set("boolean_val", boolean_value.clone(), true, &mut context)
.expect("Setting a property should not fail");
let option_type = GetOptionType::Boolean;
let get_option_result = get_option(
&options_obj,
"boolean_val",
&option_type,
&values,
&fallback,
&mut context,
)
.expect("GetOption should not fail on boolean test");
assert_eq!(get_option_result, boolean_value);
let fallback = JsValue::String("fallback".into());
let options_obj = JsObject::empty();
let locale_value = JsValue::String("en-US".into());
let other_locale_str = "de-DE";
let values = vec![other_locale_str];
options_obj
.set("Locale", locale_value, true, &mut context)
.expect("Setting a property should not fail");
let option_type = GetOptionType::String;
let get_option_result = get_option(
&options_obj,
"Locale",
&option_type,
&values,
&fallback,
&mut context,
);
assert!(get_option_result.is_err());
let value = JsValue::undefined();
let minimum = 1.0;
let maximum = 10.0;
let fallback_val = 5.0;
let fallback = Some(fallback_val);
let get_option_result =
default_number_option(&value, minimum, maximum, fallback, &mut context).unwrap();
assert_eq!(get_option_result, fallback);
let value = JsValue::nan();
let minimum = 1.0;
let maximum = 10.0;
let fallback = Some(5.0);
let get_option_result = default_number_option(&value, minimum, maximum, fallback, &mut context);
assert!(get_option_result.is_err());
let value = JsValue::new(0);
let minimum = 1.0;
let maximum = 10.0;
let fallback = Some(5.0);
let get_option_result = default_number_option(&value, minimum, maximum, fallback, &mut context);
assert!(get_option_result.is_err());
let value = JsValue::new(11);
let minimum = 1.0;
let maximum = 10.0;
let fallback = Some(5.0);
let get_option_result = default_number_option(&value, minimum, maximum, fallback, &mut context);
assert!(get_option_result.is_err());
let value_f64 = 7.0;
let value = JsValue::new(value_f64);
let minimum = 1.0;
let maximum = 10.0;
let fallback = Some(5.0);
let get_option_result =
default_number_option(&value, minimum, maximum, fallback, &mut context).unwrap();
assert_eq!(get_option_result, Some(value_f64));
let options = JsObject::empty();
let property = "fractionalSecondDigits";
let minimum = 1.0;
let maximum = 10.0;
let fallback_val = 5.0;
let fallback = Some(fallback_val);
let get_option_result =
get_number_option(&options, property, minimum, maximum, fallback, &mut context).unwrap();
assert_eq!(get_option_result, fallback);
let options = JsObject::empty();
let value_f64 = 8.0;
let value = JsValue::new(value_f64);
let property = "fractionalSecondDigits";
options
.set(property, value, true, &mut context)
.expect("Setting a property should not fail");
let minimum = 1.0;
let maximum = 10.0;
let fallback = Some(5.0);
let get_option_result =
get_number_option(&options, property, minimum, maximum, fallback, &mut context).unwrap();
assert_eq!(get_option_result, Some(value_f64));
}
#[test]
fn to_date_time_opts() {
let mut context = Context::default();
let options_obj = JsObject::empty();
options_obj
.set("timeStyle", JsObject::empty(), true, &mut context)
.expect("Setting a property should not fail");
let date_time_opts = to_date_time_options(
&JsValue::new(options_obj),
&DateTimeReqs::Date,
&DateTimeReqs::Date,
&mut context,
);
assert!(date_time_opts.is_err());
let options_obj = JsObject::empty();
options_obj
.set("dateStyle", JsObject::empty(), true, &mut context)
.expect("Setting a property should not fail");
let date_time_opts = to_date_time_options(
&JsValue::new(options_obj),
&DateTimeReqs::Time,
&DateTimeReqs::Time,
&mut context,
);
assert!(date_time_opts.is_err());
let date_time_opts = to_date_time_options(
&JsValue::undefined(),
&DateTimeReqs::Date,
&DateTimeReqs::Date,
&mut context,
)
.expect("toDateTimeOptions should not fail in date test");
let numeric_jsstring = JsValue::String("numeric".into());
assert_eq!(
date_time_opts.get("year", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("month", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("day", &mut context).unwrap(),
numeric_jsstring
);
let date_time_opts = to_date_time_options(
&JsValue::undefined(),
&DateTimeReqs::Time,
&DateTimeReqs::Time,
&mut context,
)
.expect("toDateTimeOptions should not fail in time test");
let numeric_jsstring = JsValue::String("numeric".into());
assert_eq!(
date_time_opts.get("hour", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("minute", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("second", &mut context).unwrap(),
numeric_jsstring
);
let date_time_opts = to_date_time_options(
&JsValue::undefined(),
&DateTimeReqs::AnyAll,
&DateTimeReqs::AnyAll,
&mut context,
)
.expect("toDateTimeOptions should not fail when testing required = 'any'");
let numeric_jsstring = JsValue::String("numeric".into());
assert_eq!(
date_time_opts.get("year", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("month", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("day", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("hour", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("minute", &mut context).unwrap(),
numeric_jsstring
);
assert_eq!(
date_time_opts.get("second", &mut context).unwrap(),
numeric_jsstring
);
}

63
boa_engine/src/builtins/string/mod.rs

@ -95,6 +95,7 @@ impl BuiltIn for String {
.method(Self::includes, "includes", 1)
.method(Self::index_of, "indexOf", 1)
.method(Self::last_index_of, "lastIndexOf", 1)
.method(Self::locale_compare, "localeCompare", 1)
.method(Self::r#match, "match", 1)
.method(Self::normalize, "normalize", 1)
.method(Self::pad_end, "padEnd", 1)
@ -1268,6 +1269,68 @@ impl String {
Ok(JsValue::new(-1))
}
/// [`String.prototype.localeCompare ( that [ , locales [ , options ] ] )`][spec]
///
/// Returns a number indicating whether a reference string comes before, or after, or is the
/// same as the given string in sort order.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sup-String.prototype.localeCompare
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/localeCompare
pub(crate) fn locale_compare(
this: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. Let O be ? RequireObjectCoercible(this value).
let o = this.require_object_coercible()?;
// 2. Let S be ? ToString(O).
let s = o.to_string(context)?;
// 3. Let thatValue be ? ToString(that).
let that_value = args.get_or_undefined(0).to_string(context)?;
let cmp = {
#[cfg(feature = "intl")]
{
// 4. Let collator be ? Construct(%Collator%, « locales, options »).
let collator = crate::builtins::intl::collator::Collator::constructor(
&context
.intrinsics()
.constructors()
.collator()
.constructor()
.into(),
args.get(1..).unwrap_or_default(),
context,
)?;
let collator = collator
.as_object()
.map(JsObject::borrow)
.expect("constructor must return a JsObject");
let collator = collator
.as_collator()
.expect("constructor must return a `Collator` object")
.collator();
collator.compare_utf16(&s, &that_value) as i8
}
// Default to common comparison if the user doesn't have `Intl` enabled.
#[cfg(not(feature = "intl"))]
{
s.cmp(&that_value) as i8;
}
};
// 5. Return CompareStrings(collator, S, thatValue).
Ok(cmp.into())
}
/// `String.prototype.match( regexp )`
///
/// The `match()` method retrieves the result of matching a **string** against a [`regular expression`][regex].

200
boa_engine/src/context/icu.rs

@ -1,79 +1,175 @@
use icu_datetime::provider::{
calendar::{DatePatternsV1Marker, DateSkeletonPatternsV1Marker, DateSymbolsV1Marker},
week_data::WeekDataV1Marker,
};
use icu_locale_canonicalizer::{
provider::{AliasesV1Marker, LikelySubtagsV1Marker},
LocaleCanonicalizer,
use std::fmt::Debug;
use icu_collator::{Collator, CollatorError, CollatorOptions};
use icu_list::{ListError, ListFormatter, ListLength};
use icu_locid_transform::{LocaleCanonicalizer, LocaleExpander, LocaleTransformError};
use icu_provider::{
yoke::{trait_hack::YokeTraitHack, Yokeable},
zerofrom::ZeroFrom,
AnyProvider, AsDeserializingBufferProvider, AsDowncastingAnyProvider, BufferProvider,
DataError, DataLocale, DataProvider, DataRequest, DataResponse, KeyedDataMarker, MaybeSendSync,
};
use icu_plurals::provider::OrdinalV1Marker;
use icu_provider::prelude::*;
use serde::Deserialize;
/// Trait encompassing all the required implementations that define
/// a valid icu data provider.
pub trait BoaProvider:
ResourceProvider<AliasesV1Marker>
+ ResourceProvider<LikelySubtagsV1Marker>
+ ResourceProvider<DateSymbolsV1Marker>
+ ResourceProvider<DatePatternsV1Marker>
+ ResourceProvider<DateSkeletonPatternsV1Marker>
+ ResourceProvider<OrdinalV1Marker>
+ ResourceProvider<WeekDataV1Marker>
{
use crate::builtins::intl::list_format::ListFormatType;
/// ICU4X data provider used in boa.
///
/// Providers can be either [`BufferProvider`]s or [`AnyProvider`]s.
///
/// The [`icu_provider`] documentation has more information about data providers.
pub enum BoaProvider {
/// A [`BufferProvider`] data provider.
Buffer(Box<dyn BufferProvider>),
/// An [`AnyProvider] data provider.
Any(Box<dyn AnyProvider>),
}
impl<T> BoaProvider for T where
T: ResourceProvider<AliasesV1Marker>
+ ResourceProvider<LikelySubtagsV1Marker>
+ ResourceProvider<DateSymbolsV1Marker>
+ ResourceProvider<DatePatternsV1Marker>
+ ResourceProvider<DateSkeletonPatternsV1Marker>
+ ResourceProvider<OrdinalV1Marker>
+ ResourceProvider<WeekDataV1Marker>
+ ?Sized
impl Debug for BoaProvider {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Buffer(_) => f.debug_tuple("Buffer").field(&"_").finish(),
Self::Any(_) => f.debug_tuple("Any").field(&"_").finish(),
}
}
}
impl<M> DataProvider<M> for BoaProvider
where
M: KeyedDataMarker + 'static,
for<'de> YokeTraitHack<<M::Yokeable as Yokeable<'de>>::Output>: Deserialize<'de>,
for<'a> YokeTraitHack<<M::Yokeable as Yokeable<'a>>::Output>: Clone,
M::Yokeable: ZeroFrom<'static, M::Yokeable> + MaybeSendSync,
{
fn load(&self, req: DataRequest<'_>) -> Result<DataResponse<M>, DataError> {
match self {
BoaProvider::Buffer(provider) => provider.as_deserializing().load(req),
BoaProvider::Any(provider) => provider.as_downcasting().load(req),
}
}
}
/// Collection of tools initialized from a [`BoaProvider`] that are used
impl BoaProvider {
/// Creates a new [`LocaleCanonicalizer`] from the provided [`DataProvider`].
pub(crate) fn try_new_locale_canonicalizer(
&self,
) -> Result<LocaleCanonicalizer, LocaleTransformError> {
match self {
BoaProvider::Buffer(buffer) => {
LocaleCanonicalizer::try_new_with_buffer_provider(&**buffer)
}
BoaProvider::Any(any) => LocaleCanonicalizer::try_new_with_any_provider(&**any),
}
}
/// Creates a new [`LocaleExpander`] from the provided [`DataProvider`].
pub(crate) fn try_new_locale_expander(&self) -> Result<LocaleExpander, LocaleTransformError> {
match self {
BoaProvider::Buffer(buffer) => LocaleExpander::try_new_with_buffer_provider(&**buffer),
BoaProvider::Any(any) => LocaleExpander::try_new_with_any_provider(&**any),
}
}
/// Creates a new [`ListFormatter`] from the provided [`DataProvider`] and options.
pub(crate) fn try_new_list_formatter(
&self,
locale: &DataLocale,
typ: ListFormatType,
style: ListLength,
) -> Result<ListFormatter, ListError> {
match self {
BoaProvider::Buffer(buf) => match typ {
ListFormatType::Conjunction => {
ListFormatter::try_new_and_with_length_with_buffer_provider(
&**buf, locale, style,
)
}
ListFormatType::Disjunction => {
ListFormatter::try_new_or_with_length_with_buffer_provider(
&**buf, locale, style,
)
}
ListFormatType::Unit => {
ListFormatter::try_new_unit_with_length_with_buffer_provider(
&**buf, locale, style,
)
}
},
BoaProvider::Any(any) => match typ {
ListFormatType::Conjunction => {
ListFormatter::try_new_and_with_length_with_any_provider(&**any, locale, style)
}
ListFormatType::Disjunction => {
ListFormatter::try_new_or_with_length_with_any_provider(&**any, locale, style)
}
ListFormatType::Unit => {
ListFormatter::try_new_unit_with_length_with_any_provider(&**any, locale, style)
}
},
}
}
/// Creates a new [`Collator`] from the provided [`DataProvider`] and options.
pub(crate) fn try_new_collator(
&self,
locale: &DataLocale,
options: CollatorOptions,
) -> Result<Collator, CollatorError> {
match self {
BoaProvider::Buffer(buf) => {
Collator::try_new_with_buffer_provider(&**buf, locale, options)
}
BoaProvider::Any(any) => Collator::try_new_with_any_provider(&**any, locale, options),
}
}
}
/// Collection of tools initialized from a [`DataProvider`] that are used
/// for the functionality of `Intl`.
#[allow(unused)]
pub(crate) struct Icu {
provider: Box<dyn BoaProvider>,
pub(crate) struct Icu<P> {
provider: P,
locale_canonicalizer: LocaleCanonicalizer,
locale_expander: LocaleExpander,
}
impl std::fmt::Debug for Icu {
impl<P: Debug> Debug for Icu<P> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
#[derive(Debug)]
struct Canonicalizer;
f.debug_struct("Icu")
.field("locale_canonicalizer", &Canonicalizer)
.field("provider", &self.provider)
.field("locale_canonicalizer", &"LocaleCanonicalizer")
.finish()
}
}
impl Icu {
/// Create a new [`Icu`] from a valid [`BoaProvider`]
impl<P> Icu<P> {
/// Gets the [`LocaleCanonicalizer`] tool.
pub(crate) const fn locale_canonicalizer(&self) -> &LocaleCanonicalizer {
&self.locale_canonicalizer
}
/// Gets the inner icu data provider
pub(crate) const fn provider(&self) -> &P {
&self.provider
}
/// Gets the [`LocaleExpander`] tool.
pub(crate) const fn locale_expander(&self) -> &LocaleExpander {
&self.locale_expander
}
}
impl Icu<BoaProvider> {
/// Creates a new [`Icu`] from a valid [`BoaProvider`]
///
/// # Errors
///
/// This method will return an error if any of the tools
/// required cannot be constructed.
pub(crate) fn new(provider: Box<dyn BoaProvider>) -> Result<Self, DataError> {
pub(crate) fn new(provider: BoaProvider) -> Result<Self, LocaleTransformError> {
Ok(Self {
locale_canonicalizer: LocaleCanonicalizer::new(&*provider)?,
locale_canonicalizer: provider.try_new_locale_canonicalizer()?,
locale_expander: provider.try_new_locale_expander()?,
provider,
})
}
/// Get the [`LocaleCanonicalizer`] tool.
pub(crate) const fn locale_canonicalizer(&self) -> &LocaleCanonicalizer {
&self.locale_canonicalizer
}
/// Get the inner icu data provider
#[allow(unused)]
pub(crate) fn provider(&self) -> &dyn BoaProvider {
self.provider.as_ref()
}
}

64
boa_engine/src/context/intrinsics.rs

@ -120,6 +120,14 @@ pub struct StandardConstructors {
date_time_format: StandardConstructor,
promise: StandardConstructor,
weak_ref: StandardConstructor,
#[cfg(feature = "intl")]
collator: StandardConstructor,
#[cfg(feature = "intl")]
list_format: StandardConstructor,
#[cfg(feature = "intl")]
locale: StandardConstructor,
#[cfg(feature = "intl")]
segmenter: StandardConstructor,
}
impl Default for StandardConstructors {
@ -180,6 +188,14 @@ impl Default for StandardConstructors {
date_time_format: StandardConstructor::default(),
promise: StandardConstructor::default(),
weak_ref: StandardConstructor::default(),
#[cfg(feature = "intl")]
collator: StandardConstructor::default(),
#[cfg(feature = "intl")]
list_format: StandardConstructor::default(),
#[cfg(feature = "intl")]
locale: StandardConstructor::default(),
#[cfg(feature = "intl")]
segmenter: StandardConstructor::default(),
};
// The value of `Array.prototype` is the Array prototype object.
@ -670,6 +686,54 @@ impl StandardConstructors {
pub const fn weak_ref(&self) -> &StandardConstructor {
&self.weak_ref
}
/// Returns the `Intl.Collator` constructor.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.collator
#[inline]
#[cfg(feature = "intl")]
pub const fn collator(&self) -> &StandardConstructor {
&self.collator
}
/// Returns the `Intl.ListFormat` constructor.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.ListFormat
#[inline]
#[cfg(feature = "intl")]
pub const fn list_format(&self) -> &StandardConstructor {
&self.list_format
}
/// Returns the `Intl.Locale` constructor.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.Locale
#[inline]
#[cfg(feature = "intl")]
pub const fn locale(&self) -> &StandardConstructor {
&self.locale
}
/// Returns the `Intl.Segmenter` constructor.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.segmenter
#[inline]
#[cfg(feature = "intl")]
pub const fn segmenter(&self) -> &StandardConstructor {
&self.segmenter
}
}
/// Cached intrinsic objects

61
boa_engine/src/context/mod.rs

@ -3,11 +3,13 @@
pub mod intrinsics;
#[cfg(feature = "intl")]
mod icu;
pub(crate) mod icu;
use std::collections::VecDeque;
#[cfg(feature = "intl")]
pub use icu::BoaProvider;
use intrinsics::{IntrinsicObjects, Intrinsics};
use std::collections::VecDeque;
#[cfg(feature = "console")]
use crate::builtins::console::Console;
@ -30,13 +32,6 @@ use boa_interner::{Interner, Sym};
use boa_parser::{Error as ParseError, Parser};
use boa_profiler::Profiler;
#[cfg(feature = "intl")]
use icu_provider::DataError;
#[doc(inline)]
#[cfg(all(feature = "intl", doc))]
pub use icu::BoaProvider;
/// ECMAScript context. It is the primary way to interact with the runtime.
///
/// `Context`s constructed in a thread share the same runtime, therefore it
@ -78,7 +73,6 @@ pub use icu::BoaProvider;
///
/// assert_eq!(value.as_number(), Some(12.0))
/// ```
#[derive(Debug)]
pub struct Context {
/// realm holds both the global object and the environment
pub(crate) realm: Realm,
@ -95,7 +89,7 @@ pub struct Context {
/// ICU related utilities
#[cfg(feature = "intl")]
icu: icu::Icu,
icu: icu::Icu<BoaProvider>,
/// Number of instructions remaining before a forced exit
#[cfg(feature = "fuzz")]
@ -108,6 +102,29 @@ pub struct Context {
pub(crate) kept_alive: Vec<JsObject>,
}
impl std::fmt::Debug for Context {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut debug = f.debug_struct("Context");
debug
.field("realm", &self.realm)
.field("interner", &self.interner);
#[cfg(feature = "console")]
debug.field("console", &self.console);
debug
.field("intrinsics", &self.intrinsics)
.field("vm", &self.vm)
.field("promise_job_queue", &self.promise_job_queue);
#[cfg(feature = "intl")]
debug.field("icu", &self.icu);
debug.finish()
}
}
impl Default for Context {
fn default() -> Self {
ContextBuilder::default().build()
@ -552,7 +569,7 @@ impl Context {
#[cfg(feature = "intl")]
/// Get the ICU related utilities
pub(crate) const fn icu(&self) -> &icu::Icu {
pub(crate) const fn icu(&self) -> &icu::Icu<BoaProvider> {
&self.icu
}
@ -591,11 +608,11 @@ impl Context {
feature = "intl",
doc = "The required data in a valid provider is specified in [`BoaProvider`]"
)]
#[derive(Debug, Default)]
#[derive(Default, Debug)]
pub struct ContextBuilder {
interner: Option<Interner>,
#[cfg(feature = "intl")]
icu: Option<icu::Icu>,
icu: Option<icu::Icu<BoaProvider>>,
#[cfg(feature = "fuzz")]
instructions_remaining: usize,
}
@ -615,8 +632,11 @@ impl ContextBuilder {
/// Provides an icu data provider to the [`Context`].
///
/// This function is only available if the `intl` feature is enabled.
#[cfg(any(feature = "intl", docs))]
pub fn icu_provider(mut self, provider: Box<dyn icu::BoaProvider>) -> Result<Self, DataError> {
#[cfg(feature = "intl")]
pub fn icu_provider(
mut self,
provider: BoaProvider,
) -> Result<Self, icu_locid_transform::LocaleTransformError> {
self.icu = Some(icu::Icu::new(provider)?);
Ok(self)
}
@ -657,9 +677,12 @@ impl ContextBuilder {
},
#[cfg(feature = "intl")]
icu: self.icu.unwrap_or_else(|| {
// TODO: Replace with a more fitting default
icu::Icu::new(Box::new(icu_testdata::get_provider()))
.expect("Failed to initialize default icu data.")
use icu_provider_adapters::fallback::LocaleFallbackProvider;
let provider = BoaProvider::Buffer(Box::new(
LocaleFallbackProvider::try_new_with_buffer_provider(boa_icu_provider::blob())
.expect("boa_icu_provider should return a valid provider"),
));
icu::Icu::new(provider).expect("Failed to initialize default icu data.")
}),
#[cfg(feature = "fuzz")]
instructions_remaining: self.instructions_remaining,

17
boa_engine/src/lib.rs

@ -1,4 +1,4 @@
//! Boa's **boa_engine** crate implements ECMAScript's standard library of builtin objects
//! Boa's **`boa_engine`** crate implements ECMAScript's standard library of builtin objects
//! and an ECMAScript context, bytecompiler, and virtual machine for code execution.
//!
//! # Crate Features
@ -14,13 +14,14 @@
//! Try out the most recent release with Boa's live demo [playground][boa-playground].
//!
//! # Boa Crates
//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree.
//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **boa_gc** - Boa's garbage collector
//! - **boa_interner** - Boa's string interner
//! - **boa_parser** - Boa's lexer and parser
//! - **boa_profiler** - Boa's code profiler
//! - **boa_unicode** - Boa's Unicode identifier
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **`boa_gc`** - Boa's garbage collector.
//! - **`boa_interner`** - Boa's string interner.
//! - **`boa_parser`** - Boa's lexer and parser.
//! - **`boa_profiler`** - Boa's code profiler.
//! - **`boa_unicode`** - Boa's Unicode identifier.
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
//!
//! [whatwg]: https://console.spec.whatwg.org
//! [ecma-402]: https://tc39.es/ecma402

131
boa_engine/src/object/mod.rs

@ -23,7 +23,9 @@ use self::internal_methods::{
InternalObjectMethods, ORDINARY_INTERNAL_METHODS,
};
#[cfg(feature = "intl")]
use crate::builtins::intl::date_time_format::DateTimeFormat;
use crate::builtins::intl::{
collator::Collator, date_time_format::DateTimeFormat, list_format::ListFormat,
};
use crate::{
builtins::{
array::array_iterator::ArrayIterator,
@ -270,9 +272,21 @@ pub enum ObjectKind {
/// The `WeakRef` object kind.
WeakRef(WeakGc<GcCell<Object>>),
/// The `Intl.Collator` object kind.
#[cfg(feature = "intl")]
Collator(Box<Collator>),
/// The `Intl.DateTimeFormat` object kind.
#[cfg(feature = "intl")]
DateTimeFormat(Box<DateTimeFormat>),
/// The `Intl.ListFormat` object kind.
#[cfg(feature = "intl")]
ListFormat(Box<ListFormat>),
/// The `Intl.Locale` object kind.
#[cfg(feature = "intl")]
Locale(Box<icu_locid::Locale>),
}
unsafe impl Trace for ObjectKind {
@ -296,11 +310,15 @@ unsafe impl Trace for ObjectKind {
Self::Arguments(a) => mark(a),
Self::NativeObject(o) => mark(o),
Self::IntegerIndexed(i) => mark(i),
#[cfg(feature = "intl")]
Self::DateTimeFormat(f) => mark(f),
Self::Promise(p) => mark(p),
Self::AsyncGenerator(g) => mark(g),
Self::WeakRef(wr) => mark(wr),
#[cfg(feature = "intl")]
Self::DateTimeFormat(f) => mark(f),
#[cfg(feature = "intl")]
Self::Collator(co) => mark(co),
#[cfg(feature = "intl")]
Self::ListFormat(_) | Self::Locale(_) => {}
Self::RegExp(_)
| Self::BigInt(_)
| Self::Boolean(_)
@ -628,6 +646,16 @@ impl ObjectData {
}
}
/// Create the `Collator` object data
#[cfg(feature = "intl")]
#[must_use]
pub fn collator(date_time_fmt: Collator) -> Self {
Self {
kind: ObjectKind::Collator(Box::new(date_time_fmt)),
internal_methods: &ORDINARY_INTERNAL_METHODS,
}
}
/// Create the `DateTimeFormat` object data
#[cfg(feature = "intl")]
#[must_use]
@ -637,6 +665,26 @@ impl ObjectData {
internal_methods: &ORDINARY_INTERNAL_METHODS,
}
}
/// Create the `ListFormat` object data
#[cfg(feature = "intl")]
#[must_use]
pub fn list_format(list_format: ListFormat) -> Self {
Self {
kind: ObjectKind::ListFormat(Box::new(list_format)),
internal_methods: &ORDINARY_INTERNAL_METHODS,
}
}
/// Create the `Locale` object data
#[cfg(feature = "intl")]
#[must_use]
pub fn locale(locale: icu_locid::Locale) -> Self {
Self {
kind: ObjectKind::Locale(Box::new(locale)),
internal_methods: &ORDINARY_INTERNAL_METHODS,
}
}
}
impl Display for ObjectKind {
@ -674,10 +722,16 @@ impl Display for ObjectKind {
Self::NativeObject(_) => "NativeObject",
Self::IntegerIndexed(_) => "TypedArray",
Self::DataView(_) => "DataView",
#[cfg(feature = "intl")]
Self::DateTimeFormat(_) => "DateTimeFormat",
Self::Promise(_) => "Promise",
Self::WeakRef(_) => "WeakRef",
#[cfg(feature = "intl")]
Self::Collator(_) => "Collator",
#[cfg(feature = "intl")]
Self::DateTimeFormat(_) => "DateTimeFormat",
#[cfg(feature = "intl")]
Self::ListFormat(_) => "ListFormat",
#[cfg(feature = "intl")]
Self::Locale(_) => "Locale",
})
}
}
@ -1572,7 +1626,7 @@ impl Object {
}
}
/// Gets the `WeakRef`data if the object is a `WeakRef`.
/// Gets the `WeakRef` data if the object is a `WeakRef`.
#[inline]
pub const fn as_weak_ref(&self) -> Option<&WeakGc<GcCell<Self>>> {
match self.data {
@ -1584,6 +1638,71 @@ impl Object {
}
}
/// Gets the `Collator` data if the object is a `Collator`.
#[inline]
#[cfg(feature = "intl")]
pub const fn as_collator(&self) -> Option<&Collator> {
match self.data {
ObjectData {
kind: ObjectKind::Collator(ref collator),
..
} => Some(collator),
_ => None,
}
}
/// Gets a mutable reference to the `Collator` data if the object is a `Collator`.
#[inline]
#[cfg(feature = "intl")]
pub fn as_collator_mut(&mut self) -> Option<&mut Collator> {
match self.data {
ObjectData {
kind: ObjectKind::Collator(ref mut collator),
..
} => Some(collator),
_ => None,
}
}
/// Checks if it is a `Locale` object.
#[inline]
#[cfg(feature = "intl")]
pub const fn is_locale(&self) -> bool {
matches!(
self.data,
ObjectData {
kind: ObjectKind::Locale(_),
..
}
)
}
/// Gets the `Locale` data if the object is a `Locale`.
#[inline]
#[cfg(feature = "intl")]
pub const fn as_locale(&self) -> Option<&icu_locid::Locale> {
match self.data {
ObjectData {
kind: ObjectKind::Locale(ref locale),
..
} => Some(locale),
_ => None,
}
}
/// Gets the `ListFormat` data if the object is a `ListFormat`.
#[inline]
#[cfg(feature = "intl")]
pub const fn as_list_format(&self) -> Option<&ListFormat> {
match self.data {
ObjectData {
kind: ObjectKind::ListFormat(ref lf),
..
} => Some(lf),
_ => None,
}
}
/// Return `true` if it is a native object and the native type is `T`.
pub fn is<T>(&self) -> bool
where

5
boa_engine/src/object/operations.rs

@ -459,6 +459,11 @@ impl JsObject {
Ok(true)
}
/// Abstract operation [`LengthOfArrayLike ( obj )`][spec].
///
/// Returns the value of the "length" property of an array-like object.
///
/// [spec]: https://tc39.es/ecma262/#sec-lengthofarraylike
pub(crate) fn length_of_array_like(&self, context: &mut Context) -> JsResult<u64> {
// 1. Assert: Type(obj) is Object.
// 2. Return ℝ(? ToLength(? Get(obj, "length"))).

10
boa_engine/src/string/mod.rs

@ -31,10 +31,12 @@ use std::{
alloc::{alloc, dealloc, Layout},
borrow::Borrow,
cell::Cell,
convert::Infallible,
hash::{Hash, Hasher},
ops::{Deref, Index},
ptr::{self, NonNull},
slice::SliceIndex,
str::FromStr,
};
use self::common::{COMMON_STRINGS, COMMON_STRINGS_CACHE, MAX_COMMON_STRING_LENGTH};
@ -882,6 +884,14 @@ impl PartialOrd for JsString {
}
}
impl FromStr for JsString {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(JsString::from(s))
}
}
/// Utility trait that adds trimming functionality to every `UTF-16` string.
pub(crate) trait Utf16Trim {
/// Trims both leading and trailing space from `self`.

19
boa_gc/src/lib.rs

@ -1,7 +1,7 @@
//! Boa's **boa_gc** crate implements a garbage collector.
//! Boa's **`boa_gc`** crate implements a garbage collector.
//!
//! # Crate Overview
//! **boa_gc** is a mark-sweep garbage collector that implements a Trace and Finalize trait
//! **`boa_gc`** is a mark-sweep garbage collector that implements a Trace and Finalize trait
//! for garbage collected values.
//!
//! # About Boa
@ -11,13 +11,14 @@
//! Try out the most recent release with Boa's live demo [playground][boa-playground].
//!
//! # Boa Crates
//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree.
//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **boa_gc** - Boa's garbage collector
//! - **boa_interner** - Boa's string interner
//! - **boa_parser** - Boa's lexer and parser
//! - **boa_profiler** - Boa's code profiler
//! - **boa_unicode** - Boa's Unicode identifier
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **`boa_gc`** - Boa's garbage collector.
//! - **`boa_interner`** - Boa's string interner.
//! - **`boa_parser`** - Boa's lexer and parser.
//! - **`boa_profiler`** - Boa's code profiler.
//! - **`boa_unicode`** - Boa's Unicode identifier.
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
//!
//! [boa-conformance]: https://boa-dev.github.io/boa/test262/
//! [boa-web]: https://boa-dev.github.io/

28
boa_icu_provider/Cargo.toml

@ -0,0 +1,28 @@
[package]
name = "boa_icu_provider"
description = "ICU4X data provider for the Boa JavaScript engine."
keywords = ["javascript", "cldr", "unicode"]
categories = ["internationalization"]
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
rust-version.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
icu_provider = { version = "1.0.1", features = ["serde", "sync"] }
icu_provider_blob = "1.0.0"
icu_datagen = { version = "1.0.2", optional = true }
log = { version = "0.4.17", optional = true }
simple_logger = { version = "4.0.0", optional = true }
[features]
bin = ["dep:icu_datagen", "dep:simple_logger", "dep:log"]
[[bin]]
name = "boa-datagen"
path = "src/bin/datagen.rs"
required-features = ["bin"]

12
boa_icu_provider/README.md

@ -0,0 +1,12 @@
# boa_icu_provider
`boa_icu_provider` generates and defines the [ICU4X](https://github.com/unicode-org/icu4x) data provider
used in the Boa engine to enable internationalization functionality.
## Datagen
To regenerate the data:
```bash
$ cargo run --bin boa-datagen --features bin
```

BIN
boa_icu_provider/data/icudata.postcard

Binary file not shown.

21
boa_icu_provider/src/bin/datagen.rs

@ -0,0 +1,21 @@
use std::{error::Error, fs::File};
use boa_icu_provider::data_root;
use icu_datagen::{all_keys, datagen, CldrLocaleSubset, Out, SourceData};
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::SimpleLogger::new()
.env()
.with_level(log::LevelFilter::Info)
.init()?;
let source_data = SourceData::default()
.with_cldr_latest(CldrLocaleSubset::Modern)?
.with_icuexport_latest()?;
let blob_out = Out::Blob(Box::new(File::create(
data_root().join("icudata.postcard"),
)?));
datagen(None, &all_keys(), &source_data, [blob_out].into()).map_err(Into::into)
}

93
boa_icu_provider/src/lib.rs

@ -0,0 +1,93 @@
//! Boa's **`boa_icu_provider`** exports the default data provider used by its `Intl` implementation.
//!
//! # Crate Overview
//! This crate exports the function [`blob`], which contains an extensive dataset of locale data to
//! enable `Intl` functionality in the engine. The set of locales included is precisely the ["modern"]
//! subset of locales in the [Unicode Common Locale Data Repository][cldr].
//!
//! If you need to support the full set of locales, you can check out the [ICU4X guide] about
//! generating custom data providers. Boa supports plugging both [`BufferProvider`]s or [`AnyProvider`]s
//! generated by the tool.
//!
//! ["modern"]: https://github.com/unicode-org/cldr-json/tree/main/cldr-json/cldr-localenames-modern/main
//! [cldr]: https://github.com/unicode-org/
//! [ICU4X guide]: https://github.com/unicode-org/icu4x/blob/main/docs/tutorials/data_management.md
//! [`BufferProvider`]: icu_provider::BufferProvider
//! [`AnyProvider`]: icu_provider::AnyProvider
#![deny(
// rustc lint groups https://doc.rust-lang.org/rustc/lints/groups.html
warnings,
future_incompatible,
let_underscore,
nonstandard_style,
rust_2018_compatibility,
rust_2018_idioms,
rust_2021_compatibility,
unused,
// rustc allowed-by-default lints https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html
macro_use_extern_crate,
meta_variable_misuse,
missing_abi,
missing_copy_implementations,
missing_debug_implementations,
non_ascii_idents,
noop_method_call,
single_use_lifetimes,
trivial_casts,
trivial_numeric_casts,
unreachable_pub,
unsafe_op_in_unsafe_fn,
unused_import_braces,
unused_lifetimes,
unused_qualifications,
unused_tuple_struct_fields,
variant_size_differences,
// rustdoc lints https://doc.rust-lang.org/rustdoc/lints.html
rustdoc::broken_intra_doc_links,
rustdoc::private_intra_doc_links,
rustdoc::missing_crate_level_docs,
rustdoc::private_doc_tests,
rustdoc::invalid_codeblock_attributes,
rustdoc::invalid_rust_codeblocks,
rustdoc::bare_urls,
// clippy categories https://doc.rust-lang.org/clippy/
clippy::all,
clippy::correctness,
clippy::suspicious,
clippy::style,
clippy::complexity,
clippy::perf,
clippy::pedantic,
clippy::nursery,
)]
/// Gets the path to the directory where the generated data is stored.
#[must_use]
#[doc(hidden)]
pub fn data_root() -> std::path::PathBuf {
std::path::PathBuf::from(std::env!("CARGO_MANIFEST_DIR")).join("data")
}
use icu_provider_blob::BlobDataProvider;
/// Gets a data provider that is stored as a Postcard blob.
///
/// This provider does NOT execute locale fallback. Use `LocaleFallbackProvider` from
/// the `icu_provider_adapters` crate for this functionality.
///
/// # Note
///
/// The returned provider internally uses [`Arc`][std::sync::Arc] to share the data between instances,
/// so it is preferrable to clone instead of calling `buffer()` multiple times.
#[must_use]
pub fn blob() -> BlobDataProvider {
BlobDataProvider::try_new_from_static_blob(include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/data/icudata.postcard"
)))
.expect("The statically compiled data file should be valid.")
}

17
boa_interner/src/lib.rs

@ -1,4 +1,4 @@
//! Boa's **boa_interner** is a string interner for compiler performance.
//! Boa's **`boa_interner`** is a string interner for compiler performance.
//!
//! # Crate Overview
//! The idea behind using a string interner is that in most of the code, strings such as
@ -16,13 +16,14 @@
//! Try out the most recent release with Boa's live demo [playground][boa-playground].
//!
//! # Boa Crates
//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree.
//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **boa_gc** - Boa's garbage collector
//! - **boa_interner** - Boa's string interner
//! - **boa_parser** - Boa's lexer and parser
//! - **boa_profiler** - Boa's code profiler
//! - **boa_unicode** - Boa's Unicode identifier
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **`boa_gc`** - Boa's garbage collector.
//! - **`boa_interner`** - Boa's string interner.
//! - **`boa_parser`** - Boa's lexer and parser.
//! - **`boa_profiler`** - Boa's code profiler.
//! - **`boa_unicode`** - Boa's Unicode identifier.
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
//!
//! [boa-conformance]: https://boa-dev.github.io/boa/test262/
//! [boa-web]: https://boa-dev.github.io/

17
boa_parser/src/lib.rs

@ -1,4 +1,4 @@
//! Boa's **boa_parser** crate is a parser targeting the latest [ECMAScript language specification][spec].
//! Boa's **`boa_parser`** crate is a parser targeting the latest [ECMAScript language specification][spec].
//!
//! # Crate Overview
//! This crate contains implementations of a [`Lexer`] and a [`Parser`] for the **ECMAScript**
@ -12,13 +12,14 @@
//! Try out the most recent release with Boa's live demo [playground][boa-playground].
//!
//! # Boa Crates
//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree.
//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **boa_gc** - Boa's garbage collector
//! - **boa_interner** - Boa's string interner
//! - **boa_parser** - Boa's lexer and parser
//! - **boa_profiler** - Boa's code profiler
//! - **boa_unicode** - Boa's Unicode identifier
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **`boa_gc`** - Boa's garbage collector.
//! - **`boa_interner`** - Boa's string interner.
//! - **`boa_parser`** - Boa's lexer and parser.
//! - **`boa_profiler`** - Boa's code profiler.
//! - **`boa_unicode`** - Boa's Unicode identifier.
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
//!
//! [spec]: https://tc39.es/ecma262
//! [lex]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar

11
boa_profiler/src/lib.rs

@ -13,11 +13,12 @@
//! # Boa Crates
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **`boa_gc`** - Boa's garbage collector
//! - **`boa_interner`** - Boa's string interner
//! - **`boa_parser`** - Boa's lexer and parser
//! - **`boa_profiler`** - Boa's code profiler
//! - **`boa_unicode`** - Boa's Unicode identifier
//! - **`boa_gc`** - Boa's garbage collector.
//! - **`boa_interner`** - Boa's string interner.
//! - **`boa_parser`** - Boa's lexer and parser.
//! - **`boa_profiler`** - Boa's code profiler.
//! - **`boa_unicode`** - Boa's Unicode identifier.
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
//!
//! [profiler-md]: https://github.com/boa-dev/boa/blob/main/docs/profiling.md
//! [boa-conformance]: https://boa-dev.github.io/boa/test262/

3
boa_tester/Cargo.toml

@ -15,6 +15,9 @@ rust-version.workspace = true
boa_engine = { workspace = true, features = ["intl"] }
boa_gc.workspace = true
boa_parser.workspace = true
boa_icu_provider.workspace = true
icu_provider_adapters = { version = "1.0.0", features = ["serde"]}
icu_provider_blob = "1.0.0"
clap = { version = "4.0.32", features = ["derive"] }
serde = { version = "1.0.151", features = ["derive"] }
serde_yaml = "0.9.16"

19
boa_tester/src/exec/mod.rs

@ -7,12 +7,18 @@ use super::{
};
use crate::read::ErrorType;
use boa_engine::{
builtins::JsArgs, object::FunctionBuilder, property::Attribute, Context, JsNativeErrorKind,
JsResult, JsValue,
builtins::JsArgs,
context::{BoaProvider, ContextBuilder},
object::FunctionBuilder,
property::Attribute,
Context, JsNativeErrorKind, JsResult, JsValue,
};
use boa_gc::{Finalize, Gc, GcCell, Trace};
use boa_parser::Parser;
use colored::Colorize;
use icu_provider_adapters::fallback::LocaleFallbackProvider;
use icu_provider_blob::BlobDataProvider;
use once_cell::sync::Lazy;
use rayon::prelude::*;
use std::borrow::Cow;
@ -129,6 +135,7 @@ impl Test {
/// Runs the test once, in strict or non-strict mode
fn run_once(&self, harness: &Harness, strict: bool, verbose: u8) -> TestResult {
static LOCALE_DATA: Lazy<BlobDataProvider> = Lazy::new(boa_icu_provider::blob);
if self.ignored {
if verbose > 1 {
println!(
@ -163,7 +170,13 @@ impl Test {
let result = std::panic::catch_unwind(|| match self.expected_outcome {
Outcome::Positive => {
let mut context = Context::default();
let mut context = ContextBuilder::default()
.icu_provider(BoaProvider::Buffer(Box::new(
LocaleFallbackProvider::try_new_with_buffer_provider(LOCALE_DATA.clone())
.expect("default locale data should be valid"),
)))
.expect("default locale data should be valid")
.build();
let async_result = AsyncResult::default();
if let Err(e) = self.set_up_env(harness, &mut context, async_result.clone()) {

17
boa_unicode/src/lib.rs

@ -1,4 +1,4 @@
//! Boa's **boa_unicode** crate for query valid Unicode identifiers.
//! Boa's **`boa_unicode`** crate for query valid Unicode identifiers.
//!
//! # Crate Overview
//! This crate implements the extension to query if a char belongs to a particular unicode identifier property.
@ -16,13 +16,14 @@
//! Try out the most recent release with Boa's live demo [playground][boa-playground].
//!
//! # Boa Crates
//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree.
//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **boa_gc** - Boa's garbage collector
//! - **boa_interner** - Boa's string interner
//! - **boa_parser** - Boa's lexer and parser
//! - **boa_profiler** - Boa's code profiler
//! - **boa_unicode** - Boa's Unicode identifier
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
//! - **`boa_gc`** - Boa's garbage collector.
//! - **`boa_interner`** - Boa's string interner.
//! - **`boa_parser`** - Boa's lexer and parser.
//! - **`boa_profiler`** - Boa's code profiler.
//! - **`boa_unicode`** - Boa's Unicode identifier.
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
//!
//! [uax31]: http://unicode.org/reports/tr31
//! [boa-conformance]: https://boa-dev.github.io/boa/test262/

10
test_ignore.toml

@ -19,18 +19,18 @@ features = [
# Non-implemented Intl features
"intl-normative-optional",
"Intl.DurationFormat",
"Intl.NumberFormat-v3",
"Intl.NumberFormat-unified",
"Intl.ListFormat",
"Intl.DisplayNames",
"Intl.RelativeTimeFormat",
"Intl.Segmenter",
"Intl.Locale",
# Stage 3 proposals
# https://github.com/tc39/proposal-symbols-as-weakmap-keys
"symbols-as-weakmap-keys",
# https://github.com/tc39/proposal-intl-locale-info
"Intl.Locale-info",
# https://github.com/tc39/proposal-intl-enumeration
"Intl-enumeration",
# Non-standard
"caller",
@ -42,4 +42,4 @@ features = [
# RegExp tests that check individual codepoints.
# They are not useful considering the cpu time they waste.
tests = ["CharacterClassEscapes"]
tests = ["CharacterClassEscapes", "NumberFormat"]

Loading…
Cancel
Save