Browse Source

Bump ICU4X to 1.5 and cleanup Intl (#3868)

pull/3869/head
José Julián Espina 6 months ago committed by GitHub
parent
commit
8c727f6d52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1407
      Cargo.lock
  2. 46
      Cargo.toml
  3. 1
      core/engine/Cargo.toml
  4. 15
      core/engine/src/builtins/intl/collator/mod.rs
  5. 8
      core/engine/src/builtins/intl/list_format/mod.rs
  6. 2
      core/engine/src/builtins/intl/locale/mod.rs
  7. 17
      core/engine/src/builtins/intl/locale/tests.rs
  8. 420
      core/engine/src/builtins/intl/locale/utils.rs
  9. 10
      core/engine/src/builtins/intl/mod.rs
  10. 8
      core/engine/src/builtins/intl/number_format/mod.rs
  11. 8
      core/engine/src/builtins/intl/plural_rules/mod.rs
  12. 19
      core/engine/src/builtins/intl/segmenter/mod.rs
  13. 34
      core/engine/src/builtins/string/mod.rs
  14. 22
      core/engine/src/context/mod.rs
  15. BIN
      core/icu_provider/data/icudata.postcard
  16. 2
      test262_config.toml
  17. 4
      tests/tester/src/edition.rs
  18. 18
      tools/gen-icu4x-data/Cargo.toml
  19. 124
      tools/gen-icu4x-data/src/main.rs

1407
Cargo.lock generated

File diff suppressed because it is too large Load Diff

46
Cargo.toml

@ -75,7 +75,7 @@ thin-vec = "0.2.13"
time = {version = "0.3.36", default-features = false, features = ["local-offset", "large-dates", "wasm-bindgen", "parsing", "formatting", "macros"]}
tinystr = "0.7.5"
log = "0.4.21"
simple_logger = "4.3.3"
simple_logger = "5.0.0"
cargo_metadata = "0.18.1"
trybuild = "1.0.95"
rayon = "1.10.0"
@ -107,7 +107,7 @@ tap = "1.0.1"
thiserror = "1.0.60"
dashmap = "5.5.3"
num_enum = "0.7.2"
itertools = { version = "0.12.1", default-features = false }
itertools = { version = "0.13.0", default-features = false }
portable-atomic = "1.6.0"
bytemuck = { version = "1.15.0", default-features = false }
arrayvec = "0.7.4"
@ -115,7 +115,7 @@ intrusive-collections = "0.9.6"
cfg-if = "1.0.0"
either = "1.12.0"
sys-locale = "0.3.1"
temporal_rs = "0.0.2"
temporal_rs = { git = "https://github.com/boa-dev/temporal.git", rev = "61a05fbb7c72353deda72a3df0e6887d65b840d2" }
web-time = "1.1.0"
criterion = "0.5.1"
float-cmp = "0.9.0"
@ -125,26 +125,26 @@ winapi = { version = "0.3.9", default-features = false }
# ICU4X
icu_provider = { version = "~1.4.0", default-features = false }
icu_locid = { version = "~1.4.0", default-features = false }
icu_locid_transform = { version = "~1.4.0", default-features = false }
icu_datetime = { version = "~1.4.0", default-features = false }
icu_calendar = { version = "~1.4.0", default-features = false }
icu_collator = { version = "~1.4.0", default-features = false }
icu_plurals = { version = "~1.4.0", default-features = false }
icu_list = { version = "~1.4.0", default-features = false }
icu_casemap = { version = "~1.4.0", default-features = false }
icu_segmenter = { version = "~1.4.0", default-features = false }
icu_datagen = { version = "~1.4.1", default-features = false }
icu_provider_adapters = { version = "~1.4.0", default-features = false }
icu_provider_blob = { version = "~1.4.0", default-features = false }
icu_properties = { version = "~1.4.1", default-features = true }
icu_normalizer = { version = "~1.4.2", default-features = false }
icu_decimal = { version = "~1.4.0", default-features = false }
writeable = "~0.5.4"
yoke = "~0.7.3"
zerofrom = "~0.1.3"
fixed_decimal = "~0.5.5"
icu_provider = { version = "~1.5.0", default-features = false }
icu_locid = { version = "~1.5.0", default-features = false }
icu_locid_transform = { version = "~1.5.0", default-features = false }
icu_datetime = { version = "~1.5.0", default-features = false }
icu_calendar = { version = "~1.5.0", default-features = false }
icu_collator = { version = "~1.5.0", default-features = false }
icu_plurals = { version = "~1.5.0", default-features = false }
icu_list = { version = "~1.5.0", default-features = false }
icu_casemap = { version = "~1.5.0", default-features = false }
icu_segmenter = { version = "~1.5.0", default-features = false }
icu_datagen = { version = "~1.5.0", default-features = false }
icu_provider_adapters = { version = "~1.5.0", default-features = false }
icu_provider_blob = { version = "~1.5.0", default-features = false }
icu_properties = { version = "~1.5.0", default-features = true }
icu_normalizer = { version = "~1.5.0", default-features = false }
icu_decimal = { version = "~1.5.0", default-features = false }
writeable = "~0.5.5"
yoke = "~0.7.4"
zerofrom = "~0.1.4"
fixed_decimal = "~0.5.6"
[workspace.metadata.workspaces]
allow_branch = "main"

1
core/engine/Cargo.toml

@ -105,6 +105,7 @@ cfg-if.workspace = true
time.workspace = true
hashbrown.workspace = true
either = { workspace = true, optional = true }
static_assertions.workspace = true
# intl deps
boa_icu_provider = { workspace = true, features = ["std"], optional = true }

15
core/engine/src/builtins/intl/collator/mod.rs

@ -2,8 +2,7 @@ use boa_gc::{custom_trace, Finalize, Trace};
use boa_macros::js_str;
use boa_profiler::Profiler;
use icu_collator::{
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, Collator as NativeCollator,
MaxVariable, Numeric,
provider::CollationMetadataV1Marker, AlternateHandling, CaseFirst, MaxVariable, Numeric,
};
use icu_locid::{
@ -35,7 +34,7 @@ use crate::{
};
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales, validate_extension},
locale::{canonicalize_locale_list, filter_locales, resolve_locale, validate_extension},
options::{coerce_options_to_object, IntlOptions},
Service,
};
@ -53,7 +52,7 @@ pub(crate) struct Collator {
usage: Usage,
sensitivity: Sensitivity,
ignore_punctuation: bool,
collator: NativeCollator,
collator: icu_collator::Collator,
bound_compare: Option<JsFunction>,
}
@ -277,7 +276,7 @@ impl BuiltInConstructor for Collator {
// 18. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
// 19. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt, relevantExtensionKeys, localeData).
let mut locale = resolve_locale::<Self>(
&requested_locales,
requested_locales,
&mut intl_options,
context.intl_provider(),
);
@ -337,7 +336,7 @@ impl BuiltInConstructor for Collator {
.unzip();
let collator =
NativeCollator::try_new_unstable(context.intl_provider(), &collator_locale, {
icu_collator::Collator::try_new_unstable(context.intl_provider(), &collator_locale, {
let mut options = icu_collator::CollatorOptions::new();
options.strength = strength;
options.case_level = case_level;
@ -395,8 +394,8 @@ impl Collator {
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service>::LangMarker>(&requested_locales, options, context)
// 3. Return ? FilterLocales(availableLocales, requestedLocales, options).
filter_locales::<<Self as Service>::LangMarker>(requested_locales, options, context)
.map(JsValue::from)
}

8
core/engine/src/builtins/intl/list_format/mod.rs

@ -23,7 +23,7 @@ use crate::{
};
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales},
locale::{canonicalize_locale_list, filter_locales, resolve_locale},
options::IntlOptions,
Service,
};
@ -122,7 +122,7 @@ impl BuiltInConstructor for ListFormat {
// 9. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]], requestedLocales, opt, %ListFormat%.[[RelevantExtensionKeys]], localeData).
// 10. Set listFormat.[[Locale]] to r.[[locale]].
let locale = resolve_locale::<Self>(
&requested_locales,
requested_locales,
&mut IntlOptions {
matcher,
..Default::default()
@ -204,8 +204,8 @@ impl ListFormat {
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service>::LangMarker>(&requested_locales, options, context)
// 3. Return ? FilterLocales(availableLocales, requestedLocales, options).
filter_locales::<<Self as Service>::LangMarker>(requested_locales, options, context)
.map(JsValue::from)
}

2
core/engine/src/builtins/intl/locale/mod.rs

@ -7,7 +7,7 @@ use icu_locid::{
extensions::unicode::Value, extensions_unicode_key as key, extensions_unicode_value as value,
};
#[cfg(test)]
#[cfg(all(test, feature = "intl_bundled"))]
mod tests;
mod utils;

17
core/engine/src/builtins/intl/locale/tests.rs

@ -11,7 +11,7 @@ use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata};
use crate::{
builtins::intl::{
locale::{best_locale_for_provider, default_locale, resolve_locale},
locale::{default_locale, resolve_locale},
options::{IntlOptions, LocaleMatcher},
Service,
},
@ -88,7 +88,7 @@ fn locale_resolution() {
hc: Some(HourCycle::H11),
},
};
let locale = resolve_locale::<TestService>(&[], &mut options, &provider);
let locale = resolve_locale::<TestService>([], &mut options, &provider);
assert_eq!(locale, default);
// test best fit
@ -99,15 +99,8 @@ fn locale_resolution() {
},
};
let locale = resolve_locale::<TestService>(&[], &mut options, &provider);
let best = best_locale_for_provider::<<TestService as Service>::LangMarker>(
default.id.clone(),
&provider,
)
.unwrap();
let mut best = Locale::from(best);
best.extensions = locale.extensions.clone();
assert_eq!(locale, best);
let locale = resolve_locale::<TestService>([], &mut options, &provider);
assert_eq!(locale, default);
// requested: [es-ES]
let mut options = IntlOptions {
@ -115,6 +108,6 @@ fn locale_resolution() {
service_options: TestOptions { hc: None },
};
let locale = resolve_locale::<TestService>(&[locale!("es-AR")], &mut options, &provider);
let locale = resolve_locale::<TestService>([locale!("es-AR")], &mut options, &provider);
assert_eq!(locale, "es-u-hc-h23".parse().unwrap());
}

420
core/engine/src/builtins/intl/locale/utils.rs

@ -14,18 +14,13 @@ use crate::{
};
use boa_macros::js_str;
use icu_collator::provider::CollationMetadataV1Marker;
use icu_locid::{
extensions::unicode::{Key, Value},
subtags::Variants,
LanguageIdentifier, Locale,
};
use icu_locid_transform::LocaleCanonicalizer;
use icu_provider::{
DataError, DataErrorKind, DataLocale, DataProvider, DataRequest, DataRequestMetadata,
KeyedDataMarker,
};
use icu_segmenter::provider::WordBreakDataV1Marker;
use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker};
use indexmap::IndexSet;
use tap::TapOptional;
@ -153,36 +148,55 @@ pub(crate) fn canonicalize_locale_list(
Ok(seen.into_iter().collect())
}
/// Abstract operation `BestAvailableLocale ( availableLocales, locale )`
/// Abstract operation [`LookupMatchingLocaleByPrefix ( availableLocales, requestedLocales )`][prefix]
/// and [`LookupMatchingLocaleByBestFit ( availableLocales, requestedLocales )`][best]
///
/// Compares the provided argument `locale`, which must be a String value with a
/// structurally valid and canonicalized Unicode BCP 47 locale identifier, against
/// the locales in `availableLocales` and returns either the longest non-empty prefix
/// of `locale` that is an element of `availableLocales`, or undefined if there is no
/// such element.
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request.
///
/// We only work with language identifiers, which have the same semantics
/// but are a bit easier to manipulate.
/// # Notes
///
/// More information:
/// - [ECMAScript reference][spec]
/// - This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale
pub(crate) fn best_available_locale<M: KeyedDataMarker>(
candidate: LanguageIdentifier,
provider: &(impl DataProvider<M> + ?Sized),
) -> Option<LanguageIdentifier> {
// 1. Let candidate be locale.
let mut candidate = candidate.into();
// 2. Repeat
/// - Calling this function with a singleton `KeyedDataMarker` will always return `None`.
///
/// [prefix]: https://tc39.es/ecma402/#sec-lookupmatchinglocalebyprefix
/// [best]: https://tc39.es/ecma402/#sec-lookupmatchinglocalebybestfit
pub(crate) fn lookup_matching_locale_by_prefix<M: KeyedDataMarker>(
requested_locales: impl IntoIterator<Item = Locale>,
provider: &IntlProvider,
) -> Option<Locale>
where
IntlProvider: DataProvider<M>,
{
// 1. For each element locale of requestedLocales, do
for locale in requested_locales {
// a. Let extension be empty.
// b. If locale contains a Unicode locale extension sequence, then
// i. Set extension to the Unicode locale extension sequence of locale.
// ii. Set locale to the String value that is locale with any Unicode locale extension sequences removed.
let mut locale = locale.clone();
let id = std::mem::take(&mut locale.id);
locale.extensions.transform.clear();
locale.extensions.private.clear();
// c. Let prefix be locale.
let mut prefix = id.into();
// d. Repeat, while prefix is not the empty String,
// We don't use a `while !prefix.is_und()` because it could be that prefix is und at the start,
// so we need to make the request at least once.
loop {
// a. If availableLocales contains an element equal to candidate, return candidate.
// i. If availableLocales contains prefix, return the Record { [[locale]]: prefix, [[extension]]: extension }.
// ICU4X requires doing data requests in order to check if a locale
// is part of the set of supported locales.
let response = DataProvider::<M>::load(
provider,
DataRequest {
locale: &candidate,
locale: &prefix,
metadata: {
let mut metadata = DataRequestMetadata::default();
metadata.silent = true;
@ -191,174 +205,51 @@ pub(crate) fn best_available_locale<M: KeyedDataMarker>(
},
);
match response {
Ok(req) => {
if let Ok(req) = response {
// `metadata.locale` returns None when the provider doesn't have a fallback mechanism,
// but supports the required locale. However, if the provider has a fallback mechanism,
// this will return `Some(locale)`, where the locale is the used locale after applying
// the fallback algorithm, even if the used locale is exactly the same as the required
// locale.
match req.metadata.locale {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
Some(loc)
if loc == candidate
|| (loc.is_empty()
&& [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())) =>
{
return Some(candidate.into_locale().id)
}
None => return Some(candidate.into_locale().id),
_ => {}
Some(loc) if loc.get_langid() == prefix.get_langid() => {
locale.id = loc.into_locale().id;
return Some(locale);
}
None => {
locale.id = prefix.into_locale().id;
return Some(locale);
}
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
}) => {
// This is essentially the same hack as above but for singleton keys
return Some(candidate.into_locale().id);
_ => {}
}
Err(_) => {}
}
// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined.
// c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
// d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
//
// ii. If prefix contains "-" (code unit 0x002D HYPHEN-MINUS), let pos be the index into prefix of the last occurrence of "-"; else let pos be 0.
// iii. Repeat, while pos ≥ 2 and the substring of prefix from pos - 2 to pos - 1 is "-",
// 1. Set pos to pos - 2.
// iv. Set prefix to the substring of prefix from 0 to pos.
// Since the definition of `LanguageIdentifier` allows us to manipulate it
// without using strings, we can replace these steps by a simpler
// algorithm.
if candidate.has_variants() {
let mut variants = candidate
.clear_variants()
.iter()
.copied()
.collect::<Vec<_>>();
if prefix.has_variants() {
let mut variants = prefix.clear_variants().iter().copied().collect::<Vec<_>>();
variants.pop();
candidate.set_variants(Variants::from_vec_unchecked(variants));
} else if candidate.region().is_some() {
candidate.set_region(None);
} else if candidate.script().is_some() {
candidate.set_script(None);
prefix.set_variants(Variants::from_vec_unchecked(variants));
} else if prefix.region().is_some() {
prefix.set_region(None);
} else if prefix.script().is_some() {
prefix.set_script(None);
} else {
return None;
break;
}
}
}
/// Returns the locale resolved by the `provider` after using the ICU4X fallback
/// algorithm with `candidate` (if the provider supports this), or None if the locale is not
/// supported.
pub(crate) fn best_locale_for_provider<M: KeyedDataMarker>(
candidate: LanguageIdentifier,
provider: &(impl DataProvider<M> + ?Sized),
) -> Option<LanguageIdentifier> {
// another hack to the list...
// This time is because markers like `WordBreakDataV1Marker` throw an error if they receive
// a request with a locale, because they don't really need it. In this case, we can
// check if the key is one of those kinds and return the candidate as it is.
if M::KEY.metadata().singleton {
return Some(candidate);
}
let response = DataProvider::<M>::load(
provider,
DataRequest {
locale: &DataLocale::from(&candidate),
metadata: {
let mut md = DataRequestMetadata::default();
md.silent = true;
md
},
},
)
.ok()?;
if candidate == LanguageIdentifier::UND {
return Some(LanguageIdentifier::UND);
}
response
.metadata
.locale
.map(|dl| {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
if [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())
&& dl.is_empty()
{
candidate.clone()
} else {
dl.into_locale().id
}
})
.or(Some(candidate))
.filter(|loc| loc != &LanguageIdentifier::UND)
}
/// Abstract operation [`LookupMatcher ( availableLocales, requestedLocales )`][spec]
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request.
///
/// # Note
///
/// This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher
fn lookup_matcher<M: KeyedDataMarker>(
requested_locales: &[Locale],
provider: &IntlProvider,
) -> Locale
where
IntlProvider: DataProvider<M>,
{
// 1. Let result be a new Record.
// 2. For each element locale of requestedLocales, do
for locale in requested_locales {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale
// extension sequences removed.
let mut locale = locale.clone();
let id = std::mem::take(&mut locale.id);
locale.extensions.transform.clear();
locale.extensions.private.clear();
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
let available_locale = best_available_locale::<M>(id, provider);
// c. If availableLocale is not undefined, then
if let Some(available_locale) = available_locale {
// i. Set result.[[locale]] to availableLocale.
// Assignment deferred. See return statement below.
// ii. If locale and noExtensionsLocale are not the same String value, then
// 1. Let extension be the String value consisting of the substring of the Unicode
// locale extension sequence within locale.
// 2. Set result.[[extension]] to extension.
locale.id = available_locale;
// iii. Return result.
return locale;
}
}
// 3. Let defLocale be ! DefaultLocale().
// 4. Set result.[[locale]] to defLocale.
// 5. Return result.
default_locale(provider.locale_canonicalizer())
// 2. Return undefined.
None
}
/// Abstract operation [`BestFitMatcher ( availableLocales, requestedLocales )`][spec]
/// Abstract operation [`LookupMatchingLocaleByBestFit ( availableLocales, requestedLocales )`][spec]
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
@ -367,31 +258,50 @@ where
/// produced by the `LookupMatcher` abstract operation.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher
fn best_fit_matcher<M: KeyedDataMarker>(
requested_locales: &[Locale],
fn lookup_matching_locale_by_best_fit<M: KeyedDataMarker>(
requested_locales: impl IntoIterator<Item = Locale>,
provider: &IntlProvider,
) -> Locale
) -> Option<Locale>
where
IntlProvider: DataProvider<M>,
{
for mut locale in requested_locales
.iter()
.cloned()
.chain(std::iter::once_with(|| {
default_locale(provider.locale_canonicalizer())
}))
{
for mut locale in requested_locales {
let id = std::mem::take(&mut locale.id);
// Only leave unicode extensions when returning the locale.
locale.extensions.transform.clear();
locale.extensions.private.clear();
if let Some(available) = best_locale_for_provider(id, provider) {
locale.id = available;
let Ok(response) = DataProvider::<M>::load(
provider,
DataRequest {
locale: &DataLocale::from(&id),
metadata: {
let mut md = DataRequestMetadata::default();
md.silent = true;
md
},
},
) else {
continue;
};
return locale;
if id == LanguageIdentifier::UND {
return Some(locale);
}
if let Some(id) = response
.metadata
.locale
.map(|dl| dl.into_locale().id)
.or(Some(id))
.filter(|loc| loc != &LanguageIdentifier::UND)
{
locale.id = id;
return Some(locale);
}
}
Locale::default()
None
}
/// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )`
@ -406,7 +316,7 @@ where
///
/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale
pub(in crate::builtins::intl) fn resolve_locale<S>(
requested_locales: &[Locale],
requested_locales: impl IntoIterator<Item = Locale>,
options: &mut IntlOptions<S::LocaleOptions>,
provider: &IntlProvider,
) -> Locale
@ -416,15 +326,16 @@ where
{
// 1. Let matcher be options.[[localeMatcher]].
// 2. If matcher is "lookup", then
// a. Let r be ! LookupMatcher(availableLocales, requestedLocales).
// a. Let r be LookupMatchingLocaleByPrefix(availableLocales, requestedLocales).
// 3. Else,
// a. Let r be ! BestFitMatcher(availableLocales, requestedLocales).
// 4. Let foundLocale be r.[[locale]].
// a. Let r be LookupMatchingLocaleByBestFit(availableLocales, requestedLocales).
// 4. If r is undefined, set r to the Record { [[locale]]: DefaultLocale(), [[extension]]: empty }.
let mut found_locale = if options.matcher == LocaleMatcher::Lookup {
lookup_matcher::<S::LangMarker>(requested_locales, provider)
lookup_matching_locale_by_prefix::<S::LangMarker>(requested_locales, provider)
} else {
best_fit_matcher::<S::LangMarker>(requested_locales, provider)
};
lookup_matching_locale_by_best_fit::<S::LangMarker>(requested_locales, provider)
}
.unwrap_or_else(|| default_locale(provider.locale_canonicalizer()));
// From here, the spec differs significantly from the implementation,
// since ICU4X allows us to skip some steps and modularize the
@ -485,62 +396,18 @@ where
found_locale
}
/// Abstract operation [`LookupSupportedLocales ( availableLocales, requestedLocales )`][spec]
/// Abstract operation [`FilterLocales ( availableLocales, requestedLocales, options )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list requestedLocales for which
/// `availableLocales` has a matching locale when using the BCP 47 Lookup algorithm. Locales appear
/// in the same order in the returned list as in `requestedLocales`.
/// availableLocales has a matching locale.
///
/// # Note
///
/// This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupsupportedlocales
fn lookup_supported_locales<M: KeyedDataMarker>(
requested_locales: &[Locale],
provider: &(impl DataProvider<M> + ?Sized),
) -> Vec<Locale> {
// 1. Let subset be a new empty List.
// 2. For each element locale of requestedLocales, do
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
// c. If availableLocale is not undefined, append locale to the end of subset.
// 3. Return subset.
requested_locales
.iter()
.filter(|loc| best_available_locale(loc.id.clone(), provider).is_some())
.cloned()
.collect()
}
/// Abstract operation [`BestFitSupportedLocales ( availableLocales, requestedLocales )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list `requestedLocales` for which
/// `availableLocales` has a matching locale when using the Best Fit Matcher algorithm. Locales appear
/// in the same order in the returned list as in requestedLocales.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitsupportedlocales
fn best_fit_supported_locales<M: KeyedDataMarker>(
requested_locales: &[Locale],
provider: &(impl DataProvider<M> + ?Sized),
) -> Vec<Locale> {
requested_locales
.iter()
.filter(|loc| best_locale_for_provider(loc.id.clone(), provider).is_some())
.cloned()
.collect()
}
/// Abstract operation [`SupportedLocales ( availableLocales, requestedLocales, options )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list requestedLocales for which
/// availableLocales has a matching locale
/// Calling this function with a singleton `KeyedDataMarker` will always return `None`.
///
/// [spec]: https://tc39.es/ecma402/#sec-supportedlocales
pub(in crate::builtins::intl) fn supported_locales<M: KeyedDataMarker>(
requested_locales: &[Locale],
pub(in crate::builtins::intl) fn filter_locales<M: KeyedDataMarker>(
requested_locales: Vec<Locale>,
options: &JsValue,
context: &mut Context,
) -> JsResult<JsObject>
@ -553,22 +420,36 @@ where
// 2. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit").
let matcher = get_option(&options, js_str!("localeMatcher"), context)?.unwrap_or_default();
let elements = match matcher {
// 4. Else,
// a. Let supportedLocales be LookupSupportedLocales(availableLocales, requestedLocales).
// 3. Let subset be a new empty List.
let mut subset = Vec::with_capacity(requested_locales.len());
// 4. For each element locale of requestedLocales, do
for locale in requested_locales {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.
let mut no_ext_loc = locale.clone();
no_ext_loc.extensions.unicode.clear();
let loc_match = match matcher {
// b. If matcher is "lookup", then
// i. Let match be LookupMatchingLocaleByPrefix(availableLocales, noExtensionsLocale).
LocaleMatcher::Lookup => {
lookup_supported_locales(requested_locales, context.intl_provider())
lookup_matching_locale_by_prefix([no_ext_loc], context.intl_provider())
}
// 3. If matcher is "best fit", then
// a. Let supportedLocales be BestFitSupportedLocales(availableLocales, requestedLocales).
// c. Else,
// i. Let match be LookupMatchingLocaleByBestFit(availableLocales, noExtensionsLocale).
LocaleMatcher::BestFit => {
best_fit_supported_locales(requested_locales, context.intl_provider())
lookup_matching_locale_by_best_fit([no_ext_loc], context.intl_provider())
}
};
// 5. Return CreateArrayFromList(supportedLocales).
// d. If match is not undefined, append locale to subset.
if loc_match.is_some() {
subset.push(locale);
}
}
// 5. Return CreateArrayFromList(subset).
Ok(Array::create_array_from_list(
elements
subset
.into_iter()
.map(|loc| js_string!(loc.to_string()).into()),
context,
@ -577,6 +458,10 @@ where
/// Validates that the unicode extension `key` with `value` is a valid extension value for the
/// `language`.
///
/// # Note
///
/// Calling this function with a singleton `KeyedDataMarker` will always return `None`.
pub(in crate::builtins::intl) fn validate_extension<M: KeyedDataMarker>(
language: LanguageIdentifier,
key: Key,
@ -597,54 +482,47 @@ pub(in crate::builtins::intl) fn validate_extension<M: KeyedDataMarker>(
.is_some()
}
#[cfg(test)]
#[cfg(all(test, feature = "intl_bundled"))]
mod tests {
use icu_locid::{langid, locale, Locale};
use icu_plurals::provider::CardinalV1Marker;
use icu_provider::AsDeserializingBufferProvider;
use crate::{
builtins::intl::locale::utils::{
best_available_locale, best_fit_matcher, default_locale, lookup_matcher,
lookup_matching_locale_by_best_fit, lookup_matching_locale_by_prefix,
},
context::icu::IntlProvider,
};
#[test]
fn best_avail_loc() {
let provider = boa_icu_provider::buffer();
let provider = provider.as_deserializing();
fn best_fit() {
let icu = &IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()).unwrap();
assert_eq!(
best_available_locale::<CardinalV1Marker>(langid!("en"), &provider),
Some(langid!("en"))
lookup_matching_locale_by_best_fit::<CardinalV1Marker>([locale!("en")], icu),
Some(locale!("en"))
);
assert_eq!(
best_available_locale::<CardinalV1Marker>(langid!("es-ES"), &provider),
Some(langid!("es"))
lookup_matching_locale_by_best_fit::<CardinalV1Marker>([locale!("es-ES")], icu),
Some(locale!("es"))
);
assert_eq!(
best_available_locale::<CardinalV1Marker>(langid!("kr"), &provider),
lookup_matching_locale_by_best_fit::<CardinalV1Marker>([locale!("kr")], icu),
None
);
}
#[test]
fn lookup_match() {
let icu = IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()).unwrap();
// requested: []
let res = lookup_matcher::<CardinalV1Marker>(&[], &icu);
assert_eq!(res, default_locale(icu.locale_canonicalizer()));
assert!(res.extensions.is_empty());
let icu = &IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()).unwrap();
// requested: [fr-FR-u-hc-h12]
let requested: Locale = "fr-FR-u-hc-h12".parse().unwrap();
let result = lookup_matcher::<CardinalV1Marker>(&[requested.clone()], &icu);
let result =
lookup_matching_locale_by_prefix::<CardinalV1Marker>([requested.clone()], icu).unwrap();
assert_eq!(result.id, langid!("fr"));
assert_eq!(result.extensions, requested.extensions);
@ -655,7 +533,7 @@ mod tests {
let uz = locale!("uz-Cyrl");
let requested = vec![kr, gr, es.clone(), uz];
let res = best_fit_matcher::<CardinalV1Marker>(&requested, &icu);
let res = lookup_matching_locale_by_best_fit::<CardinalV1Marker>(requested, icu).unwrap();
assert_eq!(res.id, langid!("es"));
assert_eq!(res.extensions, es.extensions);
}

10
core/engine/src/builtins/intl/mod.rs

@ -28,6 +28,7 @@ use crate::{
use boa_gc::{Finalize, Trace};
use boa_profiler::Profiler;
use icu_provider::KeyedDataMarker;
use static_assertions::const_assert;
pub(crate) mod collator;
pub(crate) mod date_time_format;
@ -44,6 +45,15 @@ pub(crate) use self::{
mod options;
// No singletons are allowed as lang markers.
// Hopefully, we'll be able to migrate this to the definition of `Service` in the future
// (https://github.com/rust-lang/rust/issues/76560)
const_assert! {!<Collator as Service>::LangMarker::KEY.metadata().singleton}
const_assert! {!<ListFormat as Service>::LangMarker::KEY.metadata().singleton}
const_assert! {!<NumberFormat as Service>::LangMarker::KEY.metadata().singleton}
const_assert! {!<PluralRules as Service>::LangMarker::KEY.metadata().singleton}
const_assert! {!<Segmenter as Service>::LangMarker::KEY.metadata().singleton}
/// JavaScript `Intl` object.
#[derive(Debug, Clone, Trace, Finalize, JsData)]
#[boa_gc(unsafe_empty_trace)]

8
core/engine/src/builtins/intl/number_format/mod.rs

@ -39,7 +39,7 @@ use crate::{
};
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales, validate_extension},
locale::{canonicalize_locale_list, filter_locales, resolve_locale, validate_extension},
options::{coerce_options_to_object, IntlOptions},
Service,
};
@ -237,7 +237,7 @@ impl BuiltInConstructor for NumberFormat {
// 9. Let localeData be %Intl.NumberFormat%.[[LocaleData]].
// 10. Let r be ResolveLocale(%Intl.NumberFormat%.[[AvailableLocales]], requestedLocales, opt, %Intl.NumberFormat%.[[RelevantExtensionKeys]], localeData).
let locale = resolve_locale::<Self>(
&requested_locales,
requested_locales,
&mut intl_options,
context.intl_provider(),
);
@ -465,8 +465,8 @@ impl NumberFormat {
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service>::LangMarker>(&requested_locales, options, context)
// 3. Return ? FilterLocales(availableLocales, requestedLocales, options).
filter_locales::<<Self as Service>::LangMarker>(requested_locales, options, context)
.map(JsValue::from)
}

8
core/engine/src/builtins/intl/plural_rules/mod.rs

@ -26,7 +26,7 @@ use crate::{
};
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales},
locale::{canonicalize_locale_list, filter_locales, resolve_locale},
number_format::{DigitFormatOptions, Extrema, NotationKind},
options::{coerce_options_to_object, IntlOptions},
Service,
@ -136,7 +136,7 @@ impl BuiltInConstructor for PluralRules {
// 10. Let r be ResolveLocale(%PluralRules%.[[AvailableLocales]], requestedLocales, opt, %PluralRules%.[[RelevantExtensionKeys]], localeData).
// 11. Set pluralRules.[[Locale]] to r.[[locale]].
let locale = resolve_locale::<Self>(
&requested_locales,
requested_locales,
&mut IntlOptions {
matcher,
..Default::default()
@ -292,8 +292,8 @@ impl PluralRules {
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service>::LangMarker>(&requested_locales, options, context)
// 3. Return ? FilterLocales(availableLocales, requestedLocales, options).
filter_locales::<<Self as Service>::LangMarker>(requested_locales, options, context)
.map(JsValue::from)
}

19
core/engine/src/builtins/intl/segmenter/mod.rs

@ -3,10 +3,9 @@ use std::ops::Range;
use boa_gc::{Finalize, Trace};
use boa_macros::js_str;
use boa_profiler::Profiler;
use icu_collator::provider::CollationDiacriticsV1Marker;
use icu_locid::Locale;
use icu_segmenter::{
provider::WordBreakDataV1Marker, GraphemeClusterSegmenter, SentenceSegmenter, WordSegmenter,
};
use icu_segmenter::{GraphemeClusterSegmenter, SentenceSegmenter, WordSegmenter};
use crate::{
builtins::{
@ -30,7 +29,7 @@ pub(crate) use options::*;
pub(crate) use segments::*;
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales},
locale::{canonicalize_locale_list, filter_locales, resolve_locale},
options::IntlOptions,
Service,
};
@ -79,7 +78,9 @@ impl NativeSegmenter {
}
impl Service for Segmenter {
type LangMarker = WordBreakDataV1Marker;
// TODO: Track https://github.com/unicode-org/icu4x/issues/3284
// and replace when segmenters are locale-aware.
type LangMarker = CollationDiacriticsV1Marker;
type LocaleOptions = ();
}
@ -134,7 +135,7 @@ impl BuiltInConstructor for Segmenter {
let options = args.get_or_undefined(1);
// 4. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let locales = canonicalize_locale_list(locales, context)?;
let requested_locales = canonicalize_locale_list(locales, context)?;
// 5. Set options to ? GetOptionsObject(options).
let options = get_options_object(options)?;
@ -148,7 +149,7 @@ impl BuiltInConstructor for Segmenter {
// 10. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]], requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]], localeData).
// 11. Set segmenter.[[Locale]] to r.[[locale]].
let locale = resolve_locale::<Self>(
&locales,
requested_locales,
&mut IntlOptions {
matcher,
..Default::default()
@ -214,8 +215,8 @@ impl Segmenter {
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service>::LangMarker>(&requested_locales, options, context)
// 3. Return ? FilterLocales(availableLocales, requestedLocales, options).
filter_locales::<<Self as Service>::LangMarker>(requested_locales, options, context)
.map(JsValue::from)
}

34
core/engine/src/builtins/string/mod.rs

@ -1743,10 +1743,12 @@ impl String {
#[cfg(feature = "intl")]
{
use super::intl::locale::{
best_available_locale, canonicalize_locale_list, default_locale,
canonicalize_locale_list, default_locale, lookup_matching_locale_by_prefix,
};
use icu_casemap::provider::CaseMapV1Marker;
use icu_locid::LanguageIdentifier;
// TODO: Small hack to make lookups behave.
// We would really like to be able to use `icu_casemap::provider::CaseMapV1Marker`
use icu_locid::Locale;
use icu_plurals::provider::OrdinalV1Marker;
// 1. Let O be ? RequireObjectCoercible(this value).
let this = this.require_object_coercible()?;
@ -1762,19 +1764,25 @@ impl String {
// 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
// 2. If requestedLocales is not an empty List, then
// a. Let requestedLocale be requestedLocales[0].
let lang = canonicalize_locale_list(args.get_or_undefined(0), context)?
let mut requested_locale = canonicalize_locale_list(args.get_or_undefined(0), context)?
.into_iter()
.next()
// 3. Else,
// a. Let requestedLocale be ! DefaultLocale().
.unwrap_or_else(|| default_locale(context.intl_provider().locale_canonicalizer()))
.id;
.unwrap_or_else(|| default_locale(context.intl_provider().locale_canonicalizer()));
// 4. Let noExtensionsLocale be the String value that is requestedLocale with any Unicode locale extension sequences (6.2.1) removed.
// 5. Let availableLocales be a List with language tags that includes the languages for which the Unicode Character Database contains language sensitive case mappings. Implementations may add additional language tags if they support case mapping for additional locales.
// 6. Let locale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
// 7. If locale is undefined, set locale to "und".
let lang = best_available_locale::<CaseMapV1Marker>(lang, context.intl_provider())
.unwrap_or(LanguageIdentifier::UND);
requested_locale.extensions.unicode.clear();
// 5. Let availableLocales be a List with language tags that includes the languages for which the Unicode
// Character Database contains language sensitive case mappings. Implementations may add additional
// language tags if they support case mapping for additional locales.
// 6. Let match be LookupMatchingLocaleByPrefix(availableLocales, noExtensionsLocale).
// 7. If match is not undefined, let locale be match.[[locale]]; else let locale be "und".
let locale = lookup_matching_locale_by_prefix::<OrdinalV1Marker>(
[requested_locale],
context.intl_provider(),
)
.unwrap_or(Locale::UND);
let casemapper = context.intl_provider().case_mapper();
@ -1784,11 +1792,11 @@ impl String {
// 10. Else,
// a. Assert: targetCase is upper.
// b. Let newCodePoints be a List whose elements are the result of an uppercase transformation of codePoints according to an implementation-derived algorithm using locale or the Unicode Default Case Conversion algorithm.
casemapper.uppercase_to_string(&segment, &lang)
casemapper.uppercase_to_string(&segment, &locale.id)
} else {
// 9. If targetCase is lower, then
// a. Let newCodePoints be a List whose elements are the result of a lowercase transformation of codePoints according to an implementation-derived algorithm using locale or the Unicode Default Case Conversion algorithm.
casemapper.lowercase_to_string(&segment, &lang)
casemapper.lowercase_to_string(&segment, &locale.id)
}
});

22
core/engine/src/context/mod.rs

@ -940,6 +940,14 @@ impl ContextBuilder {
///
/// This function is only available if the `intl` feature is enabled.
///
/// # Additional considerations
///
/// If the data was generated using `icu_datagen`, make sure that the deduplication strategy is
/// not set to [`Maximal`]. Otherwise, `icu_datagen` will delete base locales such as "en" from
/// the list of supported locales if the required data for "en" is the same as "und".
/// We recommend [`RetainBaseLanguages`] as a nice default, which will only deduplicate locales
/// if the deduplication target is not "und".
///
/// # Errors
///
/// This returns `Err` if the provided provider doesn't have the required locale information
@ -947,6 +955,9 @@ impl ContextBuilder {
/// mean that the provider will successfully construct all `Intl` services; that check is made
/// until the creation of an instance of a service.
///
/// [`Maximal`]: https://docs.rs/icu_datagen/latest/icu_datagen/enum.DeduplicationStrategy.html#variant.Maximal
/// [`RetainBaseLanguages`]: https://docs.rs/icu_datagen/latest/icu_datagen/enum.DeduplicationStrategy.html#variant.RetainBaseLanguages
/// [`ResolveLocale`]: https://tc39.es/ecma402/#sec-resolvelocale
/// [`LocaleCanonicalizer`]: icu_locid_transform::LocaleCanonicalizer
/// [`LocaleExpander`]: icu_locid_transform::LocaleExpander
/// [`BufferProvider`]: icu_provider::BufferProvider
@ -963,6 +974,14 @@ impl ContextBuilder {
///
/// This function is only available if the `intl` feature is enabled.
///
/// # Additional considerations
///
/// If the data was generated using `icu_datagen`, make sure that the deduplication strategy is
/// not set to [`Maximal`]. Otherwise, `icu_datagen` will delete base locales such as "en" from
/// the list of supported locales if the required data for "en" is the same as "und".
/// We recommend [`RetainBaseLanguages`] as a nice default, which will only deduplicate locales
/// if the deduplication target is not "und".
///
/// # Errors
///
/// This returns `Err` if the provided provider doesn't have the required locale information
@ -970,6 +989,9 @@ impl ContextBuilder {
/// mean that the provider will successfully construct all `Intl` services; that check is made
/// until the creation of an instance of a service.
///
/// [`Maximal`]: https://docs.rs/icu_datagen/latest/icu_datagen/enum.DeduplicationStrategy.html#variant.Maximal
/// [`RetainBaseLanguages`]: https://docs.rs/icu_datagen/latest/icu_datagen/enum.DeduplicationStrategy.html#variant.RetainBaseLanguages
/// [`ResolveLocale`]: https://tc39.es/ecma402/#sec-resolvelocale
/// [`LocaleCanonicalizer`]: icu_locid_transform::LocaleCanonicalizer
/// [`LocaleExpander`]: icu_locid_transform::LocaleExpander
/// [`AnyProvider`]: icu_provider::AnyProvider

BIN
core/icu_provider/data/icudata.postcard

Binary file not shown.

2
test262_config.toml

@ -1,4 +1,4 @@
commit = "b73f7d662d51584bfee6d3ed274b676d313b646a"
commit = "c00830acef42bdb0e917b5fdec76ed9d399c0eea"
[ignored]
# Not implemented yet:

4
tests/tester/src/edition.rs

@ -81,6 +81,10 @@ static FEATURE_EDITION: phf::Map<&'static str, SpecEdition> = phf::phf_map! {
// https://github.com/tc39/proposal-iterator-helpers
"iterator-helpers" => SpecEdition::ESNext,
// Promise.try
// https://github.com/tc39/proposal-promise-try
"promise-try" => SpecEdition::ESNext,
// Set methods
// https://github.com/tc39/proposal-set-methods
"set-methods" => SpecEdition::ESNext,

18
tools/gen-icu4x-data/Cargo.toml

@ -10,9 +10,15 @@ license.workspace = true
description.workspace = true
[dependencies]
icu_provider = { workspace = true, features = ["datagen"] }
icu_provider_blob = { workspace = true, features = ["export"] }
icu_datagen = { workspace = true, features = ["networking", "use_wasm"] }
icu_provider.workspace = true
icu_datagen = { workspace = true, features = [
"networking",
"use_wasm",
"provider",
"blob_exporter",
"experimental_components",
"rayon",
] }
log.workspace = true
simple_logger.workspace = true
@ -28,12 +34,6 @@ icu_normalizer = { workspace = true, features = ["datagen"] }
icu_plurals = { workspace = true, features = ["datagen", "experimental"] }
icu_segmenter = { workspace = true, features = ["datagen"] }
[target.'cfg(windows)'.dependencies]
# wasmer-wasi apparently has a wrong deps config...
# This dep patches that.
winapi = { workspace = true, features = ["sysinfoapi"] }
[lints]
workspace = true

124
tools/gen-icu4x-data/src/main.rs

@ -2,86 +2,18 @@
use std::{error::Error, fs::File, path::Path};
use icu_datagen::{CoverageLevel, DatagenDriver, DatagenProvider};
use icu_plurals::provider::{PluralRangesV1, PluralRangesV1Marker};
use icu_provider::{
datagen::{ExportMarker, IterableDynamicDataProvider},
dynutil::UpcastDataPayload,
prelude::*,
};
use icu_provider_blob::export::BlobExporter;
#[cfg(target_os = "windows")] // wasmer-wasi is a really fun dependency to maintain :)
use winapi as _;
/// Hack that associates the `und` locale with an empty plural ranges data.
/// This enables the default behaviour for all locales without data.
#[derive(Debug)]
struct PluralRangesFallbackHack(DatagenProvider);
// We definitely don't want to import dependencies just to do `T::default`.
#[allow(clippy::default_trait_access)]
impl DynamicDataProvider<AnyMarker> for PluralRangesFallbackHack {
fn load_data(
&self,
key: DataKey,
req: DataRequest<'_>,
) -> Result<DataResponse<AnyMarker>, DataError> {
if req.locale.is_und() && key.hashed() == PluralRangesV1Marker::KEY.hashed() {
let payload = <AnyMarker as UpcastDataPayload<PluralRangesV1Marker>>::upcast(
DataPayload::from_owned(PluralRangesV1 {
ranges: Default::default(),
}),
);
Ok(DataResponse {
metadata: DataResponseMetadata::default(),
payload: Some(payload),
})
} else {
self.0.load_data(key, req)
}
}
}
use icu_datagen::blob_exporter::BlobExporter;
use icu_datagen::prelude::*;
use icu_provider::data_key;
#[allow(clippy::default_trait_access)]
impl DynamicDataProvider<ExportMarker> for PluralRangesFallbackHack {
fn load_data(
&self,
key: DataKey,
req: DataRequest<'_>,
) -> Result<DataResponse<ExportMarker>, DataError> {
if req.locale.is_und() && key.hashed() == PluralRangesV1Marker::KEY.hashed() {
let payload = <ExportMarker as UpcastDataPayload<PluralRangesV1Marker>>::upcast(
DataPayload::from_owned(PluralRangesV1 {
ranges: Default::default(),
}),
);
Ok(DataResponse {
metadata: DataResponseMetadata::default(),
payload: Some(payload),
})
} else {
self.0.load_data(key, req)
}
}
}
impl IterableDynamicDataProvider<ExportMarker> for PluralRangesFallbackHack {
fn supported_locales_for_key(&self, key: DataKey) -> Result<Vec<DataLocale>, DataError> {
if key.hashed() == PluralRangesV1Marker::KEY.hashed() {
let mut locales = self.0.supported_locales_for_key(key)?;
locales.push(DataLocale::default());
Ok(locales)
} else {
self.0.supported_locales_for_key(key)
}
}
}
const KEYS_LEN: usize = 129;
/// List of keys used by `Intl` components.
///
/// This must be kept in sync with the list of implemented components of `Intl`.
const KEYS: [&[DataKey]; 9] = [
const KEYS: [DataKey; KEYS_LEN] = {
const CENTINEL_KEY: DataKey = data_key!("centinel@1");
const SERVICES: [&[DataKey]; 9] = [
icu_casemap::provider::KEYS,
icu_collator::provider::KEYS,
icu_datetime::provider::KEYS,
@ -91,7 +23,29 @@ const KEYS: [&[DataKey]; 9] = [
icu_normalizer::provider::KEYS,
icu_plurals::provider::KEYS,
icu_segmenter::provider::KEYS,
];
];
let mut array = [CENTINEL_KEY; KEYS_LEN];
let mut offset = 0;
let mut service_idx = 0;
while service_idx < SERVICES.len() {
let service = SERVICES[service_idx];
let mut idx = 0;
while idx < service.len() {
array[offset + idx] = service[idx];
idx += 1;
}
offset += service.len();
service_idx += 1;
}
assert!(offset == array.len());
array
};
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::SimpleLogger::new()
@ -106,18 +60,26 @@ fn main() -> Result<(), Box<dyn Error>> {
let _unused = std::fs::remove_dir_all(path);
std::fs::create_dir_all(path)?;
log::info!("Generating ICU4X data for keys: {:?}", KEYS);
log::info!("Generating ICU4X data for keys: {:#?}", KEYS);
let provider = DatagenProvider::new_latest_tested();
let locales = provider
.locales_for_coverage_levels([CoverageLevel::Modern])?
.into_iter()
.chain([langid!("en-US")]);
DatagenDriver::new()
.with_keys(KEYS.into_iter().flatten().copied())
.with_locales(provider.locales_for_coverage_levels([CoverageLevel::Modern])?)
.with_keys(KEYS)
.with_locales_and_fallback(locales.map(LocaleFamily::with_descendants), {
let mut options = FallbackOptions::default();
options.deduplication_strategy = Some(DeduplicationStrategy::None);
options
})
.with_additional_collations([String::from("search*")])
.with_recommended_segmenter_models()
.export(
&PluralRangesFallbackHack(provider),
BlobExporter::new_with_sink(Box::new(File::create(path.join("icudata.postcard"))?)),
&provider,
BlobExporter::new_v2_with_sink(Box::new(File::create(path.join("icudata.postcard"))?)),
)?;
Ok(())

Loading…
Cancel
Save