Browse Source

Split default icu data into lazily deserialized parts (#3948)

* Split default icu data into lazily deserialized parts

* FIx no_std compilation

* Lazily load more ICU tools

* Fix regressions and use more stable constructors
pull/3798/merge
José Julián Espina 3 months ago committed by GitHub
parent
commit
00f8e00492
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 11
      Cargo.lock
  2. 17
      core/engine/src/builtins/intl/collator/mod.rs
  3. 44
      core/engine/src/builtins/intl/list_format/mod.rs
  4. 16
      core/engine/src/builtins/intl/locale/mod.rs
  5. 11
      core/engine/src/builtins/intl/locale/tests.rs
  6. 23
      core/engine/src/builtins/intl/locale/utils.rs
  7. 26
      core/engine/src/builtins/intl/number_format/mod.rs
  8. 28
      core/engine/src/builtins/intl/plural_rules/mod.rs
  9. 41
      core/engine/src/builtins/intl/segmenter/mod.rs
  10. 15
      core/engine/src/builtins/string/mod.rs
  11. 123
      core/engine/src/context/icu.rs
  12. 5
      core/engine/src/context/mod.rs
  13. 10
      core/icu_provider/Cargo.toml
  14. BIN
      core/icu_provider/data/icu_casemap.postcard
  15. BIN
      core/icu_provider/data/icu_collator.postcard
  16. BIN
      core/icu_provider/data/icu_datetime.postcard
  17. BIN
      core/icu_provider/data/icu_decimal.postcard
  18. BIN
      core/icu_provider/data/icu_list.postcard
  19. BIN
      core/icu_provider/data/icu_locid_transform.postcard
  20. BIN
      core/icu_provider/data/icu_normalizer.postcard
  21. BIN
      core/icu_provider/data/icu_plurals.postcard
  22. BIN
      core/icu_provider/data/icu_segmenter.postcard
  23. 95
      core/icu_provider/src/lib.rs
  24. 1
      tools/gen-icu4x-data/Cargo.toml
  25. 94
      tools/gen-icu4x-data/src/main.rs

11
Cargo.lock generated

@ -443,10 +443,20 @@ dependencies = [
name = "boa_icu_provider" name = "boa_icu_provider"
version = "0.19.0" version = "0.19.0"
dependencies = [ dependencies = [
"icu_casemap",
"icu_collator",
"icu_datetime",
"icu_decimal",
"icu_list",
"icu_locid_transform",
"icu_normalizer",
"icu_plurals",
"icu_provider", "icu_provider",
"icu_provider_adapters", "icu_provider_adapters",
"icu_provider_blob", "icu_provider_blob",
"icu_segmenter",
"once_cell", "once_cell",
"paste",
] ]
[[package]] [[package]]
@ -1364,7 +1374,6 @@ dependencies = [
"icu_locid_transform", "icu_locid_transform",
"icu_normalizer", "icu_normalizer",
"icu_plurals", "icu_plurals",
"icu_provider",
"icu_segmenter", "icu_segmenter",
"log", "log",
"simple_logger", "simple_logger",

17
core/engine/src/builtins/intl/collator/mod.rs

@ -17,7 +17,7 @@ use crate::{
OrdinaryObject, OrdinaryObject,
}, },
context::{ context::{
icu::IntlProvider, icu::{ErasedProvider, IntlProvider},
intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
}, },
js_string, js_string,
@ -279,7 +279,7 @@ impl BuiltInConstructor for Collator {
requested_locales, requested_locales,
&mut intl_options, &mut intl_options,
context.intl_provider(), context.intl_provider(),
); )?;
let collator_locale = { let collator_locale = {
// `collator_locale` needs to be different from the resolved locale because ECMA402 doesn't // `collator_locale` needs to be different from the resolved locale because ECMA402 doesn't
@ -335,8 +335,6 @@ impl BuiltInConstructor for Collator {
.then_some((AlternateHandling::Shifted, MaxVariable::Punctuation)) .then_some((AlternateHandling::Shifted, MaxVariable::Punctuation))
.unzip(); .unzip();
let collator =
icu_collator::Collator::try_new_unstable(context.intl_provider(), &collator_locale, {
let mut options = icu_collator::CollatorOptions::new(); let mut options = icu_collator::CollatorOptions::new();
options.strength = strength; options.strength = strength;
options.case_level = case_level; options.case_level = case_level;
@ -344,8 +342,15 @@ impl BuiltInConstructor for Collator {
options.numeric = Some(if numeric { Numeric::On } else { Numeric::Off }); options.numeric = Some(if numeric { Numeric::On } else { Numeric::Off });
options.alternate_handling = alternate_handling; options.alternate_handling = alternate_handling;
options.max_variable = max_variable; options.max_variable = max_variable;
options
}) let collator = match context.intl_provider().erased_provider() {
ErasedProvider::Any(a) => {
icu_collator::Collator::try_new_with_any_provider(a, &collator_locale, options)
}
ErasedProvider::Buffer(b) => {
icu_collator::Collator::try_new_with_buffer_provider(b, &collator_locale, options)
}
}
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?; .map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;
let prototype = let prototype =

44
core/engine/src/builtins/intl/list_format/mod.rs

@ -12,7 +12,10 @@ use crate::{
options::{get_option, get_options_object}, options::{get_option, get_options_object},
Array, BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject, OrdinaryObject, Array, BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject, OrdinaryObject,
}, },
context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, context::{
icu::ErasedProvider,
intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
},
js_string, js_string,
object::{internal_methods::get_prototype_from_constructor, JsObject}, object::{internal_methods::get_prototype_from_constructor, JsObject},
property::Attribute, property::Attribute,
@ -128,7 +131,7 @@ impl BuiltInConstructor for ListFormat {
..Default::default() ..Default::default()
}, },
context.intl_provider(), context.intl_provider(),
); )?;
// 11. Let type be ? GetOption(options, "type", string, « "conjunction", "disjunction", "unit" », "conjunction"). // 11. Let type be ? GetOption(options, "type", string, « "conjunction", "disjunction", "unit" », "conjunction").
// 12. Set listFormat.[[Type]] to type. // 12. Set listFormat.[[Type]] to type.
@ -142,23 +145,26 @@ impl BuiltInConstructor for ListFormat {
// 16. Let dataLocaleData be localeData.[[<dataLocale>]]. // 16. Let dataLocaleData be localeData.[[<dataLocale>]].
// 17. Let dataLocaleTypes be dataLocaleData.[[<type>]]. // 17. Let dataLocaleTypes be dataLocaleData.[[<type>]].
// 18. Set listFormat.[[Templates]] to dataLocaleTypes.[[<style>]]. // 18. Set listFormat.[[Templates]] to dataLocaleTypes.[[<style>]].
let data_locale = DataLocale::from(&locale); let data_locale = &DataLocale::from(&locale);
let formatter = match typ { let formatter = match (typ, context.intl_provider().erased_provider()) {
ListFormatType::Conjunction => ListFormatter::try_new_and_with_length_unstable( (ListFormatType::Conjunction, ErasedProvider::Any(a)) => {
context.intl_provider(), ListFormatter::try_new_and_with_length_with_any_provider(a, data_locale, style)
&data_locale, }
style, (ListFormatType::Disjunction, ErasedProvider::Any(a)) => {
), ListFormatter::try_new_or_with_length_with_any_provider(a, data_locale, style)
ListFormatType::Disjunction => ListFormatter::try_new_or_with_length_unstable( }
context.intl_provider(), (ListFormatType::Unit, ErasedProvider::Any(a)) => {
&data_locale, ListFormatter::try_new_unit_with_length_with_any_provider(a, data_locale, style)
style, }
), (ListFormatType::Conjunction, ErasedProvider::Buffer(b)) => {
ListFormatType::Unit => ListFormatter::try_new_unit_with_length_unstable( ListFormatter::try_new_and_with_length_with_buffer_provider(b, data_locale, style)
context.intl_provider(), }
&data_locale, (ListFormatType::Disjunction, ErasedProvider::Buffer(b)) => {
style, ListFormatter::try_new_or_with_length_with_buffer_provider(b, data_locale, style)
), }
(ListFormatType::Unit, ErasedProvider::Buffer(b)) => {
ListFormatter::try_new_unit_with_length_with_buffer_provider(b, data_locale, style)
}
} }
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?; .map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

16
core/engine/src/builtins/intl/locale/mod.rs

@ -248,7 +248,7 @@ impl BuiltInConstructor for Locale {
// 10. Set tag to ! CanonicalizeUnicodeLocaleId(tag). // 10. Set tag to ! CanonicalizeUnicodeLocaleId(tag).
context context
.intl_provider() .intl_provider()
.locale_canonicalizer() .locale_canonicalizer()?
.canonicalize(&mut tag); .canonicalize(&mut tag);
// Skipping some boilerplate since this is easier to do using the `Locale` type, but putting the // Skipping some boilerplate since this is easier to do using the `Locale` type, but putting the
@ -282,7 +282,7 @@ impl BuiltInConstructor for Locale {
// 17. Return ! CanonicalizeUnicodeLocaleId(tag). // 17. Return ! CanonicalizeUnicodeLocaleId(tag).
context context
.intl_provider() .intl_provider()
.locale_canonicalizer() .locale_canonicalizer()?
.canonicalize(&mut tag); .canonicalize(&mut tag);
} }
@ -368,7 +368,7 @@ impl BuiltInConstructor for Locale {
context context
.intl_provider() .intl_provider()
.locale_canonicalizer() .locale_canonicalizer()?
.canonicalize(&mut tag); .canonicalize(&mut tag);
// 6. Let locale be ? OrdinaryCreateFromConstructor(NewTarget, "%Locale.prototype%", internalSlotsList). // 6. Let locale be ? OrdinaryCreateFromConstructor(NewTarget, "%Locale.prototype%", internalSlotsList).
@ -409,7 +409,10 @@ impl Locale {
.clone(); .clone();
// 3. Let maximal be the result of the Add Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set maximal to loc.[[Locale]]. // 3. Let maximal be the result of the Add Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set maximal to loc.[[Locale]].
context.intl_provider().locale_expander().maximize(&mut loc); context
.intl_provider()
.locale_expander()?
.maximize(&mut loc);
// 4. Return ! Construct(%Locale%, maximal). // 4. Return ! Construct(%Locale%, maximal).
let prototype = context.intrinsics().constructors().locale().prototype(); let prototype = context.intrinsics().constructors().locale().prototype();
@ -445,7 +448,10 @@ impl Locale {
.clone(); .clone();
// 3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set minimal to loc.[[Locale]]. // 3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set minimal to loc.[[Locale]].
context.intl_provider().locale_expander().minimize(&mut loc); context
.intl_provider()
.locale_expander()?
.minimize(&mut loc);
// 4. Return ! Construct(%Locale%, minimal). // 4. Return ! Construct(%Locale%, minimal).
let prototype = context.intrinsics().constructors().locale().prototype(); let prototype = context.intrinsics().constructors().locale().prototype();

11
core/engine/src/builtins/intl/locale/tests.rs

@ -73,8 +73,8 @@ impl Service for TestService {
#[test] #[test]
fn locale_resolution() { fn locale_resolution() {
let provider = IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()).unwrap(); let provider = IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer());
let mut default = default_locale(provider.locale_canonicalizer()); let mut default = default_locale(provider.locale_canonicalizer().unwrap());
default default
.extensions .extensions
.unicode .unicode
@ -88,7 +88,7 @@ fn locale_resolution() {
hc: Some(HourCycle::H11), hc: Some(HourCycle::H11),
}, },
}; };
let locale = resolve_locale::<TestService>([], &mut options, &provider); let locale = resolve_locale::<TestService>([], &mut options, &provider).unwrap();
assert_eq!(locale, default); assert_eq!(locale, default);
// test best fit // test best fit
@ -99,7 +99,7 @@ fn locale_resolution() {
}, },
}; };
let locale = resolve_locale::<TestService>([], &mut options, &provider); let locale = resolve_locale::<TestService>([], &mut options, &provider).unwrap();
assert_eq!(locale, default); assert_eq!(locale, default);
// requested: [es-ES] // requested: [es-ES]
@ -108,6 +108,7 @@ fn locale_resolution() {
service_options: TestOptions { hc: None }, service_options: TestOptions { hc: None },
}; };
let locale = resolve_locale::<TestService>([locale!("es-AR")], &mut options, &provider); let locale =
resolve_locale::<TestService>([locale!("es-AR")], &mut options, &provider).unwrap();
assert_eq!(locale, "es-u-hc-h23".parse().unwrap()); assert_eq!(locale, "es-u-hc-h23".parse().unwrap());
} }

23
core/engine/src/builtins/intl/locale/utils.rs

@ -132,7 +132,7 @@ pub(crate) fn canonicalize_locale_list(
// vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). // vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
context context
.intl_provider() .intl_provider()
.locale_canonicalizer() .locale_canonicalizer()?
.canonicalize(&mut tag); .canonicalize(&mut tag);
// vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen. // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen.
@ -316,7 +316,7 @@ pub(in crate::builtins::intl) fn resolve_locale<S>(
requested_locales: impl IntoIterator<Item = Locale>, requested_locales: impl IntoIterator<Item = Locale>,
options: &mut IntlOptions<S::LocaleOptions>, options: &mut IntlOptions<S::LocaleOptions>,
provider: &IntlProvider, provider: &IntlProvider,
) -> Locale ) -> JsResult<Locale>
where where
S: Service, S: Service,
IntlProvider: DataProvider<S::LangMarker>, IntlProvider: DataProvider<S::LangMarker>,
@ -327,12 +327,17 @@ where
// 3. Else, // 3. Else,
// a. Let r be LookupMatchingLocaleByBestFit(availableLocales, requestedLocales). // a. Let r be LookupMatchingLocaleByBestFit(availableLocales, requestedLocales).
// 4. If r is undefined, set r to the Record { [[locale]]: DefaultLocale(), [[extension]]: empty }. // 4. If r is undefined, set r to the Record { [[locale]]: DefaultLocale(), [[extension]]: empty }.
let mut found_locale = if options.matcher == LocaleMatcher::Lookup { let found_locale = if options.matcher == LocaleMatcher::Lookup {
lookup_matching_locale_by_prefix::<S::LangMarker>(requested_locales, provider) lookup_matching_locale_by_prefix::<S::LangMarker>(requested_locales, provider)
} else { } else {
lookup_matching_locale_by_best_fit::<S::LangMarker>(requested_locales, provider) lookup_matching_locale_by_best_fit::<S::LangMarker>(requested_locales, provider)
} };
.unwrap_or_else(|| default_locale(provider.locale_canonicalizer()));
let mut found_locale = if let Some(loc) = found_locale {
loc
} else {
default_locale(provider.locale_canonicalizer()?)
};
// From here, the spec differs significantly from the implementation, // From here, the spec differs significantly from the implementation,
// since ICU4X allows us to skip some steps and modularize the // since ICU4X allows us to skip some steps and modularize the
@ -388,9 +393,9 @@ where
// 12. Return result. // 12. Return result.
S::resolve(&mut found_locale, &mut options.service_options, provider); S::resolve(&mut found_locale, &mut options.service_options, provider);
provider provider
.locale_canonicalizer() .locale_canonicalizer()?
.canonicalize(&mut found_locale); .canonicalize(&mut found_locale);
found_locale Ok(found_locale)
} }
/// Abstract operation [`FilterLocales ( availableLocales, requestedLocales, options )`][spec] /// Abstract operation [`FilterLocales ( availableLocales, requestedLocales, options )`][spec]
@ -493,7 +498,7 @@ mod tests {
#[test] #[test]
fn best_fit() { fn best_fit() {
let icu = &IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()).unwrap(); let icu = &IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer());
assert_eq!( assert_eq!(
lookup_matching_locale_by_best_fit::<CardinalV1Marker>([locale!("en")], icu), lookup_matching_locale_by_best_fit::<CardinalV1Marker>([locale!("en")], icu),
@ -513,7 +518,7 @@ mod tests {
#[test] #[test]
fn lookup_match() { fn lookup_match() {
let icu = &IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()).unwrap(); let icu = &IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer());
// requested: [fr-FR-u-hc-h12] // requested: [fr-FR-u-hc-h12]
let requested: Locale = "fr-FR-u-hc-h12".parse().unwrap(); let requested: Locale = "fr-FR-u-hc-h12".parse().unwrap();

26
core/engine/src/builtins/intl/number_format/mod.rs

@ -15,6 +15,7 @@ use icu_locid::{
extensions::unicode::{key, Value}, extensions::unicode::{key, Value},
Locale, Locale,
}; };
use icu_provider::DataLocale;
use num_bigint::BigInt; use num_bigint::BigInt;
use num_traits::Num; use num_traits::Num;
pub(crate) use options::*; pub(crate) use options::*;
@ -24,7 +25,10 @@ use crate::{
builder::BuiltInBuilder, options::get_option, string::is_trimmable_whitespace, builder::BuiltInBuilder, options::get_option, string::is_trimmable_whitespace,
BuiltInConstructor, BuiltInObject, IntrinsicObject, BuiltInConstructor, BuiltInObject, IntrinsicObject,
}, },
context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, context::{
icu::ErasedProvider,
intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
},
js_string, js_string,
object::{ object::{
internal_methods::get_prototype_from_constructor, FunctionObjectBuilder, JsFunction, internal_methods::get_prototype_from_constructor, FunctionObjectBuilder, JsFunction,
@ -240,7 +244,7 @@ impl BuiltInConstructor for NumberFormat {
requested_locales, requested_locales,
&mut intl_options, &mut intl_options,
context.intl_provider(), context.intl_provider(),
); )?;
// 11. Set numberFormat.[[Locale]] to r.[[locale]]. // 11. Set numberFormat.[[Locale]] to r.[[locale]].
// 12. Set numberFormat.[[DataLocale]] to r.[[dataLocale]]. // 12. Set numberFormat.[[DataLocale]] to r.[[dataLocale]].
@ -365,15 +369,19 @@ impl BuiltInConstructor for NumberFormat {
let sign_display = let sign_display =
get_option(&options, js_str!("signDisplay"), context)?.unwrap_or(SignDisplay::Auto); get_option(&options, js_str!("signDisplay"), context)?.unwrap_or(SignDisplay::Auto);
let formatter = FixedDecimalFormatter::try_new_unstable(
context.intl_provider(),
&locale.clone().into(),
{
let mut options = FixedDecimalFormatterOptions::default(); let mut options = FixedDecimalFormatterOptions::default();
options.grouping_strategy = use_grouping; options.grouping_strategy = use_grouping;
options
}, let data_locale = &DataLocale::from(&locale);
)
let formatter = match context.intl_provider().erased_provider() {
ErasedProvider::Any(a) => {
FixedDecimalFormatter::try_new_with_any_provider(a, data_locale, options)
}
ErasedProvider::Buffer(b) => {
FixedDecimalFormatter::try_new_with_buffer_provider(b, data_locale, options)
}
}
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?; .map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let number_format = JsObject::from_proto_and_data_with_shared_shape( let number_format = JsObject::from_proto_and_data_with_shared_shape(

28
core/engine/src/builtins/intl/plural_rules/mod.rs

@ -16,7 +16,10 @@ use crate::{
options::get_option, Array, BuiltInBuilder, BuiltInConstructor, BuiltInObject, options::get_option, Array, BuiltInBuilder, BuiltInConstructor, BuiltInObject,
IntrinsicObject, IntrinsicObject,
}, },
context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, context::{
icu::ErasedProvider,
intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
},
js_string, js_string,
object::{internal_methods::get_prototype_from_constructor, ObjectInitializer}, object::{internal_methods::get_prototype_from_constructor, ObjectInitializer},
property::Attribute, property::Attribute,
@ -142,21 +145,16 @@ impl BuiltInConstructor for PluralRules {
..Default::default() ..Default::default()
}, },
context.intl_provider(), context.intl_provider(),
); )?;
let native = match rule_type { let data_locale = &DataLocale::from(&locale);
PluralRuleType::Cardinal => PluralRulesWithRanges::try_new_cardinal_unstable(
context.intl_provider(), let native = match context.intl_provider().erased_provider() {
&DataLocale::from(&locale), ErasedProvider::Any(a) => {
), PluralRulesWithRanges::try_new_with_any_provider(a, data_locale, rule_type)
PluralRuleType::Ordinal => PluralRulesWithRanges::try_new_ordinal_unstable( }
context.intl_provider(), ErasedProvider::Buffer(b) => {
&DataLocale::from(&locale), PluralRulesWithRanges::try_new_with_buffer_provider(b, data_locale, rule_type)
),
_ => {
return Err(JsNativeError::typ()
.with_message("unimplemented plural rule type")
.into())
} }
} }
.map_err(|e| JsNativeError::typ().with_message(e.to_string()))?; .map_err(|e| JsNativeError::typ().with_message(e.to_string()))?;

41
core/engine/src/builtins/intl/segmenter/mod.rs

@ -12,7 +12,10 @@ use crate::{
options::{get_option, get_options_object}, options::{get_option, get_options_object},
BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject, BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject,
}, },
context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, context::{
icu::ErasedProvider,
intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
},
js_string, js_string,
object::{internal_methods::get_prototype_from_constructor, JsObject, ObjectInitializer}, object::{internal_methods::get_prototype_from_constructor, JsObject, ObjectInitializer},
property::Attribute, property::Attribute,
@ -155,24 +158,38 @@ impl BuiltInConstructor for Segmenter {
..Default::default() ..Default::default()
}, },
context.intl_provider(), context.intl_provider(),
); )?;
// 12. Let granularity be ? GetOption(options, "granularity", string, « "grapheme", "word", "sentence" », "grapheme"). // 12. Let granularity be ? GetOption(options, "granularity", string, « "grapheme", "word", "sentence" », "grapheme").
let granularity = let granularity =
get_option(&options, js_str!("granularity"), context)?.unwrap_or_default(); get_option(&options, js_str!("granularity"), context)?.unwrap_or_default();
// 13. Set segmenter.[[SegmenterGranularity]] to granularity.
let native = match granularity { // 13. Set segmenter.[[SegmenterGranularity]] to granularity.
Granularity::Grapheme => { let native = match (granularity, context.intl_provider().erased_provider()) {
GraphemeClusterSegmenter::try_new_unstable(context.intl_provider()) (Granularity::Grapheme, ErasedProvider::Any(a)) => {
GraphemeClusterSegmenter::try_new_with_any_provider(a)
.map(|s| NativeSegmenter::Grapheme(Box::new(s))) .map(|s| NativeSegmenter::Grapheme(Box::new(s)))
} }
(Granularity::Word, ErasedProvider::Any(a)) => {
Granularity::Word => WordSegmenter::try_new_auto_unstable(context.intl_provider()) WordSegmenter::try_new_auto_with_any_provider(a)
.map(|s| NativeSegmenter::Word(Box::new(s))), .map(|s| NativeSegmenter::Word(Box::new(s)))
}
Granularity::Sentence => SentenceSegmenter::try_new_unstable(context.intl_provider()) (Granularity::Sentence, ErasedProvider::Any(a)) => {
.map(|s| NativeSegmenter::Sentence(Box::new(s))), SentenceSegmenter::try_new_with_any_provider(a)
.map(|s| NativeSegmenter::Sentence(Box::new(s)))
}
(Granularity::Grapheme, ErasedProvider::Buffer(b)) => {
GraphemeClusterSegmenter::try_new_with_buffer_provider(b)
.map(|s| NativeSegmenter::Grapheme(Box::new(s)))
}
(Granularity::Word, ErasedProvider::Buffer(b)) => {
WordSegmenter::try_new_auto_with_buffer_provider(b)
.map(|s| NativeSegmenter::Word(Box::new(s)))
}
(Granularity::Sentence, ErasedProvider::Buffer(b)) => {
SentenceSegmenter::try_new_with_buffer_provider(b)
.map(|s| NativeSegmenter::Sentence(Box::new(s)))
}
} }
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?; .map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;

15
core/engine/src/builtins/string/mod.rs

@ -1763,13 +1763,18 @@ impl String {
// 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
// 2. If requestedLocales is not an empty List, then // 2. If requestedLocales is not an empty List, then
// a. Let requestedLocale be requestedLocales[0]. let mut requested_locale = if let Some(locale) =
let mut requested_locale = canonicalize_locale_list(args.get_or_undefined(0), context)? canonicalize_locale_list(args.get_or_undefined(0), context)?
.into_iter() .into_iter()
.next() .next()
{
// a. Let requestedLocale be requestedLocales[0].
locale
} else {
// 3. Else, // 3. Else,
// a. Let requestedLocale be ! DefaultLocale(). // a. Let requestedLocale be ! DefaultLocale().
.unwrap_or_else(|| default_locale(context.intl_provider().locale_canonicalizer())); default_locale(context.intl_provider().locale_canonicalizer()?)
};
// 4. Let noExtensionsLocale be the String value that is requestedLocale with any Unicode locale extension sequences (6.2.1) removed. // 4. Let noExtensionsLocale be the String value that is requestedLocale with any Unicode locale extension sequences (6.2.1) removed.
requested_locale.extensions.unicode.clear(); requested_locale.extensions.unicode.clear();
@ -1784,7 +1789,7 @@ impl String {
) )
.unwrap_or(Locale::UND); .unwrap_or(Locale::UND);
let casemapper = context.intl_provider().case_mapper(); let casemapper = context.intl_provider().case_mapper()?;
// 8. Let codePoints be StringToCodePoints(S). // 8. Let codePoints be StringToCodePoints(S).
let result = string.map_valid_segments(|segment| { let result = string.map_valid_segments(|segment| {
@ -2165,7 +2170,7 @@ impl String {
} }
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
{ {
context.intl_provider().string_normalizers() context.intl_provider().string_normalizers()?
} }
}; };

123
core/engine/src/context/icu.rs

@ -1,4 +1,4 @@
use std::fmt::Debug; use std::{cell::OnceCell, fmt::Debug};
use icu_casemap::CaseMapper; use icu_casemap::CaseMapper;
use icu_locid_transform::{LocaleCanonicalizer, LocaleExpander, LocaleTransformError}; use icu_locid_transform::{LocaleCanonicalizer, LocaleExpander, LocaleTransformError};
@ -12,10 +12,10 @@ use thiserror::Error;
use yoke::{trait_hack::YokeTraitHack, Yokeable}; use yoke::{trait_hack::YokeTraitHack, Yokeable};
use zerofrom::ZeroFrom; use zerofrom::ZeroFrom;
use crate::builtins::string::StringNormalizers; use crate::{builtins::string::StringNormalizers, JsError, JsNativeError};
/// A [`DataProvider`] that can be either a [`BufferProvider`] or an [`AnyProvider`]. /// A [`DataProvider`] that can be either a [`BufferProvider`] or an [`AnyProvider`].
enum ErasedProvider { pub(crate) enum ErasedProvider {
Any(Box<dyn AnyProvider>), Any(Box<dyn AnyProvider>),
Buffer(Box<dyn BufferProvider>), Buffer(Box<dyn BufferProvider>),
} }
@ -34,13 +34,25 @@ pub enum IcuError {
CaseMap(#[from] DataError), CaseMap(#[from] DataError),
} }
impl From<IcuError> for JsNativeError {
fn from(value: IcuError) -> Self {
JsNativeError::typ().with_message(value.to_string())
}
}
impl From<IcuError> for JsError {
fn from(value: IcuError) -> Self {
JsNativeError::from(value).into()
}
}
/// Custom [`DataProvider`] for `Intl` that caches some utilities. /// Custom [`DataProvider`] for `Intl` that caches some utilities.
pub(crate) struct IntlProvider { pub(crate) struct IntlProvider {
inner_provider: ErasedProvider, inner_provider: ErasedProvider,
locale_canonicalizer: LocaleCanonicalizer, locale_canonicalizer: OnceCell<LocaleCanonicalizer>,
locale_expander: LocaleExpander, locale_expander: OnceCell<LocaleExpander>,
string_normalizers: StringNormalizers, string_normalizers: OnceCell<StringNormalizers>,
case_mapper: CaseMapper, case_mapper: OnceCell<CaseMapper>,
} }
impl<M> DataProvider<M> for IntlProvider impl<M> DataProvider<M> for IntlProvider
@ -76,19 +88,14 @@ impl IntlProvider {
/// Returns an error if any of the tools required cannot be constructed. /// Returns an error if any of the tools required cannot be constructed.
pub(crate) fn try_new_with_buffer_provider( pub(crate) fn try_new_with_buffer_provider(
provider: (impl BufferProvider + 'static), provider: (impl BufferProvider + 'static),
) -> Result<IntlProvider, IcuError> { ) -> IntlProvider {
Ok(Self { Self {
locale_canonicalizer: LocaleCanonicalizer::try_new_with_buffer_provider(&provider)?, locale_canonicalizer: OnceCell::new(),
locale_expander: LocaleExpander::try_new_with_buffer_provider(&provider)?, locale_expander: OnceCell::new(),
string_normalizers: StringNormalizers { string_normalizers: OnceCell::new(),
nfc: ComposingNormalizer::try_new_nfc_with_buffer_provider(&provider)?, case_mapper: OnceCell::new(),
nfkc: ComposingNormalizer::try_new_nfkc_with_buffer_provider(&provider)?,
nfd: DecomposingNormalizer::try_new_nfd_with_buffer_provider(&provider)?,
nfkd: DecomposingNormalizer::try_new_nfkd_with_buffer_provider(&provider)?,
},
case_mapper: CaseMapper::try_new_with_buffer_provider(&provider)?,
inner_provider: ErasedProvider::Buffer(Box::new(provider)), inner_provider: ErasedProvider::Buffer(Box::new(provider)),
}) }
} }
/// Creates a new [`IntlProvider`] from an [`AnyProvider`]. /// Creates a new [`IntlProvider`] from an [`AnyProvider`].
@ -98,38 +105,76 @@ impl IntlProvider {
/// Returns an error if any of the tools required cannot be constructed. /// Returns an error if any of the tools required cannot be constructed.
pub(crate) fn try_new_with_any_provider( pub(crate) fn try_new_with_any_provider(
provider: (impl AnyProvider + 'static), provider: (impl AnyProvider + 'static),
) -> Result<IntlProvider, IcuError> { ) -> IntlProvider {
Ok(Self { Self {
locale_canonicalizer: LocaleCanonicalizer::try_new_with_any_provider(&provider)?, locale_canonicalizer: OnceCell::new(),
locale_expander: LocaleExpander::try_new_extended_with_any_provider(&provider)?, locale_expander: OnceCell::new(),
string_normalizers: StringNormalizers { string_normalizers: OnceCell::new(),
nfc: ComposingNormalizer::try_new_nfc_with_any_provider(&provider)?, case_mapper: OnceCell::new(),
nfkc: ComposingNormalizer::try_new_nfkc_with_any_provider(&provider)?,
nfd: DecomposingNormalizer::try_new_nfd_with_any_provider(&provider)?,
nfkd: DecomposingNormalizer::try_new_nfkd_with_any_provider(&provider)?,
},
case_mapper: CaseMapper::try_new_with_any_provider(&provider)?,
inner_provider: ErasedProvider::Any(Box::new(provider)), inner_provider: ErasedProvider::Any(Box::new(provider)),
}) }
} }
/// Gets the [`LocaleCanonicalizer`] tool. /// Gets the [`LocaleCanonicalizer`] tool.
pub(crate) const fn locale_canonicalizer(&self) -> &LocaleCanonicalizer { pub(crate) fn locale_canonicalizer(&self) -> Result<&LocaleCanonicalizer, IcuError> {
&self.locale_canonicalizer if let Some(lc) = self.locale_canonicalizer.get() {
return Ok(lc);
}
let lc = match &self.inner_provider {
ErasedProvider::Any(a) => LocaleCanonicalizer::try_new_with_any_provider(a)?,
ErasedProvider::Buffer(b) => LocaleCanonicalizer::try_new_with_buffer_provider(b)?,
};
Ok(self.locale_canonicalizer.get_or_init(|| lc))
} }
/// Gets the [`LocaleExpander`] tool. /// Gets the [`LocaleExpander`] tool.
pub(crate) const fn locale_expander(&self) -> &LocaleExpander { pub(crate) fn locale_expander(&self) -> Result<&LocaleExpander, IcuError> {
&self.locale_expander if let Some(le) = self.locale_expander.get() {
return Ok(le);
}
let le = match &self.inner_provider {
ErasedProvider::Any(a) => LocaleExpander::try_new_with_any_provider(a)?,
ErasedProvider::Buffer(b) => LocaleExpander::try_new_with_buffer_provider(b)?,
};
Ok(self.locale_expander.get_or_init(|| le))
} }
/// Gets the [`StringNormalizers`] tools. /// Gets the [`StringNormalizers`] tools.
pub(crate) const fn string_normalizers(&self) -> &StringNormalizers { pub(crate) fn string_normalizers(&self) -> Result<&StringNormalizers, IcuError> {
&self.string_normalizers if let Some(sn) = self.string_normalizers.get() {
return Ok(sn);
}
let sn = match &self.inner_provider {
ErasedProvider::Any(a) => StringNormalizers {
nfc: ComposingNormalizer::try_new_nfc_with_any_provider(a)?,
nfkc: ComposingNormalizer::try_new_nfkc_with_any_provider(a)?,
nfd: DecomposingNormalizer::try_new_nfd_with_any_provider(a)?,
nfkd: DecomposingNormalizer::try_new_nfkd_with_any_provider(a)?,
},
ErasedProvider::Buffer(b) => StringNormalizers {
nfc: ComposingNormalizer::try_new_nfc_with_buffer_provider(b)?,
nfkc: ComposingNormalizer::try_new_nfkc_with_buffer_provider(b)?,
nfd: DecomposingNormalizer::try_new_nfd_with_buffer_provider(b)?,
nfkd: DecomposingNormalizer::try_new_nfkd_with_buffer_provider(b)?,
},
};
Ok(self.string_normalizers.get_or_init(|| sn))
} }
/// Gets the [`CaseMapper`] tool. /// Gets the [`CaseMapper`] tool.
pub(crate) const fn case_mapper(&self) -> &CaseMapper { pub(crate) fn case_mapper(&self) -> Result<&CaseMapper, IcuError> {
&self.case_mapper if let Some(cm) = self.case_mapper.get() {
return Ok(cm);
}
let cm = match &self.inner_provider {
ErasedProvider::Any(a) => CaseMapper::try_new_with_any_provider(a)?,
ErasedProvider::Buffer(b) => CaseMapper::try_new_with_buffer_provider(b)?,
};
Ok(self.case_mapper.get_or_init(|| cm))
}
/// Gets the inner erased provider.
pub(crate) fn erased_provider(&self) -> &ErasedProvider {
&self.inner_provider
} }
} }

5
core/engine/src/context/mod.rs

@ -958,7 +958,7 @@ impl ContextBuilder {
mut self, mut self,
provider: T, provider: T,
) -> Result<Self, IcuError> { ) -> Result<Self, IcuError> {
self.icu = Some(icu::IntlProvider::try_new_with_buffer_provider(provider)?); self.icu = Some(icu::IntlProvider::try_new_with_buffer_provider(provider));
Ok(self) Ok(self)
} }
@ -992,7 +992,7 @@ impl ContextBuilder {
mut self, mut self,
provider: T, provider: T,
) -> Result<Self, IcuError> { ) -> Result<Self, IcuError> {
self.icu = Some(icu::IntlProvider::try_new_with_any_provider(provider)?); self.icu = Some(icu::IntlProvider::try_new_with_any_provider(provider));
Ok(self) Ok(self)
} }
@ -1093,7 +1093,6 @@ impl ContextBuilder {
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(feature = "intl_bundled")] { if #[cfg(feature = "intl_bundled")] {
icu::IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()) icu::IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer())
.expect("Failed to initialize default icu data.")
} else { } else {
return Err(JsNativeError::typ() return Err(JsNativeError::typ()
.with_message("missing Intl provider for context") .with_message("missing Intl provider for context")

10
core/icu_provider/Cargo.toml

@ -14,7 +14,17 @@ rust-version.workspace = true
icu_provider = { workspace = true, features = ["sync"] } icu_provider = { workspace = true, features = ["sync"] }
icu_provider_blob.workspace = true icu_provider_blob.workspace = true
icu_provider_adapters = { workspace = true, features = ["serde"] } icu_provider_adapters = { workspace = true, features = ["serde"] }
icu_casemap = { workspace = true, features = ["serde", "datagen"] }
icu_collator = { workspace = true, features = ["serde", "datagen"] }
icu_datetime = { workspace = true, features = ["serde", "datagen"] }
icu_decimal = { workspace = true, features = ["serde", "datagen"] }
icu_list = { workspace = true, features = ["serde", "datagen"] }
icu_locid_transform = { workspace = true, features = ["serde", "datagen"] }
icu_normalizer = { workspace = true, features = ["serde", "datagen"] }
icu_plurals = { workspace = true, features = ["serde", "datagen", "experimental"] }
icu_segmenter = { workspace = true, features = ["serde", "datagen"] }
once_cell = { workspace = true, default-features = false, features = ["critical-section"] } once_cell = { workspace = true, default-features = false, features = ["critical-section"] }
paste.workspace = true
[features] [features]
default = ["std"] default = ["std"]

BIN
core/icu_provider/data/icu_casemap.postcard

Binary file not shown.

BIN
core/icu_provider/data/icu_collator.postcard

Binary file not shown.

BIN
core/icu_provider/data/icudata.postcard → core/icu_provider/data/icu_datetime.postcard

Binary file not shown.

BIN
core/icu_provider/data/icu_decimal.postcard

Binary file not shown.

BIN
core/icu_provider/data/icu_list.postcard

Binary file not shown.

BIN
core/icu_provider/data/icu_locid_transform.postcard

Binary file not shown.

BIN
core/icu_provider/data/icu_normalizer.postcard

Binary file not shown.

BIN
core/icu_provider/data/icu_plurals.postcard

Binary file not shown.

BIN
core/icu_provider/data/icu_segmenter.postcard

Binary file not shown.

95
core/icu_provider/src/lib.rs

@ -21,24 +21,95 @@
)] )]
#![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(not(feature = "std"), no_std)]
use icu_provider_adapters::fallback::LocaleFallbackProvider; extern crate alloc;
use core::fmt::Debug;
use icu_provider::{BufferMarker, BufferProvider, DataError, DataErrorKind, DataKey, DataResponse};
use icu_provider_adapters::{fallback::LocaleFallbackProvider, fork::MultiForkByKeyProvider};
use icu_provider_blob::BlobDataProvider; use icu_provider_blob::BlobDataProvider;
use once_cell::sync::Lazy; use once_cell::sync::{Lazy, OnceCell};
/// Gets the default data provider stored as a [`BufferProvider`]. /// A buffer provider that is lazily deserialized at the first data request.
/// ///
/// [`BufferProvider`]: icu_provider::BufferProvider /// The provider must specify the list of keys it supports, to avoid deserializing the
#[must_use] /// buffer for unknown keys.
pub fn buffer() -> &'static impl icu_provider::BufferProvider { struct LazyBufferProvider {
static PROVIDER: Lazy<LocaleFallbackProvider<BlobDataProvider>> = Lazy::new(|| { provider: OnceCell<BlobDataProvider>,
let blob = BlobDataProvider::try_new_from_static_blob(include_bytes!(concat!( bytes: &'static [u8],
valid_keys: &'static [DataKey],
}
impl Debug for LazyBufferProvider {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("LazyBufferProvider")
.field("provider", &self.provider)
.field("bytes", &"[...]")
.field("valid_keys", &self.valid_keys)
.finish()
}
}
impl BufferProvider for LazyBufferProvider {
fn load_buffer(
&self,
key: DataKey,
req: icu_provider::DataRequest<'_>,
) -> Result<DataResponse<BufferMarker>, DataError> {
if !self.valid_keys.contains(&key) {
return Err(DataErrorKind::MissingDataKey.with_key(key));
}
let Ok(provider) = self
.provider
.get_or_try_init(|| BlobDataProvider::try_new_from_static_blob(self.bytes))
else {
return Err(DataErrorKind::Custom.with_str_context("invalid blob data provider"));
};
provider.load_buffer(key, req)
}
}
/// A macro that creates a [`LazyBufferProvider`] from an icu4x crate.
macro_rules! provider_from_icu_crate {
($service:path) => {
paste::paste! {
LazyBufferProvider {
provider: OnceCell::new(),
bytes: include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"), env!("CARGO_MANIFEST_DIR"),
"/data/icudata.postcard" "/data/",
))) stringify!($service),
.expect("The statically compiled data file should be valid."); ".postcard",
LocaleFallbackProvider::try_new_with_buffer_provider(blob) )),
valid_keys: $service::provider::KEYS,
}
}
};
}
/// Boa's default buffer provider.
static PROVIDER: Lazy<LocaleFallbackProvider<MultiForkByKeyProvider<LazyBufferProvider>>> =
Lazy::new(|| {
let provider = MultiForkByKeyProvider::new(alloc::vec![
provider_from_icu_crate!(icu_casemap),
provider_from_icu_crate!(icu_collator),
provider_from_icu_crate!(icu_datetime),
provider_from_icu_crate!(icu_decimal),
provider_from_icu_crate!(icu_list),
provider_from_icu_crate!(icu_locid_transform),
provider_from_icu_crate!(icu_normalizer),
provider_from_icu_crate!(icu_plurals),
provider_from_icu_crate!(icu_segmenter),
]);
LocaleFallbackProvider::try_new_with_buffer_provider(provider)
.expect("The statically compiled data file should be valid.") .expect("The statically compiled data file should be valid.")
}); });
/// Gets the default data provider stored as a [`BufferProvider`].
///
/// [`BufferProvider`]: icu_provider::BufferProvider
#[must_use]
pub fn buffer() -> &'static impl BufferProvider {
&*PROVIDER &*PROVIDER
} }

1
tools/gen-icu4x-data/Cargo.toml

@ -10,7 +10,6 @@ license.workspace = true
description.workspace = true description.workspace = true
[dependencies] [dependencies]
icu_provider.workspace = true
icu_datagen = { workspace = true, features = [ icu_datagen = { workspace = true, features = [
"networking", "networking",
"use_wasm", "use_wasm",

94
tools/gen-icu4x-data/src/main.rs

@ -1,86 +1,80 @@
#![allow(missing_docs, rustdoc::missing_crate_level_docs)] #![allow(missing_docs, rustdoc::missing_crate_level_docs)]
use std::{error::Error, fs::File, path::Path}; use std::path::Path;
use std::{error::Error, fs::File};
use icu_datagen::blob_exporter::BlobExporter; use icu_datagen::blob_exporter::BlobExporter;
use icu_datagen::prelude::*; use icu_datagen::prelude::*;
use icu_provider::data_key;
const KEYS_LEN: usize = 129; /// Path to the directory where the exported data lives.
const EXPORT_PATH: &str = "core/icu_provider/data";
/// List of keys used by `Intl` components. /// List of services used by `Intl` components.
/// ///
/// This must be kept in sync with the list of implemented components of `Intl`. /// This must be kept in sync with the list of implemented services of `Intl`.
const KEYS: [DataKey; KEYS_LEN] = { const SERVICES: &[(&str, &[DataKey])] = &[
const CENTINEL_KEY: DataKey = data_key!("centinel@1"); ("icu_casemap", icu_casemap::provider::KEYS),
const SERVICES: [&[DataKey]; 9] = [ ("icu_collator", icu_collator::provider::KEYS),
icu_casemap::provider::KEYS, ("icu_datetime", icu_datetime::provider::KEYS),
icu_collator::provider::KEYS, ("icu_decimal", icu_decimal::provider::KEYS),
icu_datetime::provider::KEYS, ("icu_list", icu_list::provider::KEYS),
icu_decimal::provider::KEYS, ("icu_locid_transform", icu_locid_transform::provider::KEYS),
icu_list::provider::KEYS, ("icu_normalizer", icu_normalizer::provider::KEYS),
icu_locid_transform::provider::KEYS, ("icu_plurals", icu_plurals::provider::KEYS),
icu_normalizer::provider::KEYS, ("icu_segmenter", icu_segmenter::provider::KEYS),
icu_plurals::provider::KEYS,
icu_segmenter::provider::KEYS,
]; ];
let mut array = [CENTINEL_KEY; KEYS_LEN]; fn export_for_service(
service: &str,
let mut offset = 0; keys: &[DataKey],
let mut service_idx = 0; provider: &DatagenProvider,
driver: DatagenDriver,
while service_idx < SERVICES.len() { ) -> Result<(), Box<dyn Error>> {
let service = SERVICES[service_idx]; log::info!(
let mut idx = 0; "Generating ICU4X data for service `{service}` with keys: {:#?}",
while idx < service.len() { keys
array[offset + idx] = service[idx]; );
idx += 1;
} let export_path = Path::new(EXPORT_PATH);
let export_file = export_path.join(format!("{service}.postcard"));
driver.with_keys(keys.iter().copied()).export(
provider,
BlobExporter::new_v2_with_sink(Box::new(File::create(export_file)?)),
)?;
offset += service.len(); Ok(())
service_idx += 1;
} }
assert!(offset == array.len());
array
};
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
simple_logger::SimpleLogger::new() simple_logger::SimpleLogger::new()
.env() .env()
.with_level(log::LevelFilter::Info) .with_level(log::LevelFilter::Info)
.init()?; .init()?;
let path = Path::new("core/icu_provider/data");
// Removal will throw an error if the directory doesn't exist, hence // Removal will throw an error if the directory doesn't exist, hence
// why we can ignore the error. // why we can ignore the error.
let _unused = std::fs::remove_dir_all(path); let _unused = std::fs::remove_dir_all(EXPORT_PATH);
std::fs::create_dir_all(path)?; std::fs::create_dir_all(EXPORT_PATH)?;
log::info!("Generating ICU4X data for keys: {:#?}", KEYS);
let provider = DatagenProvider::new_latest_tested(); let provider = &DatagenProvider::new_latest_tested();
let locales = provider let locales = provider
.locales_for_coverage_levels([CoverageLevel::Modern])? .locales_for_coverage_levels([CoverageLevel::Modern])?
.into_iter() .into_iter()
.chain([langid!("en-US")]); .chain([langid!("en-US")]);
DatagenDriver::new() let driver = DatagenDriver::new()
.with_keys(KEYS)
.with_locales_and_fallback(locales.map(LocaleFamily::with_descendants), { .with_locales_and_fallback(locales.map(LocaleFamily::with_descendants), {
let mut options = FallbackOptions::default(); let mut options = FallbackOptions::default();
options.deduplication_strategy = Some(DeduplicationStrategy::None); options.deduplication_strategy = Some(DeduplicationStrategy::None);
options options
}) })
.with_additional_collations([String::from("search*")]) .with_additional_collations([String::from("search*")])
.with_recommended_segmenter_models() .with_recommended_segmenter_models();
.export(
&provider, for (service, keys) in SERVICES {
BlobExporter::new_v2_with_sink(Box::new(File::create(path.join("icudata.postcard"))?)), export_for_service(service, keys, provider, driver.clone())?;
)?; }
Ok(()) Ok(())
} }

Loading…
Cancel
Save