Browse Source

Optimize `String.prototype.normalize` (#2848)

We currently use `unicode_normalization` to handle the `String.prototype.normalize` method. However, the crate doesn't support UTF-16 as a first class string, so we had to do some hacks by converting the valid parts of a string to UTF-8, normalizing each one, encoding back to UTF-16 and concatenating everything with the unpaired surrogates within. All of this is obviously suboptimal for performance, which is why I leveraged the `icu_normalizer`, which does support UTF-16 input, to replace our current implementation.

Additionally, this allows users to override the default normalization data if the `intl` feature is enabled by providing the required data in the `BoaProvider` data provider.
pull/2865/head
José Julián Espina 2 years ago
parent
commit
739bd5a9cb
  1. 34
      Cargo.lock
  2. 10
      boa_engine/Cargo.toml
  3. 122
      boa_engine/src/builtins/string/mod.rs
  4. 55
      boa_engine/src/context/icu.rs
  5. 4
      boa_engine/src/context/mod.rs
  6. 11
      boa_icu_provider/Cargo.toml
  7. 2
      boa_icu_provider/data/min/any.rs
  8. 6
      boa_icu_provider/data/min/fallback/likelysubtags_v1/mod.rs
  9. 66
      boa_icu_provider/data/min/fallback/likelysubtags_v1/und.rs.data
  10. 4
      boa_icu_provider/data/min/fallback/mod.rs
  11. 6
      boa_icu_provider/data/min/fallback/parents_v1/mod.rs
  12. 13
      boa_icu_provider/data/min/fallback/parents_v1/und.rs.data
  13. 6
      boa_icu_provider/data/min/fallback/supplement/co_v1/mod.rs
  14. 22
      boa_icu_provider/data/min/fallback/supplement/co_v1/und.rs.data
  15. 2
      boa_icu_provider/data/min/fallback/supplement/mod.rs
  16. 216
      boa_icu_provider/data/min/mod.rs
  17. 6
      boa_icu_provider/data/min/normalizer/comp_v1/mod.rs
  18. 7
      boa_icu_provider/data/min/normalizer/comp_v1/und.rs.data
  19. 6
      boa_icu_provider/data/min/normalizer/mod.rs
  20. 6
      boa_icu_provider/data/min/normalizer/nfd_v1/mod.rs
  21. 19
      boa_icu_provider/data/min/normalizer/nfd_v1/und.rs.data
  22. 6
      boa_icu_provider/data/min/normalizer/nfdex_v1/mod.rs
  23. 8
      boa_icu_provider/data/min/normalizer/nfdex_v1/und.rs.data
  24. 6
      boa_icu_provider/data/min/normalizer/nfkd_v1/mod.rs
  25. 21
      boa_icu_provider/data/min/normalizer/nfkd_v1/und.rs.data
  26. 6
      boa_icu_provider/data/min/normalizer/nfkdex_v1/mod.rs
  27. 8
      boa_icu_provider/data/min/normalizer/nfkdex_v1/und.rs.data
  28. 50
      boa_icu_provider/src/bin/datagen.rs
  29. 42
      boa_icu_provider/src/lib.rs

34
Cargo.lock generated

@ -413,6 +413,7 @@ dependencies = [
"icu_list",
"icu_locid",
"icu_locid_transform",
"icu_normalizer",
"icu_plurals",
"icu_provider",
"icu_segmenter",
@ -439,7 +440,6 @@ dependencies = [
"textwrap",
"thin-vec",
"thiserror",
"unicode-normalization",
"writeable",
"yoke",
"zerofrom",
@ -471,13 +471,16 @@ dependencies = [
name = "boa_icu_provider"
version = "0.16.0"
dependencies = [
"icu_collections",
"icu_datagen",
"icu_normalizer",
"icu_provider",
"icu_provider_adapters",
"icu_provider_blob",
"log",
"once_cell",
"simple_logger 4.1.0",
"simple_logger",
"zerovec",
]
[[package]]
@ -1974,9 +1977,9 @@ dependencies = [
[[package]]
name = "icu_datagen"
version = "1.2.1"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f474bd38639e5af76d2a70ce60ccb553454a3d3257677de15570c359d2116c5b"
checksum = "3c20e7886449669257f4dfdad042b939a1a32ad618902a6b83936f7d9f7e8146"
dependencies = [
"cached-path",
"clap 4.2.4",
@ -2014,11 +2017,10 @@ dependencies = [
"proc-macro2",
"quote",
"rayon",
"rust-format",
"serde",
"serde-aux",
"serde_json",
"simple_logger 1.16.0",
"simple_logger",
"syn 1.0.109",
"tinystr",
"toml 0.5.11",
@ -3412,15 +3414,6 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "rust-format"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60e7c00b6c3bf5e38a880eec01d7e829d12ca682079f8238a464def3c4b31627"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
@ -3770,17 +3763,6 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]]
name = "simple_logger"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45b60258a35dc3cb8a16890b8fd6723349bfa458d7960e25e633f1b1c19d7b5e"
dependencies = [
"atty",
"log",
"winapi",
]
[[package]]
name = "simple_logger"
version = "4.1.0"

10
boa_engine/Cargo.toml

@ -15,7 +15,9 @@ rust-version.workspace = true
profiler = ["boa_profiler/profiler"]
deser = ["boa_interner/serde", "boa_ast/serde"]
intl = [
"dep:boa_icu_provider",
"boa_icu_provider/full",
"icu_normalizer/serde",
"icu_normalizer/std",
"dep:icu_locid_transform",
"dep:icu_locid",
"dep:icu_datetime",
@ -53,6 +55,7 @@ boa_profiler.workspace = true
boa_macros.workspace = true
boa_ast.workspace = true
boa_parser.workspace = true
boa_icu_provider.workspace = true
serde = { version = "1.0.160", features = ["derive", "rc"] }
serde_json = "1.0.96"
rand = "0.8.5"
@ -66,7 +69,6 @@ indexmap = "1.9.3"
ryu-js = "0.2.2"
chrono = { version = "0.4.24", default-features = false, features = ["clock", "std"] }
fast-float = "0.2.0"
unicode-normalization = "0.1.22"
once_cell = "1.17.1"
tap = "1.0.1"
sptr = "0.3.2"
@ -77,10 +79,10 @@ num_enum = "0.6.1"
pollster = "0.3.0"
thin-vec = "0.2.12"
itertools = { version = "0.10.5", default-features = false }
icu_normalizer = "1.2.0"
# intl deps
boa_icu_provider = { workspace = true, optional = true }
icu_locid_transform = { version = "1.2.1", features = ["serde"], optional = true }
icu_locid_transform = { version = "1.2.1", features = ["std", "serde"], optional = true }
icu_locid = { version = "1.2.0", features = ["serde"], optional = true }
icu_datetime = { version = "1.2.0", features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "1.2.0", optional = true }

122
boa_engine/src/builtins/string/mod.rs

@ -24,6 +24,7 @@ use crate::{
Context, JsArgs, JsResult, JsString, JsValue,
};
use boa_profiler::Profiler;
use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer};
use std::cmp::{max, min};
use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
@ -31,6 +32,15 @@ use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
mod string_iterator;
pub(crate) use string_iterator::StringIterator;
/// The set of normalizers required for the `String.prototype.normalize` function.
#[derive(Debug)]
pub(crate) struct StringNormalizers {
pub(crate) nfc: ComposingNormalizer,
pub(crate) nfkc: ComposingNormalizer,
pub(crate) nfd: DecomposingNormalizer,
pub(crate) nfkd: DecomposingNormalizer,
}
#[cfg(test)]
mod tests;
@ -2024,7 +2034,6 @@ impl String {
args: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
use unicode_normalization::UnicodeNormalization;
/// Represents the type of normalization applied to a [`JsString`]
#[derive(Clone, Copy)]
pub(crate) enum Normalization {
@ -2033,79 +2042,72 @@ impl String {
Nfkc,
Nfkd,
}
// 1. Let O be ? RequireObjectCoercible(this value).
let this = this.require_object_coercible()?;
// 2. Let S be ? ToString(O).
let s = this.to_string(context)?;
let f = match args.get_or_undefined(0) {
// 3. If form is undefined, let f be "NFC".
&JsValue::Undefined => js_string!("NFC"),
// 4. Else, let f be ? ToString(form).
form => form.to_string(context)?,
};
// 6. Let ns be the String value that is the result of normalizing S
// into the normalization form named by f as specified in
// https://unicode.org/reports/tr15/.
let normalization = match f {
ntype if &ntype == utf16!("NFC") => Normalization::Nfc,
ntype if &ntype == utf16!("NFD") => Normalization::Nfd,
ntype if &ntype == utf16!("NFKC") => Normalization::Nfkc,
ntype if &ntype == utf16!("NFKD") => Normalization::Nfkd,
// 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception.
_ => {
return Err(JsNativeError::range()
.with_message("The normalization form should be one of NFC, NFD, NFKC, NFKD.")
.into());
}
};
let mut code_points = s.code_points();
let mut result = Vec::with_capacity(s.len());
let mut next_unpaired_surrogate = None;
let mut buf = [0; 2];
loop {
let only_chars = code_points.by_ref().map_while(|cpoint| match cpoint {
CodePoint::Unicode(c) => Some(c),
CodePoint::UnpairedSurrogate(s) => {
next_unpaired_surrogate = Some(s);
None
let normalization = match args.get_or_undefined(0) {
// 3. If form is undefined, let f be "NFC".
&JsValue::Undefined => Normalization::Nfc,
// 4. Else, let f be ? ToString(form).
f => match f.to_string(context)? {
ntype if &ntype == utf16!("NFC") => Normalization::Nfc,
ntype if &ntype == utf16!("NFD") => Normalization::Nfd,
ntype if &ntype == utf16!("NFKC") => Normalization::Nfkc,
ntype if &ntype == utf16!("NFKD") => Normalization::Nfkd,
// 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception.
_ => {
return Err(JsNativeError::range()
.with_message(
"The normalization form should be one of NFC, NFD, NFKC, NFKD.",
)
.into());
}
});
},
};
match normalization {
Normalization::Nfc => {
for mapped in only_chars.nfc() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
}
}
Normalization::Nfd => {
for mapped in only_chars.nfd() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
}
}
Normalization::Nfkc => {
for mapped in only_chars.nfkc() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
}
}
Normalization::Nfkd => {
for mapped in only_chars.nfkd() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
let normalizers = {
#[cfg(not(feature = "intl"))]
{
use once_cell::sync::Lazy;
static NORMALIZERS: Lazy<StringNormalizers> = Lazy::new(|| {
let provider = &boa_icu_provider::minimal();
let nfc = ComposingNormalizer::try_new_nfc_unstable(provider)
.expect("minimal data should always be updated");
let nfkc = ComposingNormalizer::try_new_nfkc_unstable(provider)
.expect("minimal data should always be updated");
let nfd = DecomposingNormalizer::try_new_nfd_unstable(provider)
.expect("minimal data should always be updated");
let nfkd = DecomposingNormalizer::try_new_nfkd_unstable(provider)
.expect("minimal data should always be updated");
StringNormalizers {
nfc,
nfkc,
nfd,
nfkd,
}
}
});
&*NORMALIZERS
}
if let Some(surr) = next_unpaired_surrogate.take() {
result.push(surr);
} else {
break;
#[cfg(feature = "intl")]
{
context.icu().string_normalizers()
}
}
};
let result = match normalization {
Normalization::Nfc => normalizers.nfc.normalize_utf16(&s),
Normalization::Nfd => normalizers.nfd.normalize_utf16(&s),
Normalization::Nfkc => normalizers.nfkc.normalize_utf16(&s),
Normalization::Nfkd => normalizers.nfkd.normalize_utf16(&s),
};
// 7. Return ns.
Ok(js_string!(result).into())

55
boa_engine/src/context/icu.rs

@ -3,18 +3,23 @@ use std::fmt::Debug;
use icu_collator::{Collator, CollatorError, CollatorOptions};
use icu_list::{ListError, ListFormatter, ListLength};
use icu_locid_transform::{LocaleCanonicalizer, LocaleExpander, LocaleTransformError};
use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer, NormalizerError};
use icu_provider::{
AnyProvider, AsDeserializingBufferProvider, AsDowncastingAnyProvider, BufferProvider,
DataError, DataLocale, DataProvider, DataRequest, DataResponse, KeyedDataMarker, MaybeSendSync,
};
use icu_segmenter::{GraphemeClusterSegmenter, SegmenterError, SentenceSegmenter, WordSegmenter};
use serde::Deserialize;
use thiserror::Error;
use yoke::{trait_hack::YokeTraitHack, Yokeable};
use zerofrom::ZeroFrom;
use crate::builtins::intl::{
list_format::ListFormatType,
segmenter::{Granularity, NativeSegmenter},
use crate::builtins::{
intl::{
list_format::ListFormatType,
segmenter::{Granularity, NativeSegmenter},
},
string::StringNormalizers,
};
/// ICU4X data provider used in boa.
@ -147,6 +152,34 @@ impl BoaProvider<'_> {
.map(|seg| NativeSegmenter::Sentence(Box::new(seg))),
}
}
pub(crate) fn try_new_string_normalizers(&self) -> Result<StringNormalizers, NormalizerError> {
Ok(match *self {
BoaProvider::Buffer(buf) => StringNormalizers {
nfc: ComposingNormalizer::try_new_nfc_with_buffer_provider(buf)?,
nfkc: ComposingNormalizer::try_new_nfkc_with_buffer_provider(buf)?,
nfd: DecomposingNormalizer::try_new_nfd_with_buffer_provider(buf)?,
nfkd: DecomposingNormalizer::try_new_nfkd_with_buffer_provider(buf)?,
},
BoaProvider::Any(any) => StringNormalizers {
nfc: ComposingNormalizer::try_new_nfc_with_any_provider(any)?,
nfkc: ComposingNormalizer::try_new_nfkc_with_any_provider(any)?,
nfd: DecomposingNormalizer::try_new_nfd_with_any_provider(any)?,
nfkd: DecomposingNormalizer::try_new_nfkd_with_any_provider(any)?,
},
})
}
}
/// Error thrown when the engine cannot initialize the ICU tools from a data provider.
#[derive(Debug, Error)]
pub enum IcuError {
/// Failed to create the locale transform tools.
#[error("could not construct the locale transform tools")]
LocaleTransform(#[from] LocaleTransformError),
/// Failed to create the string normalization tools.
#[error("could not construct the string normalization tools")]
Normalizer(#[from] NormalizerError),
}
/// Collection of tools initialized from a [`DataProvider`] that are used for the functionality of
@ -155,14 +188,16 @@ pub(crate) struct Icu<'provider> {
provider: BoaProvider<'provider>,
locale_canonicalizer: LocaleCanonicalizer,
locale_expander: LocaleExpander,
string_normalizers: StringNormalizers,
}
impl Debug for Icu<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Icu")
.field("provider", &self.provider)
.field("locale_canonicalizer", &"LocaleCanonicalizer")
.field("locale_expander", &"LocaleExpander")
.field("locale_canonicalizer", &self.locale_canonicalizer)
.field("locale_expander", &self.locale_expander)
.field("string_normalizers", &self.string_normalizers)
.finish()
}
}
@ -173,12 +208,11 @@ impl<'provider> Icu<'provider> {
/// # Errors
///
/// Returns an error if any of the tools required cannot be constructed.
pub(crate) fn new(
provider: BoaProvider<'provider>,
) -> Result<Icu<'provider>, LocaleTransformError> {
pub(crate) fn new(provider: BoaProvider<'provider>) -> Result<Icu<'provider>, IcuError> {
Ok(Self {
locale_canonicalizer: provider.try_new_locale_canonicalizer()?,
locale_expander: provider.try_new_locale_expander()?,
string_normalizers: provider.try_new_string_normalizers()?,
provider,
})
}
@ -193,6 +227,11 @@ impl<'provider> Icu<'provider> {
&self.locale_expander
}
/// Gets the [`StringNormalizers`] tools.
pub(crate) const fn string_normalizers(&self) -> &StringNormalizers {
&self.string_normalizers
}
/// Gets the inner icu data provider
pub(crate) const fn provider(&self) -> BoaProvider<'provider> {
self.provider

4
boa_engine/src/context/mod.rs

@ -8,7 +8,7 @@ mod maybe_shared;
pub use hooks::{DefaultHooks, HostHooks};
#[cfg(feature = "intl")]
pub use icu::BoaProvider;
pub use icu::{BoaProvider, IcuError};
use intrinsics::Intrinsics;
pub use maybe_shared::MaybeShared;
@ -633,7 +633,7 @@ impl<'icu, 'hooks, 'queue> ContextBuilder<'icu, 'hooks, 'queue> {
pub fn icu_provider(
self,
provider: BoaProvider<'_>,
) -> Result<ContextBuilder<'_, 'hooks, 'queue>, icu_locid_transform::LocaleTransformError> {
) -> Result<ContextBuilder<'_, 'hooks, 'queue>, IcuError> {
Ok(ContextBuilder {
icu: Some(icu::Icu::new(provider)?),
..self

11
boa_icu_provider/Cargo.toml

@ -16,14 +16,19 @@ rust-version.workspace = true
icu_provider = { version = "1.2.0", features = ["serde", "sync"] }
icu_provider_blob = "1.2.0"
icu_provider_adapters = { version = "1.2.0", features = ["serde"] }
once_cell = {version = "1.17.1", default-features = false, features = ["critical-section"]}
icu_datagen = { version = "1.2.1", optional = true }
icu_collections = "1.2.0"
icu_normalizer = "1.2.0"
zerovec = "0.9.4"
icu_datagen = { version = "1.2.3", optional = true }
once_cell = {version = "1.17.1", default-features = false, features = ["critical-section"], optional = true }
log = { version = "0.4.17", optional = true }
simple_logger = { version = "4.1.0", optional = true }
[features]
default = ["std"]
std = ["once_cell/std"]
full = ["dep:once_cell"]
std = ["once_cell?/std"]
bin = ["dep:icu_datagen", "dep:simple_logger", "dep:log"]
[[bin]]

2
boa_icu_provider/data/min/any.rs

@ -0,0 +1,2 @@
// @generated
impl_any_provider!(BakedDataProvider);

6
boa_icu_provider/data/min/fallback/likelysubtags_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

66
boa_icu_provider/data/min/fallback/likelysubtags_v1/und.rs.data

@ -0,0 +1,66 @@
::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1 {
l2s: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"am\0ar\0as\0be\0bg\0bgcbhobn\0brxchrcv\0doiel\0fa\0gu\0he\0hi\0hy\0ja\0ka\0kk\0km\0kn\0ko\0kokks\0ky\0lo\0maimk\0ml\0mn\0mnimr\0my\0ne\0or\0pa\0ps\0rajru\0sa\0satsd\0si\0sr\0ta\0te\0tg\0th\0ti\0tt\0uk\0ur\0yuezh\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"EthiArabBengCyrlCyrlDevaDevaBengDevaCherCyrlDevaGrekArabGujrHebrDevaArmnJpanGeorCyrlKhmrKndaKoreDevaArabCyrlLaooDevaCyrlMlymCyrlBengDevaMymrDevaOryaGuruArabDevaCyrlDevaOlckArabSinhCyrlTamlTeluCyrlThaiEthiCyrlCyrlArabHantHans")
},
)
},
lr2s: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap2d::from_parts_unchecked(
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"az\0ha\0kk\0ky\0mn\0ms\0pa\0sd\0sr\0tg\0uz\0yuezh\0",
)
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x03\0\0\0\x05\0\0\0\t\0\0\0\x0B\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x13\0\0\0\x14\0\0\0\x16\0\0\0\x17\0\0\0&\0\0\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"IQ\0IR\0RU\0CM\0SD\0AF\0CN\0IR\0MN\0CN\0TR\0CN\0CC\0PK\0IN\0ME\0RO\0RU\0TR\0PK\0AF\0CN\0CN\0AU\0BN\0GB\0GF\0HK\0ID\0MO\0PA\0PF\0PH\0SR\0TH\0TW\0US\0VN\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"ArabArabCyrlArabArabArabArabArabArabArabLatnMongArabArabDevaLatnLatnLatnLatnArabArabCyrlHansHantHantHantHantHantHantHantHantHantHantHantHantHantHantHant")
},
)
},
l2r: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"af\0am\0ar\0as\0astaz\0be\0bg\0bgcbhobn\0br\0brxbs\0ca\0cebchrcs\0cv\0cy\0da\0de\0doidsbel\0en\0es\0et\0eu\0fa\0ff\0fi\0filfo\0fr\0ga\0gd\0gl\0gu\0ha\0he\0hi\0hr\0hsbhu\0hy\0ia\0id\0ig\0is\0it\0ja\0jv\0ka\0keakgpkk\0km\0kn\0ko\0kokks\0ky\0lo\0lt\0lv\0maimi\0mk\0ml\0mn\0mnimr\0ms\0my\0ne\0nl\0nn\0no\0or\0pa\0pcmpl\0ps\0pt\0qu\0rajrm\0ro\0ru\0sa\0satsc\0sd\0si\0sk\0sl\0so\0sq\0sr\0su\0sv\0sw\0ta\0te\0tg\0th\0ti\0tk\0to\0tr\0tt\0uk\0ur\0uz\0vi\0wo\0xh\0yo\0yrlyuezh\0zu\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"ZA\0ET\0EG\0IN\0ES\0AZ\0BY\0BG\0IN\0IN\0BD\0FR\0IN\0BA\0ES\0PH\0US\0CZ\0RU\0GB\0DK\0DE\0IN\0DE\0GR\0US\0ES\0EE\0ES\0IR\0SN\0FI\0PH\0FO\0FR\0IE\0GB\0ES\0IN\0NG\0IL\0IN\0HR\0DE\0HU\0AM\x00001ID\0NG\0IS\0IT\0JP\0ID\0GE\0CV\0BR\0KZ\0KH\0IN\0KR\0IN\0IN\0KG\0LA\0LT\0LV\0IN\0NZ\0MK\0IN\0MN\0IN\0IN\0MY\0MM\0NP\0NL\0NO\0NO\0IN\0IN\0NG\0PL\0AF\0BR\0PE\0IN\0CH\0RO\0RU\0IN\0IN\0IT\0PK\0LK\0SK\0SI\0SO\0AL\0RS\0ID\0SE\0TZ\0IN\0IN\0TJ\0TH\0ET\0TM\0TO\0TR\0RU\0UA\0PK\0UZ\0VN\0SN\0ZA\0NG\0BR\0HK\0CN\0ZA\0")
},
)
},
ls2r: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap2d::from_parts_unchecked(
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"az\0en\0ff\0kk\0ky\0mn\0pa\0sd\0tg\0uz\0yuezh\0",
)
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\x0B\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x11\0\0\0")
},
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"ArabShawAdlmArabArabLatnMongArabDevaKhojSindArabArabHansBopoHanbHant",
)
},
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"IR\0GB\0GN\0CN\0CN\0TR\0CN\0PK\0IN\0IN\0IN\0PK\0AF\0CN\0TW\0TW\0TW\0",
)
},
)
},
}

4
boa_icu_provider/data/min/fallback/mod.rs

@ -0,0 +1,4 @@
// @generated
pub mod likelysubtags_v1;
pub mod parents_v1;
pub mod supplement;

6
boa_icu_provider/data/min/fallback/parents_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

13
boa_icu_provider/data/min/fallback/parents_v1/und.rs.data

@ -0,0 +1,13 @@
::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1 {
parents: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe {
:: zerovec :: VarZeroVec :: from_bytes_unchecked (b"\x84\0\0\0\0\0\x06\0\x0B\0\x10\0\x15\0\x1A\0\x1F\0$\0)\0.\x003\08\0=\0B\0G\0L\0Q\0V\0[\0`\0e\0j\0o\0t\0y\0~\0\x83\0\x88\0\x8D\0\x92\0\x97\0\x9C\0\xA1\0\xA6\0\xAB\0\xB0\0\xB5\0\xBA\0\xBF\0\xC4\0\xC9\0\xCE\0\xD3\0\xD8\0\xDD\0\xE2\0\xE7\0\xEC\0\xF1\0\xF6\0\xFB\0\0\x01\x05\x01\n\x01\x0F\x01\x14\x01\x19\x01\x1E\x01#\x01(\x01-\x012\x017\x01<\x01A\x01F\x01K\x01P\x01U\x01Z\x01_\x01d\x01i\x01n\x01s\x01x\x01}\x01\x82\x01\x87\x01\x8C\x01\x91\x01\x96\x01\x9B\x01\xA0\x01\xA5\x01\xAA\x01\xAF\x01\xB4\x01\xB9\x01\xBE\x01\xC3\x01\xC8\x01\xCD\x01\xD2\x01\xD7\x01\xDC\x01\xE1\x01\xE6\x01\xEB\x01\xF0\x01\xF5\x01\xFA\x01\xFF\x01\x04\x02\t\x02\x0E\x02\x13\x02\x18\x02\x1D\x02\"\x02'\x02,\x021\x026\x02;\x02@\x02G\x02I\x02K\x02M\x02R\x02W\x02\\\x02a\x02f\x02k\x02p\x02u\x02z\x02\x7F\x02\x84\x02\x89\x02en-150en-AGen-AIen-ATen-AUen-BBen-BEen-BMen-BSen-BWen-BZen-CCen-CHen-CKen-CMen-CXen-CYen-DEen-DGen-DKen-DMen-ERen-FIen-FJen-FKen-FMen-GBen-GDen-GGen-GHen-GIen-GMen-GYen-HKen-IEen-ILen-IMen-INen-IOen-JEen-JMen-KEen-KIen-KNen-KYen-LCen-LRen-LSen-MGen-MOen-MSen-MTen-MUen-MVen-MWen-MYen-NAen-NFen-NGen-NLen-NRen-NUen-NZen-PGen-PKen-PNen-PWen-RWen-SBen-SCen-SDen-SEen-SGen-SHen-SIen-SLen-SSen-SXen-SZen-TCen-TKen-TOen-TTen-TVen-TZen-UGen-VCen-VGen-VUen-WSen-ZAen-ZMen-ZWes-ARes-BOes-BRes-BZes-CLes-COes-CRes-CUes-DOes-ECes-GTes-HNes-MXes-NIes-PAes-PEes-PRes-PYes-SVes-USes-UYes-VEhi-Latnhtnbnnno-NOpt-AOpt-CHpt-CVpt-FRpt-GQpt-GWpt-LUpt-MOpt-MZpt-STpt-TLzh-Hant-MO")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419en\0\0\0\0\0\0\x01IN\0fr\0\0\0\0\0\0\x01HT\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0zh\0\x01Hant\x01HK\0")
},
)
},
}

6
boa_icu_provider/data/min/fallback/supplement/co_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: CollationFallbackSupplementV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

22
boa_icu_provider/data/min/fallback/supplement/co_v1/und.rs.data

@ -0,0 +1,22 @@
::icu_provider_adapters::fallback::provider::LocaleFallbackSupplementV1 {
parents: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe { ::zerovec::VarZeroVec::from_bytes_unchecked(b"\x01\0\0\0\0\0yue") },
unsafe { ::zerovec::ZeroVec::from_bytes_unchecked(b"zh\0\x01Hant\0\0\0\0") },
)
},
unicode_extension_defaults: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap2d::from_parts_unchecked(
unsafe { ::zerovec::ZeroVec::from_bytes_unchecked(b"co") },
unsafe { ::zerovec::ZeroVec::from_bytes_unchecked(b"\x02\0\0\0") },
unsafe {
::zerovec::VarZeroVec::from_bytes_unchecked(b"\x02\0\0\0\0\0\x02\0zhzh-Hant")
},
unsafe {
::zerovec::VarZeroVec::from_bytes_unchecked(b"\x02\0\0\0\0\0\x06\0pinyinstroke")
},
)
},
}

2
boa_icu_provider/data/min/fallback/supplement/mod.rs

@ -0,0 +1,2 @@
// @generated
pub mod co_v1;

216
boa_icu_provider/data/min/mod.rs

@ -0,0 +1,216 @@
// @generated
#[clippy::msrv = "1.61"]
mod fallback;
#[clippy::msrv = "1.61"]
mod normalizer;
#[clippy::msrv = "1.61"]
use icu_provider::prelude::*;
/// Implement [`DataProvider<M>`] on the given struct using the data
/// hardcoded in this module. This allows the struct to be used with
/// `icu`'s `_unstable` constructors.
///
/// This macro can only be called from its definition-site, i.e. right
/// after `include!`-ing the generated module.
///
/// ```compile_fail
/// struct MyDataProvider;
/// include!("/path/to/generated/mod.rs");
/// impl_data_provider(MyDataProvider);
/// ```
#[allow(unused_macros)]
macro_rules! impl_data_provider {
($ provider : path) => {
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_normalizer::provider::CanonicalCompositionsV1Marker> for $provider {
fn load(&self, req: DataRequest) -> Result<DataResponse<::icu_normalizer::provider::CanonicalCompositionsV1Marker>, DataError> {
normalizer::comp_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| DataErrorKind::MissingLocale.with_req(::icu_normalizer::provider::CanonicalCompositionsV1Marker::KEY, req))
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_normalizer::provider::CanonicalDecompositionDataV1Marker> for $provider {
fn load(&self, req: DataRequest) -> Result<DataResponse<::icu_normalizer::provider::CanonicalDecompositionDataV1Marker>, DataError> {
normalizer::nfd_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| DataErrorKind::MissingLocale.with_req(::icu_normalizer::provider::CanonicalDecompositionDataV1Marker::KEY, req))
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_normalizer::provider::CanonicalDecompositionTablesV1Marker> for $provider {
fn load(&self, req: DataRequest) -> Result<DataResponse<::icu_normalizer::provider::CanonicalDecompositionTablesV1Marker>, DataError> {
normalizer::nfdex_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| DataErrorKind::MissingLocale.with_req(::icu_normalizer::provider::CanonicalDecompositionTablesV1Marker::KEY, req))
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker> for $provider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<::icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker>, DataError> {
normalizer::nfkd_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| {
DataErrorKind::MissingLocale.with_req(::icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker::KEY, req)
})
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker> for $provider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<::icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker>, DataError> {
normalizer::nfkdex_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| {
DataErrorKind::MissingLocale.with_req(::icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker::KEY, req)
})
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker> for $provider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker>, DataError> {
fallback::supplement::co_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| {
DataErrorKind::MissingLocale.with_req(
::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker::KEY,
req,
)
})
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker> for $provider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker>, DataError> {
fallback::likelysubtags_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| {
DataErrorKind::MissingLocale.with_req(
::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker::KEY,
req,
)
})
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker> for $provider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker>, DataError> {
fallback::parents_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| {
DataErrorKind::MissingLocale.with_req(::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker::KEY, req)
})
}
}
};
}
/// Implement [`AnyProvider`] on the given struct using the data
/// hardcoded in this module. This allows the struct to be used with
/// `icu`'s `_any` constructors.
///
/// This macro can only be called from its definition-site, i.e. right
/// after `include!`-ing the generated module.
///
/// ```compile_fail
/// struct MyAnyProvider;
/// include!("/path/to/generated/mod.rs");
/// impl_any_provider(MyAnyProvider);
/// ```
#[allow(unused_macros)]
macro_rules! impl_any_provider {
($ provider : path) => {
#[clippy::msrv = "1.61"]
impl AnyProvider for $provider {
fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> {
const CANONICALCOMPOSITIONSV1MARKER: ::icu_provider::DataKeyHash =
::icu_normalizer::provider::CanonicalCompositionsV1Marker::KEY.hashed();
const CANONICALDECOMPOSITIONDATAV1MARKER: ::icu_provider::DataKeyHash =
::icu_normalizer::provider::CanonicalDecompositionDataV1Marker::KEY.hashed();
const CANONICALDECOMPOSITIONTABLESV1MARKER: ::icu_provider::DataKeyHash =
::icu_normalizer::provider::CanonicalDecompositionTablesV1Marker::KEY.hashed();
const COMPATIBILITYDECOMPOSITIONSUPPLEMENTV1MARKER: ::icu_provider::DataKeyHash =
::icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker::KEY.hashed();
const COMPATIBILITYDECOMPOSITIONTABLESV1MARKER: ::icu_provider::DataKeyHash =
::icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker::KEY.hashed();
const COLLATIONFALLBACKSUPPLEMENTV1MARKER: ::icu_provider::DataKeyHash =
::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker::KEY.hashed();
const LOCALEFALLBACKLIKELYSUBTAGSV1MARKER: ::icu_provider::DataKeyHash =
::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker::KEY.hashed();
const LOCALEFALLBACKPARENTSV1MARKER: ::icu_provider::DataKeyHash =
::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker::KEY.hashed();
match key.hashed() {
CANONICALCOMPOSITIONSV1MARKER => normalizer::comp_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
CANONICALDECOMPOSITIONDATAV1MARKER => normalizer::nfd_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
CANONICALDECOMPOSITIONTABLESV1MARKER => normalizer::nfdex_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
COMPATIBILITYDECOMPOSITIONSUPPLEMENTV1MARKER => normalizer::nfkd_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
COMPATIBILITYDECOMPOSITIONTABLESV1MARKER => normalizer::nfkdex_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
COLLATIONFALLBACKSUPPLEMENTV1MARKER => fallback::supplement::co_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
LOCALEFALLBACKLIKELYSUBTAGSV1MARKER => fallback::likelysubtags_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
LOCALEFALLBACKPARENTSV1MARKER => fallback::parents_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
_ => return Err(DataErrorKind::MissingDataKey.with_req(key, req)),
}
.map(|payload| AnyResponse {
payload: Some(payload),
metadata: Default::default(),
})
.ok_or_else(|| DataErrorKind::MissingLocale.with_req(key, req))
}
}
};
}
#[clippy::msrv = "1.61"]
pub struct BakedDataProvider;
impl_data_provider!(BakedDataProvider);

6
boa_icu_provider/data/min/normalizer/comp_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_normalizer :: provider :: CanonicalCompositionsV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

7
boa_icu_provider/data/min/normalizer/comp_v1/und.rs.data

File diff suppressed because one or more lines are too long

6
boa_icu_provider/data/min/normalizer/mod.rs

@ -0,0 +1,6 @@
// @generated
pub mod comp_v1;
pub mod nfd_v1;
pub mod nfdex_v1;
pub mod nfkd_v1;
pub mod nfkdex_v1;

6
boa_icu_provider/data/min/normalizer/nfd_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_normalizer :: provider :: CanonicalDecompositionDataV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

19
boa_icu_provider/data/min/normalizer/nfd_v1/und.rs.data

File diff suppressed because one or more lines are too long

6
boa_icu_provider/data/min/normalizer/nfdex_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_normalizer :: provider :: CanonicalDecompositionTablesV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

8
boa_icu_provider/data/min/normalizer/nfdex_v1/und.rs.data

File diff suppressed because one or more lines are too long

6
boa_icu_provider/data/min/normalizer/nfkd_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_normalizer :: provider :: CompatibilityDecompositionSupplementV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

21
boa_icu_provider/data/min/normalizer/nfkd_v1/und.rs.data

File diff suppressed because one or more lines are too long

6
boa_icu_provider/data/min/normalizer/nfkdex_v1/mod.rs

@ -0,0 +1,6 @@
// @generated
type DataStruct = < :: icu_normalizer :: provider :: CompatibilityDecompositionTablesV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

8
boa_icu_provider/data/min/normalizer/nfkdex_v1/und.rs.data

File diff suppressed because one or more lines are too long

50
boa_icu_provider/src/bin/datagen.rs

@ -6,7 +6,19 @@
use std::{error::Error, fs::File};
use boa_icu_provider::data_root;
use icu_datagen::{all_keys_with_experimental, datagen, CldrLocaleSubset, Out, SourceData};
use icu_datagen::{
all_keys_with_experimental, datagen, BakedOptions, CldrLocaleSubset, Out, SourceData,
};
use icu_normalizer::provider::{
CanonicalCompositionsV1Marker, CanonicalDecompositionDataV1Marker,
CanonicalDecompositionTablesV1Marker, CompatibilityDecompositionSupplementV1Marker,
CompatibilityDecompositionTablesV1Marker,
};
use icu_provider::KeyedDataMarker;
use icu_provider_adapters::fallback::provider::{
CollationFallbackSupplementV1Marker, LocaleFallbackLikelySubtagsV1Marker,
LocaleFallbackParentsV1Marker,
};
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::SimpleLogger::new()
@ -19,15 +31,43 @@ fn main() -> Result<(), Box<dyn Error>> {
.with_icuexport_for_tag(SourceData::LATEST_TESTED_ICUEXPORT_TAG)?
.with_collations(vec![String::from("search*")]);
let blob_out = Out::Blob(Box::new(File::create(
let full_blob_out = Out::Blob(Box::new(File::create(
data_root().join("icudata.postcard"),
)?));
let normalization_out = Out::Baked {
mod_directory: data_root().join("min"),
options: {
let mut opt = BakedOptions::default();
opt.use_separate_crates = true;
opt.overwrite = true;
opt.pretty = true;
opt
},
};
datagen(
None,
&[
CanonicalDecompositionDataV1Marker::KEY,
CanonicalDecompositionTablesV1Marker::KEY,
CanonicalCompositionsV1Marker::KEY,
CompatibilityDecompositionSupplementV1Marker::KEY,
CompatibilityDecompositionTablesV1Marker::KEY,
LocaleFallbackLikelySubtagsV1Marker::KEY,
LocaleFallbackParentsV1Marker::KEY,
CollationFallbackSupplementV1Marker::KEY,
],
&source_data,
[normalization_out].into(),
)?;
datagen(
None,
&all_keys_with_experimental(),
&source_data,
[blob_out].into(),
)
.map_err(Into::into)
[full_blob_out].into(),
)?;
Ok(())
}

42
boa_icu_provider/src/lib.rs

@ -41,7 +41,6 @@
single_use_lifetimes,
trivial_casts,
trivial_numeric_casts,
unreachable_pub,
unsafe_op_in_unsafe_fn,
unused_import_braces,
unused_lifetimes,
@ -68,6 +67,7 @@
clippy::pedantic,
clippy::nursery,
)]
#![allow(elided_lifetimes_in_paths)]
#![cfg_attr(not(feature = "bin"), no_std)]
/// Gets the path to the directory where the generated data is stored.
@ -78,14 +78,25 @@ pub fn data_root() -> std::path::PathBuf {
std::path::PathBuf::from(std::env!("CARGO_MANIFEST_DIR")).join("data")
}
use icu_provider::BufferProvider;
use icu_provider_adapters::fallback::LocaleFallbackProvider;
use icu_provider_blob::BlobDataProvider;
use once_cell::sync::Lazy;
/// Gets a minimal data provider that is used when the `intl` feature of `boa_engine` is
/// disabled.
// Could use `LocaleFallbackProvider` in the future, which would disallow the `const`.
#[must_use]
#[allow(clippy::missing_const_for_fn)]
pub fn minimal() -> MinimalDataProvider {
MinimalDataProvider
}
/// Gets a data provider that is stored as a [`BufferProvider`]
/// Gets the default data provider stored as a [`BufferProvider`].
///
/// [`BufferProvider`]: icu_provider::BufferProvider
#[cfg(feature = "full")]
#[must_use]
pub fn buffer() -> &'static impl BufferProvider {
pub fn buffer() -> &'static impl icu_provider::BufferProvider {
use icu_provider_adapters::fallback::LocaleFallbackProvider;
use icu_provider_blob::BlobDataProvider;
use once_cell::sync::Lazy;
static PROVIDER: Lazy<LocaleFallbackProvider<BlobDataProvider>> = Lazy::new(|| {
let blob = BlobDataProvider::try_new_from_static_blob(include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
@ -98,3 +109,20 @@ pub fn buffer() -> &'static impl BufferProvider {
&*PROVIDER
}
#[doc(hidden)]
#[non_exhaustive]
#[derive(Clone, Copy, Debug)]
pub struct MinimalDataProvider;
#[allow(
unreachable_pub,
clippy::unreadable_literal,
clippy::unnecessary_lazy_evaluations,
clippy::module_name_repetitions,
rustdoc::private_doc_tests
)]
mod baked {
include!("../data/min/mod.rs");
impl_data_provider!(super::MinimalDataProvider);
}

Loading…
Cancel
Save