Browse Source

Split ICU4X data generation from `boa_icu_provider` (#3682)

* Split ICU4X data generation from `boa_icu_provider`

* npx prettier

* Revert to previous blob version

* Rename new feature to `intl_bundled`

* Replace missing `intl_core` configs

* Add missingg `intl_bundled` cfg

* Enable `intl_bundled` for `boa_wasm`
pull/3700/head
José Julián Espina 9 months ago committed by GitHub
parent
commit
d346bf8364
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 8
      .github/workflows/rust.yml
  2. 24
      Cargo.lock
  3. 5
      Cargo.toml
  4. 2
      cli/Cargo.toml
  5. 17
      core/engine/Cargo.toml
  6. 21
      core/engine/src/context/mod.rs
  7. 17
      core/icu_provider/Cargo.toml
  8. 10
      core/icu_provider/README.md
  9. BIN
      core/icu_provider/data/icudata.postcard
  10. 17
      core/icu_provider/src/lib.rs
  11. 2
      ffi/wasm/Cargo.toml
  12. 2
      tests/tester/Cargo.toml
  13. 32
      tools/gen-icu4x-data/Cargo.toml
  14. 12
      tools/gen-icu4x-data/README.md
  15. 46
      tools/gen-icu4x-data/src/main.rs

8
.github/workflows/rust.yml

@ -36,7 +36,7 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: tarpaulin
args: --workspace --features annex-b,intl,experimental --ignore-tests --engine llvm --out xml
args: --workspace --features annex-b,intl_bundled,experimental --ignore-tests --engine llvm --out xml
- name: Upload to codecov.io
uses: codecov/codecov-action@v4
@ -61,13 +61,13 @@ jobs:
run: cargo test --no-run --profile ci
# this order is faster according to rust-analyzer
- name: Build
run: cargo build --all-targets --quiet --profile ci --features annex-b,intl,experimental
run: cargo build --all-targets --quiet --profile ci --features annex-b,intl_bundled,experimental
- name: Install latest nextest
uses: taiki-e/install-action@nextest
- name: Test with nextest
run: cargo nextest run --profile ci --cargo-profile ci --features annex-b,intl,experimental
run: cargo nextest run --profile ci --cargo-profile ci --features annex-b,intl_bundled,experimental
- name: Test docs
run: cargo test --doc --profile ci --features annex-b,intl,experimental
run: cargo test --doc --profile ci --features annex-b,intl_bundled,experimental
msrv:
name: MSRV

24
Cargo.lock generated

@ -489,14 +489,10 @@ dependencies = [
name = "boa_icu_provider"
version = "0.17.0"
dependencies = [
"icu_datagen",
"icu_plurals",
"icu_provider",
"icu_provider_adapters",
"icu_provider_blob",
"log",
"once_cell",
"simple_logger",
]
[[package]]
@ -1560,6 +1556,26 @@ dependencies = [
"slab",
]
[[package]]
name = "gen-icu4x-data"
version = "0.17.0"
dependencies = [
"icu_casemap",
"icu_collator",
"icu_datagen",
"icu_datetime",
"icu_decimal",
"icu_list",
"icu_locid_transform",
"icu_normalizer",
"icu_plurals",
"icu_provider",
"icu_provider_blob",
"icu_segmenter",
"log",
"simple_logger",
]
[[package]]
name = "generational-arena"
version = "0.2.9"

5
Cargo.toml

@ -10,6 +10,9 @@ members = [
# TESTS
"tests/*",
# TOOLS
"tools/*",
# OTHERS
"examples",
"cli",
@ -89,7 +92,7 @@ icu_datagen = { version = "~1.4.1", default-features = false }
icu_provider_adapters = { version = "~1.4.0", default-features = false }
icu_provider_blob = { version = "~1.4.0", default-features = false }
icu_properties = { version = "~1.4.0", default-features = true }
icu_normalizer = { version = "~1.4.1", default-features = true }
icu_normalizer = { version = "~1.4.1", default-features = false }
icu_decimal = { version = "~1.4.0", default-features = false }
writeable = "~0.5.4"
yoke = "~0.7.3"

2
cli/Cargo.toml

@ -26,7 +26,7 @@ pollster.workspace = true
dhat = { version = "0.3.3", optional = true }
[features]
default = ["boa_engine/annex-b", "boa_engine/experimental", "boa_engine/intl"]
default = ["boa_engine/annex-b", "boa_engine/experimental", "boa_engine/intl_bundled"]
dhat = ["dep:dhat"]
[target.x86_64-unknown-linux-gnu.dependencies]

17
core/engine/Cargo.toml

@ -14,10 +14,19 @@ rust-version.workspace = true
[features]
profiler = ["boa_profiler/profiler"]
deser = ["boa_interner/serde", "boa_ast/serde"]
# Enables the `Intl` builtin object and bundles a default ICU4X data provider.
# Prefer this over `intl` if you just want to enable `Intl` without dealing with the
# generation of ICU4X data.
intl_bundled = ["intl", "dep:boa_icu_provider"]
# Enables Boa's `Intl` builtin implementation.
# Prefer this over `intl_bundled` if you want to reduce the size of the final binary
# by providing a smaller ICU4X data provider.
intl = [
"boa_gc/icu",
"icu_normalizer/serde",
"icu_normalizer/std",
"dep:boa_icu_provider",
"dep:icu_locid_transform",
"dep:icu_locid",
"dep:icu_datetime",
@ -48,7 +57,7 @@ trace = ["js"]
# Enable Boa's additional ECMAScript features for web browsers.
annex-b = ["boa_parser/annex-b"]
# Stage 3 proposals
# Enable Boa's Temporal proposal implementation
temporal = ["dep:icu_calendar"]
# Enable experimental features, like Stage 3 proposals.
@ -59,7 +68,7 @@ js = ["dep:web-time"]
[dependencies]
boa_interner.workspace = true
boa_gc = { workspace = true, features = [ "thin-vec", "icu" ] }
boa_gc = { workspace = true, features = [ "thin-vec" ] }
boa_profiler.workspace = true
boa_macros.workspace = true
boa_ast.workspace = true
@ -86,7 +95,7 @@ num_enum = "0.7.2"
pollster.workspace = true
thin-vec.workspace = true
itertools = { version = "0.12.1", default-features = false }
icu_normalizer.workspace = true
icu_normalizer = { workspace = true, features = ["compiled_data"] }
paste = "1.0"
portable-atomic = "1.6.0"
bytemuck = { version = "1.14.3", features = ["derive"] }

21
core/engine/src/context/mod.rs

@ -13,8 +13,6 @@ pub use icu::IcuError;
use intrinsics::Intrinsics;
#[cfg(not(feature = "intl"))]
pub use std::marker::PhantomData;
use std::{cell::Cell, path::Path, rc::Rc};
use crate::{
@ -1050,10 +1048,21 @@ impl ContextBuilder {
vm,
strict: false,
#[cfg(feature = "intl")]
intl_provider: self.icu.unwrap_or_else(|| {
icu::IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer())
.expect("Failed to initialize default icu data.")
}),
intl_provider: if let Some(icu) = self.icu {
icu
} else {
cfg_if::cfg_if! {
if #[cfg(feature = "intl_bundled")] {
icu::IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer())
.expect("Failed to initialize default icu data.")
} else {
return Err(JsNativeError::typ()
.with_message("missing Intl provider for context")
.into()
);
}
}
},
#[cfg(feature = "fuzz")]
instructions_remaining: self.instructions_remaining,
kept_alive: Vec::new(),

17
core/icu_provider/Cargo.toml

@ -10,28 +10,15 @@ license.workspace = true
repository.workspace = true
rust-version.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
icu_provider = { workspace = true, features = ["serde", "sync", "datagen"] }
icu_provider_blob = { workspace = true, features = ["export"] }
icu_provider = { workspace = true, features = ["sync"] }
icu_provider_blob.workspace = true
icu_provider_adapters = { workspace = true, features = ["serde"] }
once_cell = { workspace = true, default-features = false, features = ["critical-section"] }
icu_datagen = { workspace = true, optional = true, features = ["networking", "use_wasm"] }
icu_plurals = { workspace = true, optional = true, features = ["datagen", "experimental"] }
log = { workspace = true, optional = true }
simple_logger = { workspace = true, optional = true }
[features]
default = ["std"]
std = ["once_cell/std"]
bin = ["dep:icu_datagen", "dep:simple_logger", "dep:log", "dep:icu_plurals"]
[[bin]]
name = "boa_datagen"
path = "src/bin/datagen.rs"
required-features = ["bin"]
[lints]
workspace = true

10
core/icu_provider/README.md

@ -1,12 +1,4 @@
# boa_icu_provider
`boa_icu_provider` generates and defines the [ICU4X](https://github.com/unicode-org/icu4x) data provider
`boa_icu_provider` defines the [ICU4X](https://github.com/unicode-org/icu4x) data provider
used in the Boa engine to enable internationalization functionality.
## Datagen
To regenerate the data:
```bash
$ cargo run --release --bin boa_datagen --features bin
```

BIN
core/icu_provider/data/icudata.postcard

Binary file not shown.

17
core/icu_provider/src/lib.rs

@ -19,26 +19,17 @@
html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg",
html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg"
)]
#![cfg_attr(not(feature = "bin"), no_std)]
#![allow(unused_crate_dependencies)]
#![cfg_attr(not(feature = "std"), no_std)]
/// Gets the path to the directory where the generated data is stored.
#[cfg(feature = "bin")]
#[must_use]
#[doc(hidden)]
pub fn data_root() -> std::path::PathBuf {
std::path::PathBuf::from(std::env!("CARGO_MANIFEST_DIR")).join("data")
}
use icu_provider_adapters::fallback::LocaleFallbackProvider;
use icu_provider_blob::BlobDataProvider;
use once_cell::sync::Lazy;
/// Gets the default data provider stored as a [`BufferProvider`].
///
/// [`BufferProvider`]: icu_provider::BufferProvider
#[must_use]
pub fn buffer() -> &'static impl icu_provider::BufferProvider {
use icu_provider_adapters::fallback::LocaleFallbackProvider;
use icu_provider_blob::BlobDataProvider;
use once_cell::sync::Lazy;
static PROVIDER: Lazy<LocaleFallbackProvider<BlobDataProvider>> = Lazy::new(|| {
let blob = BlobDataProvider::try_new_from_static_blob(include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),

2
ffi/wasm/Cargo.toml

@ -18,7 +18,7 @@ getrandom = { version = "0.2.12", features = ["js"] }
console_error_panic_hook = "0.1.7"
[features]
default = ["boa_engine/annex-b", "boa_engine/intl", "boa_engine/experimental"]
default = ["boa_engine/annex-b", "boa_engine/intl_bundled", "boa_engine/experimental"]
[lib]
crate-type = ["cdylib", "lib"]

2
tests/tester/Cargo.toml

@ -34,7 +34,7 @@ bus = "2.4.1"
time.workspace = true
[features]
default = ["boa_engine/intl", "boa_engine/experimental", "boa_engine/annex-b"]
default = ["boa_engine/intl_bundled", "boa_engine/experimental", "boa_engine/annex-b"]
[lints]
workspace = true

32
tools/gen-icu4x-data/Cargo.toml

@ -0,0 +1,32 @@
[package]
name = "gen-icu4x-data"
publish = false
edition.workspace = true
version.workspace = true
rust-version.workspace = true
authors.workspace = true
repository.workspace = true
license.workspace = true
description.workspace = true
[dependencies]
icu_provider = { workspace = true, features = ["datagen"] }
icu_provider_blob = { workspace = true, features = ["export"] }
icu_datagen = { workspace = true, features = ["networking", "use_wasm"] }
log.workspace = true
simple_logger.workspace = true
# Components
icu_casemap = { workspace = true, features = ["datagen"] }
icu_collator = { workspace = true, features = ["datagen"] }
icu_datetime = { workspace = true, features = ["datagen"] }
icu_decimal = { workspace = true, features = ["datagen"] }
icu_list = { workspace = true, features = ["datagen"] }
icu_locid_transform = { workspace = true, features = ["datagen"] }
icu_normalizer = { workspace = true, features = ["datagen"] }
icu_plurals = { workspace = true, features = ["datagen", "experimental"] }
icu_segmenter = { workspace = true, features = ["datagen"] }
[lints]
workspace = true

12
tools/gen-icu4x-data/README.md

@ -0,0 +1,12 @@
# boa_icu_provider
`gen-icu4x-data` generates the [ICU4X](https://github.com/unicode-org/icu4x) data provider
for `boa_icu_provider`.
## Datagen
To regenerate the data:
```bash
$ cargo run --release --bin gen-icu4x-data
```

46
core/icu_provider/src/bin/datagen.rs → tools/gen-icu4x-data/src/main.rs

@ -1,17 +1,8 @@
#![doc(
html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg",
html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg"
)]
#![allow(
unused_crate_dependencies,
missing_docs,
rustdoc::missing_crate_level_docs
)]
#![allow(missing_docs, rustdoc::missing_crate_level_docs)]
use std::{error::Error, fs::File};
use std::{error::Error, fs::File, path::Path};
use boa_icu_provider::data_root;
use icu_datagen::{all_keys, CoverageLevel, DatagenDriver, DatagenProvider};
use icu_datagen::{CoverageLevel, DatagenDriver, DatagenProvider};
use icu_plurals::provider::{PluralRangesV1, PluralRangesV1Marker};
use icu_provider::{
datagen::{ExportMarker, IterableDynamicDataProvider},
@ -84,23 +75,46 @@ impl IterableDynamicDataProvider<ExportMarker> for PluralRangesFallbackHack {
}
}
/// List of keys used by `Intl` components.
///
/// This must be kept in sync with the list of implemented components of `Intl`.
const KEYS: [&[DataKey]; 9] = [
icu_casemap::provider::KEYS,
icu_collator::provider::KEYS,
icu_datetime::provider::KEYS,
icu_decimal::provider::KEYS,
icu_list::provider::KEYS,
icu_locid_transform::provider::KEYS,
icu_normalizer::provider::KEYS,
icu_plurals::provider::KEYS,
icu_segmenter::provider::KEYS,
];
fn main() -> Result<(), Box<dyn Error>> {
simple_logger::SimpleLogger::new()
.env()
.with_level(log::LevelFilter::Info)
.init()?;
let path = Path::new("core/icu_provider/data");
// Removal will throw an error if the directory doesn't exist, hence
// why we can ignore the error.
let _unused = std::fs::remove_dir_all(path);
std::fs::create_dir_all(path)?;
log::info!("Generating ICU4X data for keys: {:?}", KEYS);
let provider = DatagenProvider::new_latest_tested();
DatagenDriver::new()
.with_keys(all_keys())
.with_keys(KEYS.into_iter().flatten().copied())
.with_locales(provider.locales_for_coverage_levels([CoverageLevel::Modern])?)
.with_additional_collations([String::from("search*")])
.with_recommended_segmenter_models()
.export(
&PluralRangesFallbackHack(provider),
BlobExporter::new_with_sink(Box::new(File::create(
data_root().join("icudata.postcard"),
)?)),
BlobExporter::new_with_sink(Box::new(File::create(path.join("icudata.postcard"))?)),
)?;
Ok(())
Loading…
Cancel
Save