diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index d738f65927..99f5ed0d71 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -36,7 +36,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: tarpaulin - args: --workspace --features annex-b,intl,experimental --ignore-tests --engine llvm --out xml + args: --workspace --features annex-b,intl_bundled,experimental --ignore-tests --engine llvm --out xml - name: Upload to codecov.io uses: codecov/codecov-action@v4 @@ -61,13 +61,13 @@ jobs: run: cargo test --no-run --profile ci # this order is faster according to rust-analyzer - name: Build - run: cargo build --all-targets --quiet --profile ci --features annex-b,intl,experimental + run: cargo build --all-targets --quiet --profile ci --features annex-b,intl_bundled,experimental - name: Install latest nextest uses: taiki-e/install-action@nextest - name: Test with nextest - run: cargo nextest run --profile ci --cargo-profile ci --features annex-b,intl,experimental + run: cargo nextest run --profile ci --cargo-profile ci --features annex-b,intl_bundled,experimental - name: Test docs - run: cargo test --doc --profile ci --features annex-b,intl,experimental + run: cargo test --doc --profile ci --features annex-b,intl_bundled,experimental msrv: name: MSRV diff --git a/Cargo.lock b/Cargo.lock index b0bf0fd12c..fda01722a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -489,14 +489,10 @@ dependencies = [ name = "boa_icu_provider" version = "0.17.0" dependencies = [ - "icu_datagen", - "icu_plurals", "icu_provider", "icu_provider_adapters", "icu_provider_blob", - "log", "once_cell", - "simple_logger", ] [[package]] @@ -1560,6 +1556,26 @@ dependencies = [ "slab", ] +[[package]] +name = "gen-icu4x-data" +version = "0.17.0" +dependencies = [ + "icu_casemap", + "icu_collator", + "icu_datagen", + "icu_datetime", + "icu_decimal", + "icu_list", + "icu_locid_transform", + "icu_normalizer", + "icu_plurals", + "icu_provider", + "icu_provider_blob", + "icu_segmenter", + "log", + "simple_logger", +] + [[package]] name = "generational-arena" version = "0.2.9" diff --git a/Cargo.toml b/Cargo.toml index b940e37b60..2f294bad36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,9 @@ members = [ # TESTS "tests/*", + # TOOLS + "tools/*", + # OTHERS "examples", "cli", @@ -89,7 +92,7 @@ icu_datagen = { version = "~1.4.1", default-features = false } icu_provider_adapters = { version = "~1.4.0", default-features = false } icu_provider_blob = { version = "~1.4.0", default-features = false } icu_properties = { version = "~1.4.0", default-features = true } -icu_normalizer = { version = "~1.4.1", default-features = true } +icu_normalizer = { version = "~1.4.1", default-features = false } icu_decimal = { version = "~1.4.0", default-features = false } writeable = "~0.5.4" yoke = "~0.7.3" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a94023c942..e1ecf2e5ae 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -26,7 +26,7 @@ pollster.workspace = true dhat = { version = "0.3.3", optional = true } [features] -default = ["boa_engine/annex-b", "boa_engine/experimental", "boa_engine/intl"] +default = ["boa_engine/annex-b", "boa_engine/experimental", "boa_engine/intl_bundled"] dhat = ["dep:dhat"] [target.x86_64-unknown-linux-gnu.dependencies] diff --git a/core/engine/Cargo.toml b/core/engine/Cargo.toml index 4d22119329..32a778d304 100644 --- a/core/engine/Cargo.toml +++ b/core/engine/Cargo.toml @@ -14,10 +14,19 @@ rust-version.workspace = true [features] profiler = ["boa_profiler/profiler"] deser = ["boa_interner/serde", "boa_ast/serde"] + +# Enables the `Intl` builtin object and bundles a default ICU4X data provider. +# Prefer this over `intl` if you just want to enable `Intl` without dealing with the +# generation of ICU4X data. +intl_bundled = ["intl", "dep:boa_icu_provider"] + +# Enables Boa's `Intl` builtin implementation. +# Prefer this over `intl_bundled` if you want to reduce the size of the final binary +# by providing a smaller ICU4X data provider. intl = [ + "boa_gc/icu", "icu_normalizer/serde", "icu_normalizer/std", - "dep:boa_icu_provider", "dep:icu_locid_transform", "dep:icu_locid", "dep:icu_datetime", @@ -48,7 +57,7 @@ trace = ["js"] # Enable Boa's additional ECMAScript features for web browsers. annex-b = ["boa_parser/annex-b"] -# Stage 3 proposals +# Enable Boa's Temporal proposal implementation temporal = ["dep:icu_calendar"] # Enable experimental features, like Stage 3 proposals. @@ -59,7 +68,7 @@ js = ["dep:web-time"] [dependencies] boa_interner.workspace = true -boa_gc = { workspace = true, features = [ "thin-vec", "icu" ] } +boa_gc = { workspace = true, features = [ "thin-vec" ] } boa_profiler.workspace = true boa_macros.workspace = true boa_ast.workspace = true @@ -86,7 +95,7 @@ num_enum = "0.7.2" pollster.workspace = true thin-vec.workspace = true itertools = { version = "0.12.1", default-features = false } -icu_normalizer.workspace = true +icu_normalizer = { workspace = true, features = ["compiled_data"] } paste = "1.0" portable-atomic = "1.6.0" bytemuck = { version = "1.14.3", features = ["derive"] } diff --git a/core/engine/src/context/mod.rs b/core/engine/src/context/mod.rs index baff917997..727349a414 100644 --- a/core/engine/src/context/mod.rs +++ b/core/engine/src/context/mod.rs @@ -13,8 +13,6 @@ pub use icu::IcuError; use intrinsics::Intrinsics; -#[cfg(not(feature = "intl"))] -pub use std::marker::PhantomData; use std::{cell::Cell, path::Path, rc::Rc}; use crate::{ @@ -1050,10 +1048,21 @@ impl ContextBuilder { vm, strict: false, #[cfg(feature = "intl")] - intl_provider: self.icu.unwrap_or_else(|| { - icu::IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()) - .expect("Failed to initialize default icu data.") - }), + intl_provider: if let Some(icu) = self.icu { + icu + } else { + cfg_if::cfg_if! { + if #[cfg(feature = "intl_bundled")] { + icu::IntlProvider::try_new_with_buffer_provider(boa_icu_provider::buffer()) + .expect("Failed to initialize default icu data.") + } else { + return Err(JsNativeError::typ() + .with_message("missing Intl provider for context") + .into() + ); + } + } + }, #[cfg(feature = "fuzz")] instructions_remaining: self.instructions_remaining, kept_alive: Vec::new(), diff --git a/core/icu_provider/Cargo.toml b/core/icu_provider/Cargo.toml index 4182bb9b03..c6f23f666d 100644 --- a/core/icu_provider/Cargo.toml +++ b/core/icu_provider/Cargo.toml @@ -10,28 +10,15 @@ license.workspace = true repository.workspace = true rust-version.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -icu_provider = { workspace = true, features = ["serde", "sync", "datagen"] } -icu_provider_blob = { workspace = true, features = ["export"] } +icu_provider = { workspace = true, features = ["sync"] } +icu_provider_blob.workspace = true icu_provider_adapters = { workspace = true, features = ["serde"] } once_cell = { workspace = true, default-features = false, features = ["critical-section"] } -icu_datagen = { workspace = true, optional = true, features = ["networking", "use_wasm"] } -icu_plurals = { workspace = true, optional = true, features = ["datagen", "experimental"] } -log = { workspace = true, optional = true } -simple_logger = { workspace = true, optional = true } - [features] default = ["std"] std = ["once_cell/std"] -bin = ["dep:icu_datagen", "dep:simple_logger", "dep:log", "dep:icu_plurals"] - -[[bin]] -name = "boa_datagen" -path = "src/bin/datagen.rs" -required-features = ["bin"] [lints] workspace = true diff --git a/core/icu_provider/README.md b/core/icu_provider/README.md index 3444b5f37f..21fe8bc326 100644 --- a/core/icu_provider/README.md +++ b/core/icu_provider/README.md @@ -1,12 +1,4 @@ # boa_icu_provider -`boa_icu_provider` generates and defines the [ICU4X](https://github.com/unicode-org/icu4x) data provider +`boa_icu_provider` defines the [ICU4X](https://github.com/unicode-org/icu4x) data provider used in the Boa engine to enable internationalization functionality. - -## Datagen - -To regenerate the data: - -```bash -$ cargo run --release --bin boa_datagen --features bin -``` diff --git a/core/icu_provider/data/icudata.postcard b/core/icu_provider/data/icudata.postcard index 8343eeb070..cf7fab9f0a 100644 Binary files a/core/icu_provider/data/icudata.postcard and b/core/icu_provider/data/icudata.postcard differ diff --git a/core/icu_provider/src/lib.rs b/core/icu_provider/src/lib.rs index abf29ee919..0ab13a4daa 100644 --- a/core/icu_provider/src/lib.rs +++ b/core/icu_provider/src/lib.rs @@ -19,26 +19,17 @@ html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg", html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg" )] -#![cfg_attr(not(feature = "bin"), no_std)] -#![allow(unused_crate_dependencies)] +#![cfg_attr(not(feature = "std"), no_std)] -/// Gets the path to the directory where the generated data is stored. -#[cfg(feature = "bin")] -#[must_use] -#[doc(hidden)] -pub fn data_root() -> std::path::PathBuf { - std::path::PathBuf::from(std::env!("CARGO_MANIFEST_DIR")).join("data") -} +use icu_provider_adapters::fallback::LocaleFallbackProvider; +use icu_provider_blob::BlobDataProvider; +use once_cell::sync::Lazy; /// Gets the default data provider stored as a [`BufferProvider`]. /// /// [`BufferProvider`]: icu_provider::BufferProvider #[must_use] pub fn buffer() -> &'static impl icu_provider::BufferProvider { - use icu_provider_adapters::fallback::LocaleFallbackProvider; - use icu_provider_blob::BlobDataProvider; - use once_cell::sync::Lazy; - static PROVIDER: Lazy> = Lazy::new(|| { let blob = BlobDataProvider::try_new_from_static_blob(include_bytes!(concat!( env!("CARGO_MANIFEST_DIR"), diff --git a/ffi/wasm/Cargo.toml b/ffi/wasm/Cargo.toml index 880c4f4fd4..bda0d5bc58 100644 --- a/ffi/wasm/Cargo.toml +++ b/ffi/wasm/Cargo.toml @@ -18,7 +18,7 @@ getrandom = { version = "0.2.12", features = ["js"] } console_error_panic_hook = "0.1.7" [features] -default = ["boa_engine/annex-b", "boa_engine/intl", "boa_engine/experimental"] +default = ["boa_engine/annex-b", "boa_engine/intl_bundled", "boa_engine/experimental"] [lib] crate-type = ["cdylib", "lib"] diff --git a/tests/tester/Cargo.toml b/tests/tester/Cargo.toml index 73f9bcc1d3..8c344c0d7e 100644 --- a/tests/tester/Cargo.toml +++ b/tests/tester/Cargo.toml @@ -34,7 +34,7 @@ bus = "2.4.1" time.workspace = true [features] -default = ["boa_engine/intl", "boa_engine/experimental", "boa_engine/annex-b"] +default = ["boa_engine/intl_bundled", "boa_engine/experimental", "boa_engine/annex-b"] [lints] workspace = true diff --git a/tools/gen-icu4x-data/Cargo.toml b/tools/gen-icu4x-data/Cargo.toml new file mode 100644 index 0000000000..b1f85b5052 --- /dev/null +++ b/tools/gen-icu4x-data/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "gen-icu4x-data" +publish = false +edition.workspace = true +version.workspace = true +rust-version.workspace = true +authors.workspace = true +repository.workspace = true +license.workspace = true +description.workspace = true + +[dependencies] +icu_provider = { workspace = true, features = ["datagen"] } +icu_provider_blob = { workspace = true, features = ["export"] } +icu_datagen = { workspace = true, features = ["networking", "use_wasm"] } +log.workspace = true +simple_logger.workspace = true + +# Components + +icu_casemap = { workspace = true, features = ["datagen"] } +icu_collator = { workspace = true, features = ["datagen"] } +icu_datetime = { workspace = true, features = ["datagen"] } +icu_decimal = { workspace = true, features = ["datagen"] } +icu_list = { workspace = true, features = ["datagen"] } +icu_locid_transform = { workspace = true, features = ["datagen"] } +icu_normalizer = { workspace = true, features = ["datagen"] } +icu_plurals = { workspace = true, features = ["datagen", "experimental"] } +icu_segmenter = { workspace = true, features = ["datagen"] } + +[lints] +workspace = true diff --git a/tools/gen-icu4x-data/README.md b/tools/gen-icu4x-data/README.md new file mode 100644 index 0000000000..86ab8f14cc --- /dev/null +++ b/tools/gen-icu4x-data/README.md @@ -0,0 +1,12 @@ +# boa_icu_provider + +`gen-icu4x-data` generates the [ICU4X](https://github.com/unicode-org/icu4x) data provider +for `boa_icu_provider`. + +## Datagen + +To regenerate the data: + +```bash +$ cargo run --release --bin gen-icu4x-data +``` diff --git a/core/icu_provider/src/bin/datagen.rs b/tools/gen-icu4x-data/src/main.rs similarity index 72% rename from core/icu_provider/src/bin/datagen.rs rename to tools/gen-icu4x-data/src/main.rs index 5da227f591..4226025a3f 100644 --- a/core/icu_provider/src/bin/datagen.rs +++ b/tools/gen-icu4x-data/src/main.rs @@ -1,17 +1,8 @@ -#![doc( - html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg", - html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg" -)] -#![allow( - unused_crate_dependencies, - missing_docs, - rustdoc::missing_crate_level_docs -)] +#![allow(missing_docs, rustdoc::missing_crate_level_docs)] -use std::{error::Error, fs::File}; +use std::{error::Error, fs::File, path::Path}; -use boa_icu_provider::data_root; -use icu_datagen::{all_keys, CoverageLevel, DatagenDriver, DatagenProvider}; +use icu_datagen::{CoverageLevel, DatagenDriver, DatagenProvider}; use icu_plurals::provider::{PluralRangesV1, PluralRangesV1Marker}; use icu_provider::{ datagen::{ExportMarker, IterableDynamicDataProvider}, @@ -84,23 +75,46 @@ impl IterableDynamicDataProvider for PluralRangesFallbackHack { } } +/// List of keys used by `Intl` components. +/// +/// This must be kept in sync with the list of implemented components of `Intl`. +const KEYS: [&[DataKey]; 9] = [ + icu_casemap::provider::KEYS, + icu_collator::provider::KEYS, + icu_datetime::provider::KEYS, + icu_decimal::provider::KEYS, + icu_list::provider::KEYS, + icu_locid_transform::provider::KEYS, + icu_normalizer::provider::KEYS, + icu_plurals::provider::KEYS, + icu_segmenter::provider::KEYS, +]; + fn main() -> Result<(), Box> { simple_logger::SimpleLogger::new() .env() .with_level(log::LevelFilter::Info) .init()?; + let path = Path::new("core/icu_provider/data"); + + // Removal will throw an error if the directory doesn't exist, hence + // why we can ignore the error. + let _unused = std::fs::remove_dir_all(path); + std::fs::create_dir_all(path)?; + + log::info!("Generating ICU4X data for keys: {:?}", KEYS); + let provider = DatagenProvider::new_latest_tested(); DatagenDriver::new() - .with_keys(all_keys()) + .with_keys(KEYS.into_iter().flatten().copied()) .with_locales(provider.locales_for_coverage_levels([CoverageLevel::Modern])?) .with_additional_collations([String::from("search*")]) + .with_recommended_segmenter_models() .export( &PluralRangesFallbackHack(provider), - BlobExporter::new_with_sink(Box::new(File::create( - data_root().join("icudata.postcard"), - )?)), + BlobExporter::new_with_sink(Box::new(File::create(path.join("icudata.postcard"))?)), )?; Ok(())