diff --git a/Cargo.lock b/Cargo.lock index d032b019e7..0375385ea7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,6 +100,7 @@ dependencies = [ "fast-float", "float-cmp", "gc", + "icu", "indexmap", "jemallocator", "num-bigint", @@ -445,6 +446,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dyn-clone" version = "1.0.5" @@ -511,6 +523,18 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "fixed_decimal" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "857766d9808ad8f9033fe4e6499fcc6504ae485cc99ef2f3a98b4850c23b895a" +dependencies = [ + "displaydoc", + "smallvec", + "static_assertions", + "writeable", +] + [[package]] name = "float-cmp" version = "0.9.0" @@ -608,6 +632,189 @@ dependencies = [ "libc", ] +[[package]] +name = "icu" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904a80ce690d14859a54e02d803eefba4ecd021882b8ab0492e455951d4df9a5" +dependencies = [ + "fixed_decimal", + "icu_calendar", + "icu_datetime", + "icu_decimal", + "icu_locale_canonicalizer", + "icu_locid", + "icu_locid_macros", + "icu_plurals", + "icu_properties", +] + +[[package]] +name = "icu_calendar" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07549f5d96acacdee05054133bd74ab1179211185ebe886f953fe1a0dd634096" +dependencies = [ + "displaydoc", + "icu_provider", + "litemap", + "tinystr", +] + +[[package]] +name = "icu_codepointtrie" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b54c854632ef55b92a825f9f26038a3be7f72116036aee05c522b6a65323beb" +dependencies = [ + "displaydoc", + "serde", + "yoke", + "zerovec", +] + +[[package]] +name = "icu_datetime" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cd9ae537546879a4e57a09d3fc7f0ae48a1c2d8f605ce14536b04f99afe5f3e" +dependencies = [ + "displaydoc", + "either", + "icu_calendar", + "icu_locid", + "icu_plurals", + "icu_provider", + "litemap", + "num_enum", + "serde", + "smallvec", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_decimal" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb632b080fd9944616933eb659279228d43801d4c58152e2938fac6f94ecc01" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locid", + "icu_provider", + "serde", + "writeable", +] + +[[package]] +name = "icu_locale_canonicalizer" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "293bbe9de3262a23f0dac731e1c52927f54347d0fd845a1f5aa7de3cc26f3358" +dependencies = [ + "icu_locid", + "icu_provider", + "litemap", + "serde", + "tinystr", +] + +[[package]] +name = "icu_locid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a3b8515791232b1be02c21467c123216a0e60fd024f4a74cc119ef5b1af79e" +dependencies = [ + "displaydoc", + "serde", + "tinystr", + "writeable", +] + +[[package]] +name = "icu_locid_macros" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c40e3635df35d94a0294b07efadb52b58017a3e16fe86a5ffddfc3e56a36ae1" +dependencies = [ + "icu_locid", + "proc-macro-crate", + "tinystr", +] + +[[package]] +name = "icu_plurals" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34c03a32a72566b65e6245ad4e9f8e4d470e1decb1fcd10a9cab061bb9fbce92" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_locid", + "icu_provider", + "num_enum", + "serde", + "zerovec", +] + +[[package]] +name = "icu_properties" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db71ea31fe47694e165989886826e23f9defbae5dbd002e678d756086d1f5c7" +dependencies = [ + "displaydoc", + "icu_codepointtrie", + "icu_provider", + "icu_uniset", + "num_enum", + "serde", + "zerovec", +] + +[[package]] +name = "icu_provider" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "496b8bc2f7c2fe705191442201ea0db4c86153cabe3f39dbddb95cad3aca6454" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "litemap", + "serde", + "tinystr", + "writeable", + "yoke", +] + +[[package]] +name = "icu_provider_macros" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea1a9a3f206d9be3e027c3d1448aa8edd690c0aa789d8dca01a17fa76fb79524" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "icu_uniset" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce83f199e54f8e0a948d7b737a62a7e4075760d6aef671ef85e52734a05db66" +dependencies = [ + "displaydoc", + "litemap", + "serde", + "tinystr", + "yoke", + "zerovec", +] + [[package]] name = "indexmap" version = "1.8.1" @@ -708,6 +915,16 @@ version = "0.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5284f00d480e1c39af34e72f8ad60b94f47007e3481cd3b731c1d67190ddc7b7" +[[package]] +name = "litemap" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5568cf6856c84333bda775ba41b834b20f75d64888067c8c2e26a09154a5730e" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "lock_api" version = "0.4.7" @@ -844,6 +1061,26 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf5395665662ef45796a4ff5486c5d41d29e0c09640af4c5f17fd94ee2c119c9" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0498641e53dd6ac1a4f22547548caa6864cc4933784319cd1775271c5a46ce" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "once_cell" version = "1.10.0" @@ -974,6 +1211,16 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +[[package]] +name = "proc-macro-crate" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e17d47ce914bf4de440332250b0edd23ce48c005f59fab39d3335866b114f11a" +dependencies = [ + "thiserror", + "toml", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -1298,6 +1545,21 @@ name = "smallvec" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +dependencies = [ + "serde", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "str-buf" @@ -1436,6 +1698,32 @@ dependencies = [ "winapi", ] +[[package]] +name = "tinystr" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "954af207a9e273b644c2c0d29d79d7cba9c22a66e633535d0d0d3712d7e50563" +dependencies = [ + "serde", + "tinystr-macros", + "tinystr-raw", +] + +[[package]] +name = "tinystr-macros" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f65be51117c325c2b58eec529be7a0857d11527a9029973b58810a4c63e77a6" +dependencies = [ + "tinystr-raw", +] + +[[package]] +name = "tinystr-raw" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f87ef8b0485e4efff5cac95608adc3251e412fef6039ecd56c5618c8003895" + [[package]] name = "tinytemplate" version = "1.2.1" @@ -1461,6 +1749,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +[[package]] +name = "toml" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +dependencies = [ + "serde", +] + [[package]] name = "unicode-general-category" version = "0.5.1" @@ -1667,6 +1964,12 @@ version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08cabc9f0066848fef4bc6a1c1668e6efce38b661d2aeec75d18d8617eebb5f1" +[[package]] +name = "writeable" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063dba1bbc4c97dfb9b9e57cf4bcf3bac1ebfd2ef60bc609fc08dcd0228c3de5" + [[package]] name = "yaml-rust" version = "0.4.5" @@ -1675,3 +1978,36 @@ checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" dependencies = [ "linked-hash-map", ] + +[[package]] +name = "yoke" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a32bb18a3c6c8c2726ad84153babdf8f5336a7d5551a9177a79d006f0ce789f" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", +] + +[[package]] +name = "yoke-derive" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b70271795226acc88b2dfe0651e91e6402b64b1887c60722e04be55ff15941" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae959aabd79f0f4dec802b32404cf41776d03757dd5b6e815d3d1ca076fa939" +dependencies = [ + "serde", + "yoke", +] diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index ef0eb03eb0..5d278e53d7 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -41,6 +41,7 @@ unicode-normalization = "0.1.19" dyn-clone = "1.0.5" once_cell = "1.10.0" tap = "1.0.1" +icu = "0.5.0" [dev-dependencies] criterion = "0.3.5" diff --git a/boa_engine/src/builtins/intl/mod.rs b/boa_engine/src/builtins/intl/mod.rs index bba8384180..2c2472122c 100644 --- a/boa_engine/src/builtins/intl/mod.rs +++ b/boa_engine/src/builtins/intl/mod.rs @@ -17,11 +17,16 @@ use crate::{ }; pub mod date_time_format; +#[cfg(test)] +mod tests; use boa_profiler::Profiler; use indexmap::IndexSet; +use rustc_hash::FxHashMap; use tap::{Conv, Pipe}; +use icu::locid::Locale; + /// JavaScript `Intl` object. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) struct Intl; @@ -144,3 +149,507 @@ impl Intl { ))) } } + +/// `MatcherRecord` type aggregates unicode `locale` string and unicode locale `extension`. +/// +/// This is a return value for `lookup_matcher` and `best_fit_matcher` subroutines. +#[derive(Debug)] +struct MatcherRecord { + locale: JsString, + extension: JsString, +} + +/// The `DefaultLocale` abstract operation returns a String value representing the structurally +/// valid and canonicalized Unicode BCP 47 locale identifier for the host environment's current +/// locale. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-defaultlocale +fn default_locale() -> JsString { + // FIXME get locale from environment + JsString::new("en-US") +} + +/// The `BestAvailableLocale` abstract operation compares the provided argument `locale`, +/// which must be a String value with a structurally valid and canonicalized Unicode BCP 47 +/// locale identifier, against the locales in `availableLocales` and returns either the longest +/// non-empty prefix of `locale` that is an element of `availableLocales`, or undefined if +/// there is no such element. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale +fn best_available_locale(available_locales: &[JsString], locale: &JsString) -> Option { + // 1. Let candidate be locale. + let mut candidate = locale.clone(); + // 2. Repeat + loop { + // a. If availableLocales contains an element equal to candidate, return candidate. + if available_locales.contains(&candidate) { + return Some(candidate); + } + + // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined. + let pos = candidate.rfind('-'); + match pos { + Some(ind) => { + // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2. + let tmp_candidate = candidate[..ind].to_string(); + let prev_dash = tmp_candidate.rfind('-').unwrap_or(ind); + let trim_ind = if ind >= 2 && prev_dash == ind - 2 { + ind - 2 + } else { + ind + }; + // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive. + candidate = JsString::new(&candidate[..trim_ind]); + } + None => return None, + } + } +} + +/// The `LookupMatcher` abstract operation compares `requestedLocales`, which must be a `List` +/// as returned by `CanonicalizeLocaleList`, against the locales in `availableLocales` and +/// determines the best available language to meet the request. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher +fn lookup_matcher(available_locales: &[JsString], requested_locales: &[JsString]) -> MatcherRecord { + // 1. Let result be a new Record. + // 2. For each element locale of requestedLocales, do + for locale_str in requested_locales { + // a. Let noExtensionsLocale be the String value that is locale with any Unicode locale + // extension sequences removed. + let parsed_locale = + Locale::from_bytes(locale_str.as_bytes()).expect("Locale parsing failed"); + let no_extensions_locale = JsString::new(parsed_locale.id.to_string()); + + // b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale). + let available_locale = best_available_locale(available_locales, &no_extensions_locale); + + // c. If availableLocale is not undefined, then + if let Some(available_locale) = available_locale { + // i. Set result.[[locale]] to availableLocale. + // Assignment deferred. See return statement below. + // ii. If locale and noExtensionsLocale are not the same String value, then + let maybe_ext = if locale_str.eq(&no_extensions_locale) { + JsString::empty() + } else { + // 1. Let extension be the String value consisting of the substring of the Unicode + // locale extension sequence within locale. + // 2. Set result.[[extension]] to extension. + JsString::new(parsed_locale.extensions.to_string()) + }; + + // iii. Return result. + return MatcherRecord { + locale: available_locale, + extension: maybe_ext, + }; + } + } + + // 3. Let defLocale be ! DefaultLocale(). + // 4. Set result.[[locale]] to defLocale. + // 5. Return result. + MatcherRecord { + locale: default_locale(), + extension: JsString::empty(), + } +} + +/// The `BestFitMatcher` abstract operation compares `requestedLocales`, which must be a `List` +/// as returned by `CanonicalizeLocaleList`, against the locales in `availableLocales` and +/// determines the best available language to meet the request. The algorithm is implementation +/// dependent, but should produce results that a typical user of the requested locales would +/// perceive as at least as good as those produced by the `LookupMatcher` abstract operation. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher +fn best_fit_matcher( + available_locales: &[JsString], + requested_locales: &[JsString], +) -> MatcherRecord { + lookup_matcher(available_locales, requested_locales) +} + +/// `Keyword` structure is a pair of keyword key and keyword value. +#[derive(Debug)] +struct Keyword { + key: JsString, + value: JsString, +} + +/// `UniExtRecord` structure represents unicode extension records. +/// +/// It contains the list of unicode `extension` attributes and the list of `keywords`. +/// +/// For example: +/// +/// - `-u-nu-thai` has no attributes and the list of keywords contains `(nu:thai)` pair. +#[allow(dead_code)] +#[derive(Debug)] +struct UniExtRecord { + attributes: Vec, // never read at this point + keywords: Vec, +} + +/// The `UnicodeExtensionComponents` abstract operation returns the attributes and keywords from +/// `extension`, which must be a String value whose contents are a `Unicode locale extension` +/// sequence. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-unicode-extension-components +fn unicode_extension_components(extension: &JsString) -> UniExtRecord { + // 1. Let attributes be a new empty List. + let mut attributes = Vec::::new(); + + // 2. Let keywords be a new empty List. + let mut keywords = Vec::::new(); + + // 3. Let keyword be undefined. + let mut keyword: Option = None; + + // 4. Let size be the length of extension. + let size = extension.len(); + + // 5. Let k be 3. + let mut k = 3; + + // 6. Repeat, while k < size, + while k < size { + // a. Let e be ! StringIndexOf(extension, "-", k). + let e = extension.index_of(&JsString::new("-"), k); + + // b. If e = -1, let len be size - k; else let len be e - k. + let len = match e { + Some(pos) => pos - k, + None => size - k, + }; + + // c. Let subtag be the String value equal to the substring of extension consisting of the + // code units at indices k (inclusive) through k + len (exclusive). + let subtag = JsString::new(&extension[k..k + len]); + + // d. If keyword is undefined and len ≠ 2, then + if keyword.is_none() && len != 2 { + // i. If subtag is not an element of attributes, then + if !attributes.contains(&subtag) { + // 1. Append subtag to attributes. + attributes.push(subtag); + } + // e. Else if len = 2, then + } else if len == 2 { + // i. If keyword is not undefined and keywords does not contain an element + // whose [[Key]] is the same as keyword.[[Key]], then + // 1. Append keyword to keywords. + if let Some(keyword_val) = keyword { + let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key); + if !has_key { + keywords.push(keyword_val); + } + }; + + // ii. Set keyword to the Record { [[Key]]: subtag, [[Value]]: "" }. + keyword = Some(Keyword { + key: subtag, + value: JsString::empty(), + }); + // f. Else, + } else { + // i. If keyword.[[Value]] is the empty String, then + // 1. Set keyword.[[Value]] to subtag. + // ii. Else, + // 1. Set keyword.[[Value]] to the string-concatenation of keyword.[[Value]], "-", and subtag. + if let Some(keyword_val) = keyword { + let new_keyword_val = if keyword_val.value.is_empty() { + subtag + } else { + JsString::new(format!("{}-{subtag}", keyword_val.value)) + }; + + keyword = Some(Keyword { + key: keyword_val.key, + value: new_keyword_val, + }); + }; + } + + // g. Let k be k + len + 1. + k = k + len + 1; + } + + // 7. If keyword is not undefined and keywords does not contain an element whose [[Key]] is + // the same as keyword.[[Key]], then + // a. Append keyword to keywords. + if let Some(keyword_val) = keyword { + let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key); + if !has_key { + keywords.push(keyword_val); + } + }; + + // 8. Return the Record { [[Attributes]]: attributes, [[Keywords]]: keywords }. + UniExtRecord { + attributes, + keywords, + } +} + +/// The `InsertUnicodeExtensionAndCanonicalize` abstract operation inserts `extension`, which must +/// be a Unicode locale extension sequence, into `locale`, which must be a String value with a +/// structurally valid and canonicalized Unicode BCP 47 locale identifier. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-insert-unicode-extension-and-canonicalize +fn insert_unicode_extension_and_canonicalize(locale: &str, extension: &str) -> JsString { + // TODO 1. Assert: locale does not contain a substring that is a Unicode locale extension sequence. + // TODO 2. Assert: extension is a Unicode locale extension sequence. + // TODO 3. Assert: tag matches the unicode_locale_id production. + // 4. Let privateIndex be ! StringIndexOf(locale, "-x-", 0). + let private_index = locale.find("-x-"); + let new_locale = match private_index { + // 5. If privateIndex = -1, then + None => { + // a. Let locale be the string-concatenation of locale and extension. + locale.to_owned() + extension + } + // 6. Else, + Some(idx) => { + // a. Let preExtension be the substring of locale from position 0, inclusive, + // to position privateIndex, exclusive. + let pre_extension = &locale[0..idx]; + + // b. Let postExtension be the substring of locale from position privateIndex to + // the end of the string. + let post_extension = &locale[idx..]; + + // c. Let locale be the string-concatenation of preExtension, extension, + // and postExtension. + pre_extension.to_owned() + extension + post_extension + } + }; + + // TODO 7. Assert: ! IsStructurallyValidLanguageTag(locale) is true. + // 8. Return ! CanonicalizeUnicodeLocaleId(locale). + Intl::canonicalize_locale(&new_locale) +} + +/// `LocaleDataRecord` is the type of `locale_data` argument in `resolve_locale` subroutine. +/// +/// It is an alias for a map where key is a string and value is another map. +/// +/// Value of that inner map is a vector of strings representing locale parameters. +type LocaleDataRecord = FxHashMap>>; + +/// `DateTimeFormatRecord` type aggregates `locale_matcher` selector and `properties` map. +/// +/// It is used as a type of `options` parameter in `resolve_locale` subroutine. +#[derive(Debug)] +struct DateTimeFormatRecord { + pub(crate) locale_matcher: JsString, + pub(crate) properties: FxHashMap, +} + +/// `ResolveLocaleRecord` type consists of unicode `locale` string, `data_locale` string and `properties` map. +/// +/// This is a return value for `resolve_locale` subroutine. +#[derive(Debug)] +struct ResolveLocaleRecord { + pub(crate) locale: JsString, + pub(crate) properties: FxHashMap, + pub(crate) data_locale: JsString, +} + +/// The `ResolveLocale` abstract operation compares a BCP 47 language priority list +/// `requestedLocales` against the locales in `availableLocales` and determines the best +/// available language to meet the request. `availableLocales`, `requestedLocales`, and +/// `relevantExtensionKeys` must be provided as `List` values, options and `localeData` as Records. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale +#[allow(dead_code)] +fn resolve_locale( + available_locales: &[JsString], + requested_locales: &[JsString], + options: &DateTimeFormatRecord, + relevant_extension_keys: &[JsString], + locale_data: &LocaleDataRecord, + context: &mut Context, +) -> ResolveLocaleRecord { + // 1. Let matcher be options.[[localeMatcher]]. + let matcher = &options.locale_matcher; + // 2. If matcher is "lookup", then + // a. Let r be ! LookupMatcher(availableLocales, requestedLocales). + // 3. Else, + // a. Let r be ! BestFitMatcher(availableLocales, requestedLocales). + let r = if matcher.eq(&JsString::new("lookup")) { + lookup_matcher(available_locales, requested_locales) + } else { + best_fit_matcher(available_locales, requested_locales) + }; + + // 4. Let foundLocale be r.[[locale]]. + let mut found_locale = r.locale; + + // 5. Let result be a new Record. + let mut result = ResolveLocaleRecord { + locale: JsString::empty(), + properties: FxHashMap::default(), + data_locale: JsString::empty(), + }; + + // 6. Set result.[[dataLocale]] to foundLocale. + result.data_locale = found_locale.clone(); + + // 7. If r has an [[extension]] field, then + let keywords = if r.extension.is_empty() { + Vec::::new() + } else { + // a. Let components be ! UnicodeExtensionComponents(r.[[extension]]). + let components = unicode_extension_components(&r.extension); + // b. Let keywords be components.[[Keywords]]. + components.keywords + }; + + // 8. Let supportedExtension be "-u". + let mut supported_extension = JsString::new("-u"); + + // 9. For each element key of relevantExtensionKeys, do + for key in relevant_extension_keys { + // a. Let foundLocaleData be localeData.[[]]. + // TODO b. Assert: Type(foundLocaleData) is Record. + let found_locale_data = match locale_data.get(&found_locale) { + Some(locale_value) => locale_value.clone(), + None => FxHashMap::default(), + }; + + // c. Let keyLocaleData be foundLocaleData.[[]]. + // TODO d. Assert: Type(keyLocaleData) is List. + let key_locale_data = match found_locale_data.get(key) { + Some(locale_vec) => locale_vec.clone(), + None => Vec::new(), + }; + + // e. Let value be keyLocaleData[0]. + // TODO f. Assert: Type(value) is either String or Null. + let mut value = match key_locale_data.get(0) { + Some(first_elt) => JsValue::String(first_elt.clone()), + None => JsValue::null(), + }; + + // g. Let supportedExtensionAddition be "". + let mut supported_extension_addition = JsString::empty(); + + // h. If r has an [[extension]] field, then + if !r.extension.is_empty() { + // i. If keywords contains an element whose [[Key]] is the same as key, then + // 1. Let entry be the element of keywords whose [[Key]] is the same as key. + let maybe_entry = keywords.iter().find(|elem| key.eq(&elem.key)); + if let Some(entry) = maybe_entry { + // 2. Let requestedValue be entry.[[Value]]. + let requested_value = &entry.value; + + // 3. If requestedValue is not the empty String, then + if !requested_value.is_empty() { + // a. If keyLocaleData contains requestedValue, then + if key_locale_data.contains(requested_value) { + // i. Let value be requestedValue. + value = JsValue::String(JsString::new(requested_value)); + // ii. Let supportedExtensionAddition be the string-concatenation + // of "-", key, "-", and value. + supported_extension_addition = + JsString::concat_array(&["-", key, "-", requested_value]); + } + // 4. Else if keyLocaleData contains "true", then + } else if key_locale_data.contains(&JsString::new("true")) { + // a. Let value be "true". + value = JsValue::String(JsString::new("true")); + // b. Let supportedExtensionAddition be the string-concatenation of "-" and key. + supported_extension_addition = JsString::concat_array(&["-", key]); + } + } + } + + // i. If options has a field [[]], then + if options.properties.contains_key(key) { + // i. Let optionsValue be options.[[]]. + // TODO ii. Assert: Type(optionsValue) is either String, Undefined, or Null. + let mut options_value = options + .properties + .get(key) + .unwrap_or(&JsValue::undefined()) + .clone(); + + // iii. If Type(optionsValue) is String, then + if options_value.is_string() { + // TODO 1. Let optionsValue be the string optionsValue after performing the + // algorithm steps to transform Unicode extension values to canonical syntax + // per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale + // Identifiers, treating key as ukey and optionsValue as uvalue productions. + + // TODO 2. Let optionsValue be the string optionsValue after performing the + // algorithm steps to replace Unicode extension values with their canonical + // form per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode + // Locale Identifiers, treating key as ukey and optionsValue as uvalue + // productions. + + // 3. If optionsValue is the empty String, then + if let Some(options_val_str) = options_value.as_string() { + if options_val_str.is_empty() { + // a. Let optionsValue be "true". + options_value = JsValue::String(JsString::new("true")); + } + } + } + + // iv. If keyLocaleData contains optionsValue, then + let options_val_str = options_value + .to_string(context) + .unwrap_or_else(|_| JsString::empty()); + if key_locale_data.contains(&options_val_str) { + // 1. If SameValue(optionsValue, value) is false, then + if !options_value.eq(&value) { + // a. Let value be optionsValue. + value = options_value; + + // b. Let supportedExtensionAddition be "". + supported_extension_addition = JsString::empty(); + } + } + } + + // j. Set result.[[]] to value. + result.properties.insert(key.clone(), value); + + // k. Append supportedExtensionAddition to supportedExtension. + supported_extension = JsString::concat(supported_extension, &supported_extension_addition); + } + + // 10. If the number of elements in supportedExtension is greater than 2, then + if supported_extension.len() > 2 { + // a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension). + found_locale = + insert_unicode_extension_and_canonicalize(&found_locale, &supported_extension); + } + + // 11. Set result.[[locale]] to foundLocale. + result.locale = found_locale; + + // 12. Return result. + result +} diff --git a/boa_engine/src/builtins/intl/tests.rs b/boa_engine/src/builtins/intl/tests.rs new file mode 100644 index 0000000000..b0a3d8de36 --- /dev/null +++ b/boa_engine/src/builtins/intl/tests.rs @@ -0,0 +1,246 @@ +use crate::{Context, JsString}; + +use rustc_hash::FxHashMap; + +#[test] +fn best_avail_loc() { + let no_extensions_locale = JsString::new("en-US"); + let available_locales = Vec::::new(); + assert_eq!( + crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,), + None + ); + + let no_extensions_locale = JsString::new("de-DE"); + let available_locales = vec![no_extensions_locale.clone()]; + assert_eq!( + crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,), + Some(no_extensions_locale) + ); + + let locale_part = "fr".to_string(); + let no_extensions_locale = JsString::new(locale_part.clone() + &"-CA".to_string()); + let available_locales = vec![JsString::new(locale_part.clone())]; + assert_eq!( + crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,), + Some(JsString::new(locale_part)) + ); + + let ja_kana_t = JsString::new("ja-Kana-JP-t"); + let ja_kana = JsString::new("ja-Kana-JP"); + let no_extensions_locale = JsString::new("ja-Kana-JP-t-it-latn-it"); + let available_locales = vec![ja_kana_t.clone(), ja_kana.clone()]; + assert_eq!( + crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,), + Some(ja_kana) + ); +} + +#[test] +fn lookup_match() { + // available: [], requested: [] + let available_locales = Vec::::new(); + let requested_locales = Vec::::new(); + + let matcher = crate::builtins::intl::lookup_matcher(&available_locales, &requested_locales); + assert_eq!(matcher.locale, crate::builtins::intl::default_locale()); + assert_eq!(matcher.extension, ""); + + // available: [de-DE], requested: [] + let available_locales = vec![JsString::new("de-DE")]; + let requested_locales = Vec::::new(); + + let matcher = crate::builtins::intl::lookup_matcher(&available_locales, &requested_locales); + assert_eq!(matcher.locale, crate::builtins::intl::default_locale()); + assert_eq!(matcher.extension, ""); + + // available: [fr-FR], requested: [fr-FR-u-hc-h12] + let available_locales = vec![JsString::new("fr-FR")]; + let requested_locales = vec![JsString::new("fr-FR-u-hc-h12")]; + + let matcher = crate::builtins::intl::lookup_matcher(&available_locales, &requested_locales); + assert_eq!(matcher.locale, "fr-FR"); + assert_eq!(matcher.extension, "-u-hc-h12"); + + // available: [es-ES], requested: [es-ES] + let available_locales = vec![JsString::new("es-ES")]; + let requested_locales = vec![JsString::new("es-ES")]; + + let matcher = crate::builtins::intl::best_fit_matcher(&available_locales, &requested_locales); + assert_eq!(matcher.locale, "es-ES"); + assert_eq!(matcher.extension, ""); +} + +#[test] +fn insert_unicode_ext() { + let locale = JsString::new("hu-HU"); + let ext = JsString::empty(); + assert_eq!( + crate::builtins::intl::insert_unicode_extension_and_canonicalize(&locale, &ext), + locale + ); + + let locale = JsString::new("hu-HU"); + let ext = JsString::new("-u-hc-h12"); + assert_eq!( + crate::builtins::intl::insert_unicode_extension_and_canonicalize(&locale, &ext), + JsString::new("hu-HU-u-hc-h12") + ); + + let locale = JsString::new("hu-HU-x-PRIVATE"); + let ext = JsString::new("-u-hc-h12"); + assert_eq!( + crate::builtins::intl::insert_unicode_extension_and_canonicalize(&locale, &ext), + JsString::new("hu-HU-u-hc-h12-x-PRIVATE") + ); +} + +#[test] +fn uni_ext_comp() { + let ext = JsString::new("-u-ca-japanese-hc-h12"); + let components = crate::builtins::intl::unicode_extension_components(&ext); + assert_eq!(components.attributes.is_empty(), true); + assert_eq!(components.keywords.len(), 2); + assert_eq!(components.keywords[0].key, "ca"); + assert_eq!(components.keywords[0].value, "japanese"); + assert_eq!(components.keywords[1].key, "hc"); + assert_eq!(components.keywords[1].value, "h12"); + + let ext = JsString::new("-u-alias-co-phonebk-ka-shifted"); + let components = crate::builtins::intl::unicode_extension_components(&ext); + assert_eq!(components.attributes, vec![JsString::new("alias")]); + assert_eq!(components.keywords.len(), 2); + assert_eq!(components.keywords[0].key, "co"); + assert_eq!(components.keywords[0].value, "phonebk"); + assert_eq!(components.keywords[1].key, "ka"); + assert_eq!(components.keywords[1].value, "shifted"); + + let ext = JsString::new("-u-ca-buddhist-kk-nu-thai"); + let components = crate::builtins::intl::unicode_extension_components(&ext); + assert_eq!(components.attributes.is_empty(), true); + assert_eq!(components.keywords.len(), 3); + assert_eq!(components.keywords[0].key, "ca"); + assert_eq!(components.keywords[0].value, "buddhist"); + assert_eq!(components.keywords[1].key, "kk"); + assert_eq!(components.keywords[1].value, ""); + assert_eq!(components.keywords[2].key, "nu"); + assert_eq!(components.keywords[2].value, "thai"); + + let ext = JsString::new("-u-ca-islamic-civil"); + let components = crate::builtins::intl::unicode_extension_components(&ext); + assert_eq!(components.attributes.is_empty(), true); + assert_eq!(components.keywords.len(), 1); + assert_eq!(components.keywords[0].key, "ca"); + assert_eq!(components.keywords[0].value, "islamic-civil"); +} + +#[test] +fn locale_resolution() { + let mut context = Context::default(); + + // test lookup + let available_locales = Vec::::new(); + let requested_locales = Vec::::new(); + let relevant_extension_keys = Vec::::new(); + let locale_data = FxHashMap::default(); + let options = crate::builtins::intl::DateTimeFormatRecord { + locale_matcher: JsString::new("lookup"), + properties: FxHashMap::default(), + }; + + let locale_record = crate::builtins::intl::resolve_locale( + &available_locales, + &requested_locales, + &options, + &relevant_extension_keys, + &locale_data, + &mut context, + ); + assert_eq!( + locale_record.locale, + crate::builtins::intl::default_locale() + ); + assert_eq!( + locale_record.data_locale, + crate::builtins::intl::default_locale() + ); + assert_eq!(locale_record.properties.is_empty(), true); + + // test best fit + let available_locales = Vec::::new(); + let requested_locales = Vec::::new(); + let relevant_extension_keys = Vec::::new(); + let locale_data = FxHashMap::default(); + let options = crate::builtins::intl::DateTimeFormatRecord { + locale_matcher: JsString::new("best-fit"), + properties: FxHashMap::default(), + }; + + let locale_record = crate::builtins::intl::resolve_locale( + &available_locales, + &requested_locales, + &options, + &relevant_extension_keys, + &locale_data, + &mut context, + ); + assert_eq!( + locale_record.locale, + crate::builtins::intl::default_locale() + ); + assert_eq!( + locale_record.data_locale, + crate::builtins::intl::default_locale() + ); + assert_eq!(locale_record.properties.is_empty(), true); + + // available: [es-ES], requested: [es-ES] + let available_locales = vec![JsString::new("es-ES")]; + let requested_locales = vec![JsString::new("es-ES")]; + let relevant_extension_keys = Vec::::new(); + let locale_data = FxHashMap::default(); + let options = crate::builtins::intl::DateTimeFormatRecord { + locale_matcher: JsString::new("lookup"), + properties: FxHashMap::default(), + }; + + let locale_record = crate::builtins::intl::resolve_locale( + &available_locales, + &requested_locales, + &options, + &relevant_extension_keys, + &locale_data, + &mut context, + ); + assert_eq!(locale_record.locale, "es-ES"); + assert_eq!(locale_record.data_locale, "es-ES"); + assert_eq!(locale_record.properties.is_empty(), true); + + // available: [zh-CN], requested: [] + let available_locales = vec![JsString::new("zh-CN")]; + let requested_locales = Vec::::new(); + let relevant_extension_keys = Vec::::new(); + let locale_data = FxHashMap::default(); + let options = crate::builtins::intl::DateTimeFormatRecord { + locale_matcher: JsString::new("lookup"), + properties: FxHashMap::default(), + }; + + let locale_record = crate::builtins::intl::resolve_locale( + &available_locales, + &requested_locales, + &options, + &relevant_extension_keys, + &locale_data, + &mut context, + ); + assert_eq!( + locale_record.locale, + crate::builtins::intl::default_locale() + ); + assert_eq!( + locale_record.data_locale, + crate::builtins::intl::default_locale() + ); + assert_eq!(locale_record.properties.is_empty(), true); +}