Browse Source

Implement ResolveLocale helper (#2036)

<!---
Thank you for contributing to Boa! Please fill out the template below, and remove or add any
information as you feel neccesary.
--->

This Pull Request implements ResolveLocale abstract method. It is required for further InitializeDateTimeFormat development.

It changes the following:

- Adds several helpers to operate with locale extensions
- Adds DefaultLocale placeholder
- Implements BestAvailableLocale and locale matchers
- Implements UnicodeExtensionsComponents
- Introduces testing
pull/2059/head
NorbertGarfield 2 years ago
parent
commit
d40d938d5a
  1. 336
      Cargo.lock
  2. 1
      boa_engine/Cargo.toml
  3. 509
      boa_engine/src/builtins/intl/mod.rs
  4. 246
      boa_engine/src/builtins/intl/tests.rs

336
Cargo.lock generated

@ -100,6 +100,7 @@ dependencies = [
"fast-float", "fast-float",
"float-cmp", "float-cmp",
"gc", "gc",
"icu",
"indexmap", "indexmap",
"jemallocator", "jemallocator",
"num-bigint", "num-bigint",
@ -445,6 +446,17 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "displaydoc"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "dyn-clone" name = "dyn-clone"
version = "1.0.5" version = "1.0.5"
@ -511,6 +523,18 @@ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]]
name = "fixed_decimal"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "857766d9808ad8f9033fe4e6499fcc6504ae485cc99ef2f3a98b4850c23b895a"
dependencies = [
"displaydoc",
"smallvec",
"static_assertions",
"writeable",
]
[[package]] [[package]]
name = "float-cmp" name = "float-cmp"
version = "0.9.0" version = "0.9.0"
@ -608,6 +632,189 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "icu"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904a80ce690d14859a54e02d803eefba4ecd021882b8ab0492e455951d4df9a5"
dependencies = [
"fixed_decimal",
"icu_calendar",
"icu_datetime",
"icu_decimal",
"icu_locale_canonicalizer",
"icu_locid",
"icu_locid_macros",
"icu_plurals",
"icu_properties",
]
[[package]]
name = "icu_calendar"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07549f5d96acacdee05054133bd74ab1179211185ebe886f953fe1a0dd634096"
dependencies = [
"displaydoc",
"icu_provider",
"litemap",
"tinystr",
]
[[package]]
name = "icu_codepointtrie"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b54c854632ef55b92a825f9f26038a3be7f72116036aee05c522b6a65323beb"
dependencies = [
"displaydoc",
"serde",
"yoke",
"zerovec",
]
[[package]]
name = "icu_datetime"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6cd9ae537546879a4e57a09d3fc7f0ae48a1c2d8f605ce14536b04f99afe5f3e"
dependencies = [
"displaydoc",
"either",
"icu_calendar",
"icu_locid",
"icu_plurals",
"icu_provider",
"litemap",
"num_enum",
"serde",
"smallvec",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_decimal"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bb632b080fd9944616933eb659279228d43801d4c58152e2938fac6f94ecc01"
dependencies = [
"displaydoc",
"fixed_decimal",
"icu_locid",
"icu_provider",
"serde",
"writeable",
]
[[package]]
name = "icu_locale_canonicalizer"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "293bbe9de3262a23f0dac731e1c52927f54347d0fd845a1f5aa7de3cc26f3358"
dependencies = [
"icu_locid",
"icu_provider",
"litemap",
"serde",
"tinystr",
]
[[package]]
name = "icu_locid"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83a3b8515791232b1be02c21467c123216a0e60fd024f4a74cc119ef5b1af79e"
dependencies = [
"displaydoc",
"serde",
"tinystr",
"writeable",
]
[[package]]
name = "icu_locid_macros"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c40e3635df35d94a0294b07efadb52b58017a3e16fe86a5ffddfc3e56a36ae1"
dependencies = [
"icu_locid",
"proc-macro-crate",
"tinystr",
]
[[package]]
name = "icu_plurals"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34c03a32a72566b65e6245ad4e9f8e4d470e1decb1fcd10a9cab061bb9fbce92"
dependencies = [
"displaydoc",
"fixed_decimal",
"icu_locid",
"icu_provider",
"num_enum",
"serde",
"zerovec",
]
[[package]]
name = "icu_properties"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3db71ea31fe47694e165989886826e23f9defbae5dbd002e678d756086d1f5c7"
dependencies = [
"displaydoc",
"icu_codepointtrie",
"icu_provider",
"icu_uniset",
"num_enum",
"serde",
"zerovec",
]
[[package]]
name = "icu_provider"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "496b8bc2f7c2fe705191442201ea0db4c86153cabe3f39dbddb95cad3aca6454"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"litemap",
"serde",
"tinystr",
"writeable",
"yoke",
]
[[package]]
name = "icu_provider_macros"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea1a9a3f206d9be3e027c3d1448aa8edd690c0aa789d8dca01a17fa76fb79524"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "icu_uniset"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce83f199e54f8e0a948d7b737a62a7e4075760d6aef671ef85e52734a05db66"
dependencies = [
"displaydoc",
"litemap",
"serde",
"tinystr",
"yoke",
"zerovec",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "1.8.1" version = "1.8.1"
@ -708,6 +915,16 @@ version = "0.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5284f00d480e1c39af34e72f8ad60b94f47007e3481cd3b731c1d67190ddc7b7" checksum = "5284f00d480e1c39af34e72f8ad60b94f47007e3481cd3b731c1d67190ddc7b7"
[[package]]
name = "litemap"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5568cf6856c84333bda775ba41b834b20f75d64888067c8c2e26a09154a5730e"
dependencies = [
"serde",
"serde_json",
]
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.7" version = "0.4.7"
@ -844,6 +1061,26 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "num_enum"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf5395665662ef45796a4ff5486c5d41d29e0c09640af4c5f17fd94ee2c119c9"
dependencies = [
"num_enum_derive",
]
[[package]]
name = "num_enum_derive"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b0498641e53dd6ac1a4f22547548caa6864cc4933784319cd1775271c5a46ce"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.10.0" version = "1.10.0"
@ -974,6 +1211,16 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
[[package]]
name = "proc-macro-crate"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e17d47ce914bf4de440332250b0edd23ce48c005f59fab39d3335866b114f11a"
dependencies = [
"thiserror",
"toml",
]
[[package]] [[package]]
name = "proc-macro-error" name = "proc-macro-error"
version = "1.0.4" version = "1.0.4"
@ -1298,6 +1545,21 @@ name = "smallvec"
version = "1.8.0" version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
dependencies = [
"serde",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "str-buf" name = "str-buf"
@ -1436,6 +1698,32 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "tinystr"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "954af207a9e273b644c2c0d29d79d7cba9c22a66e633535d0d0d3712d7e50563"
dependencies = [
"serde",
"tinystr-macros",
"tinystr-raw",
]
[[package]]
name = "tinystr-macros"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f65be51117c325c2b58eec529be7a0857d11527a9029973b58810a4c63e77a6"
dependencies = [
"tinystr-raw",
]
[[package]]
name = "tinystr-raw"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f87ef8b0485e4efff5cac95608adc3251e412fef6039ecd56c5618c8003895"
[[package]] [[package]]
name = "tinytemplate" name = "tinytemplate"
version = "1.2.1" version = "1.2.1"
@ -1461,6 +1749,15 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "toml"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "unicode-general-category" name = "unicode-general-category"
version = "0.5.1" version = "0.5.1"
@ -1667,6 +1964,12 @@ version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08cabc9f0066848fef4bc6a1c1668e6efce38b661d2aeec75d18d8617eebb5f1" checksum = "08cabc9f0066848fef4bc6a1c1668e6efce38b661d2aeec75d18d8617eebb5f1"
[[package]]
name = "writeable"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063dba1bbc4c97dfb9b9e57cf4bcf3bac1ebfd2ef60bc609fc08dcd0228c3de5"
[[package]] [[package]]
name = "yaml-rust" name = "yaml-rust"
version = "0.4.5" version = "0.4.5"
@ -1675,3 +1978,36 @@ checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [ dependencies = [
"linked-hash-map", "linked-hash-map",
] ]
[[package]]
name = "yoke"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a32bb18a3c6c8c2726ad84153babdf8f5336a7d5551a9177a79d006f0ce789f"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
]
[[package]]
name = "yoke-derive"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77b70271795226acc88b2dfe0651e91e6402b64b1887c60722e04be55ff15941"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerovec"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae959aabd79f0f4dec802b32404cf41776d03757dd5b6e815d3d1ca076fa939"
dependencies = [
"serde",
"yoke",
]

1
boa_engine/Cargo.toml

@ -41,6 +41,7 @@ unicode-normalization = "0.1.19"
dyn-clone = "1.0.5" dyn-clone = "1.0.5"
once_cell = "1.10.0" once_cell = "1.10.0"
tap = "1.0.1" tap = "1.0.1"
icu = "0.5.0"
[dev-dependencies] [dev-dependencies]
criterion = "0.3.5" criterion = "0.3.5"

509
boa_engine/src/builtins/intl/mod.rs

@ -17,11 +17,16 @@ use crate::{
}; };
pub mod date_time_format; pub mod date_time_format;
#[cfg(test)]
mod tests;
use boa_profiler::Profiler; use boa_profiler::Profiler;
use indexmap::IndexSet; use indexmap::IndexSet;
use rustc_hash::FxHashMap;
use tap::{Conv, Pipe}; use tap::{Conv, Pipe};
use icu::locid::Locale;
/// JavaScript `Intl` object. /// JavaScript `Intl` object.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct Intl; pub(crate) struct Intl;
@ -144,3 +149,507 @@ impl Intl {
))) )))
} }
} }
/// `MatcherRecord` type aggregates unicode `locale` string and unicode locale `extension`.
///
/// This is a return value for `lookup_matcher` and `best_fit_matcher` subroutines.
#[derive(Debug)]
struct MatcherRecord {
locale: JsString,
extension: JsString,
}
/// The `DefaultLocale` abstract operation returns a String value representing the structurally
/// valid and canonicalized Unicode BCP 47 locale identifier for the host environment's current
/// locale.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-defaultlocale
fn default_locale() -> JsString {
// FIXME get locale from environment
JsString::new("en-US")
}
/// The `BestAvailableLocale` abstract operation compares the provided argument `locale`,
/// which must be a String value with a structurally valid and canonicalized Unicode BCP 47
/// locale identifier, against the locales in `availableLocales` and returns either the longest
/// non-empty prefix of `locale` that is an element of `availableLocales`, or undefined if
/// there is no such element.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale
fn best_available_locale(available_locales: &[JsString], locale: &JsString) -> Option<JsString> {
// 1. Let candidate be locale.
let mut candidate = locale.clone();
// 2. Repeat
loop {
// a. If availableLocales contains an element equal to candidate, return candidate.
if available_locales.contains(&candidate) {
return Some(candidate);
}
// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined.
let pos = candidate.rfind('-');
match pos {
Some(ind) => {
// c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
let tmp_candidate = candidate[..ind].to_string();
let prev_dash = tmp_candidate.rfind('-').unwrap_or(ind);
let trim_ind = if ind >= 2 && prev_dash == ind - 2 {
ind - 2
} else {
ind
};
// d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
candidate = JsString::new(&candidate[..trim_ind]);
}
None => return None,
}
}
}
/// The `LookupMatcher` abstract operation compares `requestedLocales`, which must be a `List`
/// as returned by `CanonicalizeLocaleList`, against the locales in `availableLocales` and
/// determines the best available language to meet the request.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher
fn lookup_matcher(available_locales: &[JsString], requested_locales: &[JsString]) -> MatcherRecord {
// 1. Let result be a new Record.
// 2. For each element locale of requestedLocales, do
for locale_str in requested_locales {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale
// extension sequences removed.
let parsed_locale =
Locale::from_bytes(locale_str.as_bytes()).expect("Locale parsing failed");
let no_extensions_locale = JsString::new(parsed_locale.id.to_string());
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
let available_locale = best_available_locale(available_locales, &no_extensions_locale);
// c. If availableLocale is not undefined, then
if let Some(available_locale) = available_locale {
// i. Set result.[[locale]] to availableLocale.
// Assignment deferred. See return statement below.
// ii. If locale and noExtensionsLocale are not the same String value, then
let maybe_ext = if locale_str.eq(&no_extensions_locale) {
JsString::empty()
} else {
// 1. Let extension be the String value consisting of the substring of the Unicode
// locale extension sequence within locale.
// 2. Set result.[[extension]] to extension.
JsString::new(parsed_locale.extensions.to_string())
};
// iii. Return result.
return MatcherRecord {
locale: available_locale,
extension: maybe_ext,
};
}
}
// 3. Let defLocale be ! DefaultLocale().
// 4. Set result.[[locale]] to defLocale.
// 5. Return result.
MatcherRecord {
locale: default_locale(),
extension: JsString::empty(),
}
}
/// The `BestFitMatcher` abstract operation compares `requestedLocales`, which must be a `List`
/// as returned by `CanonicalizeLocaleList`, against the locales in `availableLocales` and
/// determines the best available language to meet the request. The algorithm is implementation
/// dependent, but should produce results that a typical user of the requested locales would
/// perceive as at least as good as those produced by the `LookupMatcher` abstract operation.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher
fn best_fit_matcher(
available_locales: &[JsString],
requested_locales: &[JsString],
) -> MatcherRecord {
lookup_matcher(available_locales, requested_locales)
}
/// `Keyword` structure is a pair of keyword key and keyword value.
#[derive(Debug)]
struct Keyword {
key: JsString,
value: JsString,
}
/// `UniExtRecord` structure represents unicode extension records.
///
/// It contains the list of unicode `extension` attributes and the list of `keywords`.
///
/// For example:
///
/// - `-u-nu-thai` has no attributes and the list of keywords contains `(nu:thai)` pair.
#[allow(dead_code)]
#[derive(Debug)]
struct UniExtRecord {
attributes: Vec<JsString>, // never read at this point
keywords: Vec<Keyword>,
}
/// The `UnicodeExtensionComponents` abstract operation returns the attributes and keywords from
/// `extension`, which must be a String value whose contents are a `Unicode locale extension`
/// sequence.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-unicode-extension-components
fn unicode_extension_components(extension: &JsString) -> UniExtRecord {
// 1. Let attributes be a new empty List.
let mut attributes = Vec::<JsString>::new();
// 2. Let keywords be a new empty List.
let mut keywords = Vec::<Keyword>::new();
// 3. Let keyword be undefined.
let mut keyword: Option<Keyword> = None;
// 4. Let size be the length of extension.
let size = extension.len();
// 5. Let k be 3.
let mut k = 3;
// 6. Repeat, while k < size,
while k < size {
// a. Let e be ! StringIndexOf(extension, "-", k).
let e = extension.index_of(&JsString::new("-"), k);
// b. If e = -1, let len be size - k; else let len be e - k.
let len = match e {
Some(pos) => pos - k,
None => size - k,
};
// c. Let subtag be the String value equal to the substring of extension consisting of the
// code units at indices k (inclusive) through k + len (exclusive).
let subtag = JsString::new(&extension[k..k + len]);
// d. If keyword is undefined and len ≠ 2, then
if keyword.is_none() && len != 2 {
// i. If subtag is not an element of attributes, then
if !attributes.contains(&subtag) {
// 1. Append subtag to attributes.
attributes.push(subtag);
}
// e. Else if len = 2, then
} else if len == 2 {
// i. If keyword is not undefined and keywords does not contain an element
// whose [[Key]] is the same as keyword.[[Key]], then
// 1. Append keyword to keywords.
if let Some(keyword_val) = keyword {
let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key);
if !has_key {
keywords.push(keyword_val);
}
};
// ii. Set keyword to the Record { [[Key]]: subtag, [[Value]]: "" }.
keyword = Some(Keyword {
key: subtag,
value: JsString::empty(),
});
// f. Else,
} else {
// i. If keyword.[[Value]] is the empty String, then
// 1. Set keyword.[[Value]] to subtag.
// ii. Else,
// 1. Set keyword.[[Value]] to the string-concatenation of keyword.[[Value]], "-", and subtag.
if let Some(keyword_val) = keyword {
let new_keyword_val = if keyword_val.value.is_empty() {
subtag
} else {
JsString::new(format!("{}-{subtag}", keyword_val.value))
};
keyword = Some(Keyword {
key: keyword_val.key,
value: new_keyword_val,
});
};
}
// g. Let k be k + len + 1.
k = k + len + 1;
}
// 7. If keyword is not undefined and keywords does not contain an element whose [[Key]] is
// the same as keyword.[[Key]], then
// a. Append keyword to keywords.
if let Some(keyword_val) = keyword {
let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key);
if !has_key {
keywords.push(keyword_val);
}
};
// 8. Return the Record { [[Attributes]]: attributes, [[Keywords]]: keywords }.
UniExtRecord {
attributes,
keywords,
}
}
/// The `InsertUnicodeExtensionAndCanonicalize` abstract operation inserts `extension`, which must
/// be a Unicode locale extension sequence, into `locale`, which must be a String value with a
/// structurally valid and canonicalized Unicode BCP 47 locale identifier.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-insert-unicode-extension-and-canonicalize
fn insert_unicode_extension_and_canonicalize(locale: &str, extension: &str) -> JsString {
// TODO 1. Assert: locale does not contain a substring that is a Unicode locale extension sequence.
// TODO 2. Assert: extension is a Unicode locale extension sequence.
// TODO 3. Assert: tag matches the unicode_locale_id production.
// 4. Let privateIndex be ! StringIndexOf(locale, "-x-", 0).
let private_index = locale.find("-x-");
let new_locale = match private_index {
// 5. If privateIndex = -1, then
None => {
// a. Let locale be the string-concatenation of locale and extension.
locale.to_owned() + extension
}
// 6. Else,
Some(idx) => {
// a. Let preExtension be the substring of locale from position 0, inclusive,
// to position privateIndex, exclusive.
let pre_extension = &locale[0..idx];
// b. Let postExtension be the substring of locale from position privateIndex to
// the end of the string.
let post_extension = &locale[idx..];
// c. Let locale be the string-concatenation of preExtension, extension,
// and postExtension.
pre_extension.to_owned() + extension + post_extension
}
};
// TODO 7. Assert: ! IsStructurallyValidLanguageTag(locale) is true.
// 8. Return ! CanonicalizeUnicodeLocaleId(locale).
Intl::canonicalize_locale(&new_locale)
}
/// `LocaleDataRecord` is the type of `locale_data` argument in `resolve_locale` subroutine.
///
/// It is an alias for a map where key is a string and value is another map.
///
/// Value of that inner map is a vector of strings representing locale parameters.
type LocaleDataRecord = FxHashMap<JsString, FxHashMap<JsString, Vec<JsString>>>;
/// `DateTimeFormatRecord` type aggregates `locale_matcher` selector and `properties` map.
///
/// It is used as a type of `options` parameter in `resolve_locale` subroutine.
#[derive(Debug)]
struct DateTimeFormatRecord {
pub(crate) locale_matcher: JsString,
pub(crate) properties: FxHashMap<JsString, JsValue>,
}
/// `ResolveLocaleRecord` type consists of unicode `locale` string, `data_locale` string and `properties` map.
///
/// This is a return value for `resolve_locale` subroutine.
#[derive(Debug)]
struct ResolveLocaleRecord {
pub(crate) locale: JsString,
pub(crate) properties: FxHashMap<JsString, JsValue>,
pub(crate) data_locale: JsString,
}
/// The `ResolveLocale` abstract operation compares a BCP 47 language priority list
/// `requestedLocales` against the locales in `availableLocales` and determines the best
/// available language to meet the request. `availableLocales`, `requestedLocales`, and
/// `relevantExtensionKeys` must be provided as `List` values, options and `localeData` as Records.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale
#[allow(dead_code)]
fn resolve_locale(
available_locales: &[JsString],
requested_locales: &[JsString],
options: &DateTimeFormatRecord,
relevant_extension_keys: &[JsString],
locale_data: &LocaleDataRecord,
context: &mut Context,
) -> ResolveLocaleRecord {
// 1. Let matcher be options.[[localeMatcher]].
let matcher = &options.locale_matcher;
// 2. If matcher is "lookup", then
// a. Let r be ! LookupMatcher(availableLocales, requestedLocales).
// 3. Else,
// a. Let r be ! BestFitMatcher(availableLocales, requestedLocales).
let r = if matcher.eq(&JsString::new("lookup")) {
lookup_matcher(available_locales, requested_locales)
} else {
best_fit_matcher(available_locales, requested_locales)
};
// 4. Let foundLocale be r.[[locale]].
let mut found_locale = r.locale;
// 5. Let result be a new Record.
let mut result = ResolveLocaleRecord {
locale: JsString::empty(),
properties: FxHashMap::default(),
data_locale: JsString::empty(),
};
// 6. Set result.[[dataLocale]] to foundLocale.
result.data_locale = found_locale.clone();
// 7. If r has an [[extension]] field, then
let keywords = if r.extension.is_empty() {
Vec::<Keyword>::new()
} else {
// a. Let components be ! UnicodeExtensionComponents(r.[[extension]]).
let components = unicode_extension_components(&r.extension);
// b. Let keywords be components.[[Keywords]].
components.keywords
};
// 8. Let supportedExtension be "-u".
let mut supported_extension = JsString::new("-u");
// 9. For each element key of relevantExtensionKeys, do
for key in relevant_extension_keys {
// a. Let foundLocaleData be localeData.[[<foundLocale>]].
// TODO b. Assert: Type(foundLocaleData) is Record.
let found_locale_data = match locale_data.get(&found_locale) {
Some(locale_value) => locale_value.clone(),
None => FxHashMap::default(),
};
// c. Let keyLocaleData be foundLocaleData.[[<key>]].
// TODO d. Assert: Type(keyLocaleData) is List.
let key_locale_data = match found_locale_data.get(key) {
Some(locale_vec) => locale_vec.clone(),
None => Vec::new(),
};
// e. Let value be keyLocaleData[0].
// TODO f. Assert: Type(value) is either String or Null.
let mut value = match key_locale_data.get(0) {
Some(first_elt) => JsValue::String(first_elt.clone()),
None => JsValue::null(),
};
// g. Let supportedExtensionAddition be "".
let mut supported_extension_addition = JsString::empty();
// h. If r has an [[extension]] field, then
if !r.extension.is_empty() {
// i. If keywords contains an element whose [[Key]] is the same as key, then
// 1. Let entry be the element of keywords whose [[Key]] is the same as key.
let maybe_entry = keywords.iter().find(|elem| key.eq(&elem.key));
if let Some(entry) = maybe_entry {
// 2. Let requestedValue be entry.[[Value]].
let requested_value = &entry.value;
// 3. If requestedValue is not the empty String, then
if !requested_value.is_empty() {
// a. If keyLocaleData contains requestedValue, then
if key_locale_data.contains(requested_value) {
// i. Let value be requestedValue.
value = JsValue::String(JsString::new(requested_value));
// ii. Let supportedExtensionAddition be the string-concatenation
// of "-", key, "-", and value.
supported_extension_addition =
JsString::concat_array(&["-", key, "-", requested_value]);
}
// 4. Else if keyLocaleData contains "true", then
} else if key_locale_data.contains(&JsString::new("true")) {
// a. Let value be "true".
value = JsValue::String(JsString::new("true"));
// b. Let supportedExtensionAddition be the string-concatenation of "-" and key.
supported_extension_addition = JsString::concat_array(&["-", key]);
}
}
}
// i. If options has a field [[<key>]], then
if options.properties.contains_key(key) {
// i. Let optionsValue be options.[[<key>]].
// TODO ii. Assert: Type(optionsValue) is either String, Undefined, or Null.
let mut options_value = options
.properties
.get(key)
.unwrap_or(&JsValue::undefined())
.clone();
// iii. If Type(optionsValue) is String, then
if options_value.is_string() {
// TODO 1. Let optionsValue be the string optionsValue after performing the
// algorithm steps to transform Unicode extension values to canonical syntax
// per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale
// Identifiers, treating key as ukey and optionsValue as uvalue productions.
// TODO 2. Let optionsValue be the string optionsValue after performing the
// algorithm steps to replace Unicode extension values with their canonical
// form per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode
// Locale Identifiers, treating key as ukey and optionsValue as uvalue
// productions.
// 3. If optionsValue is the empty String, then
if let Some(options_val_str) = options_value.as_string() {
if options_val_str.is_empty() {
// a. Let optionsValue be "true".
options_value = JsValue::String(JsString::new("true"));
}
}
}
// iv. If keyLocaleData contains optionsValue, then
let options_val_str = options_value
.to_string(context)
.unwrap_or_else(|_| JsString::empty());
if key_locale_data.contains(&options_val_str) {
// 1. If SameValue(optionsValue, value) is false, then
if !options_value.eq(&value) {
// a. Let value be optionsValue.
value = options_value;
// b. Let supportedExtensionAddition be "".
supported_extension_addition = JsString::empty();
}
}
}
// j. Set result.[[<key>]] to value.
result.properties.insert(key.clone(), value);
// k. Append supportedExtensionAddition to supportedExtension.
supported_extension = JsString::concat(supported_extension, &supported_extension_addition);
}
// 10. If the number of elements in supportedExtension is greater than 2, then
if supported_extension.len() > 2 {
// a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension).
found_locale =
insert_unicode_extension_and_canonicalize(&found_locale, &supported_extension);
}
// 11. Set result.[[locale]] to foundLocale.
result.locale = found_locale;
// 12. Return result.
result
}

246
boa_engine/src/builtins/intl/tests.rs

@ -0,0 +1,246 @@
use crate::{Context, JsString};
use rustc_hash::FxHashMap;
#[test]
fn best_avail_loc() {
let no_extensions_locale = JsString::new("en-US");
let available_locales = Vec::<JsString>::new();
assert_eq!(
crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,),
None
);
let no_extensions_locale = JsString::new("de-DE");
let available_locales = vec![no_extensions_locale.clone()];
assert_eq!(
crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,),
Some(no_extensions_locale)
);
let locale_part = "fr".to_string();
let no_extensions_locale = JsString::new(locale_part.clone() + &"-CA".to_string());
let available_locales = vec![JsString::new(locale_part.clone())];
assert_eq!(
crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,),
Some(JsString::new(locale_part))
);
let ja_kana_t = JsString::new("ja-Kana-JP-t");
let ja_kana = JsString::new("ja-Kana-JP");
let no_extensions_locale = JsString::new("ja-Kana-JP-t-it-latn-it");
let available_locales = vec![ja_kana_t.clone(), ja_kana.clone()];
assert_eq!(
crate::builtins::intl::best_available_locale(&available_locales, &no_extensions_locale,),
Some(ja_kana)
);
}
#[test]
fn lookup_match() {
// available: [], requested: []
let available_locales = Vec::<JsString>::new();
let requested_locales = Vec::<JsString>::new();
let matcher = crate::builtins::intl::lookup_matcher(&available_locales, &requested_locales);
assert_eq!(matcher.locale, crate::builtins::intl::default_locale());
assert_eq!(matcher.extension, "");
// available: [de-DE], requested: []
let available_locales = vec![JsString::new("de-DE")];
let requested_locales = Vec::<JsString>::new();
let matcher = crate::builtins::intl::lookup_matcher(&available_locales, &requested_locales);
assert_eq!(matcher.locale, crate::builtins::intl::default_locale());
assert_eq!(matcher.extension, "");
// available: [fr-FR], requested: [fr-FR-u-hc-h12]
let available_locales = vec![JsString::new("fr-FR")];
let requested_locales = vec![JsString::new("fr-FR-u-hc-h12")];
let matcher = crate::builtins::intl::lookup_matcher(&available_locales, &requested_locales);
assert_eq!(matcher.locale, "fr-FR");
assert_eq!(matcher.extension, "-u-hc-h12");
// available: [es-ES], requested: [es-ES]
let available_locales = vec![JsString::new("es-ES")];
let requested_locales = vec![JsString::new("es-ES")];
let matcher = crate::builtins::intl::best_fit_matcher(&available_locales, &requested_locales);
assert_eq!(matcher.locale, "es-ES");
assert_eq!(matcher.extension, "");
}
#[test]
fn insert_unicode_ext() {
let locale = JsString::new("hu-HU");
let ext = JsString::empty();
assert_eq!(
crate::builtins::intl::insert_unicode_extension_and_canonicalize(&locale, &ext),
locale
);
let locale = JsString::new("hu-HU");
let ext = JsString::new("-u-hc-h12");
assert_eq!(
crate::builtins::intl::insert_unicode_extension_and_canonicalize(&locale, &ext),
JsString::new("hu-HU-u-hc-h12")
);
let locale = JsString::new("hu-HU-x-PRIVATE");
let ext = JsString::new("-u-hc-h12");
assert_eq!(
crate::builtins::intl::insert_unicode_extension_and_canonicalize(&locale, &ext),
JsString::new("hu-HU-u-hc-h12-x-PRIVATE")
);
}
#[test]
fn uni_ext_comp() {
let ext = JsString::new("-u-ca-japanese-hc-h12");
let components = crate::builtins::intl::unicode_extension_components(&ext);
assert_eq!(components.attributes.is_empty(), true);
assert_eq!(components.keywords.len(), 2);
assert_eq!(components.keywords[0].key, "ca");
assert_eq!(components.keywords[0].value, "japanese");
assert_eq!(components.keywords[1].key, "hc");
assert_eq!(components.keywords[1].value, "h12");
let ext = JsString::new("-u-alias-co-phonebk-ka-shifted");
let components = crate::builtins::intl::unicode_extension_components(&ext);
assert_eq!(components.attributes, vec![JsString::new("alias")]);
assert_eq!(components.keywords.len(), 2);
assert_eq!(components.keywords[0].key, "co");
assert_eq!(components.keywords[0].value, "phonebk");
assert_eq!(components.keywords[1].key, "ka");
assert_eq!(components.keywords[1].value, "shifted");
let ext = JsString::new("-u-ca-buddhist-kk-nu-thai");
let components = crate::builtins::intl::unicode_extension_components(&ext);
assert_eq!(components.attributes.is_empty(), true);
assert_eq!(components.keywords.len(), 3);
assert_eq!(components.keywords[0].key, "ca");
assert_eq!(components.keywords[0].value, "buddhist");
assert_eq!(components.keywords[1].key, "kk");
assert_eq!(components.keywords[1].value, "");
assert_eq!(components.keywords[2].key, "nu");
assert_eq!(components.keywords[2].value, "thai");
let ext = JsString::new("-u-ca-islamic-civil");
let components = crate::builtins::intl::unicode_extension_components(&ext);
assert_eq!(components.attributes.is_empty(), true);
assert_eq!(components.keywords.len(), 1);
assert_eq!(components.keywords[0].key, "ca");
assert_eq!(components.keywords[0].value, "islamic-civil");
}
#[test]
fn locale_resolution() {
let mut context = Context::default();
// test lookup
let available_locales = Vec::<JsString>::new();
let requested_locales = Vec::<JsString>::new();
let relevant_extension_keys = Vec::<JsString>::new();
let locale_data = FxHashMap::default();
let options = crate::builtins::intl::DateTimeFormatRecord {
locale_matcher: JsString::new("lookup"),
properties: FxHashMap::default(),
};
let locale_record = crate::builtins::intl::resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(
locale_record.locale,
crate::builtins::intl::default_locale()
);
assert_eq!(
locale_record.data_locale,
crate::builtins::intl::default_locale()
);
assert_eq!(locale_record.properties.is_empty(), true);
// test best fit
let available_locales = Vec::<JsString>::new();
let requested_locales = Vec::<JsString>::new();
let relevant_extension_keys = Vec::<JsString>::new();
let locale_data = FxHashMap::default();
let options = crate::builtins::intl::DateTimeFormatRecord {
locale_matcher: JsString::new("best-fit"),
properties: FxHashMap::default(),
};
let locale_record = crate::builtins::intl::resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(
locale_record.locale,
crate::builtins::intl::default_locale()
);
assert_eq!(
locale_record.data_locale,
crate::builtins::intl::default_locale()
);
assert_eq!(locale_record.properties.is_empty(), true);
// available: [es-ES], requested: [es-ES]
let available_locales = vec![JsString::new("es-ES")];
let requested_locales = vec![JsString::new("es-ES")];
let relevant_extension_keys = Vec::<JsString>::new();
let locale_data = FxHashMap::default();
let options = crate::builtins::intl::DateTimeFormatRecord {
locale_matcher: JsString::new("lookup"),
properties: FxHashMap::default(),
};
let locale_record = crate::builtins::intl::resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(locale_record.locale, "es-ES");
assert_eq!(locale_record.data_locale, "es-ES");
assert_eq!(locale_record.properties.is_empty(), true);
// available: [zh-CN], requested: []
let available_locales = vec![JsString::new("zh-CN")];
let requested_locales = Vec::<JsString>::new();
let relevant_extension_keys = Vec::<JsString>::new();
let locale_data = FxHashMap::default();
let options = crate::builtins::intl::DateTimeFormatRecord {
locale_matcher: JsString::new("lookup"),
properties: FxHashMap::default(),
};
let locale_record = crate::builtins::intl::resolve_locale(
&available_locales,
&requested_locales,
&options,
&relevant_extension_keys,
&locale_data,
&mut context,
);
assert_eq!(
locale_record.locale,
crate::builtins::intl::default_locale()
);
assert_eq!(
locale_record.data_locale,
crate::builtins::intl::default_locale()
);
assert_eq!(locale_record.properties.is_empty(), true);
}
Loading…
Cancel
Save