@ -14,18 +14,13 @@ use crate::{
} ;
use boa_macros ::js_str ;
use icu_collator ::provider ::CollationMetadataV1Marker ;
use icu_locid ::{
extensions ::unicode ::{ Key , Value } ,
subtags ::Variants ,
LanguageIdentifier , Locale ,
} ;
use icu_locid_transform ::LocaleCanonicalizer ;
use icu_provider ::{
DataError , DataErrorKind , DataLocale , DataProvider , DataRequest , DataRequestMetadata ,
KeyedDataMarker ,
} ;
use icu_segmenter ::provider ::WordBreakDataV1Marker ;
use icu_provider ::{ DataLocale , DataProvider , DataRequest , DataRequestMetadata , KeyedDataMarker } ;
use indexmap ::IndexSet ;
use tap ::TapOptional ;
@ -153,36 +148,55 @@ pub(crate) fn canonicalize_locale_list(
Ok ( seen . into_iter ( ) . collect ( ) )
}
/// Abstract operation `BestAvailableLocale ( availableLocales, locale )`
/// Abstract operation [`LookupMatchingLocaleByPrefix ( availableLocales, requestedLocales )`][prefix]
/// and [`LookupMatchingLocaleByBestFit ( availableLocales, requestedLocales )`][best]
///
/// Compares the provided argument `locale`, which must be a String value with a
/// structurally valid and canonicalized Unicode BCP 47 locale identifier, against
/// the locales in `availableLocales` and returns either the longest non-empty prefix
/// of `locale` that is an element of `availableLocales`, or undefined if there is no
/// such element.
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request.
///
/// We only work with language identifiers, which have the same semantics
/// but are a bit easier to manipulate.
/// # Notes
///
/// More information:
/// - [ECMAScript reference][spec]
/// - This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale
pub ( crate ) fn best_available_locale < M : KeyedDataMarker > (
candidate : LanguageIdentifier ,
provider : & ( impl DataProvider < M > + ? Sized ) ,
) -> Option < LanguageIdentifier > {
// 1. Let candidate be locale.
let mut candidate = candidate . into ( ) ;
// 2. Repeat
/// - Calling this function with a singleton `KeyedDataMarker` will always return `None`.
///
/// [prefix]: https://tc39.es/ecma402/#sec-lookupmatchinglocalebyprefix
/// [best]: https://tc39.es/ecma402/#sec-lookupmatchinglocalebybestfit
pub ( crate ) fn lookup_matching_locale_by_prefix < M : KeyedDataMarker > (
requested_locales : impl IntoIterator < Item = Locale > ,
provider : & IntlProvider ,
) -> Option < Locale >
where
IntlProvider : DataProvider < M > ,
{
// 1. For each element locale of requestedLocales, do
for locale in requested_locales {
// a. Let extension be empty.
// b. If locale contains a Unicode locale extension sequence, then
// i. Set extension to the Unicode locale extension sequence of locale.
// ii. Set locale to the String value that is locale with any Unicode locale extension sequences removed.
let mut locale = locale . clone ( ) ;
let id = std ::mem ::take ( & mut locale . id ) ;
locale . extensions . transform . clear ( ) ;
locale . extensions . private . clear ( ) ;
// c. Let prefix be locale.
let mut prefix = id . into ( ) ;
// d. Repeat, while prefix is not the empty String,
// We don't use a `while !prefix.is_und()` because it could be that prefix is und at the start,
// so we need to make the request at least once.
loop {
// a. If availableLocales contains an element equal to candidate, return candidate.
// i. If availableLocales contains prefix, return the Record { [[locale]]: prefix, [[extension]]: extension } .
// ICU4X requires doing data requests in order to check if a locale
// is part of the set of supported locales.
let response = DataProvider ::< M > ::load (
provider ,
DataRequest {
locale : & candidate ,
locale : & prefix ,
metadata : {
let mut metadata = DataRequestMetadata ::default ( ) ;
metadata . silent = true ;
@ -191,174 +205,51 @@ pub(crate) fn best_available_locale<M: KeyedDataMarker>(
} ,
) ;
match response {
Ok ( req ) = > {
if let Ok ( req ) = response {
// `metadata.locale` returns None when the provider doesn't have a fallback mechanism,
// but supports the required locale. However, if the provider has a fallback mechanism,
// this will return `Some(locale)`, where the locale is the used locale after applying
// the fallback algorithm, even if the used locale is exactly the same as the required
// locale.
match req . metadata . locale {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
Some ( loc )
if loc = = candidate
| | ( loc . is_empty ( )
& & [
CollationMetadataV1Marker ::KEY . path ( ) ,
WordBreakDataV1Marker ::KEY . path ( ) ,
]
. contains ( & M ::KEY . path ( ) ) ) = >
{
return Some ( candidate . into_locale ( ) . id )
}
None = > return Some ( candidate . into_locale ( ) . id ) ,
_ = > { }
Some ( loc ) if loc . get_langid ( ) = = prefix . get_langid ( ) = > {
locale . id = loc . into_locale ( ) . id ;
return Some ( locale ) ;
}
None = > {
locale . id = prefix . into_locale ( ) . id ;
return Some ( locale ) ;
}
Err ( DataError {
kind : DataErrorKind ::ExtraneousLocale ,
..
} ) = > {
// This is essentially the same hack as above but for singleton keys
return Some ( candidate . into_locale ( ) . id ) ;
_ = > { }
}
Err ( _ ) = > { }
}
// b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined .
// c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
// d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive .
//
// ii. If prefix contains "-" (code unit 0x002D HYPHEN-MINUS), let pos be the index into prefix of the last occurrence of "-"; else let pos be 0.
// iii. Repeat, while pos ≥ 2 and the substring of prefix from pos - 2 to pos - 1 is "-",
// 1. Set pos to pos - 2 .
// iv. Set prefix to the substring of prefix from 0 to pos.
// Since the definition of `LanguageIdentifier` allows us to manipulate it
// without using strings, we can replace these steps by a simpler
// algorithm.
if candidate . has_variants ( ) {
let mut variants = candidate
. clear_variants ( )
. iter ( )
. copied ( )
. collect ::< Vec < _ > > ( ) ;
if prefix . has_variants ( ) {
let mut variants = prefix . clear_variants ( ) . iter ( ) . copied ( ) . collect ::< Vec < _ > > ( ) ;
variants . pop ( ) ;
candidate . set_variants ( Variants ::from_vec_unchecked ( variants ) ) ;
} else if candidate . region ( ) . is_some ( ) {
candidate . set_region ( None ) ;
} else if candidate . script ( ) . is_some ( ) {
candidate . set_script ( None ) ;
prefix . set_variants ( Variants ::from_vec_unchecked ( variants ) ) ;
} else if prefix . region ( ) . is_some ( ) {
prefix . set_region ( None ) ;
} else if prefix . script ( ) . is_some ( ) {
prefix . set_script ( None ) ;
} else {
return None ;
break ;
}
}
}
/// Returns the locale resolved by the `provider` after using the ICU4X fallback
/// algorithm with `candidate` (if the provider supports this), or None if the locale is not
/// supported.
pub ( crate ) fn best_locale_for_provider < M : KeyedDataMarker > (
candidate : LanguageIdentifier ,
provider : & ( impl DataProvider < M > + ? Sized ) ,
) -> Option < LanguageIdentifier > {
// another hack to the list...
// This time is because markers like `WordBreakDataV1Marker` throw an error if they receive
// a request with a locale, because they don't really need it. In this case, we can
// check if the key is one of those kinds and return the candidate as it is.
if M ::KEY . metadata ( ) . singleton {
return Some ( candidate ) ;
}
let response = DataProvider ::< M > ::load (
provider ,
DataRequest {
locale : & DataLocale ::from ( & candidate ) ,
metadata : {
let mut md = DataRequestMetadata ::default ( ) ;
md . silent = true ;
md
} ,
} ,
)
. ok ( ) ? ;
if candidate = = LanguageIdentifier ::UND {
return Some ( LanguageIdentifier ::UND ) ;
}
response
. metadata
. locale
. map ( | dl | {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
if [
CollationMetadataV1Marker ::KEY . path ( ) ,
WordBreakDataV1Marker ::KEY . path ( ) ,
]
. contains ( & M ::KEY . path ( ) )
& & dl . is_empty ( )
{
candidate . clone ( )
} else {
dl . into_locale ( ) . id
}
} )
. or ( Some ( candidate ) )
. filter ( | loc | loc ! = & LanguageIdentifier ::UND )
}
/// Abstract operation [`LookupMatcher ( availableLocales, requestedLocales )`][spec]
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
/// meet the request.
///
/// # Note
///
/// This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher
fn lookup_matcher < M : KeyedDataMarker > (
requested_locales : & [ Locale ] ,
provider : & IntlProvider ,
) -> Locale
where
IntlProvider : DataProvider < M > ,
{
// 1. Let result be a new Record.
// 2. For each element locale of requestedLocales, do
for locale in requested_locales {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale
// extension sequences removed.
let mut locale = locale . clone ( ) ;
let id = std ::mem ::take ( & mut locale . id ) ;
locale . extensions . transform . clear ( ) ;
locale . extensions . private . clear ( ) ;
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
let available_locale = best_available_locale ::< M > ( id , provider ) ;
// c. If availableLocale is not undefined, then
if let Some ( available_locale ) = available_locale {
// i. Set result.[[locale]] to availableLocale.
// Assignment deferred. See return statement below.
// ii. If locale and noExtensionsLocale are not the same String value, then
// 1. Let extension be the String value consisting of the substring of the Unicode
// locale extension sequence within locale.
// 2. Set result.[[extension]] to extension.
locale . id = available_locale ;
// iii. Return result.
return locale ;
}
}
// 3. Let defLocale be ! DefaultLocale().
// 4. Set result.[[locale]] to defLocale.
// 5. Return result.
default_locale ( provider . locale_canonicalizer ( ) )
// 2. Return undefined.
None
}
/// Abstract operation [`BestFitMatcher ( availableLocales, requestedLocales )`][spec]
/// Abstract operation [`LookupMatchingLocaleByBestFit ( availableLocales, requestedLocales )`][spec]
///
/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`,
/// against the locales in `availableLocales` and determines the best available language to
@ -367,31 +258,50 @@ where
/// produced by the `LookupMatcher` abstract operation.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher
fn best_fit_matcher < M : KeyedDataMarker > (
requested_locales : & [ Locale ] ,
fn lookup_matching_locale_by_ best_fit< M : KeyedDataMarker > (
requested_locales : impl IntoIterator < Item = Locale > ,
provider : & IntlProvider ,
) -> Locale
) -> Option < Locale >
where
IntlProvider : DataProvider < M > ,
{
for mut locale in requested_locales
. iter ( )
. cloned ( )
. chain ( std ::iter ::once_with ( | | {
default_locale ( provider . locale_canonicalizer ( ) )
} ) )
{
for mut locale in requested_locales {
let id = std ::mem ::take ( & mut locale . id ) ;
// Only leave unicode extensions when returning the locale.
locale . extensions . transform . clear ( ) ;
locale . extensions . private . clear ( ) ;
if let Some ( available ) = best_locale_for_provider ( id , provider ) {
locale . id = available ;
let Ok ( response ) = DataProvider ::< M > ::load (
provider ,
DataRequest {
locale : & DataLocale ::from ( & id ) ,
metadata : {
let mut md = DataRequestMetadata ::default ( ) ;
md . silent = true ;
md
} ,
} ,
) else {
continue ;
} ;
return locale ;
if id = = LanguageIdentifier ::UND {
return Some ( locale ) ;
}
if let Some ( id ) = response
. metadata
. locale
. map ( | dl | dl . into_locale ( ) . id )
. or ( Some ( id ) )
. filter ( | loc | loc ! = & LanguageIdentifier ::UND )
{
locale . id = id ;
return Some ( locale ) ;
}
}
Locale ::default ( )
None
}
/// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )`
@ -406,7 +316,7 @@ where
///
/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale
pub ( in crate ::builtins ::intl ) fn resolve_locale < S > (
requested_locales : & [ Locale ] ,
requested_locales : impl IntoIterator < Item = Locale > ,
options : & mut IntlOptions < S ::LocaleOptions > ,
provider : & IntlProvider ,
) -> Locale
@ -416,15 +326,16 @@ where
{
// 1. Let matcher be options.[[localeMatcher]].
// 2. If matcher is "lookup", then
// a. Let r be ! LookupMatcher(availableLocales, requestedLocales).
// a. Let r be LookupMatchingLocal eByP refix (availableLocales, requestedLocales).
// 3. Else,
// a. Let r be ! BestFitMatcher (availableLocales, requestedLocales).
// 4. Let foundLocale be r.[[locale]] .
// a. Let r be LookupMatchingLocaleByBestFit (availableLocales, requestedLocales).
// 4. If r is undefined, set r to the Record { [[locale]]: DefaultLocale(), [[extension]]: empty } .
let mut found_locale = if options . matcher = = LocaleMatcher ::Lookup {
lookup_matcher ::< S ::LangMarker > ( requested_locales , provider )
lookup_matching_local e_by_p refix ::< S ::LangMarker > ( requested_locales , provider )
} else {
best_fit_matcher ::< S ::LangMarker > ( requested_locales , provider )
} ;
lookup_matching_locale_by_best_fit ::< S ::LangMarker > ( requested_locales , provider )
}
. unwrap_or_else ( | | default_locale ( provider . locale_canonicalizer ( ) ) ) ;
// From here, the spec differs significantly from the implementation,
// since ICU4X allows us to skip some steps and modularize the
@ -485,62 +396,18 @@ where
found_locale
}
/// Abstract operation [`LookupSupportedLocales ( availableLocales, requestedLocale s )`][spec]
/// Abstract operation [`FilterLocales ( availableLocales, requestedLocales, option s )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list requestedLocales for which
/// `availableLocales` has a matching locale when using the BCP 47 Lookup algorithm. Locales appear
/// in the same order in the returned list as in `requestedLocales`.
/// availableLocales has a matching locale.
///
/// # Note
///
/// This differs a bit from the spec, since we don't have an `[[AvailableLocales]]`
/// list to compare with. However, we can do data requests to a [`DataProvider`]
/// in order to see if a certain [`Locale`] is supported.
///
/// [spec]: https://tc39.es/ecma402/#sec-lookupsupportedlocales
fn lookup_supported_locales < M : KeyedDataMarker > (
requested_locales : & [ Locale ] ,
provider : & ( impl DataProvider < M > + ? Sized ) ,
) -> Vec < Locale > {
// 1. Let subset be a new empty List.
// 2. For each element locale of requestedLocales, do
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.
// b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale).
// c. If availableLocale is not undefined, append locale to the end of subset.
// 3. Return subset.
requested_locales
. iter ( )
. filter ( | loc | best_available_locale ( loc . id . clone ( ) , provider ) . is_some ( ) )
. cloned ( )
. collect ( )
}
/// Abstract operation [`BestFitSupportedLocales ( availableLocales, requestedLocales )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list `requestedLocales` for which
/// `availableLocales` has a matching locale when using the Best Fit Matcher algorithm. Locales appear
/// in the same order in the returned list as in requestedLocales.
///
/// [spec]: https://tc39.es/ecma402/#sec-bestfitsupportedlocales
fn best_fit_supported_locales < M : KeyedDataMarker > (
requested_locales : & [ Locale ] ,
provider : & ( impl DataProvider < M > + ? Sized ) ,
) -> Vec < Locale > {
requested_locales
. iter ( )
. filter ( | loc | best_locale_for_provider ( loc . id . clone ( ) , provider ) . is_some ( ) )
. cloned ( )
. collect ( )
}
/// Abstract operation [`SupportedLocales ( availableLocales, requestedLocales, options )`][spec]
///
/// Returns the subset of the provided BCP 47 language priority list requestedLocales for which
/// availableLocales has a matching locale
/// Calling this function with a singleton `KeyedDataMarker` will always return `None`.
///
/// [spec]: https://tc39.es/ecma402/#sec-supportedlocales
pub ( in crate ::builtins ::intl ) fn supported _locales< M : KeyedDataMarker > (
requested_locales : & [ Locale ] ,
pub ( in crate ::builtins ::intl ) fn filter_locales < M : KeyedDataMarker > (
requested_locales : Vec < Locale > ,
options : & JsValue ,
context : & mut Context ,
) -> JsResult < JsObject >
@ -553,22 +420,36 @@ where
// 2. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit").
let matcher = get_option ( & options , js_str ! ( "localeMatcher" ) , context ) ? . unwrap_or_default ( ) ;
let elements = match matcher {
// 4. Else,
// a. Let supportedLocales be LookupSupportedLocales(availableLocales, requestedLocales).
// 3. Let subset be a new empty List.
let mut subset = Vec ::with_capacity ( requested_locales . len ( ) ) ;
// 4. For each element locale of requestedLocales, do
for locale in requested_locales {
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.
let mut no_ext_loc = locale . clone ( ) ;
no_ext_loc . extensions . unicode . clear ( ) ;
let loc_match = match matcher {
// b. If matcher is "lookup", then
// i. Let match be LookupMatchingLocaleByPrefix(availableLocales, noExtensionsLocale).
LocaleMatcher ::Lookup = > {
lookup_supported_locales ( requested_locales , context . intl_provider ( ) )
lookup_matching_locale_by_prefix ( [ no_ext_loc ] , context . intl_provider ( ) )
}
// 3. If matcher is "best fit", then
// a. Let supportedLocales be BestFitSupportedLocales(availableLocales, requestedLocales).
// c. Else,
// i. Let match be LookupMatchingLocaleByBestFit(availableLocales, noExtensionsLocale ).
LocaleMatcher ::BestFit = > {
best_fit_supported_locales ( requested_locales , context . intl_provider ( ) )
lookup_matching_locale_by_best_fit ( [ no_ext_loc ] , context . intl_provider ( ) )
}
} ;
// 5. Return CreateArrayFromList(supportedLocales).
// d. If match is not undefined, append locale to subset.
if loc_match . is_some ( ) {
subset . push ( locale ) ;
}
}
// 5. Return CreateArrayFromList(subset).
Ok ( Array ::create_array_from_list (
elements
subset
. into_iter ( )
. map ( | loc | js_string ! ( loc . to_string ( ) ) . into ( ) ) ,
context ,
@ -577,6 +458,10 @@ where
/// Validates that the unicode extension `key` with `value` is a valid extension value for the
/// `language`.
///
/// # Note
///
/// Calling this function with a singleton `KeyedDataMarker` will always return `None`.
pub ( in crate ::builtins ::intl ) fn validate_extension < M : KeyedDataMarker > (
language : LanguageIdentifier ,
key : Key ,
@ -597,54 +482,47 @@ pub(in crate::builtins::intl) fn validate_extension<M: KeyedDataMarker>(
. is_some ( )
}
#[ cfg(test) ]
#[ cfg(all( test, feature = " intl_bundled " ) ) ]
mod tests {
use icu_locid ::{ langid , locale , Locale } ;
use icu_plurals ::provider ::CardinalV1Marker ;
use icu_provider ::AsDeserializingBufferProvider ;
use crate ::{
builtins ::intl ::locale ::utils ::{
best_available_locale , best_fit_matcher , default_locale , lookup_matcher ,
lookup_matching_locale_by_best_fit , lookup_matching_local e_by_p refix ,
} ,
context ::icu ::IntlProvider ,
} ;
#[ test ]
fn best_avail_loc ( ) {
let provider = boa_icu_provider ::buffer ( ) ;
let provider = provider . as_deserializing ( ) ;
fn best_fit ( ) {
let icu = & IntlProvider ::try_new_with_buffer_provider ( boa_icu_provider ::buffer ( ) ) . unwrap ( ) ;
assert_eq! (
best_available_locale ::< CardinalV1Marker > ( langid ! ( "en" ) , & provider ) ,
Some ( langid ! ( "en" ) )
lookup_matching_locale_by_best_fit ::< CardinalV1Marker > ( [ locale ! ( "en" ) ] , icu ) ,
Some ( locale ! ( "en" ) )
) ;
assert_eq! (
best_available_locale ::< CardinalV1Marker > ( langid ! ( "es-ES" ) , & provider ) ,
Some ( langid ! ( "es" ) )
lookup_matching_locale_by_best_fit ::< CardinalV1Marker > ( [ locale ! ( "es-ES" ) ] , icu ) ,
Some ( locale ! ( "es" ) )
) ;
assert_eq! (
best_available_locale ::< CardinalV1Marker > ( langid ! ( "kr" ) , & provider ) ,
lookup_matching_locale_by_best_fit ::< CardinalV1Marker > ( [ locale ! ( "kr" ) ] , icu ) ,
None
) ;
}
#[ test ]
fn lookup_match ( ) {
let icu = IntlProvider ::try_new_with_buffer_provider ( boa_icu_provider ::buffer ( ) ) . unwrap ( ) ;
// requested: []
let res = lookup_matcher ::< CardinalV1Marker > ( & [ ] , & icu ) ;
assert_eq! ( res , default_locale ( icu . locale_canonicalizer ( ) ) ) ;
assert! ( res . extensions . is_empty ( ) ) ;
let icu = & IntlProvider ::try_new_with_buffer_provider ( boa_icu_provider ::buffer ( ) ) . unwrap ( ) ;
// requested: [fr-FR-u-hc-h12]
let requested : Locale = "fr-FR-u-hc-h12" . parse ( ) . unwrap ( ) ;
let result = lookup_matcher ::< CardinalV1Marker > ( & [ requested . clone ( ) ] , & icu ) ;
let result =
lookup_matching_locale_by_prefix ::< CardinalV1Marker > ( [ requested . clone ( ) ] , icu ) . unwrap ( ) ;
assert_eq! ( result . id , langid ! ( "fr" ) ) ;
assert_eq! ( result . extensions , requested . extensions ) ;
@ -655,7 +533,7 @@ mod tests {
let uz = locale ! ( "uz-Cyrl" ) ;
let requested = vec! [ kr , gr , es . clone ( ) , uz ] ;
let res = best_fit_matcher ::< CardinalV1Marker > ( & requested , & icu ) ;
let res = lookup_matching_locale_by_ best_fit ::< CardinalV1Marker > ( requested , icu ) . unwrap ( ) ;
assert_eq! ( res . id , langid ! ( "es" ) ) ;
assert_eq! ( res . extensions , es . extensions ) ;
}