From 1b67e5d6070f0d3e6da36eb3ccb193ce2f8656ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Thu, 20 Apr 2023 03:14:40 +0000 Subject: [PATCH] Implement `Intl.Segmenter` (#2840) The new ICU4X release stabilized the `icu_segmenter` component, so this PR implements `Intl.Segmenter` using that as a base. Also, I opted for importing `itertools` instead of copy-pasting the implementation of `TupleWindows` because its design is a lot more complex than `Intersperse`, which we copy-pasted previously. Though, I disabled all `std` features of `itertools` to make it a lot more lightweight, so it shouldn't make much difference in compilation times. --- Cargo.lock | 2 + boa_engine/Cargo.toml | 3 + boa_engine/src/builtins/intl/locale/utils.rs | 35 ++- .../src/builtins/intl/segmenter/iterator.rs | 152 +++++++++ boa_engine/src/builtins/intl/segmenter/mod.rs | 292 +++++++++++++++++- .../src/builtins/intl/segmenter/options.rs | 19 +- .../src/builtins/intl/segmenter/segments.rs | 142 +++++++++ boa_engine/src/builtins/iterable/mod.rs | 32 +- boa_engine/src/builtins/json/mod.rs | 121 ++------ boa_engine/src/builtins/mod.rs | 2 + boa_engine/src/context/icu.rs | 70 +++-- boa_engine/src/context/intrinsics.rs | 14 + boa_engine/src/object/mod.rs | 100 +++++- test_ignore.toml | 1 - 14 files changed, 828 insertions(+), 157 deletions(-) create mode 100644 boa_engine/src/builtins/intl/segmenter/iterator.rs create mode 100644 boa_engine/src/builtins/intl/segmenter/segments.rs diff --git a/Cargo.lock b/Cargo.lock index 3453731ec8..bd5f7bc649 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,8 +415,10 @@ dependencies = [ "icu_locid_transform", "icu_plurals", "icu_provider", + "icu_segmenter", "indexmap", "indoc", + "itertools", "jemallocator", "num-bigint", "num-integer", diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index 84a0991239..d14230fc9c 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -25,6 +25,7 @@ intl = [ "dep:icu_collator", "dep:icu_casemapping", "dep:icu_list", + "dep:icu_segmenter", "dep:writeable", "dep:sys-locale", "dep:yoke", @@ -75,6 +76,7 @@ dashmap = "5.4.0" num_enum = "0.6.1" pollster = "0.3.0" thin-vec = "0.2.12" +itertools = { version = "0.10.5", default-features = false } # intl deps boa_icu_provider = { workspace = true, optional = true } @@ -87,6 +89,7 @@ icu_plurals = { version = "1.2.0", features = ["serde"], optional = true } icu_provider = { version = "1.2.0", optional = true } icu_list = { version = "1.2.0", features = ["serde"], optional = true } icu_casemapping = { version = "0.7.2", features = ["serde"], optional = true} +icu_segmenter = { version = "1.2.1", features = ["serde"], optional = true } writeable = { version = "0.5.2", optional = true } yoke = { version = "0.7.1", optional = true } zerofrom = { version = "0.1.2", optional = true } diff --git a/boa_engine/src/builtins/intl/locale/utils.rs b/boa_engine/src/builtins/intl/locale/utils.rs index 9030def0a1..226cb126c3 100644 --- a/boa_engine/src/builtins/intl/locale/utils.rs +++ b/boa_engine/src/builtins/intl/locale/utils.rs @@ -20,6 +20,7 @@ use icu_locid::{ }; use icu_locid_transform::LocaleCanonicalizer; use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker}; +use icu_segmenter::provider::WordBreakDataV1Marker; use indexmap::IndexSet; use tap::TapOptional; @@ -115,9 +116,14 @@ pub(crate) fn canonicalize_locale_list( // iv. Else, else { // 1. Let tag be ? ToString(kValue). + let k_value = k_value.to_string(context)?.to_std_string_escaped(); + if k_value.contains('_') { + return Err(JsNativeError::range() + .with_message("locale is not a structurally valid language tag") + .into()); + } + k_value - .to_string(context)? - .to_std_string_escaped() .parse() // v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. .map_err(|_| { @@ -169,7 +175,11 @@ pub(crate) fn best_available_locale( provider, DataRequest { locale: &candidate, - metadata: DataRequestMetadata::default(), + metadata: { + let mut metadata = DataRequestMetadata::default(); + metadata.silent = true; + metadata + }, }, ); @@ -180,10 +190,15 @@ pub(crate) fn best_available_locale( // the fallback algorithm, even if the used locale is exactly the same as the required // locale. match req.metadata.locale { + // TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services Some(loc) if loc == candidate - // TODO: ugly hack to accept locales that fallback to "und" in the collator service - || (loc.is_empty() && M::KEY.path() == CollationMetadataV1Marker::KEY.path()) => + || (loc.is_empty() + && [ + CollationMetadataV1Marker::KEY.path(), + WordBreakDataV1Marker::KEY.path(), + ] + .contains(&M::KEY.path())) => { return Some(candidate.into_locale().id) } @@ -242,8 +257,14 @@ pub(crate) fn best_locale_for_provider( .metadata .locale .map(|dl| { - // TODO: ugly hack to accept locales that fallback to "und" in the collator service - if M::KEY.path() == CollationMetadataV1Marker::KEY.path() && dl.is_empty() { + // TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services + if [ + CollationMetadataV1Marker::KEY.path(), + WordBreakDataV1Marker::KEY.path(), + ] + .contains(&M::KEY.path()) + && dl.is_empty() + { candidate.clone() } else { dl.into_locale().id diff --git a/boa_engine/src/builtins/intl/segmenter/iterator.rs b/boa_engine/src/builtins/intl/segmenter/iterator.rs new file mode 100644 index 0000000000..7b3601e407 --- /dev/null +++ b/boa_engine/src/builtins/intl/segmenter/iterator.rs @@ -0,0 +1,152 @@ +use boa_gc::{Finalize, Trace}; +use boa_profiler::Profiler; +use icu_segmenter::{ + GraphemeClusterBreakIteratorUtf16, SentenceBreakIteratorUtf16, WordBreakIteratorUtf16, +}; + +use crate::{ + builtins::{iterable::create_iter_result_object, BuiltInBuilder, IntrinsicObject}, + context::intrinsics::Intrinsics, + js_string, + object::ObjectData, + property::Attribute, + realm::Realm, + Context, JsNativeError, JsObject, JsResult, JsString, JsSymbol, JsValue, +}; + +use super::create_segment_data_object; + +pub(crate) enum NativeSegmentIterator<'l, 's> { + Grapheme(GraphemeClusterBreakIteratorUtf16<'l, 's>), + Word(WordBreakIteratorUtf16<'l, 's>), + Sentence(SentenceBreakIteratorUtf16<'l, 's>), +} + +impl Iterator for NativeSegmentIterator<'_, '_> { + type Item = usize; + + fn next(&mut self) -> Option { + match self { + NativeSegmentIterator::Grapheme(g) => g.next(), + NativeSegmentIterator::Word(w) => w.next(), + NativeSegmentIterator::Sentence(s) => s.next(), + } + } +} + +impl NativeSegmentIterator<'_, '_> { + /// If the iterator is a word break iterator, returns `Some(true)` when the segment preceding + /// the current boundary is word-like. + pub(crate) fn is_word_like(&self) -> Option { + if let Self::Word(w) = self { + Some(w.is_word_like()) + } else { + None + } + } +} + +#[derive(Debug, Trace, Finalize)] +pub struct SegmentIterator { + segmenter: JsObject, + string: JsString, + next_segment_index: usize, +} + +impl IntrinsicObject for SegmentIterator { + fn init(realm: &Realm) { + let _timer = Profiler::global().start_event("%SegmentIteratorPrototype%", "init"); + + BuiltInBuilder::with_intrinsic::(realm) + .static_property( + JsSymbol::to_string_tag(), + js_string!("Segmenter String Iterator"), + Attribute::CONFIGURABLE, + ) + .static_method(Self::next, js_string!("next"), 0) + .build(); + } + + fn get(intrinsics: &Intrinsics) -> JsObject { + intrinsics.objects().iterator_prototypes().segment() + } +} + +impl SegmentIterator { + /// [`CreateSegmentIterator ( segmenter, string )`][spec] + /// + /// [spec]: https://tc39.es/ecma402/#sec-createsegmentiterator + pub(crate) fn create( + segmenter: JsObject, + string: JsString, + context: &mut Context<'_>, + ) -> JsObject { + // 1. Let internalSlotsList be « [[IteratingSegmenter]], [[IteratedString]], [[IteratedStringNextSegmentCodeUnitIndex]] ». + // 2. Let iterator be OrdinaryObjectCreate(%SegmentIteratorPrototype%, internalSlotsList). + // 3. Set iterator.[[IteratingSegmenter]] to segmenter. + // 4. Set iterator.[[IteratedString]] to string. + // 5. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to 0. + // 6. Return iterator. + JsObject::from_proto_and_data( + context + .intrinsics() + .objects() + .iterator_prototypes() + .segment(), + ObjectData::segment_iterator(Self { + segmenter, + string, + next_segment_index: 0, + }), + ) + } + /// [`%SegmentIteratorPrototype%.next ( )`][spec] + /// + /// [spec]: https://tc39.es/ecma402/#sec-%segmentiteratorprototype%.next + fn next(this: &JsValue, _: &[JsValue], context: &mut Context<'_>) -> JsResult { + // 1. Let iterator be the this value. + // 2. Perform ? RequireInternalSlot(iterator, [[IteratingSegmenter]]). + let mut iter = this.as_object().map(JsObject::borrow_mut).ok_or_else(|| { + JsNativeError::typ() + .with_message("`next` can only be called on a `Segment Iterator` object") + })?; + let iter = iter.as_segment_iterator_mut().ok_or_else(|| { + JsNativeError::typ() + .with_message("`next` can only be called on a `Segment Iterator` object") + })?; + + // 5. Let startIndex be iterator.[[IteratedStringNextSegmentCodeUnitIndex]]. + let start = iter.next_segment_index; + + // 4. Let string be iterator.[[IteratedString]]. + // 6. Let endIndex be ! FindBoundary(segmenter, string, startIndex, after). + let Some((end, is_word_like)) = iter.string.get(start..).and_then(|string| { + // 3. Let segmenter be iterator.[[IteratingSegmenter]]. + let segmenter = iter.segmenter.borrow(); + let segmenter = segmenter + .as_segmenter() + .expect("segment iterator object should contain a segmenter"); + let mut segments = segmenter.native.segment(string); + // the first elem is always 0. + segments.next(); + segments.next().map(|end| (start + end, segments.is_word_like())) + }) else { + // 7. If endIndex is not finite, then + // a. Return CreateIterResultObject(undefined, true). + return Ok(create_iter_result_object(JsValue::undefined(), true, context)); + }; + // 8. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex. + iter.next_segment_index = end; + + // 9. Let segmentData be ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex). + let segment_data = + create_segment_data_object(iter.string.clone(), start..end, is_word_like, context); + + // 10. Return CreateIterResultObject(segmentData, false). + Ok(create_iter_result_object( + segment_data.into(), + false, + context, + )) + } +} diff --git a/boa_engine/src/builtins/intl/segmenter/mod.rs b/boa_engine/src/builtins/intl/segmenter/mod.rs index bd32d380e9..f3fad7f25f 100644 --- a/boa_engine/src/builtins/intl/segmenter/mod.rs +++ b/boa_engine/src/builtins/intl/segmenter/mod.rs @@ -1,27 +1,89 @@ -// TODO: implement `Segmenter` when https://github.com/unicode-org/icu4x/issues/2259 closes. +use std::ops::Range; +use boa_macros::utf16; use boa_profiler::Profiler; +use icu_locid::Locale; +use icu_segmenter::provider::WordBreakDataV1Marker; use crate::{ builtins::{BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject}, context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, - object::JsObject, + js_string, + object::{ + internal_methods::get_prototype_from_constructor, JsObject, ObjectData, ObjectInitializer, + }, + property::Attribute, realm::Realm, - Context, JsResult, JsValue, + Context, JsArgs, JsNativeError, JsResult, JsString, JsSymbol, JsValue, }; +mod iterator; mod options; -#[allow(unused)] +mod segments; +pub(crate) use iterator::*; pub(crate) use options::*; +pub(crate) use segments::*; -#[derive(Debug, Clone)] -pub(crate) struct Segmenter; +use super::{ + locale::{canonicalize_locale_list, resolve_locale, supported_locales}, + options::{get_option, get_options_object, IntlOptions, LocaleMatcher}, + Service, +}; + +#[derive(Debug)] +pub struct Segmenter { + locale: Locale, + native: NativeSegmenter, +} + +#[derive(Debug)] +pub(crate) enum NativeSegmenter { + Grapheme(Box), + Word(Box), + Sentence(Box), +} + +impl NativeSegmenter { + /// Gets the granularity level of this `NativeSegmenter`. + pub(crate) const fn granularity(&self) -> Granularity { + match self { + Self::Grapheme(_) => Granularity::Grapheme, + Self::Word(_) => Granularity::Word, + Self::Sentence(_) => Granularity::Sentence, + } + } + + /// Segment the passed string, returning an iterator with the index boundaries + /// of the segments. + pub(crate) fn segment<'l, 's>(&'l self, input: &'s [u16]) -> NativeSegmentIterator<'l, 's> { + match self { + NativeSegmenter::Grapheme(g) => NativeSegmentIterator::Grapheme(g.segment_utf16(input)), + NativeSegmenter::Word(w) => NativeSegmentIterator::Word(w.segment_utf16(input)), + NativeSegmenter::Sentence(s) => NativeSegmentIterator::Sentence(s.segment_utf16(input)), + } + } +} + +impl Service for Segmenter { + type LangMarker = WordBreakDataV1Marker; + + type LocaleOptions = (); +} impl IntrinsicObject for Segmenter { fn init(realm: &Realm) { let _timer = Profiler::global().start_event(Self::NAME, "init"); - BuiltInBuilder::from_standard_constructor::(realm).build(); + BuiltInBuilder::from_standard_constructor::(realm) + .static_method(Self::supported_locales_of, "supportedLocalesOf", 1) + .property( + JsSymbol::to_string_tag(), + "Intl.Segmenter", + Attribute::CONFIGURABLE, + ) + .method(Self::resolved_options, "resolvedOptions", 0) + .method(Self::segment, "segment", 1) + .build(); } fn get(intrinsics: &Intrinsics) -> JsObject { @@ -39,8 +101,218 @@ impl BuiltInConstructor for Segmenter { const STANDARD_CONSTRUCTOR: fn(&StandardConstructors) -> &StandardConstructor = StandardConstructors::segmenter; - #[allow(clippy::unnecessary_wraps)] - fn constructor(_: &JsValue, _: &[JsValue], _: &mut Context<'_>) -> JsResult { - Ok(JsValue::Undefined) + fn constructor( + new_target: &JsValue, + args: &[JsValue], + context: &mut Context<'_>, + ) -> JsResult { + // 1. If NewTarget is undefined, throw a TypeError exception. + if new_target.is_undefined() { + return Err(JsNativeError::typ() + .with_message("cannot call `Intl.Collator` constructor without `new`") + .into()); + } + let locales = args.get_or_undefined(0); + let options = args.get_or_undefined(1); + + // 4. Let requestedLocales be ? CanonicalizeLocaleList(locales). + let locales = canonicalize_locale_list(locales, context)?; + + // 5. Set options to ? GetOptionsObject(options). + let options = get_options_object(options)?; + + // 6. Let opt be a new Record. + // 7. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit"). + let matcher = + get_option::(&options, utf16!("localeMatcher"), false, context)? + .unwrap_or_default(); + + // 8. Set opt.[[localeMatcher]] to matcher. + // 9. Let localeData be %Segmenter%.[[LocaleData]]. + // 10. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]], requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]], localeData). + // 11. Set segmenter.[[Locale]] to r.[[locale]]. + let locale = resolve_locale::( + &locales, + &mut IntlOptions { + matcher, + ..Default::default() + }, + context.icu(), + ); + + // 12. Let granularity be ? GetOption(options, "granularity", string, « "grapheme", "word", "sentence" », "grapheme"). + let granularity = + get_option::(&options, utf16!("granularity"), false, context)? + .unwrap_or_default(); + // 13. Set segmenter.[[SegmenterGranularity]] to granularity. + + let kind = context + .icu() + .provider() + .try_new_segmenter(granularity) + .map_err(|err| JsNativeError::typ().with_message(err.to_string()))?; + + let segmenter = Segmenter { + locale, + native: kind, + }; + + // 2. Let internalSlotsList be « [[InitializedSegmenter]], [[Locale]], [[SegmenterGranularity]] ». + // 3. Let segmenter be ? OrdinaryCreateFromConstructor(NewTarget, "%Segmenter.prototype%", internalSlotsList). + + let proto = + get_prototype_from_constructor(new_target, StandardConstructors::segmenter, context)?; + + let segmenter = JsObject::from_proto_and_data(proto, ObjectData::segmenter(segmenter)); + + // 14. Return segmenter. + Ok(segmenter.into()) + } +} + +impl Segmenter { + /// [`Intl.Segmenter.supportedLocalesOf ( locales [ , options ] )`][spec]. + /// + /// Returns an array containing those of the provided locales that are supported in list + /// formatting without having to fall back to the runtime's default locale. + /// + /// More information: + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma402/#sec-intl.segmenter.supportedlocalesof + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/supportedLocalesOf + fn supported_locales_of( + _: &JsValue, + args: &[JsValue], + context: &mut Context<'_>, + ) -> JsResult { + let locales = args.get_or_undefined(0); + let options = args.get_or_undefined(1); + + // 1. Let availableLocales be %Segmenter%.[[AvailableLocales]]. + // 2. Let requestedLocales be ? CanonicalizeLocaleList(locales). + let requested_locales = canonicalize_locale_list(locales, context)?; + + // 3. Return ? SupportedLocales(availableLocales, requestedLocales, options). + supported_locales::<::LangMarker>(&requested_locales, options, context) + .map(JsValue::from) } + + /// [`Intl.Segmenter.prototype.resolvedOptions ( )`][spec]. + /// + /// Returns a new object with properties reflecting the locale and style formatting options + /// computed during the construction of the current `Intl.Segmenter` object. + /// + /// More information: + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma402/#sec-Intl.Segmenter.prototype.resolvedoptions + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/resolvedOptions + fn resolved_options( + this: &JsValue, + _: &[JsValue], + context: &mut Context<'_>, + ) -> JsResult { + // 1. Let segmenter be the this value. + // 2. Perform ? RequireInternalSlot(segmenter, [[InitializedSegmenter]]). + let segmenter = this.as_object().map(JsObject::borrow).ok_or_else(|| { + JsNativeError::typ() + .with_message("`resolved_options` can only be called on an `Intl.Segmenter` object") + })?; + let segmenter = segmenter.as_segmenter().ok_or_else(|| { + JsNativeError::typ() + .with_message("`resolved_options` can only be called on an `Intl.Segmenter` object") + })?; + + // 3. Let options be OrdinaryObjectCreate(%Object.prototype%). + // 4. For each row of Table 19, except the header row, in table order, do + // a. Let p be the Property value of the current row. + // b. Let v be the value of segmenter's internal slot whose name is the Internal Slot value of the current row. + // c. Assert: v is not undefined. + // d. Perform ! CreateDataPropertyOrThrow(options, p, v). + let options = ObjectInitializer::new(context) + .property( + js_string!("locale"), + segmenter.locale.to_string(), + Attribute::all(), + ) + .property( + js_string!("granularity"), + segmenter.native.granularity().to_string(), + Attribute::all(), + ) + .build(); + + // 5. Return options. + Ok(options.into()) + } + + /// [`Intl.Segmenter.prototype.segment ( string )`][spec]. + /// + /// Segments a string according to the locale and granularity of this `Intl.Segmenter` object. + /// + /// [spec]: https://tc39.es/ecma402/#sec-intl.segmenter.prototype.segment + fn segment(this: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult { + // 1. Let segmenter be the this value. + // 2. Perform ? RequireInternalSlot(segmenter, [[InitializedSegmenter]]). + let segmenter = this + .as_object() + .filter(|o| o.borrow().is_segmenter()) + .ok_or_else(|| { + JsNativeError::typ().with_message( + "`resolved_options` can only be called on an `Intl.Segmenter` object", + ) + })?; + + // 3. Let string be ? ToString(string). + let string = args.get_or_undefined(0).to_string(context)?; + + // 4. Return ! CreateSegmentsObject(segmenter, string). + Ok(Segments::create(segmenter.clone(), string, context).into()) + } +} + +/// [`CreateSegmentDataObject ( segmenter, string, startIndex, endIndex )`][spec]. +/// +/// [spec]: https://tc39.es/ecma402/#sec-createsegmentdataobject +fn create_segment_data_object( + string: JsString, + range: Range, + is_word_like: Option, + context: &mut Context<'_>, +) -> JsObject { + // 1. Let len be the length of string. + // 2. Assert: startIndex ≥ 0. + // ensured by `usize`. + // 3. Assert: endIndex ≤ len. + assert!(range.end <= string.len()); + // 4. Assert: startIndex < endIndex. + assert!(range.start < range.end); + + let start = range.start; + + // 6. Let segment be the substring of string from startIndex to endIndex. + let segment = js_string!(&string[range]); + + // 5. Let result be OrdinaryObjectCreate(%Object.prototype%). + let object = &mut ObjectInitializer::new(context); + + object + // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment). + .property(js_string!("segment"), segment, Attribute::all()) + // 8. Perform ! CreateDataPropertyOrThrow(result, "index", 𝔽(startIndex)). + .property(js_string!("index"), start, Attribute::all()) + // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string). + .property(js_string!("input"), string, Attribute::all()); + + // 10. Let granularity be segmenter.[[SegmenterGranularity]]. + // 11. If granularity is "word", then + if let Some(is_word_like) = is_word_like { + // a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" according to locale segmenter.[[Locale]]. + // b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike). + object.property(js_string!("isWordLike"), is_word_like, Attribute::all()); + } + + // 12. Return result. + object.build() } diff --git a/boa_engine/src/builtins/intl/segmenter/options.rs b/boa_engine/src/builtins/intl/segmenter/options.rs index 55330e2f2b..944558b7db 100644 --- a/boa_engine/src/builtins/intl/segmenter/options.rs +++ b/boa_engine/src/builtins/intl/segmenter/options.rs @@ -1,3 +1,7 @@ +use std::fmt::Display; + +use crate::builtins::intl::options::OptionTypeParsable; + #[derive(Debug, Clone, Copy, Default)] pub(crate) enum Granularity { #[default] @@ -6,10 +10,21 @@ pub(crate) enum Granularity { Sentence, } +impl Display for Granularity { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Granularity::Grapheme => "grapheme", + Granularity::Word => "word", + Granularity::Sentence => "sentence", + } + .fmt(f) + } +} + #[derive(Debug)] pub(crate) struct ParseGranularityError; -impl std::fmt::Display for ParseGranularityError { +impl Display for ParseGranularityError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("provided string was not `grapheme`, `word` or `sentence`") } @@ -27,3 +42,5 @@ impl std::str::FromStr for Granularity { } } } + +impl OptionTypeParsable for Granularity {} diff --git a/boa_engine/src/builtins/intl/segmenter/segments.rs b/boa_engine/src/builtins/intl/segmenter/segments.rs new file mode 100644 index 0000000000..24709b2a8c --- /dev/null +++ b/boa_engine/src/builtins/intl/segmenter/segments.rs @@ -0,0 +1,142 @@ +use boa_gc::{Finalize, Trace}; +use boa_profiler::Profiler; +use itertools::Itertools; + +use crate::{ + builtins::{BuiltInBuilder, IntrinsicObject}, + context::intrinsics::Intrinsics, + js_string, + object::ObjectData, + realm::Realm, + Context, JsArgs, JsNativeError, JsObject, JsResult, JsString, JsSymbol, JsValue, +}; + +use super::{create_segment_data_object, SegmentIterator}; + +#[derive(Debug, Trace, Finalize)] +pub struct Segments { + segmenter: JsObject, + string: JsString, +} + +impl IntrinsicObject for Segments { + fn init(realm: &Realm) { + let _timer = Profiler::global().start_event("%SegmentsPrototype%", "init"); + + BuiltInBuilder::with_intrinsic::(realm) + .static_method(Self::containing, "containing", 1) + .static_method( + Self::iterator, + (JsSymbol::iterator(), js_string!("[Symbol.iterator]")), + 0, + ) + .build(); + } + + fn get(intrinsics: &Intrinsics) -> JsObject { + intrinsics.objects().segments_prototype() + } +} + +impl Segments { + /// [`CreateSegmentsObject ( segmenter, string )`][spec] + /// + /// [spec]: https://tc39.es/ecma402/#sec-createsegmentsobject + pub(crate) fn create( + segmenter: JsObject, + string: JsString, + context: &mut Context<'_>, + ) -> JsObject { + // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ». + // 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList). + // 3. Set segments.[[SegmentsSegmenter]] to segmenter. + // 4. Set segments.[[SegmentsString]] to string. + // 5. Return segments. + JsObject::from_proto_and_data( + context.intrinsics().objects().segments_prototype(), + ObjectData::segments(Segments { segmenter, string }), + ) + } + + /// [`%SegmentsPrototype%.containing ( index )`][spec] + /// + /// [spec]: https://tc39.es/ecma402/#sec-%segmentsprototype%.containing + fn containing( + this: &JsValue, + args: &[JsValue], + context: &mut Context<'_>, + ) -> JsResult { + // 1. Let segments be the this value. + // 2. Perform ? RequireInternalSlot(segments, [[SegmentsSegmenter]]). + let segments = this.as_object().map(JsObject::borrow).ok_or_else(|| { + JsNativeError::typ() + .with_message("`containing` can only be called on a `Segments` object") + })?; + let segments = segments.as_segments().ok_or_else(|| { + JsNativeError::typ() + .with_message("`containing` can only be called on a `Segments` object") + })?; + + // 3. Let segmenter be segments.[[SegmentsSegmenter]]. + let segmenter = segments.segmenter.borrow(); + let segmenter = segmenter + .as_segmenter() + .expect("segments object should contain a segmenter"); + + // 4. Let string be segments.[[SegmentsString]]. + // 5. Let len be the length of string. + let len = segments.string.len() as i64; + + // 6. Let n be ? ToIntegerOrInfinity(index). + let Some(n) = args + .get_or_undefined(0) + .to_integer_or_infinity(context)? + .as_integer() + // 7. If n < 0 or n ≥ len, return undefined. + .filter(|i| (0..len).contains(i)) + .map(|n| n as usize) else { + return Ok(JsValue::undefined()); + }; + + // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before). + // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after). + let (range, is_word_like) = { + let mut segments = segmenter.native.segment(&segments.string); + std::iter::from_fn(|| segments.next().map(|i| (i, segments.is_word_like()))) + .tuple_windows() + .find(|((i, _), (j, _))| (*i..*j).contains(&n)) + .map(|((i, _), (j, word))| ((i..j), word)) + .expect("string should have at least a length of 1, and `n` must be in range") + }; + + // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex). + Ok( + create_segment_data_object(segments.string.clone(), range, is_word_like, context) + .into(), + ) + } + + /// [`%SegmentsPrototype% [ @@iterator ] ( )`][spec] + /// + /// [spec]: https://tc39.es/ecma402/#sec-%segmentsprototype%-@@iterator + fn iterator(this: &JsValue, _: &[JsValue], context: &mut Context<'_>) -> JsResult { + // 1. Let segments be the this value. + // 2. Perform ? RequireInternalSlot(segments, [[SegmentsSegmenter]]). + let segments = this.as_object().map(JsObject::borrow).ok_or_else(|| { + JsNativeError::typ() + .with_message("`containing` can only be called on a `Segments` object") + })?; + let segments = segments.as_segments().ok_or_else(|| { + JsNativeError::typ() + .with_message("`containing` can only be called on a `Segments` object") + })?; + + // 3. Let segmenter be segments.[[SegmentsSegmenter]]. + // 4. Let string be segments.[[SegmentsString]]. + // 5. Return ! CreateSegmentIterator(segmenter, string). + Ok( + SegmentIterator::create(segments.segmenter.clone(), segments.string.clone(), context) + .into(), + ) + } +} diff --git a/boa_engine/src/builtins/iterable/mod.rs b/boa_engine/src/builtins/iterable/mod.rs index d54f5b760f..48e498d6a3 100644 --- a/boa_engine/src/builtins/iterable/mod.rs +++ b/boa_engine/src/builtins/iterable/mod.rs @@ -4,9 +4,9 @@ use crate::{ builtins::{BuiltInBuilder, IntrinsicObject}, context::intrinsics::Intrinsics, error::JsNativeError, + js_string, object::JsObject, realm::Realm, - string::utf16, symbol::JsSymbol, Context, JsResult, JsValue, }; @@ -68,6 +68,10 @@ pub struct IteratorPrototypes { /// The `ForInIteratorPrototype` prototype object. for_in: JsObject, + + /// The `%SegmentIteratorPrototype%` prototype object. + #[cfg(feature = "intl")] + segment: JsObject, } impl IteratorPrototypes { @@ -124,6 +128,13 @@ impl IteratorPrototypes { pub fn for_in(&self) -> JsObject { self.for_in.clone() } + + /// Returns the `%SegmentIteratorPrototype%` object. + #[inline] + #[cfg(feature = "intl")] + pub fn segment(&self) -> JsObject { + self.segment.clone() + } } /// `%IteratorPrototype%` object @@ -142,7 +153,7 @@ impl IntrinsicObject for Iterator { BuiltInBuilder::with_intrinsic::(realm) .static_method( |v, _, _| Ok(v.clone()), - (JsSymbol::iterator(), "[Symbol.iterator]"), + (JsSymbol::iterator(), js_string!("[Symbol.iterator]")), 0, ) .build(); @@ -168,7 +179,10 @@ impl IntrinsicObject for AsyncIterator { BuiltInBuilder::with_intrinsic::(realm) .static_method( |v, _, _| Ok(v.clone()), - (JsSymbol::async_iterator(), "[Symbol.asyncIterator]"), + ( + JsSymbol::async_iterator(), + js_string!("[Symbol.asyncIterator]"), + ), 0, ) .build(); @@ -190,10 +204,10 @@ pub fn create_iter_result_object(value: JsValue, done: bool, context: &mut Conte let obj = JsObject::with_object_proto(context.intrinsics()); // 3. Perform ! CreateDataPropertyOrThrow(obj, "value", value). - obj.create_data_property_or_throw(utf16!("value"), value, context) + obj.create_data_property_or_throw(js_string!("value"), value, context) .expect("this CreateDataPropertyOrThrow call must not fail"); // 4. Perform ! CreateDataPropertyOrThrow(obj, "done", done). - obj.create_data_property_or_throw(utf16!("done"), done, context) + obj.create_data_property_or_throw(js_string!("done"), done, context) .expect("this CreateDataPropertyOrThrow call must not fail"); // 5. Return obj. obj.into() @@ -267,7 +281,7 @@ impl JsValue { })?; // 5. Let nextMethod be ? GetV(iterator, "next"). - let next_method = iterator.get_v(utf16!("next"), context)?; + let next_method = iterator.get_v(js_string!("next"), context)?; // 6. Let iteratorRecord be the Record { [[Iterator]]: iterator, [[NextMethod]]: nextMethod, [[Done]]: false }. // 7. Return iteratorRecord. @@ -303,7 +317,7 @@ impl IteratorResult { #[inline] pub fn complete(&self, context: &mut Context<'_>) -> JsResult { // 1. Return ToBoolean(? Get(iterResult, "done")). - Ok(self.object.get(utf16!("done"), context)?.to_boolean()) + Ok(self.object.get(js_string!("done"), context)?.to_boolean()) } /// `IteratorValue ( iterResult )` @@ -319,7 +333,7 @@ impl IteratorResult { #[inline] pub fn value(&self, context: &mut Context<'_>) -> JsResult { // 1. Return ? Get(iterResult, "value"). - self.object.get(utf16!("value"), context) + self.object.get(js_string!("value"), context) } } @@ -474,7 +488,7 @@ impl IteratorRecord { let iterator = &self.iterator; // 3. Let innerResult be Completion(GetMethod(iterator, "return")). - let inner_result = iterator.get_method(utf16!("return"), context); + let inner_result = iterator.get_method(js_string!("return"), context); // 4. If innerResult.[[Type]] is normal, then let inner_result = match inner_result { diff --git a/boa_engine/src/builtins/json/mod.rs b/boa_engine/src/builtins/json/mod.rs index 1cba7255ab..8c07c8e99d 100644 --- a/boa_engine/src/builtins/json/mod.rs +++ b/boa_engine/src/builtins/json/mod.rs @@ -13,10 +13,9 @@ //! [json]: https://www.json.org/json-en.html //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON -use std::{ - borrow::Cow, - iter::{once, FusedIterator}, -}; +use std::{borrow::Cow, iter::once}; + +use itertools::Itertools; use crate::{ builtins::BuiltInObject, @@ -42,100 +41,6 @@ use super::{BuiltInBuilder, IntrinsicObject}; #[cfg(test)] mod tests; -// `Intersperse` impl taken from `itertools` -#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -#[derive(Clone, Debug)] -struct Intersperse -where - I: Iterator, -{ - element: I::Item, - iter: std::iter::Fuse, - peek: Option, -} - -fn intersperse(iter: I, element: I::Item) -> Intersperse -where - I: Iterator, -{ - let mut iter = iter.fuse(); - Intersperse { - peek: iter.next(), - iter, - element, - } -} - -impl Iterator for Intersperse -where - I: Iterator, - I::Item: Clone, -{ - type Item = I::Item; - fn next(&mut self) -> Option { - if self.peek.is_some() { - self.peek.take() - } else { - self.peek = self.iter.next(); - if self.peek.is_some() { - Some(self.element.clone()) - } else { - None - } - } - } - - fn size_hint(&self) -> (usize, Option) { - type SizeHint = (usize, Option); - const fn add(a: SizeHint, b: SizeHint) -> SizeHint { - let min = a.0.saturating_add(b.0); - let max = match (a.1, b.1) { - (Some(x), Some(y)) => x.checked_add(y), - _ => None, - }; - - (min, max) - } - - fn add_scalar(sh: SizeHint, x: usize) -> SizeHint { - let (mut low, mut hi) = sh; - low = low.saturating_add(x); - hi = hi.and_then(|elt| elt.checked_add(x)); - (low, hi) - } - // 2 * SH + { 1 or 0 } - let has_peek = usize::from(self.peek.is_some()); - let sh = self.iter.size_hint(); - add_scalar(add(sh, sh), has_peek) - } - - fn fold(mut self, init: B, mut f: F) -> B - where - Self: Sized, - F: FnMut(B, Self::Item) -> B, - { - let mut accum = init; - - if let Some(x) = self.peek.take() { - accum = f(accum, x); - } - - let element = &mut self.element; - - self.iter.fold(accum, |accum, x| { - let accum = f(accum, element.clone()); - f(accum, x) - }) - } -} - -impl FusedIterator for Intersperse -where - I: Iterator, - I::Item: Clone, -{ -} - /// JavaScript `JSON` global object. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) struct Json; @@ -744,7 +649,10 @@ impl Json { // ii. Let final be the string-concatenation of "{", properties, and "}". let separator = utf16!(","); let result = once(utf16!("{")) - .chain(intersperse(partial.iter().map(Vec::as_slice), separator)) + .chain(Itertools::intersperse( + partial.iter().map(Vec::as_slice), + separator, + )) .chain(once(utf16!("}"))) .flatten() .copied() @@ -764,7 +672,10 @@ impl Json { // the code unit 0x000A (LINE FEED), stepback, and "}". let result = [utf16!("{\n"), &state.indent[..]] .into_iter() - .chain(intersperse(partial.iter().map(Vec::as_slice), &separator)) + .chain(Itertools::intersperse( + partial.iter().map(Vec::as_slice), + &separator, + )) .chain([utf16!("\n"), &stepback[..], utf16!("}")].into_iter()) .flatten() .copied() @@ -854,7 +765,10 @@ impl Json { // ii. Let final be the string-concatenation of "[", properties, and "]". let separator = utf16!(","); let result = once(utf16!("[")) - .chain(intersperse(partial.iter().map(Cow::as_ref), separator)) + .chain(Itertools::intersperse( + partial.iter().map(Cow::as_ref), + separator, + )) .chain(once(utf16!("]"))) .flatten() .copied() @@ -872,7 +786,10 @@ impl Json { // iii. Let final be the string-concatenation of "[", the code unit 0x000A (LINE FEED), state.[[Indent]], properties, the code unit 0x000A (LINE FEED), stepback, and "]". let result = [utf16!("[\n"), &state.indent[..]] .into_iter() - .chain(intersperse(partial.iter().map(Cow::as_ref), &separator)) + .chain(Itertools::intersperse( + partial.iter().map(Cow::as_ref), + &separator, + )) .chain([utf16!("\n"), &stepback[..], utf16!("]")].into_iter()) .flatten() .copied() diff --git a/boa_engine/src/builtins/mod.rs b/boa_engine/src/builtins/mod.rs index c734e935c6..a6aa6e5f86 100644 --- a/boa_engine/src/builtins/mod.rs +++ b/boa_engine/src/builtins/mod.rs @@ -271,6 +271,8 @@ impl Realm { intl::Locale::init(self); intl::DateTimeFormat::init(self); intl::Segmenter::init(self); + intl::segmenter::Segments::init(self); + intl::segmenter::SegmentIterator::init(self); } } } diff --git a/boa_engine/src/context/icu.rs b/boa_engine/src/context/icu.rs index cfe918b5cf..a33cbd25f9 100644 --- a/boa_engine/src/context/icu.rs +++ b/boa_engine/src/context/icu.rs @@ -7,11 +7,15 @@ use icu_provider::{ AnyProvider, AsDeserializingBufferProvider, AsDowncastingAnyProvider, BufferProvider, DataError, DataLocale, DataProvider, DataRequest, DataResponse, KeyedDataMarker, MaybeSendSync, }; +use icu_segmenter::{GraphemeClusterSegmenter, SegmenterError, SentenceSegmenter, WordSegmenter}; use serde::Deserialize; use yoke::{trait_hack::YokeTraitHack, Yokeable}; use zerofrom::ZeroFrom; -use crate::builtins::intl::list_format::ListFormatType; +use crate::builtins::intl::{ + list_format::ListFormatType, + segmenter::{Granularity, NativeSegmenter}, +}; /// ICU4X data provider used in boa. /// @@ -55,19 +59,19 @@ impl BoaProvider<'_> { pub(crate) fn try_new_locale_canonicalizer( &self, ) -> Result { - match self { + match *self { BoaProvider::Buffer(buffer) => { - LocaleCanonicalizer::try_new_with_buffer_provider(&**buffer) + LocaleCanonicalizer::try_new_with_buffer_provider(buffer) } - BoaProvider::Any(any) => LocaleCanonicalizer::try_new_with_any_provider(&**any), + BoaProvider::Any(any) => LocaleCanonicalizer::try_new_with_any_provider(any), } } /// Creates a new [`LocaleExpander`] from the provided [`DataProvider`]. pub(crate) fn try_new_locale_expander(&self) -> Result { - match self { - BoaProvider::Buffer(buffer) => LocaleExpander::try_new_with_buffer_provider(&**buffer), - BoaProvider::Any(any) => LocaleExpander::try_new_with_any_provider(&**any), + match *self { + BoaProvider::Buffer(buffer) => LocaleExpander::try_new_with_buffer_provider(buffer), + BoaProvider::Any(any) => LocaleExpander::try_new_with_any_provider(any), } } @@ -78,33 +82,27 @@ impl BoaProvider<'_> { typ: ListFormatType, style: ListLength, ) -> Result { - match self { + match *self { BoaProvider::Buffer(buf) => match typ { ListFormatType::Conjunction => { - ListFormatter::try_new_and_with_length_with_buffer_provider( - &**buf, locale, style, - ) + ListFormatter::try_new_and_with_length_with_buffer_provider(buf, locale, style) } ListFormatType::Disjunction => { - ListFormatter::try_new_or_with_length_with_buffer_provider( - &**buf, locale, style, - ) + ListFormatter::try_new_or_with_length_with_buffer_provider(buf, locale, style) } ListFormatType::Unit => { - ListFormatter::try_new_unit_with_length_with_buffer_provider( - &**buf, locale, style, - ) + ListFormatter::try_new_unit_with_length_with_buffer_provider(buf, locale, style) } }, BoaProvider::Any(any) => match typ { ListFormatType::Conjunction => { - ListFormatter::try_new_and_with_length_with_any_provider(&**any, locale, style) + ListFormatter::try_new_and_with_length_with_any_provider(any, locale, style) } ListFormatType::Disjunction => { - ListFormatter::try_new_or_with_length_with_any_provider(&**any, locale, style) + ListFormatter::try_new_or_with_length_with_any_provider(any, locale, style) } ListFormatType::Unit => { - ListFormatter::try_new_unit_with_length_with_any_provider(&**any, locale, style) + ListFormatter::try_new_unit_with_length_with_any_provider(any, locale, style) } }, } @@ -116,11 +114,37 @@ impl BoaProvider<'_> { locale: &DataLocale, options: CollatorOptions, ) -> Result { - match self { + match *self { BoaProvider::Buffer(buf) => { - Collator::try_new_with_buffer_provider(&**buf, locale, options) + Collator::try_new_with_buffer_provider(buf, locale, options) + } + BoaProvider::Any(any) => Collator::try_new_with_any_provider(any, locale, options), + } + } + + /// Creates a new [`NativeSegmenter`] from the provided [`DataProvider`] and options. + pub(crate) fn try_new_segmenter( + &self, + granularity: Granularity, + ) -> Result { + match granularity { + Granularity::Grapheme => match *self { + BoaProvider::Buffer(buf) => { + GraphemeClusterSegmenter::try_new_with_buffer_provider(buf) + } + BoaProvider::Any(any) => GraphemeClusterSegmenter::try_new_with_any_provider(any), + } + .map(|seg| NativeSegmenter::Grapheme(Box::new(seg))), + Granularity::Word => match *self { + BoaProvider::Buffer(buf) => WordSegmenter::try_new_auto_with_buffer_provider(buf), + BoaProvider::Any(any) => WordSegmenter::try_new_auto_with_any_provider(any), + } + .map(|seg| NativeSegmenter::Word(Box::new(seg))), + Granularity::Sentence => match *self { + BoaProvider::Buffer(buf) => SentenceSegmenter::try_new_with_buffer_provider(buf), + BoaProvider::Any(any) => SentenceSegmenter::try_new_with_any_provider(any), } - BoaProvider::Any(any) => Collator::try_new_with_any_provider(&**any, locale, options), + .map(|seg| NativeSegmenter::Sentence(Box::new(seg))), } } } diff --git a/boa_engine/src/context/intrinsics.rs b/boa_engine/src/context/intrinsics.rs index ff0a09bcbb..50a684ea98 100644 --- a/boa_engine/src/context/intrinsics.rs +++ b/boa_engine/src/context/intrinsics.rs @@ -783,6 +783,10 @@ pub struct IntrinsicObjects { /// [`%Intl%`](https://tc39.es/ecma402/#intl-object) #[cfg(feature = "intl")] intl: JsObject, + + /// [`%SegmentsPrototype%`](https://tc39.es/ecma402/#sec-%segmentsprototype%-object) + #[cfg(feature = "intl")] + segments_prototype: JsObject, } impl Default for IntrinsicObjects { @@ -808,6 +812,8 @@ impl Default for IntrinsicObjects { unescape: JsFunction::empty_intrinsic_function(false), #[cfg(feature = "intl")] intl: JsObject::default(), + #[cfg(feature = "intl")] + segments_prototype: JsObject::default(), } } } @@ -935,4 +941,12 @@ impl IntrinsicObjects { pub fn intl(&self) -> JsObject { self.intl.clone() } + + /// Gets the [`%SegmentsPrototype%`][spec] intrinsic object. + /// + /// [spec]: https://tc39.es/ecma402/#sec-%segmentsprototype%-object + #[cfg(feature = "intl")] + pub fn segments_prototype(&self) -> JsObject { + self.segments_prototype.clone() + } } diff --git a/boa_engine/src/object/mod.rs b/boa_engine/src/object/mod.rs index 243e9b6cb6..b15f1a8dab 100644 --- a/boa_engine/src/object/mod.rs +++ b/boa_engine/src/object/mod.rs @@ -25,7 +25,10 @@ use self::internal_methods::{ }; #[cfg(feature = "intl")] use crate::builtins::intl::{ - collator::Collator, date_time_format::DateTimeFormat, list_format::ListFormat, + collator::Collator, + date_time_format::DateTimeFormat, + list_format::ListFormat, + segmenter::{SegmentIterator, Segmenter, Segments}, }; use crate::{ builtins::{ @@ -314,6 +317,18 @@ pub enum ObjectKind { /// The `Intl.Locale` object kind. #[cfg(feature = "intl")] Locale(Box), + + /// The `Intl.Segmenter` object kind. + #[cfg(feature = "intl")] + Segmenter(Segmenter), + + /// The `Segments` object kind. + #[cfg(feature = "intl")] + Segments(Segments), + + /// The `Segment Iterator` object kind. + #[cfg(feature = "intl")] + SegmentIterator(SegmentIterator), } unsafe impl Trace for ObjectKind { @@ -347,7 +362,11 @@ unsafe impl Trace for ObjectKind { #[cfg(feature = "intl")] Self::Collator(co) => mark(co), #[cfg(feature = "intl")] - Self::ListFormat(_) | Self::Locale(_) => {} + Self::Segments(seg) => mark(seg), + #[cfg(feature = "intl")] + Self::SegmentIterator(it) => mark(it), + #[cfg(feature = "intl")] + Self::ListFormat(_) | Self::Locale(_) | Self::Segmenter(_) => {} Self::RegExp(_) | Self::BigInt(_) | Self::Boolean(_) @@ -724,6 +743,36 @@ impl ObjectData { internal_methods: &ORDINARY_INTERNAL_METHODS, } } + + /// Create the `Segmenter` object data + #[cfg(feature = "intl")] + #[must_use] + pub fn segmenter(segmenter: Segmenter) -> Self { + Self { + kind: ObjectKind::Segmenter(segmenter), + internal_methods: &ORDINARY_INTERNAL_METHODS, + } + } + + /// Create the `Segments` object data + #[cfg(feature = "intl")] + #[must_use] + pub fn segments(segments: Segments) -> Self { + Self { + kind: ObjectKind::Segments(segments), + internal_methods: &ORDINARY_INTERNAL_METHODS, + } + } + + /// Create the `SegmentIterator` object data + #[cfg(feature = "intl")] + #[must_use] + pub fn segment_iterator(segment_iterator: SegmentIterator) -> Self { + Self { + kind: ObjectKind::SegmentIterator(segment_iterator), + internal_methods: &ORDINARY_INTERNAL_METHODS, + } + } } impl Debug for ObjectKind { @@ -773,6 +822,12 @@ impl Debug for ObjectKind { Self::ListFormat(_) => "ListFormat", #[cfg(feature = "intl")] Self::Locale(_) => "Locale", + #[cfg(feature = "intl")] + Self::Segmenter(_) => "Segmenter", + #[cfg(feature = "intl")] + Self::Segments(_) => "Segments", + #[cfg(feature = "intl")] + Self::SegmentIterator(_) => "SegmentIterator", }) } } @@ -1540,6 +1595,43 @@ impl Object { } } + /// Checks if it is a `Segmenter` object. + #[inline] + #[cfg(feature = "intl")] + pub const fn is_segmenter(&self) -> bool { + matches!(self.kind, ObjectKind::Segmenter(_)) + } + + /// Gets the `Segmenter` data if the object is a `Segmenter`. + #[inline] + #[cfg(feature = "intl")] + pub const fn as_segmenter(&self) -> Option<&Segmenter> { + match self.kind { + ObjectKind::Segmenter(ref seg) => Some(seg), + _ => None, + } + } + + /// Gets the `Segments` data if the object is a `Segments`. + #[inline] + #[cfg(feature = "intl")] + pub const fn as_segments(&self) -> Option<&Segments> { + match self.kind { + ObjectKind::Segments(ref seg) => Some(seg), + _ => None, + } + } + + /// Gets the `SegmentIterator` data if the object is a `SegmentIterator`. + #[inline] + #[cfg(feature = "intl")] + pub fn as_segment_iterator_mut(&mut self) -> Option<&mut SegmentIterator> { + match &mut self.kind { + ObjectKind::SegmentIterator(it) => Some(it), + _ => None, + } + } + /// Return `true` if it is a native object and the native type is `T`. pub fn is(&self) -> bool where @@ -1688,12 +1780,12 @@ impl From for FunctionBinding { impl From<(B, N)> for FunctionBinding where B: Into, - N: AsRef, + N: Into, { fn from((binding, name): (B, N)) -> Self { Self { binding: binding.into(), - name: name.as_ref().into(), + name: name.into(), } } } diff --git a/test_ignore.toml b/test_ignore.toml index 64ccdec7b4..56cf37445f 100644 --- a/test_ignore.toml +++ b/test_ignore.toml @@ -22,7 +22,6 @@ features = [ "Intl.DurationFormat", "Intl.DisplayNames", "Intl.RelativeTimeFormat", - "Intl.Segmenter", # Stage 3 proposals