Browse Source

Implement `Intl.Segmenter` (#2840)

The new ICU4X release stabilized the `icu_segmenter` component, so this PR implements `Intl.Segmenter` using that as a base.

Also, I opted for importing `itertools` instead of copy-pasting the implementation of `TupleWindows` because its design is a lot more complex than `Intersperse`, which we copy-pasted previously. Though, I disabled all `std` features of `itertools` to make it a lot more lightweight, so it shouldn't make much difference in compilation times.
pull/2852/head
José Julián Espina 1 year ago
parent
commit
1b67e5d607
  1. 2
      Cargo.lock
  2. 3
      boa_engine/Cargo.toml
  3. 35
      boa_engine/src/builtins/intl/locale/utils.rs
  4. 152
      boa_engine/src/builtins/intl/segmenter/iterator.rs
  5. 292
      boa_engine/src/builtins/intl/segmenter/mod.rs
  6. 19
      boa_engine/src/builtins/intl/segmenter/options.rs
  7. 142
      boa_engine/src/builtins/intl/segmenter/segments.rs
  8. 32
      boa_engine/src/builtins/iterable/mod.rs
  9. 121
      boa_engine/src/builtins/json/mod.rs
  10. 2
      boa_engine/src/builtins/mod.rs
  11. 70
      boa_engine/src/context/icu.rs
  12. 14
      boa_engine/src/context/intrinsics.rs
  13. 100
      boa_engine/src/object/mod.rs
  14. 1
      test_ignore.toml

2
Cargo.lock generated

@ -415,8 +415,10 @@ dependencies = [
"icu_locid_transform", "icu_locid_transform",
"icu_plurals", "icu_plurals",
"icu_provider", "icu_provider",
"icu_segmenter",
"indexmap", "indexmap",
"indoc", "indoc",
"itertools",
"jemallocator", "jemallocator",
"num-bigint", "num-bigint",
"num-integer", "num-integer",

3
boa_engine/Cargo.toml

@ -25,6 +25,7 @@ intl = [
"dep:icu_collator", "dep:icu_collator",
"dep:icu_casemapping", "dep:icu_casemapping",
"dep:icu_list", "dep:icu_list",
"dep:icu_segmenter",
"dep:writeable", "dep:writeable",
"dep:sys-locale", "dep:sys-locale",
"dep:yoke", "dep:yoke",
@ -75,6 +76,7 @@ dashmap = "5.4.0"
num_enum = "0.6.1" num_enum = "0.6.1"
pollster = "0.3.0" pollster = "0.3.0"
thin-vec = "0.2.12" thin-vec = "0.2.12"
itertools = { version = "0.10.5", default-features = false }
# intl deps # intl deps
boa_icu_provider = { workspace = true, optional = true } boa_icu_provider = { workspace = true, optional = true }
@ -87,6 +89,7 @@ icu_plurals = { version = "1.2.0", features = ["serde"], optional = true }
icu_provider = { version = "1.2.0", optional = true } icu_provider = { version = "1.2.0", optional = true }
icu_list = { version = "1.2.0", features = ["serde"], optional = true } icu_list = { version = "1.2.0", features = ["serde"], optional = true }
icu_casemapping = { version = "0.7.2", features = ["serde"], optional = true} icu_casemapping = { version = "0.7.2", features = ["serde"], optional = true}
icu_segmenter = { version = "1.2.1", features = ["serde"], optional = true }
writeable = { version = "0.5.2", optional = true } writeable = { version = "0.5.2", optional = true }
yoke = { version = "0.7.1", optional = true } yoke = { version = "0.7.1", optional = true }
zerofrom = { version = "0.1.2", optional = true } zerofrom = { version = "0.1.2", optional = true }

35
boa_engine/src/builtins/intl/locale/utils.rs

@ -20,6 +20,7 @@ use icu_locid::{
}; };
use icu_locid_transform::LocaleCanonicalizer; use icu_locid_transform::LocaleCanonicalizer;
use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker}; use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker};
use icu_segmenter::provider::WordBreakDataV1Marker;
use indexmap::IndexSet; use indexmap::IndexSet;
use tap::TapOptional; use tap::TapOptional;
@ -115,9 +116,14 @@ pub(crate) fn canonicalize_locale_list(
// iv. Else, // iv. Else,
else { else {
// 1. Let tag be ? ToString(kValue). // 1. Let tag be ? ToString(kValue).
let k_value = k_value.to_string(context)?.to_std_string_escaped();
if k_value.contains('_') {
return Err(JsNativeError::range()
.with_message("locale is not a structurally valid language tag")
.into());
}
k_value k_value
.to_string(context)?
.to_std_string_escaped()
.parse() .parse()
// v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. // v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.
.map_err(|_| { .map_err(|_| {
@ -169,7 +175,11 @@ pub(crate) fn best_available_locale<M: KeyedDataMarker>(
provider, provider,
DataRequest { DataRequest {
locale: &candidate, locale: &candidate,
metadata: DataRequestMetadata::default(), metadata: {
let mut metadata = DataRequestMetadata::default();
metadata.silent = true;
metadata
},
}, },
); );
@ -180,10 +190,15 @@ pub(crate) fn best_available_locale<M: KeyedDataMarker>(
// the fallback algorithm, even if the used locale is exactly the same as the required // the fallback algorithm, even if the used locale is exactly the same as the required
// locale. // locale.
match req.metadata.locale { match req.metadata.locale {
// TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
Some(loc) Some(loc)
if loc == candidate if loc == candidate
// TODO: ugly hack to accept locales that fallback to "und" in the collator service || (loc.is_empty()
|| (loc.is_empty() && M::KEY.path() == CollationMetadataV1Marker::KEY.path()) => && [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())) =>
{ {
return Some(candidate.into_locale().id) return Some(candidate.into_locale().id)
} }
@ -242,8 +257,14 @@ pub(crate) fn best_locale_for_provider<M: KeyedDataMarker>(
.metadata .metadata
.locale .locale
.map(|dl| { .map(|dl| {
// TODO: ugly hack to accept locales that fallback to "und" in the collator service // TODO: ugly hack to accept locales that fallback to "und" in the collator/segmenter services
if M::KEY.path() == CollationMetadataV1Marker::KEY.path() && dl.is_empty() { if [
CollationMetadataV1Marker::KEY.path(),
WordBreakDataV1Marker::KEY.path(),
]
.contains(&M::KEY.path())
&& dl.is_empty()
{
candidate.clone() candidate.clone()
} else { } else {
dl.into_locale().id dl.into_locale().id

152
boa_engine/src/builtins/intl/segmenter/iterator.rs

@ -0,0 +1,152 @@
use boa_gc::{Finalize, Trace};
use boa_profiler::Profiler;
use icu_segmenter::{
GraphemeClusterBreakIteratorUtf16, SentenceBreakIteratorUtf16, WordBreakIteratorUtf16,
};
use crate::{
builtins::{iterable::create_iter_result_object, BuiltInBuilder, IntrinsicObject},
context::intrinsics::Intrinsics,
js_string,
object::ObjectData,
property::Attribute,
realm::Realm,
Context, JsNativeError, JsObject, JsResult, JsString, JsSymbol, JsValue,
};
use super::create_segment_data_object;
pub(crate) enum NativeSegmentIterator<'l, 's> {
Grapheme(GraphemeClusterBreakIteratorUtf16<'l, 's>),
Word(WordBreakIteratorUtf16<'l, 's>),
Sentence(SentenceBreakIteratorUtf16<'l, 's>),
}
impl Iterator for NativeSegmentIterator<'_, '_> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
match self {
NativeSegmentIterator::Grapheme(g) => g.next(),
NativeSegmentIterator::Word(w) => w.next(),
NativeSegmentIterator::Sentence(s) => s.next(),
}
}
}
impl NativeSegmentIterator<'_, '_> {
/// If the iterator is a word break iterator, returns `Some(true)` when the segment preceding
/// the current boundary is word-like.
pub(crate) fn is_word_like(&self) -> Option<bool> {
if let Self::Word(w) = self {
Some(w.is_word_like())
} else {
None
}
}
}
#[derive(Debug, Trace, Finalize)]
pub struct SegmentIterator {
segmenter: JsObject,
string: JsString,
next_segment_index: usize,
}
impl IntrinsicObject for SegmentIterator {
fn init(realm: &Realm) {
let _timer = Profiler::global().start_event("%SegmentIteratorPrototype%", "init");
BuiltInBuilder::with_intrinsic::<Self>(realm)
.static_property(
JsSymbol::to_string_tag(),
js_string!("Segmenter String Iterator"),
Attribute::CONFIGURABLE,
)
.static_method(Self::next, js_string!("next"), 0)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
intrinsics.objects().iterator_prototypes().segment()
}
}
impl SegmentIterator {
/// [`CreateSegmentIterator ( segmenter, string )`][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-createsegmentiterator
pub(crate) fn create(
segmenter: JsObject,
string: JsString,
context: &mut Context<'_>,
) -> JsObject {
// 1. Let internalSlotsList be « [[IteratingSegmenter]], [[IteratedString]], [[IteratedStringNextSegmentCodeUnitIndex]] ».
// 2. Let iterator be OrdinaryObjectCreate(%SegmentIteratorPrototype%, internalSlotsList).
// 3. Set iterator.[[IteratingSegmenter]] to segmenter.
// 4. Set iterator.[[IteratedString]] to string.
// 5. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to 0.
// 6. Return iterator.
JsObject::from_proto_and_data(
context
.intrinsics()
.objects()
.iterator_prototypes()
.segment(),
ObjectData::segment_iterator(Self {
segmenter,
string,
next_segment_index: 0,
}),
)
}
/// [`%SegmentIteratorPrototype%.next ( )`][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-%segmentiteratorprototype%.next
fn next(this: &JsValue, _: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Let iterator be the this value.
// 2. Perform ? RequireInternalSlot(iterator, [[IteratingSegmenter]]).
let mut iter = this.as_object().map(JsObject::borrow_mut).ok_or_else(|| {
JsNativeError::typ()
.with_message("`next` can only be called on a `Segment Iterator` object")
})?;
let iter = iter.as_segment_iterator_mut().ok_or_else(|| {
JsNativeError::typ()
.with_message("`next` can only be called on a `Segment Iterator` object")
})?;
// 5. Let startIndex be iterator.[[IteratedStringNextSegmentCodeUnitIndex]].
let start = iter.next_segment_index;
// 4. Let string be iterator.[[IteratedString]].
// 6. Let endIndex be ! FindBoundary(segmenter, string, startIndex, after).
let Some((end, is_word_like)) = iter.string.get(start..).and_then(|string| {
// 3. Let segmenter be iterator.[[IteratingSegmenter]].
let segmenter = iter.segmenter.borrow();
let segmenter = segmenter
.as_segmenter()
.expect("segment iterator object should contain a segmenter");
let mut segments = segmenter.native.segment(string);
// the first elem is always 0.
segments.next();
segments.next().map(|end| (start + end, segments.is_word_like()))
}) else {
// 7. If endIndex is not finite, then
// a. Return CreateIterResultObject(undefined, true).
return Ok(create_iter_result_object(JsValue::undefined(), true, context));
};
// 8. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex.
iter.next_segment_index = end;
// 9. Let segmentData be ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
let segment_data =
create_segment_data_object(iter.string.clone(), start..end, is_word_like, context);
// 10. Return CreateIterResultObject(segmentData, false).
Ok(create_iter_result_object(
segment_data.into(),
false,
context,
))
}
}

292
boa_engine/src/builtins/intl/segmenter/mod.rs

@ -1,27 +1,89 @@
// TODO: implement `Segmenter` when https://github.com/unicode-org/icu4x/issues/2259 closes. use std::ops::Range;
use boa_macros::utf16;
use boa_profiler::Profiler; use boa_profiler::Profiler;
use icu_locid::Locale;
use icu_segmenter::provider::WordBreakDataV1Marker;
use crate::{ use crate::{
builtins::{BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject}, builtins::{BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject},
context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
object::JsObject, js_string,
object::{
internal_methods::get_prototype_from_constructor, JsObject, ObjectData, ObjectInitializer,
},
property::Attribute,
realm::Realm, realm::Realm,
Context, JsResult, JsValue, Context, JsArgs, JsNativeError, JsResult, JsString, JsSymbol, JsValue,
}; };
mod iterator;
mod options; mod options;
#[allow(unused)] mod segments;
pub(crate) use iterator::*;
pub(crate) use options::*; pub(crate) use options::*;
pub(crate) use segments::*;
#[derive(Debug, Clone)] use super::{
pub(crate) struct Segmenter; locale::{canonicalize_locale_list, resolve_locale, supported_locales},
options::{get_option, get_options_object, IntlOptions, LocaleMatcher},
Service,
};
#[derive(Debug)]
pub struct Segmenter {
locale: Locale,
native: NativeSegmenter,
}
#[derive(Debug)]
pub(crate) enum NativeSegmenter {
Grapheme(Box<icu_segmenter::GraphemeClusterSegmenter>),
Word(Box<icu_segmenter::WordSegmenter>),
Sentence(Box<icu_segmenter::SentenceSegmenter>),
}
impl NativeSegmenter {
/// Gets the granularity level of this `NativeSegmenter`.
pub(crate) const fn granularity(&self) -> Granularity {
match self {
Self::Grapheme(_) => Granularity::Grapheme,
Self::Word(_) => Granularity::Word,
Self::Sentence(_) => Granularity::Sentence,
}
}
/// Segment the passed string, returning an iterator with the index boundaries
/// of the segments.
pub(crate) fn segment<'l, 's>(&'l self, input: &'s [u16]) -> NativeSegmentIterator<'l, 's> {
match self {
NativeSegmenter::Grapheme(g) => NativeSegmentIterator::Grapheme(g.segment_utf16(input)),
NativeSegmenter::Word(w) => NativeSegmentIterator::Word(w.segment_utf16(input)),
NativeSegmenter::Sentence(s) => NativeSegmentIterator::Sentence(s.segment_utf16(input)),
}
}
}
impl Service for Segmenter {
type LangMarker = WordBreakDataV1Marker;
type LocaleOptions = ();
}
impl IntrinsicObject for Segmenter { impl IntrinsicObject for Segmenter {
fn init(realm: &Realm) { fn init(realm: &Realm) {
let _timer = Profiler::global().start_event(Self::NAME, "init"); let _timer = Profiler::global().start_event(Self::NAME, "init");
BuiltInBuilder::from_standard_constructor::<Self>(realm).build(); BuiltInBuilder::from_standard_constructor::<Self>(realm)
.static_method(Self::supported_locales_of, "supportedLocalesOf", 1)
.property(
JsSymbol::to_string_tag(),
"Intl.Segmenter",
Attribute::CONFIGURABLE,
)
.method(Self::resolved_options, "resolvedOptions", 0)
.method(Self::segment, "segment", 1)
.build();
} }
fn get(intrinsics: &Intrinsics) -> JsObject { fn get(intrinsics: &Intrinsics) -> JsObject {
@ -39,8 +101,218 @@ impl BuiltInConstructor for Segmenter {
const STANDARD_CONSTRUCTOR: fn(&StandardConstructors) -> &StandardConstructor = const STANDARD_CONSTRUCTOR: fn(&StandardConstructors) -> &StandardConstructor =
StandardConstructors::segmenter; StandardConstructors::segmenter;
#[allow(clippy::unnecessary_wraps)] fn constructor(
fn constructor(_: &JsValue, _: &[JsValue], _: &mut Context<'_>) -> JsResult<JsValue> { new_target: &JsValue,
Ok(JsValue::Undefined) args: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
// 1. If NewTarget is undefined, throw a TypeError exception.
if new_target.is_undefined() {
return Err(JsNativeError::typ()
.with_message("cannot call `Intl.Collator` constructor without `new`")
.into());
}
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 4. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let locales = canonicalize_locale_list(locales, context)?;
// 5. Set options to ? GetOptionsObject(options).
let options = get_options_object(options)?;
// 6. Let opt be a new Record.
// 7. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit").
let matcher =
get_option::<LocaleMatcher>(&options, utf16!("localeMatcher"), false, context)?
.unwrap_or_default();
// 8. Set opt.[[localeMatcher]] to matcher.
// 9. Let localeData be %Segmenter%.[[LocaleData]].
// 10. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]], requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]], localeData).
// 11. Set segmenter.[[Locale]] to r.[[locale]].
let locale = resolve_locale::<Self>(
&locales,
&mut IntlOptions {
matcher,
..Default::default()
},
context.icu(),
);
// 12. Let granularity be ? GetOption(options, "granularity", string, « "grapheme", "word", "sentence" », "grapheme").
let granularity =
get_option::<Granularity>(&options, utf16!("granularity"), false, context)?
.unwrap_or_default();
// 13. Set segmenter.[[SegmenterGranularity]] to granularity.
let kind = context
.icu()
.provider()
.try_new_segmenter(granularity)
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let segmenter = Segmenter {
locale,
native: kind,
};
// 2. Let internalSlotsList be « [[InitializedSegmenter]], [[Locale]], [[SegmenterGranularity]] ».
// 3. Let segmenter be ? OrdinaryCreateFromConstructor(NewTarget, "%Segmenter.prototype%", internalSlotsList).
let proto =
get_prototype_from_constructor(new_target, StandardConstructors::segmenter, context)?;
let segmenter = JsObject::from_proto_and_data(proto, ObjectData::segmenter(segmenter));
// 14. Return segmenter.
Ok(segmenter.into())
}
}
impl Segmenter {
/// [`Intl.Segmenter.supportedLocalesOf ( locales [ , options ] )`][spec].
///
/// Returns an array containing those of the provided locales that are supported in list
/// formatting without having to fall back to the runtime's default locale.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.segmenter.supportedlocalesof
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/supportedLocalesOf
fn supported_locales_of(
_: &JsValue,
args: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 1. Let availableLocales be %Segmenter%.[[AvailableLocales]].
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service>::LangMarker>(&requested_locales, options, context)
.map(JsValue::from)
} }
/// [`Intl.Segmenter.prototype.resolvedOptions ( )`][spec].
///
/// Returns a new object with properties reflecting the locale and style formatting options
/// computed during the construction of the current `Intl.Segmenter` object.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.Segmenter.prototype.resolvedoptions
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/resolvedOptions
fn resolved_options(
this: &JsValue,
_: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
// 1. Let segmenter be the this value.
// 2. Perform ? RequireInternalSlot(segmenter, [[InitializedSegmenter]]).
let segmenter = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolved_options` can only be called on an `Intl.Segmenter` object")
})?;
let segmenter = segmenter.as_segmenter().ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolved_options` can only be called on an `Intl.Segmenter` object")
})?;
// 3. Let options be OrdinaryObjectCreate(%Object.prototype%).
// 4. For each row of Table 19, except the header row, in table order, do
// a. Let p be the Property value of the current row.
// b. Let v be the value of segmenter's internal slot whose name is the Internal Slot value of the current row.
// c. Assert: v is not undefined.
// d. Perform ! CreateDataPropertyOrThrow(options, p, v).
let options = ObjectInitializer::new(context)
.property(
js_string!("locale"),
segmenter.locale.to_string(),
Attribute::all(),
)
.property(
js_string!("granularity"),
segmenter.native.granularity().to_string(),
Attribute::all(),
)
.build();
// 5. Return options.
Ok(options.into())
}
/// [`Intl.Segmenter.prototype.segment ( string )`][spec].
///
/// Segments a string according to the locale and granularity of this `Intl.Segmenter` object.
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.segmenter.prototype.segment
fn segment(this: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Let segmenter be the this value.
// 2. Perform ? RequireInternalSlot(segmenter, [[InitializedSegmenter]]).
let segmenter = this
.as_object()
.filter(|o| o.borrow().is_segmenter())
.ok_or_else(|| {
JsNativeError::typ().with_message(
"`resolved_options` can only be called on an `Intl.Segmenter` object",
)
})?;
// 3. Let string be ? ToString(string).
let string = args.get_or_undefined(0).to_string(context)?;
// 4. Return ! CreateSegmentsObject(segmenter, string).
Ok(Segments::create(segmenter.clone(), string, context).into())
}
}
/// [`CreateSegmentDataObject ( segmenter, string, startIndex, endIndex )`][spec].
///
/// [spec]: https://tc39.es/ecma402/#sec-createsegmentdataobject
fn create_segment_data_object(
string: JsString,
range: Range<usize>,
is_word_like: Option<bool>,
context: &mut Context<'_>,
) -> JsObject {
// 1. Let len be the length of string.
// 2. Assert: startIndex ≥ 0.
// ensured by `usize`.
// 3. Assert: endIndex ≤ len.
assert!(range.end <= string.len());
// 4. Assert: startIndex < endIndex.
assert!(range.start < range.end);
let start = range.start;
// 6. Let segment be the substring of string from startIndex to endIndex.
let segment = js_string!(&string[range]);
// 5. Let result be OrdinaryObjectCreate(%Object.prototype%).
let object = &mut ObjectInitializer::new(context);
object
// 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
.property(js_string!("segment"), segment, Attribute::all())
// 8. Perform ! CreateDataPropertyOrThrow(result, "index", 𝔽(startIndex)).
.property(js_string!("index"), start, Attribute::all())
// 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
.property(js_string!("input"), string, Attribute::all());
// 10. Let granularity be segmenter.[[SegmenterGranularity]].
// 11. If granularity is "word", then
if let Some(is_word_like) = is_word_like {
// a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" according to locale segmenter.[[Locale]].
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
object.property(js_string!("isWordLike"), is_word_like, Attribute::all());
}
// 12. Return result.
object.build()
} }

19
boa_engine/src/builtins/intl/segmenter/options.rs

@ -1,3 +1,7 @@
use std::fmt::Display;
use crate::builtins::intl::options::OptionTypeParsable;
#[derive(Debug, Clone, Copy, Default)] #[derive(Debug, Clone, Copy, Default)]
pub(crate) enum Granularity { pub(crate) enum Granularity {
#[default] #[default]
@ -6,10 +10,21 @@ pub(crate) enum Granularity {
Sentence, Sentence,
} }
impl Display for Granularity {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Granularity::Grapheme => "grapheme",
Granularity::Word => "word",
Granularity::Sentence => "sentence",
}
.fmt(f)
}
}
#[derive(Debug)] #[derive(Debug)]
pub(crate) struct ParseGranularityError; pub(crate) struct ParseGranularityError;
impl std::fmt::Display for ParseGranularityError { impl Display for ParseGranularityError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("provided string was not `grapheme`, `word` or `sentence`") f.write_str("provided string was not `grapheme`, `word` or `sentence`")
} }
@ -27,3 +42,5 @@ impl std::str::FromStr for Granularity {
} }
} }
} }
impl OptionTypeParsable for Granularity {}

142
boa_engine/src/builtins/intl/segmenter/segments.rs

@ -0,0 +1,142 @@
use boa_gc::{Finalize, Trace};
use boa_profiler::Profiler;
use itertools::Itertools;
use crate::{
builtins::{BuiltInBuilder, IntrinsicObject},
context::intrinsics::Intrinsics,
js_string,
object::ObjectData,
realm::Realm,
Context, JsArgs, JsNativeError, JsObject, JsResult, JsString, JsSymbol, JsValue,
};
use super::{create_segment_data_object, SegmentIterator};
#[derive(Debug, Trace, Finalize)]
pub struct Segments {
segmenter: JsObject,
string: JsString,
}
impl IntrinsicObject for Segments {
fn init(realm: &Realm) {
let _timer = Profiler::global().start_event("%SegmentsPrototype%", "init");
BuiltInBuilder::with_intrinsic::<Self>(realm)
.static_method(Self::containing, "containing", 1)
.static_method(
Self::iterator,
(JsSymbol::iterator(), js_string!("[Symbol.iterator]")),
0,
)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
intrinsics.objects().segments_prototype()
}
}
impl Segments {
/// [`CreateSegmentsObject ( segmenter, string )`][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-createsegmentsobject
pub(crate) fn create(
segmenter: JsObject,
string: JsString,
context: &mut Context<'_>,
) -> JsObject {
// 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
// 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList).
// 3. Set segments.[[SegmentsSegmenter]] to segmenter.
// 4. Set segments.[[SegmentsString]] to string.
// 5. Return segments.
JsObject::from_proto_and_data(
context.intrinsics().objects().segments_prototype(),
ObjectData::segments(Segments { segmenter, string }),
)
}
/// [`%SegmentsPrototype%.containing ( index )`][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-%segmentsprototype%.containing
fn containing(
this: &JsValue,
args: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
// 1. Let segments be the this value.
// 2. Perform ? RequireInternalSlot(segments, [[SegmentsSegmenter]]).
let segments = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`containing` can only be called on a `Segments` object")
})?;
let segments = segments.as_segments().ok_or_else(|| {
JsNativeError::typ()
.with_message("`containing` can only be called on a `Segments` object")
})?;
// 3. Let segmenter be segments.[[SegmentsSegmenter]].
let segmenter = segments.segmenter.borrow();
let segmenter = segmenter
.as_segmenter()
.expect("segments object should contain a segmenter");
// 4. Let string be segments.[[SegmentsString]].
// 5. Let len be the length of string.
let len = segments.string.len() as i64;
// 6. Let n be ? ToIntegerOrInfinity(index).
let Some(n) = args
.get_or_undefined(0)
.to_integer_or_infinity(context)?
.as_integer()
// 7. If n < 0 or n ≥ len, return undefined.
.filter(|i| (0..len).contains(i))
.map(|n| n as usize) else {
return Ok(JsValue::undefined());
};
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
let (range, is_word_like) = {
let mut segments = segmenter.native.segment(&segments.string);
std::iter::from_fn(|| segments.next().map(|i| (i, segments.is_word_like())))
.tuple_windows()
.find(|((i, _), (j, _))| (*i..*j).contains(&n))
.map(|((i, _), (j, word))| ((i..j), word))
.expect("string should have at least a length of 1, and `n` must be in range")
};
// 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex).
Ok(
create_segment_data_object(segments.string.clone(), range, is_word_like, context)
.into(),
)
}
/// [`%SegmentsPrototype% [ @@iterator ] ( )`][spec]
///
/// [spec]: https://tc39.es/ecma402/#sec-%segmentsprototype%-@@iterator
fn iterator(this: &JsValue, _: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Let segments be the this value.
// 2. Perform ? RequireInternalSlot(segments, [[SegmentsSegmenter]]).
let segments = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`containing` can only be called on a `Segments` object")
})?;
let segments = segments.as_segments().ok_or_else(|| {
JsNativeError::typ()
.with_message("`containing` can only be called on a `Segments` object")
})?;
// 3. Let segmenter be segments.[[SegmentsSegmenter]].
// 4. Let string be segments.[[SegmentsString]].
// 5. Return ! CreateSegmentIterator(segmenter, string).
Ok(
SegmentIterator::create(segments.segmenter.clone(), segments.string.clone(), context)
.into(),
)
}
}

32
boa_engine/src/builtins/iterable/mod.rs

@ -4,9 +4,9 @@ use crate::{
builtins::{BuiltInBuilder, IntrinsicObject}, builtins::{BuiltInBuilder, IntrinsicObject},
context::intrinsics::Intrinsics, context::intrinsics::Intrinsics,
error::JsNativeError, error::JsNativeError,
js_string,
object::JsObject, object::JsObject,
realm::Realm, realm::Realm,
string::utf16,
symbol::JsSymbol, symbol::JsSymbol,
Context, JsResult, JsValue, Context, JsResult, JsValue,
}; };
@ -68,6 +68,10 @@ pub struct IteratorPrototypes {
/// The `ForInIteratorPrototype` prototype object. /// The `ForInIteratorPrototype` prototype object.
for_in: JsObject, for_in: JsObject,
/// The `%SegmentIteratorPrototype%` prototype object.
#[cfg(feature = "intl")]
segment: JsObject,
} }
impl IteratorPrototypes { impl IteratorPrototypes {
@ -124,6 +128,13 @@ impl IteratorPrototypes {
pub fn for_in(&self) -> JsObject { pub fn for_in(&self) -> JsObject {
self.for_in.clone() self.for_in.clone()
} }
/// Returns the `%SegmentIteratorPrototype%` object.
#[inline]
#[cfg(feature = "intl")]
pub fn segment(&self) -> JsObject {
self.segment.clone()
}
} }
/// `%IteratorPrototype%` object /// `%IteratorPrototype%` object
@ -142,7 +153,7 @@ impl IntrinsicObject for Iterator {
BuiltInBuilder::with_intrinsic::<Self>(realm) BuiltInBuilder::with_intrinsic::<Self>(realm)
.static_method( .static_method(
|v, _, _| Ok(v.clone()), |v, _, _| Ok(v.clone()),
(JsSymbol::iterator(), "[Symbol.iterator]"), (JsSymbol::iterator(), js_string!("[Symbol.iterator]")),
0, 0,
) )
.build(); .build();
@ -168,7 +179,10 @@ impl IntrinsicObject for AsyncIterator {
BuiltInBuilder::with_intrinsic::<Self>(realm) BuiltInBuilder::with_intrinsic::<Self>(realm)
.static_method( .static_method(
|v, _, _| Ok(v.clone()), |v, _, _| Ok(v.clone()),
(JsSymbol::async_iterator(), "[Symbol.asyncIterator]"), (
JsSymbol::async_iterator(),
js_string!("[Symbol.asyncIterator]"),
),
0, 0,
) )
.build(); .build();
@ -190,10 +204,10 @@ pub fn create_iter_result_object(value: JsValue, done: bool, context: &mut Conte
let obj = JsObject::with_object_proto(context.intrinsics()); let obj = JsObject::with_object_proto(context.intrinsics());
// 3. Perform ! CreateDataPropertyOrThrow(obj, "value", value). // 3. Perform ! CreateDataPropertyOrThrow(obj, "value", value).
obj.create_data_property_or_throw(utf16!("value"), value, context) obj.create_data_property_or_throw(js_string!("value"), value, context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
// 4. Perform ! CreateDataPropertyOrThrow(obj, "done", done). // 4. Perform ! CreateDataPropertyOrThrow(obj, "done", done).
obj.create_data_property_or_throw(utf16!("done"), done, context) obj.create_data_property_or_throw(js_string!("done"), done, context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
// 5. Return obj. // 5. Return obj.
obj.into() obj.into()
@ -267,7 +281,7 @@ impl JsValue {
})?; })?;
// 5. Let nextMethod be ? GetV(iterator, "next"). // 5. Let nextMethod be ? GetV(iterator, "next").
let next_method = iterator.get_v(utf16!("next"), context)?; let next_method = iterator.get_v(js_string!("next"), context)?;
// 6. Let iteratorRecord be the Record { [[Iterator]]: iterator, [[NextMethod]]: nextMethod, [[Done]]: false }. // 6. Let iteratorRecord be the Record { [[Iterator]]: iterator, [[NextMethod]]: nextMethod, [[Done]]: false }.
// 7. Return iteratorRecord. // 7. Return iteratorRecord.
@ -303,7 +317,7 @@ impl IteratorResult {
#[inline] #[inline]
pub fn complete(&self, context: &mut Context<'_>) -> JsResult<bool> { pub fn complete(&self, context: &mut Context<'_>) -> JsResult<bool> {
// 1. Return ToBoolean(? Get(iterResult, "done")). // 1. Return ToBoolean(? Get(iterResult, "done")).
Ok(self.object.get(utf16!("done"), context)?.to_boolean()) Ok(self.object.get(js_string!("done"), context)?.to_boolean())
} }
/// `IteratorValue ( iterResult )` /// `IteratorValue ( iterResult )`
@ -319,7 +333,7 @@ impl IteratorResult {
#[inline] #[inline]
pub fn value(&self, context: &mut Context<'_>) -> JsResult<JsValue> { pub fn value(&self, context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Return ? Get(iterResult, "value"). // 1. Return ? Get(iterResult, "value").
self.object.get(utf16!("value"), context) self.object.get(js_string!("value"), context)
} }
} }
@ -474,7 +488,7 @@ impl IteratorRecord {
let iterator = &self.iterator; let iterator = &self.iterator;
// 3. Let innerResult be Completion(GetMethod(iterator, "return")). // 3. Let innerResult be Completion(GetMethod(iterator, "return")).
let inner_result = iterator.get_method(utf16!("return"), context); let inner_result = iterator.get_method(js_string!("return"), context);
// 4. If innerResult.[[Type]] is normal, then // 4. If innerResult.[[Type]] is normal, then
let inner_result = match inner_result { let inner_result = match inner_result {

121
boa_engine/src/builtins/json/mod.rs

@ -13,10 +13,9 @@
//! [json]: https://www.json.org/json-en.html //! [json]: https://www.json.org/json-en.html
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON
use std::{ use std::{borrow::Cow, iter::once};
borrow::Cow,
iter::{once, FusedIterator}, use itertools::Itertools;
};
use crate::{ use crate::{
builtins::BuiltInObject, builtins::BuiltInObject,
@ -42,100 +41,6 @@ use super::{BuiltInBuilder, IntrinsicObject};
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
// `Intersperse` impl taken from `itertools`
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
#[derive(Clone, Debug)]
struct Intersperse<I>
where
I: Iterator,
{
element: I::Item,
iter: std::iter::Fuse<I>,
peek: Option<I::Item>,
}
fn intersperse<I>(iter: I, element: I::Item) -> Intersperse<I>
where
I: Iterator,
{
let mut iter = iter.fuse();
Intersperse {
peek: iter.next(),
iter,
element,
}
}
impl<I> Iterator for Intersperse<I>
where
I: Iterator,
I::Item: Clone,
{
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
if self.peek.is_some() {
self.peek.take()
} else {
self.peek = self.iter.next();
if self.peek.is_some() {
Some(self.element.clone())
} else {
None
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
type SizeHint = (usize, Option<usize>);
const fn add(a: SizeHint, b: SizeHint) -> SizeHint {
let min = a.0.saturating_add(b.0);
let max = match (a.1, b.1) {
(Some(x), Some(y)) => x.checked_add(y),
_ => None,
};
(min, max)
}
fn add_scalar(sh: SizeHint, x: usize) -> SizeHint {
let (mut low, mut hi) = sh;
low = low.saturating_add(x);
hi = hi.and_then(|elt| elt.checked_add(x));
(low, hi)
}
// 2 * SH + { 1 or 0 }
let has_peek = usize::from(self.peek.is_some());
let sh = self.iter.size_hint();
add_scalar(add(sh, sh), has_peek)
}
fn fold<B, F>(mut self, init: B, mut f: F) -> B
where
Self: Sized,
F: FnMut(B, Self::Item) -> B,
{
let mut accum = init;
if let Some(x) = self.peek.take() {
accum = f(accum, x);
}
let element = &mut self.element;
self.iter.fold(accum, |accum, x| {
let accum = f(accum, element.clone());
f(accum, x)
})
}
}
impl<I> FusedIterator for Intersperse<I>
where
I: Iterator,
I::Item: Clone,
{
}
/// JavaScript `JSON` global object. /// JavaScript `JSON` global object.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct Json; pub(crate) struct Json;
@ -744,7 +649,10 @@ impl Json {
// ii. Let final be the string-concatenation of "{", properties, and "}". // ii. Let final be the string-concatenation of "{", properties, and "}".
let separator = utf16!(","); let separator = utf16!(",");
let result = once(utf16!("{")) let result = once(utf16!("{"))
.chain(intersperse(partial.iter().map(Vec::as_slice), separator)) .chain(Itertools::intersperse(
partial.iter().map(Vec::as_slice),
separator,
))
.chain(once(utf16!("}"))) .chain(once(utf16!("}")))
.flatten() .flatten()
.copied() .copied()
@ -764,7 +672,10 @@ impl Json {
// the code unit 0x000A (LINE FEED), stepback, and "}". // the code unit 0x000A (LINE FEED), stepback, and "}".
let result = [utf16!("{\n"), &state.indent[..]] let result = [utf16!("{\n"), &state.indent[..]]
.into_iter() .into_iter()
.chain(intersperse(partial.iter().map(Vec::as_slice), &separator)) .chain(Itertools::intersperse(
partial.iter().map(Vec::as_slice),
&separator,
))
.chain([utf16!("\n"), &stepback[..], utf16!("}")].into_iter()) .chain([utf16!("\n"), &stepback[..], utf16!("}")].into_iter())
.flatten() .flatten()
.copied() .copied()
@ -854,7 +765,10 @@ impl Json {
// ii. Let final be the string-concatenation of "[", properties, and "]". // ii. Let final be the string-concatenation of "[", properties, and "]".
let separator = utf16!(","); let separator = utf16!(",");
let result = once(utf16!("[")) let result = once(utf16!("["))
.chain(intersperse(partial.iter().map(Cow::as_ref), separator)) .chain(Itertools::intersperse(
partial.iter().map(Cow::as_ref),
separator,
))
.chain(once(utf16!("]"))) .chain(once(utf16!("]")))
.flatten() .flatten()
.copied() .copied()
@ -872,7 +786,10 @@ impl Json {
// iii. Let final be the string-concatenation of "[", the code unit 0x000A (LINE FEED), state.[[Indent]], properties, the code unit 0x000A (LINE FEED), stepback, and "]". // iii. Let final be the string-concatenation of "[", the code unit 0x000A (LINE FEED), state.[[Indent]], properties, the code unit 0x000A (LINE FEED), stepback, and "]".
let result = [utf16!("[\n"), &state.indent[..]] let result = [utf16!("[\n"), &state.indent[..]]
.into_iter() .into_iter()
.chain(intersperse(partial.iter().map(Cow::as_ref), &separator)) .chain(Itertools::intersperse(
partial.iter().map(Cow::as_ref),
&separator,
))
.chain([utf16!("\n"), &stepback[..], utf16!("]")].into_iter()) .chain([utf16!("\n"), &stepback[..], utf16!("]")].into_iter())
.flatten() .flatten()
.copied() .copied()

2
boa_engine/src/builtins/mod.rs

@ -271,6 +271,8 @@ impl Realm {
intl::Locale::init(self); intl::Locale::init(self);
intl::DateTimeFormat::init(self); intl::DateTimeFormat::init(self);
intl::Segmenter::init(self); intl::Segmenter::init(self);
intl::segmenter::Segments::init(self);
intl::segmenter::SegmentIterator::init(self);
} }
} }
} }

70
boa_engine/src/context/icu.rs

@ -7,11 +7,15 @@ use icu_provider::{
AnyProvider, AsDeserializingBufferProvider, AsDowncastingAnyProvider, BufferProvider, AnyProvider, AsDeserializingBufferProvider, AsDowncastingAnyProvider, BufferProvider,
DataError, DataLocale, DataProvider, DataRequest, DataResponse, KeyedDataMarker, MaybeSendSync, DataError, DataLocale, DataProvider, DataRequest, DataResponse, KeyedDataMarker, MaybeSendSync,
}; };
use icu_segmenter::{GraphemeClusterSegmenter, SegmenterError, SentenceSegmenter, WordSegmenter};
use serde::Deserialize; use serde::Deserialize;
use yoke::{trait_hack::YokeTraitHack, Yokeable}; use yoke::{trait_hack::YokeTraitHack, Yokeable};
use zerofrom::ZeroFrom; use zerofrom::ZeroFrom;
use crate::builtins::intl::list_format::ListFormatType; use crate::builtins::intl::{
list_format::ListFormatType,
segmenter::{Granularity, NativeSegmenter},
};
/// ICU4X data provider used in boa. /// ICU4X data provider used in boa.
/// ///
@ -55,19 +59,19 @@ impl BoaProvider<'_> {
pub(crate) fn try_new_locale_canonicalizer( pub(crate) fn try_new_locale_canonicalizer(
&self, &self,
) -> Result<LocaleCanonicalizer, LocaleTransformError> { ) -> Result<LocaleCanonicalizer, LocaleTransformError> {
match self { match *self {
BoaProvider::Buffer(buffer) => { BoaProvider::Buffer(buffer) => {
LocaleCanonicalizer::try_new_with_buffer_provider(&**buffer) LocaleCanonicalizer::try_new_with_buffer_provider(buffer)
} }
BoaProvider::Any(any) => LocaleCanonicalizer::try_new_with_any_provider(&**any), BoaProvider::Any(any) => LocaleCanonicalizer::try_new_with_any_provider(any),
} }
} }
/// Creates a new [`LocaleExpander`] from the provided [`DataProvider`]. /// Creates a new [`LocaleExpander`] from the provided [`DataProvider`].
pub(crate) fn try_new_locale_expander(&self) -> Result<LocaleExpander, LocaleTransformError> { pub(crate) fn try_new_locale_expander(&self) -> Result<LocaleExpander, LocaleTransformError> {
match self { match *self {
BoaProvider::Buffer(buffer) => LocaleExpander::try_new_with_buffer_provider(&**buffer), BoaProvider::Buffer(buffer) => LocaleExpander::try_new_with_buffer_provider(buffer),
BoaProvider::Any(any) => LocaleExpander::try_new_with_any_provider(&**any), BoaProvider::Any(any) => LocaleExpander::try_new_with_any_provider(any),
} }
} }
@ -78,33 +82,27 @@ impl BoaProvider<'_> {
typ: ListFormatType, typ: ListFormatType,
style: ListLength, style: ListLength,
) -> Result<ListFormatter, ListError> { ) -> Result<ListFormatter, ListError> {
match self { match *self {
BoaProvider::Buffer(buf) => match typ { BoaProvider::Buffer(buf) => match typ {
ListFormatType::Conjunction => { ListFormatType::Conjunction => {
ListFormatter::try_new_and_with_length_with_buffer_provider( ListFormatter::try_new_and_with_length_with_buffer_provider(buf, locale, style)
&**buf, locale, style,
)
} }
ListFormatType::Disjunction => { ListFormatType::Disjunction => {
ListFormatter::try_new_or_with_length_with_buffer_provider( ListFormatter::try_new_or_with_length_with_buffer_provider(buf, locale, style)
&**buf, locale, style,
)
} }
ListFormatType::Unit => { ListFormatType::Unit => {
ListFormatter::try_new_unit_with_length_with_buffer_provider( ListFormatter::try_new_unit_with_length_with_buffer_provider(buf, locale, style)
&**buf, locale, style,
)
} }
}, },
BoaProvider::Any(any) => match typ { BoaProvider::Any(any) => match typ {
ListFormatType::Conjunction => { ListFormatType::Conjunction => {
ListFormatter::try_new_and_with_length_with_any_provider(&**any, locale, style) ListFormatter::try_new_and_with_length_with_any_provider(any, locale, style)
} }
ListFormatType::Disjunction => { ListFormatType::Disjunction => {
ListFormatter::try_new_or_with_length_with_any_provider(&**any, locale, style) ListFormatter::try_new_or_with_length_with_any_provider(any, locale, style)
} }
ListFormatType::Unit => { ListFormatType::Unit => {
ListFormatter::try_new_unit_with_length_with_any_provider(&**any, locale, style) ListFormatter::try_new_unit_with_length_with_any_provider(any, locale, style)
} }
}, },
} }
@ -116,11 +114,37 @@ impl BoaProvider<'_> {
locale: &DataLocale, locale: &DataLocale,
options: CollatorOptions, options: CollatorOptions,
) -> Result<Collator, CollatorError> { ) -> Result<Collator, CollatorError> {
match self { match *self {
BoaProvider::Buffer(buf) => { BoaProvider::Buffer(buf) => {
Collator::try_new_with_buffer_provider(&**buf, locale, options) Collator::try_new_with_buffer_provider(buf, locale, options)
}
BoaProvider::Any(any) => Collator::try_new_with_any_provider(any, locale, options),
}
}
/// Creates a new [`NativeSegmenter`] from the provided [`DataProvider`] and options.
pub(crate) fn try_new_segmenter(
&self,
granularity: Granularity,
) -> Result<NativeSegmenter, SegmenterError> {
match granularity {
Granularity::Grapheme => match *self {
BoaProvider::Buffer(buf) => {
GraphemeClusterSegmenter::try_new_with_buffer_provider(buf)
}
BoaProvider::Any(any) => GraphemeClusterSegmenter::try_new_with_any_provider(any),
}
.map(|seg| NativeSegmenter::Grapheme(Box::new(seg))),
Granularity::Word => match *self {
BoaProvider::Buffer(buf) => WordSegmenter::try_new_auto_with_buffer_provider(buf),
BoaProvider::Any(any) => WordSegmenter::try_new_auto_with_any_provider(any),
}
.map(|seg| NativeSegmenter::Word(Box::new(seg))),
Granularity::Sentence => match *self {
BoaProvider::Buffer(buf) => SentenceSegmenter::try_new_with_buffer_provider(buf),
BoaProvider::Any(any) => SentenceSegmenter::try_new_with_any_provider(any),
} }
BoaProvider::Any(any) => Collator::try_new_with_any_provider(&**any, locale, options), .map(|seg| NativeSegmenter::Sentence(Box::new(seg))),
} }
} }
} }

14
boa_engine/src/context/intrinsics.rs

@ -783,6 +783,10 @@ pub struct IntrinsicObjects {
/// [`%Intl%`](https://tc39.es/ecma402/#intl-object) /// [`%Intl%`](https://tc39.es/ecma402/#intl-object)
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
intl: JsObject, intl: JsObject,
/// [`%SegmentsPrototype%`](https://tc39.es/ecma402/#sec-%segmentsprototype%-object)
#[cfg(feature = "intl")]
segments_prototype: JsObject,
} }
impl Default for IntrinsicObjects { impl Default for IntrinsicObjects {
@ -808,6 +812,8 @@ impl Default for IntrinsicObjects {
unescape: JsFunction::empty_intrinsic_function(false), unescape: JsFunction::empty_intrinsic_function(false),
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
intl: JsObject::default(), intl: JsObject::default(),
#[cfg(feature = "intl")]
segments_prototype: JsObject::default(),
} }
} }
} }
@ -935,4 +941,12 @@ impl IntrinsicObjects {
pub fn intl(&self) -> JsObject { pub fn intl(&self) -> JsObject {
self.intl.clone() self.intl.clone()
} }
/// Gets the [`%SegmentsPrototype%`][spec] intrinsic object.
///
/// [spec]: https://tc39.es/ecma402/#sec-%segmentsprototype%-object
#[cfg(feature = "intl")]
pub fn segments_prototype(&self) -> JsObject {
self.segments_prototype.clone()
}
} }

100
boa_engine/src/object/mod.rs

@ -25,7 +25,10 @@ use self::internal_methods::{
}; };
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
use crate::builtins::intl::{ use crate::builtins::intl::{
collator::Collator, date_time_format::DateTimeFormat, list_format::ListFormat, collator::Collator,
date_time_format::DateTimeFormat,
list_format::ListFormat,
segmenter::{SegmentIterator, Segmenter, Segments},
}; };
use crate::{ use crate::{
builtins::{ builtins::{
@ -314,6 +317,18 @@ pub enum ObjectKind {
/// The `Intl.Locale` object kind. /// The `Intl.Locale` object kind.
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
Locale(Box<icu_locid::Locale>), Locale(Box<icu_locid::Locale>),
/// The `Intl.Segmenter` object kind.
#[cfg(feature = "intl")]
Segmenter(Segmenter),
/// The `Segments` object kind.
#[cfg(feature = "intl")]
Segments(Segments),
/// The `Segment Iterator` object kind.
#[cfg(feature = "intl")]
SegmentIterator(SegmentIterator),
} }
unsafe impl Trace for ObjectKind { unsafe impl Trace for ObjectKind {
@ -347,7 +362,11 @@ unsafe impl Trace for ObjectKind {
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
Self::Collator(co) => mark(co), Self::Collator(co) => mark(co),
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
Self::ListFormat(_) | Self::Locale(_) => {} Self::Segments(seg) => mark(seg),
#[cfg(feature = "intl")]
Self::SegmentIterator(it) => mark(it),
#[cfg(feature = "intl")]
Self::ListFormat(_) | Self::Locale(_) | Self::Segmenter(_) => {}
Self::RegExp(_) Self::RegExp(_)
| Self::BigInt(_) | Self::BigInt(_)
| Self::Boolean(_) | Self::Boolean(_)
@ -724,6 +743,36 @@ impl ObjectData {
internal_methods: &ORDINARY_INTERNAL_METHODS, internal_methods: &ORDINARY_INTERNAL_METHODS,
} }
} }
/// Create the `Segmenter` object data
#[cfg(feature = "intl")]
#[must_use]
pub fn segmenter(segmenter: Segmenter) -> Self {
Self {
kind: ObjectKind::Segmenter(segmenter),
internal_methods: &ORDINARY_INTERNAL_METHODS,
}
}
/// Create the `Segments` object data
#[cfg(feature = "intl")]
#[must_use]
pub fn segments(segments: Segments) -> Self {
Self {
kind: ObjectKind::Segments(segments),
internal_methods: &ORDINARY_INTERNAL_METHODS,
}
}
/// Create the `SegmentIterator` object data
#[cfg(feature = "intl")]
#[must_use]
pub fn segment_iterator(segment_iterator: SegmentIterator) -> Self {
Self {
kind: ObjectKind::SegmentIterator(segment_iterator),
internal_methods: &ORDINARY_INTERNAL_METHODS,
}
}
} }
impl Debug for ObjectKind { impl Debug for ObjectKind {
@ -773,6 +822,12 @@ impl Debug for ObjectKind {
Self::ListFormat(_) => "ListFormat", Self::ListFormat(_) => "ListFormat",
#[cfg(feature = "intl")] #[cfg(feature = "intl")]
Self::Locale(_) => "Locale", Self::Locale(_) => "Locale",
#[cfg(feature = "intl")]
Self::Segmenter(_) => "Segmenter",
#[cfg(feature = "intl")]
Self::Segments(_) => "Segments",
#[cfg(feature = "intl")]
Self::SegmentIterator(_) => "SegmentIterator",
}) })
} }
} }
@ -1540,6 +1595,43 @@ impl Object {
} }
} }
/// Checks if it is a `Segmenter` object.
#[inline]
#[cfg(feature = "intl")]
pub const fn is_segmenter(&self) -> bool {
matches!(self.kind, ObjectKind::Segmenter(_))
}
/// Gets the `Segmenter` data if the object is a `Segmenter`.
#[inline]
#[cfg(feature = "intl")]
pub const fn as_segmenter(&self) -> Option<&Segmenter> {
match self.kind {
ObjectKind::Segmenter(ref seg) => Some(seg),
_ => None,
}
}
/// Gets the `Segments` data if the object is a `Segments`.
#[inline]
#[cfg(feature = "intl")]
pub const fn as_segments(&self) -> Option<&Segments> {
match self.kind {
ObjectKind::Segments(ref seg) => Some(seg),
_ => None,
}
}
/// Gets the `SegmentIterator` data if the object is a `SegmentIterator`.
#[inline]
#[cfg(feature = "intl")]
pub fn as_segment_iterator_mut(&mut self) -> Option<&mut SegmentIterator> {
match &mut self.kind {
ObjectKind::SegmentIterator(it) => Some(it),
_ => None,
}
}
/// Return `true` if it is a native object and the native type is `T`. /// Return `true` if it is a native object and the native type is `T`.
pub fn is<T>(&self) -> bool pub fn is<T>(&self) -> bool
where where
@ -1688,12 +1780,12 @@ impl From<JsString> for FunctionBinding {
impl<B, N> From<(B, N)> for FunctionBinding impl<B, N> From<(B, N)> for FunctionBinding
where where
B: Into<PropertyKey>, B: Into<PropertyKey>,
N: AsRef<str>, N: Into<JsString>,
{ {
fn from((binding, name): (B, N)) -> Self { fn from((binding, name): (B, N)) -> Self {
Self { Self {
binding: binding.into(), binding: binding.into(),
name: name.as_ref().into(), name: name.into(),
} }
} }
} }

1
test_ignore.toml

@ -22,7 +22,6 @@ features = [
"Intl.DurationFormat", "Intl.DurationFormat",
"Intl.DisplayNames", "Intl.DisplayNames",
"Intl.RelativeTimeFormat", "Intl.RelativeTimeFormat",
"Intl.Segmenter",
# Stage 3 proposals # Stage 3 proposals

Loading…
Cancel
Save