Browse Source

Add regexp indices (`d` flag) support (#3094)

* Add regexp indices (`d` flag) support

Closes #3086

* Run rustfmt

* Fix clippy
pull/3124/head
Dirk de Visser 1 year ago committed by GitHub
parent
commit
1a2be79f83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 191
      boa_engine/src/builtins/regexp/mod.rs

191
boa_engine/src/builtins/regexp/mod.rs

@ -27,7 +27,7 @@ use crate::{
}; };
use boa_parser::lexer::regex::RegExpFlags; use boa_parser::lexer::regex::RegExpFlags;
use boa_profiler::Profiler; use boa_profiler::Profiler;
use regress::{Flags, Regex}; use regress::{Flags, Range, Regex};
use std::str::FromStr; use std::str::FromStr;
use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject}; use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
@ -837,7 +837,7 @@ impl RegExp {
Self::abstract_builtin_exec(this, &input, context) Self::abstract_builtin_exec(this, &input, context)
} }
/// `22.2.5.2.2 RegExpBuiltinExec ( R, S )` /// `22.2.7.2 RegExpBuiltinExec ( R, S )`
/// ///
/// More information: /// More information:
/// - [ECMAScript reference][spec] /// - [ECMAScript reference][spec]
@ -848,7 +848,6 @@ impl RegExp {
input: &JsString, input: &JsString,
context: &mut Context<'_>, context: &mut Context<'_>,
) -> JsResult<Option<JsObject>> { ) -> JsResult<Option<JsObject>> {
// 1. Assert: R is an initialized RegExp instance.
let rx = { let rx = {
let obj = this.borrow(); let obj = this.borrow();
if let Some(rx) = obj.as_regexp() { if let Some(rx) = obj.as_regexp() {
@ -860,36 +859,41 @@ impl RegExp {
} }
}; };
// 2. Assert: Type(S) is String. // 1. Let length be the length of S.
// 3. Let length be the number of code units in S.
let length = input.len() as u64; let length = input.len() as u64;
// 4. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))). // 2. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))).
let mut last_index = this.get(utf16!("lastIndex"), context)?.to_length(context)?; let mut last_index = this.get(utf16!("lastIndex"), context)?.to_length(context)?;
// 5. Let flags be R.[[OriginalFlags]]. // 3. Let flags be R.[[OriginalFlags]].
let flags = &rx.original_flags; let flags = &rx.original_flags;
// 6. If flags contains "g", let global be true; else let global be false. // 4. If flags contains "g", let global be true; else let global be false.
let global = flags.contains(&('g' as u16)); let global = flags.contains(&('g' as u16));
// 7. If flags contains "y", let sticky be true; else let sticky be false. // 5. If flags contains "y", let sticky be true; else let sticky be false.
let sticky = flags.contains(&('y' as u16)); let sticky = flags.contains(&('y' as u16));
// 8. If global is false and sticky is false, set lastIndex to 0. // 6. If flags contains "d", let hasIndices be true; else let hasIndices be false.
let has_indices = flags.contains(&('d' as u16));
// 7. If global is false and sticky is false, set lastIndex to 0.
if !global && !sticky { if !global && !sticky {
last_index = 0; last_index = 0;
} }
// 9. Let matcher be R.[[RegExpMatcher]]. // 8. Let matcher be R.[[RegExpMatcher]].
let matcher = &rx.matcher; let matcher = &rx.matcher;
// 10. If flags contains "u", let fullUnicode be true; else let fullUnicode be false. // 9. If flags contains "u" or flags contains "v", let fullUnicode be true; else let fullUnicode be false.
let unicode = flags.contains(&('u' as u16)); let full_unicode = flags.contains(&('u' as u16)) || flags.contains(&('v' as u16));
// 11. Let matchSucceeded be false. // TODO:
// 12. Repeat, while matchSucceeded is false, // 11. If fullUnicode is true, let input be StringToCodePoints(S). Otherwise, let input be a List whose elements are the code units that are the elements of S.
// 12. NOTE: Each element of input is considered to be a character.
// 10. Let matchSucceeded be false.
// 13. Repeat, while matchSucceeded is false,
let lossy_input = input.to_std_string_escaped(); let lossy_input = input.to_std_string_escaped();
let (match_value, last_byte_index) = loop { let (match_value, last_byte_index) = loop {
// a. If lastIndex > length, then // a. If lastIndex > length, then
@ -904,7 +908,7 @@ impl RegExp {
return Ok(None); return Ok(None);
} }
// b. Let r be matcher(S, lastIndex). // b. Let inputIndex be the index into input of the character that was obtained from element lastIndex of S.
// Check if last_index is a valid utf8 index into input. // Check if last_index is a valid utf8 index into input.
// TODO: avoid converting to String // TODO: avoid converting to String
let last_byte_index = match String::from_utf16(&input[..last_index as usize]) { let last_byte_index = match String::from_utf16(&input[..last_index as usize]) {
@ -915,10 +919,11 @@ impl RegExp {
.into()) .into())
} }
}; };
// c. Let r be matcher(input, inputIndex).
let r = matcher.find_from(&lossy_input, last_byte_index).next(); let r = matcher.find_from(&lossy_input, last_byte_index).next();
match r { match r {
// c. If r is failure, then // d. If r is failure, then
None => { None => {
// i. If sticky is true, then // i. If sticky is true, then
if sticky { if sticky {
@ -930,11 +935,11 @@ impl RegExp {
} }
// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode). // ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
last_index = advance_string_index(input, last_index, unicode); last_index = advance_string_index(input, last_index, full_unicode);
} }
Some(m) => { Some(m) => {
// c. If r is failure, then // d. If r is failure, then
#[allow(clippy::if_not_else)] #[allow(clippy::if_not_else)]
if m.start() != last_byte_index { if m.start() != last_byte_index {
// i. If sticky is true, then // i. If sticky is true, then
@ -947,21 +952,21 @@ impl RegExp {
} }
// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode). // ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
last_index = advance_string_index(input, last_index, unicode); last_index = advance_string_index(input, last_index, full_unicode);
// d. Else, // e. Else,
} else { } else {
//i. Assert: r is a State. // i. Assert: r is a State.
//ii. Set matchSucceeded to true. // ii. Set matchSucceeded to true.
break (m, last_byte_index); break (m, last_byte_index);
} }
} }
} }
}; };
// 13. Let e be r's endIndex value. // 14. Let e be r's endIndex value.
let mut e = match_value.end(); let mut e = match_value.end();
// 14. If fullUnicode is true, then // 15. If fullUnicode is true, set e to GetStringIndex(S, e).
// TODO: disabled for now until we have UTF-16 support // TODO: disabled for now until we have UTF-16 support
if false { if false {
// e is an index into the Input character list, derived from S, matched by matcher. // e is an index into the Input character list, derived from S, matched by matcher.
@ -971,7 +976,7 @@ impl RegExp {
e = input.get(..e).map_or_else(|| input.len(), <[u16]>::len); e = input.get(..e).map_or_else(|| input.len(), <[u16]>::len);
} }
// 15. If global is true or sticky is true, then // 16. If global is true or sticky is true, then
if global || sticky { if global || sticky {
// a. Perform ? Set(R, "lastIndex", 𝔽(e), true). // a. Perform ? Set(R, "lastIndex", 𝔽(e), true).
this.set( this.set(
@ -982,41 +987,69 @@ impl RegExp {
)?; )?;
} }
// 16. Let n be the number of elements in r's captures List. (This is the same value as 22.2.2.1's NcapturingParens.) // 17. Let n be the number of elements in r's captures List.
let n = match_value.captures.len() as u64; let n = match_value.captures.len() as u64;
// 17. Assert: n < 23^2 - 1. // 18. Assert: n = R.[[RegExpRecord]].[[CapturingGroupsCount]].
// 19. Assert: n < 232 - 1.
debug_assert!(n < 23u64.pow(2) - 1); debug_assert!(n < 23u64.pow(2) - 1);
// 18. Let A be ! ArrayCreate(n + 1). // 20. Let A be ! ArrayCreate(n + 1).
// 19. Assert: The mathematical value of A's "length" property is n + 1. // 21. Assert: The mathematical value of A's "length" property is n + 1.
let a = Array::array_create(n + 1, None, context)?; let a = Array::array_create(n + 1, None, context)?;
// 20. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)). // 22. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
a.create_data_property_or_throw(utf16!("index"), last_index, context) a.create_data_property_or_throw(utf16!("index"), last_index, context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
// 21. Perform ! CreateDataPropertyOrThrow(A, "input", S). // 23. Perform ! CreateDataPropertyOrThrow(A, "input", S).
a.create_data_property_or_throw(utf16!("input"), input.clone(), context) a.create_data_property_or_throw(utf16!("input"), input.clone(), context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
// 22. Let matchedSubstr be the substring of S from lastIndex to e. // 24. Let match be the Match Record { [[StartIndex]]: lastIndex, [[EndIndex]]: e }.
// Immediately convert it to an array according to 22.2.7.7 GetMatchIndexPair(S, match)
// 1. Assert: match.[[StartIndex]] ≤ match.[[EndIndex]] ≤ the length of S.
// 2. Return CreateArrayFromList(« 𝔽(match.[[StartIndex]]), 𝔽(match.[[EndIndex]]) »).
let match_record = Array::create_array_from_list(
[match_value.start().into(), match_value.end().into()],
context,
);
// 25. Let indices be a new empty List.
let indices = Array::array_create(n + 1, None, context)?;
// 27. Append match to indices.
indices
.create_data_property_or_throw(0, match_record, context)
.expect("this CreateDataPropertyOrThrow call must not fail");
// 28. Let matchedSubstr be GetMatchString(S, match).
let matched_substr = js_string!(&lossy_input[last_byte_index..e]); let matched_substr = js_string!(&lossy_input[last_byte_index..e]);
// 23. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr). // 29. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr).
a.create_data_property_or_throw(0, matched_substr, context) a.create_data_property_or_throw(0, matched_substr, context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
// 24. If R contains any GroupName, then let mut named_groups = match_value
// 25. Else, .named_groups()
let named_groups = match_value.named_groups(); .collect::<Vec<(&str, Option<Range>)>>();
let groups = if named_groups.clone().count() > 0 { // Strict mode requires groups to be created in a sorted order
// a. Let groups be ! OrdinaryObjectCreate(null). named_groups.sort_by(|(name_x, _), (name_y, _)| name_x.cmp(name_y));
// Combines:
// 26. Let groupNames be a new empty List.
// 30. If R contains any GroupName, then
// 31. Else,
// 33. For each integer i such that 1 ≤ i ≤ n, in ascending order, do
#[allow(clippy::if_not_else)]
let (groups, group_names) = if !named_groups.clone().is_empty() {
// a. Let groups be OrdinaryObjectCreate(null).
let groups = JsObject::with_null_proto(); let groups = JsObject::with_null_proto();
let group_names = JsObject::with_null_proto();
// Perform 27.f here // e. If the ith capture of R was defined with a GroupName, then
// f. If the ith capture of R was defined with a GroupName, then // i. Let s be the CapturingGroupName of that GroupName.
// i. Let s be the CapturingGroupName of the corresponding RegExpIdentifierName.
// ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue). // ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
// iii. Append s to groupNames.
for (name, range) in named_groups { for (name, range) in named_groups {
if let Some(range) = range { if let Some(range) = range {
// TODO: Full UTF-16 regex support // TODO: Full UTF-16 regex support
@ -1025,15 +1058,50 @@ impl RegExp {
groups groups
.create_data_property_or_throw(name, value, context) .create_data_property_or_throw(name, value, context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
// 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
// a. Let matchIndices be indices[i].
// b. If matchIndices is not undefined, then
// i. Let matchIndexPair be GetMatchIndexPair(S, matchIndices).
// d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), matchIndexPair).
group_names
.create_data_property_or_throw(
name,
Array::create_array_from_list(
[range.start.into(), range.end.into()],
context,
),
context,
)
.expect("this CreateDataPropertyOrThrow call must not fail");
} else {
groups
.create_data_property_or_throw(name, JsValue::undefined(), context)
.expect("this CreateDataPropertyOrThrow call must not fail");
// 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
// c. Else,
// i. Let matchIndexPair be undefined.
// d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), matchIndexPair).
group_names
.create_data_property_or_throw(name, JsValue::undefined(), context)
.expect("this CreateDataPropertyOrThrow call must not fail");
} }
} }
groups.into()
(groups.into(), group_names.into())
} else { } else {
// a. Let groups be undefined. // a. Let groups be undefined.
JsValue::undefined() (JsValue::undefined(), JsValue::undefined())
}; };
// 26. Perform ! CreateDataPropertyOrThrow(A, "groups", groups). // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
// 8. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
indices
.create_data_property_or_throw(utf16!("groups"), group_names, context)
.expect("this CreateDataPropertyOrThrow call must not fail");
// 32. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
a.create_data_property_or_throw(utf16!("groups"), groups, context) a.create_data_property_or_throw(utf16!("groups"), groups, context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
@ -1046,16 +1114,41 @@ impl RegExp {
// c. Else if fullUnicode is true, then // c. Else if fullUnicode is true, then
// d. Else, // d. Else,
// TODO: Full UTF-16 regex support // TODO: Full UTF-16 regex support
let captured_value = capture.map_or_else(JsValue::undefined, |range| { let captured_value = capture.clone().map_or_else(JsValue::undefined, |range| {
js_string!(&lossy_input[range]).into() js_string!(&lossy_input[range]).into()
}); });
// e. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue). // e. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).
a.create_data_property_or_throw(i, captured_value, context) a.create_data_property_or_throw(i, captured_value.clone(), context)
.expect("this CreateDataPropertyOrThrow call must not fail");
// 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
if has_indices {
// b. If matchIndices is not undefined, then
// i. Let matchIndexPair be GetMatchIndexPair(S, matchIndices).
// c. Else,
// i. Let matchIndexPair be undefined.
let indices_range = capture.map_or_else(JsValue::undefined, |range| {
Array::create_array_from_list([range.start.into(), range.end.into()], context)
.into()
});
// d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), matchIndexPair).
indices
.create_data_property_or_throw(i, indices_range, context)
.expect("this CreateDataPropertyOrThrow call must not fail");
}
}
// 34. If hasIndices is true, then
// a. Let indicesArray be MakeMatchIndicesIndexPairArray(S, indices, groupNames, hasGroups).
// b. Perform ! CreateDataPropertyOrThrow(A, "indices", indicesArray).
if has_indices {
a.create_data_property_or_throw(utf16!("indices"), indices, context)
.expect("this CreateDataPropertyOrThrow call must not fail"); .expect("this CreateDataPropertyOrThrow call must not fail");
} }
// 28. Return A. // 35. Return A.
Ok(Some(a)) Ok(Some(a))
} }

Loading…
Cancel
Save