Browse Source

Fix `GetSubstitution` (#2933)

pull/2939/head
Haled Odat 2 years ago committed by GitHub
parent
commit
190eeb388b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 185
      boa_engine/src/builtins/regexp/mod.rs
  2. 102
      boa_engine/src/builtins/string/mod.rs

185
boa_engine/src/builtins/regexp/mod.rs

@ -1267,8 +1267,14 @@ impl RegExp {
args: &[JsValue], args: &[JsValue],
context: &mut Context<'_>, context: &mut Context<'_>,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// Helper enum.
enum CallableOrString<'a> {
FunctionalReplace(&'a JsObject),
ReplaceValue(JsString),
}
// 1. Let rx be the this value. // 1. Let rx be the this value.
// 2. If Type(rx) is not Object, throw a TypeError exception. // 2. If rx is not an Object, throw a TypeError exception.
let rx = this.as_object().ok_or_else(|| { let rx = this.as_object().ok_or_else(|| {
JsNativeError::typ().with_message( JsNativeError::typ().with_message(
"RegExp.prototype[Symbol.replace] method called on incompatible value", "RegExp.prototype[Symbol.replace] method called on incompatible value",
@ -1276,84 +1282,101 @@ impl RegExp {
})?; })?;
// 3. Let S be ? ToString(string). // 3. Let S be ? ToString(string).
let arg_str = args.get_or_undefined(0).to_string(context)?; let s = args.get_or_undefined(0).to_string(context)?;
// 4. Let lengthS be the number of code unit elements in S. // 4. Let lengthS be the length of S.
let length_arg_str = arg_str.len(); let length_s = s.len();
let replace_value = args.get_or_undefined(1);
// 5. Let functionalReplace be IsCallable(replaceValue). // 5. Let functionalReplace be IsCallable(replaceValue).
let replace_value = args.get_or_undefined(1).clone(); let functional_replace = replace_value.as_callable();
let replace = if let Some(f) = replace_value.as_callable() {
Ok(f) // 6. If functionalReplace is false, then
let replace_value = if let Some(callable) = functional_replace {
CallableOrString::FunctionalReplace(callable)
} else { } else {
// 6. If functionalReplace is false, then
// a. Set replaceValue to ? ToString(replaceValue). // a. Set replaceValue to ? ToString(replaceValue).
Err(replace_value.to_string(context)?) CallableOrString::ReplaceValue(replace_value.to_string(context)?)
}; };
// 7. Let global be ! ToBoolean(? Get(rx, "global")). // 7. Let flags be ? ToString(? Get(rx, "flags")).
let global = rx.get(utf16!("global"), context)?.to_boolean(); let flags = rx.get(utf16!("flags"), context)?.to_string(context)?;
// 8. If global is true, then // 8. If flags contains "g", let global be true. Otherwise, let global be false.
let mut unicode = false; let global = flags.as_slice().contains(&u16::from(b'g'));
if global {
// a. Let fullUnicode be ! ToBoolean(? Get(rx, "unicode")). // 9. If global is true, then
unicode = rx.get(utf16!("unicode"), context)?.to_boolean(); let full_unicode = if global {
// a. If flags contains "u", let fullUnicode be true. Otherwise, let fullUnicode be false.
let full_unicode = flags.contains(&u16::from(b'u'));
// b. Perform ? Set(rx, "lastIndex", +0𝔽, true). // b. Perform ? Set(rx, "lastIndex", +0𝔽, true).
rx.set(utf16!("lastIndex"), 0, true, context)?; rx.set(utf16!("lastIndex"), 0, true, context)?;
}
// 9. Let results be a new empty List. full_unicode
} else {
false
};
// 10. Let results be a new empty List.
let mut results = Vec::new(); let mut results = Vec::new();
// 10. Let done be false. // SKIPPED: 11. Let done be false.
// 11. Repeat, while done is false, //
// NOTE(HalidOdat): We don't keep track of `done`, we just break when done is true.
// 12. Repeat, while done is false,
loop { loop {
// a. Let result be ? RegExpExec(rx, S). // a. Let result be ? RegExpExec(rx, S).
let result = Self::abstract_exec(rx, arg_str.clone(), context)?; let result = Self::abstract_exec(rx, s.clone(), context)?;
// b. If result is null, set done to true. // b. If result is null, set done to true.
let Some(result) = result else {
// SKIPPED: 1. Set done to true.
break;
};
// c. Else, // c. Else,
if let Some(result) = result { // i. Append result to results.
// i. Append result to the end of results. results.push(result.clone());
results.push(result.clone());
// ii. If global is false, set done to true. // ii. If global is false, then
if !global {
// SKIPPED: 1. Set done to true.
break;
}
if !global { // iii. Else,
break; // 1. Let matchStr be ? ToString(? Get(result, "0")).
} let match_str = result.get(0, context)?.to_string(context)?;
// iii. Else,
// 1. Let matchStr be ? ToString(? Get(result, "0")).
let match_str = result.get(0, context)?.to_string(context)?;
// 2. If matchStr is the empty String, then // 2. If matchStr is the empty String, then
if match_str.is_empty() { if match_str.is_empty() {
// a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))). // a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
let this_index = rx.get(utf16!("lastIndex"), context)?.to_length(context)?; let this_index = rx.get(utf16!("lastIndex"), context)?.to_length(context)?;
// b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode). // b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
let next_index = advance_string_index(&arg_str, this_index, unicode); let next_index = advance_string_index(&s, this_index, full_unicode);
// c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true). // c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
rx.set(utf16!("lastIndex"), JsValue::new(next_index), true, context)?; rx.set(utf16!("lastIndex"), JsValue::new(next_index), true, context)?;
}
} else {
break;
} }
} }
// 12. Let accumulatedResult be the empty String. // 16. If nextSourcePosition ≥ lengthS, return accumulatedResult.
// 17. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
// 13. Let accumulatedResult be the empty String.
let mut accumulated_result = vec![]; let mut accumulated_result = vec![];
// 13. Let nextSourcePosition be 0. // 14. Let nextSourcePosition be 0.
let mut next_source_position = 0; let mut next_source_position = 0;
// 14. For each element result of results, do // 15. For each element result of results, do
for result in results { for result in results {
// a. Let resultLength be ? LengthOfArrayLike(result). // a. Let resultLength be ? LengthOfArrayLike(result).
let result_length = result.length_of_array_like(context)? as isize; let result_length = result.length_of_array_like(context)? as i64;
// b. Let nCaptures be max(resultLength - 1, 0). // b. Let nCaptures be max(resultLength - 1, 0).
let n_captures = std::cmp::max(result_length - 1, 0); let n_captures = std::cmp::max(result_length - 1, 0);
@ -1361,7 +1384,7 @@ impl RegExp {
// c. Let matched be ? ToString(? Get(result, "0")). // c. Let matched be ? ToString(? Get(result, "0")).
let matched = result.get(0, context)?.to_string(context)?; let matched = result.get(0, context)?.to_string(context)?;
// d. Let matchLength be the number of code units in matched. // d. Let matchLength be the length of matched.
let match_length = matched.len(); let match_length = matched.len();
// e. Let position be ? ToIntegerOrInfinity(? Get(result, "index")). // e. Let position be ? ToIntegerOrInfinity(? Get(result, "index")).
@ -1370,13 +1393,12 @@ impl RegExp {
.to_integer_or_infinity(context)?; .to_integer_or_infinity(context)?;
// f. Set position to the result of clamping position between 0 and lengthS. // f. Set position to the result of clamping position between 0 and lengthS.
//position = position. let position = position.clamp_finite(0, length_s as i64) as usize;
let position = position.clamp_finite(0, length_arg_str as i64) as usize;
// h. Let captures be a new empty List. // g. Let captures be a new empty List.
let mut captures = Vec::new(); let mut captures = Vec::new();
// g. Let n be 1. // h. Let n be 1.
// i. Repeat, while n ≤ nCaptures, // i. Repeat, while n ≤ nCaptures,
for n in 1..=n_captures { for n in 1..=n_captures {
// i. Let capN be ? Get(result, ! ToString(𝔽(n))). // i. Let capN be ? Get(result, ! ToString(𝔽(n))).
@ -1388,42 +1410,43 @@ impl RegExp {
cap_n = cap_n.to_string(context)?.into(); cap_n = cap_n.to_string(context)?.into();
} }
// iii. Append capN as the last element of captures. // iii. Append capN to captures.
captures.push(cap_n); captures.push(cap_n);
// iv. Set n to n + 1. // iv. NOTE: When n = 1, the preceding step puts the first element into captures (at index 0).
// More generally, the nth capture (the characters captured by the nth set of capturing parentheses)
// is at captures[n - 1].
//
// v. Set n to n + 1.
} }
// j. Let namedCaptures be ? Get(result, "groups"). // j. Let namedCaptures be ? Get(result, "groups").
let mut named_captures = result.get(utf16!("groups"), context)?; let mut named_captures = result.get(utf16!("groups"), context)?;
// k. If functionalReplace is true, then let replacement = match replace_value {
let replacement = match replace { // k. If functionalReplace is true, then
Ok(replace_fn) => { CallableOrString::FunctionalReplace(replace_value) => {
// i. Let replacerArgs be « matched ». // i. Let replacerArgs be the list-concatenation of « matched », captures, and « 𝔽(position), S ».
let mut replacer_args = vec![JsValue::new(matched)]; let mut replacer_args = vec![JsValue::new(matched)];
// ii. Append in List order the elements of captures to the end of the List replacerArgs.
replacer_args.extend(captures); replacer_args.extend(captures);
// iii. Append 𝔽(position) and S to replacerArgs.
replacer_args.push(position.into()); replacer_args.push(position.into());
replacer_args.push(arg_str.clone().into()); replacer_args.push(s.clone().into());
// iv. If namedCaptures is not undefined, then // ii. If namedCaptures is not undefined, then
if !named_captures.is_undefined() { if !named_captures.is_undefined() {
// 1. Append namedCaptures as the last element of replacerArgs. // 1. Append namedCaptures to replacerArgs.
replacer_args.push(named_captures); replacer_args.push(named_captures);
} }
// v. Let replValue be ? Call(replaceValue, undefined, replacerArgs). // iii. Let replValue be ? Call(replaceValue, undefined, replacerArgs).
// vi. Let replacement be ? ToString(replValue). let repl_value =
replace_fn replace_value.call(&JsValue::undefined(), &replacer_args, context)?;
.call(&JsValue::undefined(), &replacer_args, context)?
.to_string(context)? // iv. Let replacement be ? ToString(replValue).
repl_value.to_string(context)?
} }
// l. Else, // l. Else,
Err(ref replace_str) => { CallableOrString::ReplaceValue(ref replace_value) => {
// i. If namedCaptures is not undefined, then // i. If namedCaptures is not undefined, then
if !named_captures.is_undefined() { if !named_captures.is_undefined() {
// 1. Set namedCaptures to ? ToObject(namedCaptures). // 1. Set namedCaptures to ? ToObject(namedCaptures).
@ -1433,11 +1456,11 @@ impl RegExp {
// ii. Let replacement be ? GetSubstitution(matched, S, position, captures, namedCaptures, replaceValue). // ii. Let replacement be ? GetSubstitution(matched, S, position, captures, namedCaptures, replaceValue).
string::get_substitution( string::get_substitution(
&matched, &matched,
&arg_str, &s,
position, position,
&captures, &captures,
&named_captures, &named_captures,
replace_str, replace_value,
context, context,
)? )?
} }
@ -1446,12 +1469,12 @@ impl RegExp {
// m. If position ≥ nextSourcePosition, then // m. If position ≥ nextSourcePosition, then
if position >= next_source_position { if position >= next_source_position {
// i. NOTE: position should not normally move backwards. // i. NOTE: position should not normally move backwards.
// If it does, it is an indication of an ill-behaving RegExp subclass // If it does, it is an indication of an ill-behaving RegExp subclass or use of
// or use of an access triggered side-effect to change the global flag or other characteristics of rx. // an access triggered side-effect to change the global flag or other characteristics of rx.
// In such cases, the corresponding substitution is ignored. // In such cases, the corresponding substitution is ignored.
// ii. Set accumulatedResult to the string-concatenation of accumulatedResult,
// the substring of S from nextSourcePosition to position, and replacement. // ii. Set accumulatedResult to the string-concatenation of accumulatedResult, the substring of S from nextSourcePosition to position, and replacement.
accumulated_result.extend_from_slice(&arg_str[next_source_position..position]); accumulated_result.extend_from_slice(&s[next_source_position..position]);
accumulated_result.extend_from_slice(&replacement); accumulated_result.extend_from_slice(&replacement);
// iii. Set nextSourcePosition to position + matchLength. // iii. Set nextSourcePosition to position + matchLength.
@ -1459,13 +1482,13 @@ impl RegExp {
} }
} }
// 15. If nextSourcePosition ≥ lengthS, return accumulatedResult. // 16. If nextSourcePosition ≥ lengthS, return accumulatedResult.
if next_source_position >= length_arg_str { if next_source_position >= length_s {
return Ok(js_string!(accumulated_result).into()); return Ok(js_string!(accumulated_result).into());
} }
// 16. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition. // 17. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
Ok(js_string!(&accumulated_result[..], &arg_str[next_source_position..]).into()) Ok(js_string!(&accumulated_result[..], &s[next_source_position..]).into())
} }
/// `RegExp.prototype[ @@search ]( string )` /// `RegExp.prototype[ @@search ]( string )`

102
boa_engine/src/builtins/string/mod.rs

@ -2599,9 +2599,6 @@ pub(crate) fn get_substitution(
// 8. Let tailPos be position + matchLength. // 8. Let tailPos be position + matchLength.
let tail_pos = position + match_length; let tail_pos = position + match_length;
// 9. Let m be the number of elements in captures.
let m = captures.len();
// 10. Let result be the String value derived from replacement by copying code unit elements // 10. Let result be the String value derived from replacement by copying code unit elements
// from replacement to result while performing replacements as specified in Table 58. // from replacement to result while performing replacements as specified in Table 58.
// These $ replacements are done left-to-right, and, once such a replacement is performed, // These $ replacements are done left-to-right, and, once such a replacement is performed,
@ -2616,35 +2613,25 @@ pub(crate) fn get_substitution(
.and_then(CodePoint::as_char) .and_then(CodePoint::as_char)
.as_ref() .as_ref()
.map_or(false, char::is_ascii_digit); .map_or(false, char::is_ascii_digit);
// we use peek so that it is still in the iterator if not used
let third = if second_is_digit {
chars.peek().copied()
} else {
None
};
let third_is_digit = third
.and_then(CodePoint::as_char)
.as_ref()
.map_or(false, char::is_ascii_digit);
match (second, third) { match second {
// $$ // $$
(Some(CodePoint::Unicode('$')), _) => { Some(CodePoint::Unicode('$')) => {
// $ // $
result.push('$' as u16); result.push('$' as u16);
} }
// $& // $&
(Some(CodePoint::Unicode('&')), _) => { Some(CodePoint::Unicode('&')) => {
// matched // matched
result.extend_from_slice(matched); result.extend_from_slice(matched);
} }
// $` // $`
(Some(CodePoint::Unicode('`')), _) => { Some(CodePoint::Unicode('`')) => {
// The replacement is the substring of str from 0 to position. // The replacement is the substring of str from 0 to position.
result.extend_from_slice(&str[..position]); result.extend_from_slice(&str[..position]);
} }
// $' // $'
(Some(CodePoint::Unicode('\'')), _) => { Some(CodePoint::Unicode('\'')) => {
// If tailPos ≥ stringLength, the replacement is the empty String. // If tailPos ≥ stringLength, the replacement is the empty String.
// Otherwise the replacement is the substring of str from tailPos. // Otherwise the replacement is the substring of str from tailPos.
if tail_pos < str_length { if tail_pos < str_length {
@ -2652,52 +2639,61 @@ pub(crate) fn get_substitution(
} }
} }
// $nn // $nn
(Some(CodePoint::Unicode(second)), Some(CodePoint::Unicode(third))) // f. Else if templateRemainder starts with "$" followed by 1 or more decimal digits, then
if second_is_digit && third_is_digit => Some(CodePoint::Unicode(second)) if second_is_digit => {
{ // i. If templateRemainder starts with "$" followed by 2 or more decimal digits, let digitCount be 2. Otherwise, let digitCount be 1.
// The nnth element of captures, where nn is a two-digit decimal number in the range 01 to 99. // ii. Let ref be the substring of templateRemainder from 0 to 1 + digitCount.
let tens = second // iii. Let digits be the substring of templateRemainder from 1 to 1 + digitCount.
.to_digit(10) // iv. Let index be ℝ(StringToNumber(digits)).
.expect("could not convert character to digit after checking it") let mut index = second
as usize;
let units = third
.to_digit(10) .to_digit(10)
.expect("could not convert character to digit after checking it") .expect("could not convert character to digit after checking it")
as usize; as usize;
let nn = 10 * tens + units;
// vi. Let captureLen be the number of elements in captures.
// If nn ≤ m and the nnth element of captures is undefined, use the empty String instead. let capture_len = captures.len();
// If nn is 00 or nn > m, no replacement is done.
if nn == 0 || nn > m { // NOTE(HalidOdat): We deviate from the spec, because of a bug in GetSubstitutions
result.extend_from_slice(&['$' as u16, second as u16, third as u16]); //
} else if let Some(capture) = captures.get(nn - 1) { // See: https://github.com/tc39/ecma262/issues/1426
if let Some(s) = capture.as_string() { if let Some(digit) = chars
result.extend_from_slice(s); .peek()
.copied()
.and_then(CodePoint::as_char)
.and_then(|n| n.to_digit(10))
{
// If there is two digits, and it's not in range fallback to one digit.
let two_digit_index = index * 10 + digit as usize;
if (1..=capture_len).contains(&two_digit_index) {
index = two_digit_index;
chars.next();
} }
} }
chars.next(); // v. Assert: 0 ≤ index ≤ 99.
} debug_assert!((0..=99).contains(&index));
// $n
(Some(CodePoint::Unicode(second)), _) if second_is_digit => { // vii. If 1 ≤ index ≤ captureLen, then
// The nth element of captures, where n is a single digit in the range 1 to 9. if (1..=capture_len).contains(&index) {
let n = second // 1. Let capture be captures[index - 1].
.to_digit(10) // 2. If capture is undefined, then
.expect("could not convert character to digit after checking it") // a. Let refReplacement be the empty String.
as usize; // 3. Else,
// a. Let refReplacement be capture.
if let Some(capture) = captures.get(index - 1) {
if let Some(s) = capture.as_string() {
result.extend_from_slice(s);
}
}
// If n ≤ m and the nth element of captures is undefined, use the empty String instead. // viii. Else,
// If n > m, no replacement is done. } else {
if n == 0 || n > m { // 1. Let refReplacement be ref.
result.extend_from_slice(&['$' as u16, second as u16]); result.extend_from_slice(&['$' as u16, second as u16]);
} else if let Some(capture) = captures.get(n - 1) {
if let Some(s) = capture.as_string() {
result.extend_from_slice(s);
}
} }
} }
// $< // $<
(Some(CodePoint::Unicode('<')), _) => { Some(CodePoint::Unicode('<')) => {
// 1. If namedCaptures is undefined, the replacement text is the String "$<". // 1. If namedCaptures is undefined, the replacement text is the String "$<".
// 2. Else, // 2. Else,
if named_captures.is_undefined() { if named_captures.is_undefined() {

Loading…
Cancel
Save