Browse Source

Handle surrogates in `String.fromCodePoint` (#2659)

This Pull Request fixes #2657.

It changes the following:

- Handles surrogates when trying to convert f64 codepoints to u16.
- Replaces `abs().floor()` with `truncate` on `is_float_integer`.
pull/2667/head
José Julián Espina 2 years ago
parent
commit
5a3186d633
  1. 2
      boa_engine/src/builtins/number/mod.rs
  2. 18
      boa_engine/src/builtins/string/mod.rs
  3. 54
      boa_engine/src/builtins/string/tests.rs

2
boa_engine/src/builtins/number/mod.rs

@ -850,7 +850,7 @@ impl Number {
/// Checks if the float argument is an integer.
#[allow(clippy::float_cmp)]
pub(crate) fn is_float_integer(number: f64) -> bool {
number.is_finite() && number.abs().floor() == number.abs()
number.is_finite() && number.trunc() == number
}
/// The abstract operation `Number::equal` takes arguments

18
boa_engine/src/builtins/string/mod.rs

@ -255,22 +255,30 @@ impl String {
// b. If ! IsIntegralNumber(nextCP) is false, throw a RangeError exception.
if !Number::is_float_integer(nextcp) {
return Err(JsNativeError::range()
.with_message(format!("invalid code point: {nextcp}"))
.with_message(format!("codepoint `{nextcp}` is not an integer"))
.into());
}
// c. If ℝ(nextCP) < 0 or ℝ(nextCP) > 0x10FFFF, throw a RangeError exception.
if nextcp < 0.0 || nextcp > f64::from(0x0010_FFFF) {
return Err(JsNativeError::range()
.with_message(format!("invalid code point: {nextcp}"))
.with_message(format!("codepoint `{nextcp}` outside of Unicode range"))
.into());
}
let nextcp =
char::from_u32(nextcp as u32).expect("Checked above the range of `nextcp`");
// SAFETY:
// - `nextcp` is not NaN (by the call to `is_float_integer`).
// - `nextcp` is not infinite (by the call to `is_float_integer`).
// - `nextcp` is in the u32 range (by the check above).
let nextcp = unsafe { nextcp.to_int_unchecked::<u32>() };
// d. Set result to the string-concatenation of result and ! UTF16EncodeCodePoint(ℝ(nextCP)).
result.extend_from_slice(nextcp.encode_utf16(&mut buf));
result.extend_from_slice(match u16::try_from(nextcp) {
Ok(ref cp) => std::slice::from_ref(cp),
Err(_) => char::from_u32(nextcp)
.expect("u32 is in range and cannot be a surrogate by the conversion above")
.encode_utf16(&mut buf),
});
}
// 3. Assert: If codePoints is empty, then result is the empty String.

54
boa_engine/src/builtins/string/tests.rs

@ -856,3 +856,57 @@ fn search() {
TestAction::assert_eq("'ba'.search(/a/)", 1),
]);
}
#[test]
fn from_code_point() {
// Taken from https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint
run_test_actions([
TestAction::assert_eq("String.fromCodePoint(42)", "*"),
TestAction::assert_eq("String.fromCodePoint(65, 90)", "AZ"),
TestAction::assert_eq("String.fromCodePoint(0x404)", "Є"),
TestAction::assert_eq(
"String.fromCodePoint(0x2f804)",
js_string!(&[0xD87E, 0xDC04]),
),
TestAction::assert_eq(
"String.fromCodePoint(0x1D306, 0x1D307)",
js_string!(&[0xD834, 0xDF06, 0xD834, 0xDF07]),
),
// Should encode to unpaired surrogates
TestAction::assert_eq(
"String.fromCharCode(0xD800, 0xD8FF)",
js_string!(&[0xD800, 0xD8FF]),
),
TestAction::assert_eq("String.fromCodePoint(9731, 9733, 9842, 0x4F60)", "☃★♲你"),
TestAction::assert_native_error(
"String.fromCodePoint('_')",
ErrorKind::Range,
"codepoint `NaN` is not an integer",
),
TestAction::assert_native_error(
"String.fromCodePoint(Infinity)",
ErrorKind::Range,
"codepoint `inf` is not an integer",
),
TestAction::assert_native_error(
"String.fromCodePoint(-1)",
ErrorKind::Range,
"codepoint `-1` outside of Unicode range",
),
TestAction::assert_native_error(
"String.fromCodePoint(3.14)",
ErrorKind::Range,
"codepoint `3.14` is not an integer",
),
TestAction::assert_native_error(
"String.fromCodePoint(3e-2)",
ErrorKind::Range,
"codepoint `0.03` is not an integer",
),
TestAction::assert_native_error(
"String.fromCodePoint(NaN)",
ErrorKind::Range,
"codepoint `NaN` is not an integer",
),
]);
}

Loading…
Cancel
Save