From 00a19005e46a91eb95a2dd75b41a8f7a13df7684 Mon Sep 17 00:00:00 2001 From: jedel1043 Date: Wed, 2 Mar 2022 22:07:23 +0000 Subject: [PATCH] Remove `toInteger` and document the `string` builtin (#1884) The ECMAScript 2022 specification removes the `toInteger` method, and replaces it with `toIntegerOrInfinity`, which is arguably better for us since the `JsValue::toInteger` returns an `f64`, which is pretty confusing at times. This pull request removes the `JsValue::to_integer` method, replaces all its calls by `JsValue::to_integer_or_infinity` or others per the spec and documents several methods from the `string` builtin. --- boa_engine/src/builtins/console/mod.rs | 19 +- boa_engine/src/builtins/number/mod.rs | 68 +- boa_engine/src/builtins/regexp/mod.rs | 3 +- boa_engine/src/builtins/string/mod.rs | 848 +++++++++++------- .../src/builtins/string/string_iterator.rs | 3 +- boa_engine/src/builtins/string/tests.rs | 9 +- boa_engine/src/builtins/symbol/mod.rs | 5 +- boa_engine/src/symbol.rs | 10 + boa_engine/src/tests.rs | 68 +- boa_engine/src/value/integer.rs | 69 ++ boa_engine/src/value/mod.rs | 121 +-- 11 files changed, 744 insertions(+), 479 deletions(-) create mode 100644 boa_engine/src/value/integer.rs diff --git a/boa_engine/src/builtins/console/mod.rs b/boa_engine/src/builtins/console/mod.rs index c06476af2b..ed9b24a83f 100644 --- a/boa_engine/src/builtins/console/mod.rs +++ b/boa_engine/src/builtins/console/mod.rs @@ -20,7 +20,7 @@ use crate::{ builtins::{BuiltIn, JsArgs}, object::ObjectInitializer, property::Attribute, - value::{display::display_obj, JsValue}, + value::{display::display_obj, JsValue, Numeric}, Context, JsResult, JsString, }; use boa_profiler::Profiler; @@ -71,21 +71,16 @@ pub fn formatter(data: &[JsValue], context: &mut Context) -> JsResult { match fmt { /* integer */ 'd' | 'i' => { - let arg = data - .get(arg_index) - .cloned() - .unwrap_or_default() - .to_integer(context)?; - formatted.push_str(&arg.to_string()); + let arg = match data.get_or_undefined(arg_index).to_numeric(context)? { + Numeric::Number(r) => (r.floor() + 0.0).to_string(), + Numeric::BigInt(int) => int.to_string(), + }; + formatted.push_str(&arg); arg_index += 1; } /* float */ 'f' => { - let arg = data - .get(arg_index) - .cloned() - .unwrap_or_default() - .to_number(context)?; + let arg = data.get_or_undefined(arg_index).to_number(context)?; formatted.push_str(&format!("{arg:.6}")); arg_index += 1; } diff --git a/boa_engine/src/builtins/number/mod.rs b/boa_engine/src/builtins/number/mod.rs index 7afcda0eb2..1c84fc27e9 100644 --- a/boa_engine/src/builtins/number/mod.rs +++ b/boa_engine/src/builtins/number/mod.rs @@ -192,7 +192,7 @@ impl Number { let precision = match args.get(0) { None | Some(JsValue::Undefined) => None, // 2. Let f be ? ToIntegerOrInfinity(fractionDigits). - Some(n) => Some(n.to_integer(context)? as i32), + Some(n) => Some(n.to_integer_or_infinity(context)?), }; // 4. If x is not finite, return ! Number::toString(x). if !this_num.is_finite() { @@ -200,15 +200,17 @@ impl Number { } // Get rid of the '-' sign for -0.0 let this_num = if this_num == 0. { 0. } else { this_num }; - let this_str_num = if let Some(precision) = precision { + let this_str_num = match precision { + None => f64_to_exponential(this_num), + Some(IntegerOrInfinity::Integer(precision)) if (0..=100).contains(&precision) => // 5. If f < 0 or f > 100, throw a RangeError exception. - if !(0..=100).contains(&precision) { + { + f64_to_exponential_with_precision(this_num, precision as usize) + } + _ => { return context - .throw_range_error("toExponential() argument must be between 0 and 100"); + .throw_range_error("toExponential() argument must be between 0 and 100") } - f64_to_exponential_with_precision(this_num, precision as usize) - } else { - f64_to_exponential(this_num) }; Ok(JsValue::new(this_str_num)) } @@ -231,19 +233,19 @@ impl Number { ) -> JsResult { // 1. Let this_num be ? thisNumberValue(this value). let this_num = Self::this_number_value(this, context)?; - let precision = match args.get(0) { - // 2. Let f be ? ToIntegerOrInfinity(fractionDigits). - Some(n) => match n.to_integer(context)? as i32 { - 0..=100 => n.to_integer(context)? as usize, - // 4, 5. If f < 0 or f > 100, throw a RangeError exception. - _ => { - return context - .throw_range_error("toFixed() digits argument must be between 0 and 100") - } - }, - // 3. If fractionDigits is undefined, then f is 0. - None => 0, - }; + + // 2. Let f be ? ToIntegerOrInfinity(fractionDigits). + // 3. Assert: If fractionDigits is undefined, then f is 0. + let precision = args.get_or_undefined(0).to_integer_or_infinity(context)?; + + // 4, 5. If f < 0 or f > 100, throw a RangeError exception. + let precision = precision + .as_integer() + .filter(|i| (0..=100).contains(i)) + .ok_or_else(|| { + context.construct_range_error("toFixed() digits argument must be between 0 and 100") + })? as usize; + // 6. If x is not finite, return ! Number::toString(x). if !this_num.is_finite() { Ok(JsValue::new(Self::to_native_string(this_num))) @@ -642,21 +644,23 @@ impl Number { // 1. Let x be ? thisNumberValue(this value). let x = Self::this_number_value(this, context)?; - // 2. If radix is undefined, let radixNumber be 10. let radix = args.get_or_undefined(0); let radix_number = if radix.is_undefined() { - 10.0 - // 3. Else, let radixNumber be ? ToInteger(radix). + // 2. If radix is undefined, let radixNumber be 10. + 10 } else { - radix.to_integer(context)? - }; - - // 4. If radixNumber < 2 or radixNumber > 36, throw a RangeError exception. - if !(2.0..=36.0).contains(&radix_number) { - return context - .throw_range_error("radix must be an integer at least 2 and no greater than 36"); - } - let radix_number = radix_number as u8; + // 3. Else, let radixMV be ? ToIntegerOrInfinity(radix). + radix + .to_integer_or_infinity(context)? + .as_integer() + // 4. If radixNumber < 2 or radixNumber > 36, throw a RangeError exception. + .filter(|i| (2..=36).contains(i)) + .ok_or_else(|| { + context.construct_range_error( + "radix must be an integer at least 2 and no greater than 36", + ) + })? + } as u8; // 5. If radixNumber = 10, return ! ToString(x). if radix_number == 10 { diff --git a/boa_engine/src/builtins/regexp/mod.rs b/boa_engine/src/builtins/regexp/mod.rs index 915fdbe8bb..fc58e5785b 100644 --- a/boa_engine/src/builtins/regexp/mod.rs +++ b/boa_engine/src/builtins/regexp/mod.rs @@ -1707,8 +1707,7 @@ fn advance_string_index(s: &JsString, index: usize, unicode: bool) -> usize { } // 5. Let cp be ! CodePointAt(S, index). - let (_, offset, _) = - crate::builtins::string::code_point_at(s, index as i64).expect("Failed to get code point"); + let (_, offset, _) = crate::builtins::string::code_point_at(s, index); index + offset as usize } diff --git a/boa_engine/src/builtins/string/mod.rs b/boa_engine/src/builtins/string/mod.rs index 3698194da1..a62a6c5ff2 100644 --- a/boa_engine/src/builtins/string/mod.rs +++ b/boa_engine/src/builtins/string/mod.rs @@ -15,39 +15,53 @@ mod tests; use super::JsArgs; use crate::{ - builtins::{string::string_iterator::StringIterator, Array, BuiltIn, Number, RegExp, Symbol}, + builtins::{string::string_iterator::StringIterator, Array, BuiltIn, Number, RegExp}, context::StandardObjects, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData, }, property::{Attribute, PropertyDescriptor}, symbol::WellKnownSymbols, + value::IntegerOrInfinity, Context, JsResult, JsString, JsValue, }; use boa_profiler::Profiler; -use std::{char::from_u32, cmp::max, string::String as StdString}; +use std::{ + char::from_u32, + cmp::{max, min}, + string::String as StdString, +}; use unicode_normalization::UnicodeNormalization; -pub(crate) fn code_point_at(string: &JsString, position: i64) -> Option<(u32, u8, bool)> { - let size = string.encode_utf16().count(); - if position < 0 || position >= size as i64 { - return None; - } +#[derive(Clone, Copy, Eq, PartialEq)] +pub(crate) enum Placement { + Start, + End, +} +pub(crate) fn code_point_at(string: &JsString, position: usize) -> (u32, u8, bool) { let mut encoded = string.encode_utf16(); - let first = encoded.nth(position as usize)?; + let size = encoded.clone().count(); + + let first = encoded + .nth(position) + .expect("The callers of this function must've already checked bounds."); if !is_leading_surrogate(first) && !is_trailing_surrogate(first) { - return Some((u32::from(first), 1, false)); + return (u32::from(first), 1, false); } - if is_trailing_surrogate(first) || position + 1 == size as i64 { - return Some((u32::from(first), 1, true)); + + if is_trailing_surrogate(first) || position + 1 == size { + return (u32::from(first), 1, true); } - let second = encoded.next()?; + + let second = encoded + .next() + .expect("The callers of this function must've already checked bounds."); if !is_trailing_surrogate(second) { - return Some((u32::from(first), 1, true)); + return (u32::from(first), 1, true); } let cp = (u32::from(first) - 0xD800) * 0x400 + (u32::from(second) - 0xDC00) + 0x10000; - Some((cp, 2, false)) + (cp, 2, false) } /// Helper function to check if a `char` is trimmable. @@ -153,7 +167,7 @@ impl String { /// /// The resulting string can also not be larger than the maximum string size, /// which can differ in JavaScript engines. In Boa it is `2^32 - 1` - pub(crate) const MAX_STRING_LENGTH: f64 = u32::MAX as f64; + pub(crate) const MAX_STRING_LENGTH: usize = u32::MAX as usize; /// `String( value )` /// @@ -166,22 +180,26 @@ impl String { // This value is used by console.log and other routines to match Object type // to its Javascript Identifier (global constructor method name) let string = match args.get(0) { - Some(value) if value.is_symbol() && new_target.is_undefined() => { - Symbol::to_string(value, &[], context)? - .as_string() - .expect("'Symbol::to_string' returns 'Value::String'") - .clone() + // 2. Else, + // a. If NewTarget is undefined and Type(value) is Symbol, return SymbolDescriptiveString(value). + Some(JsValue::Symbol(ref sym)) if new_target.is_undefined() => { + return Ok(sym.descriptive_string().into()) } + // b. Let s be ? ToString(value). Some(value) => value.to_string(context)?, + // 1. If value is not present, let s be the empty String. None => JsString::default(), }; + // 3. If NewTarget is undefined, return s. if new_target.is_undefined() { return Ok(string.into()); } let prototype = get_prototype_from_constructor(new_target, StandardObjects::string_object, context)?; + + // 4. Return ! StringCreate(s, ? GetPrototypeFromConstructor(NewTarget, "%String.prototype%")). Ok(Self::string_create(string, prototype, context).into()) } @@ -222,10 +240,22 @@ impl String { s } + /// Abstract operation `thisStringValue( value )` + /// + /// More informacion: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#thisstringvalue fn this_string_value(this: &JsValue, context: &mut Context) -> JsResult { + // 1. If Type(value) is String, return value. this.as_string() .cloned() + // 2. If Type(value) is Object and value has a [[StringData]] internal slot, then + // a. Let s be value.[[StringData]]. + // b. Assert: Type(s) is String. + // c. Return s. .or_else(|| this.as_object().and_then(|obj| obj.borrow().as_string())) + // 3. Throw a TypeError exception. .ok_or_else(|| context.construct_type_error("'this' is not a string")) } @@ -363,7 +393,7 @@ impl String { for next in args { // 3a. Let nextCU be โ„(? ToUint16(next)). // 3b. Append nextCU to the end of elements. - elements.push(next.to_u32(context)? as u16); + elements.push(next.to_uint16(context)?); } // 4. Return the String value whose code units are the elements in the List elements. @@ -373,7 +403,12 @@ impl String { Ok(JsValue::String(JsString::new(s))) } - /// Get the string value to a primitive string + /// `String.prototype.toString ( )` + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.tostring #[allow(clippy::wrong_self_convention)] #[inline] pub(crate) fn to_string( @@ -381,8 +416,8 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { - // Get String from String Object and send it back as a new value - Ok(JsValue::new(Self::this_string_value(this, context)?)) + // 1. Return ? thisStringValue(this value). + Ok(Self::this_string_value(this, context)?.into()) } /// `String.prototype.charAt( index )` @@ -406,29 +441,32 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let position = args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)?; - // Fast path returning empty string when pos is obviously out of range - if position < 0.0 { - return Ok("".into()); - } + // 4. Let size be the length of S. + let size = string.encode_utf16().count() as i64; - // Calling .len() on a string would give the wrong result, as they are bytes not the number of - // unicode code points - // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of - // bytes is an O(1) operation. - if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { - Ok(char::try_from(u32::from(utf16_val)) - .unwrap_or('\u{FFFD}' /* replacement char */) - .into()) - } else { - Ok("".into()) + // 3. Let position be ? ToIntegerOrInfinity(pos). + match args.get_or_undefined(0).to_integer_or_infinity(context)? { + IntegerOrInfinity::Integer(position) if (0..size).contains(&position) => { + // 6. Return the substring of S from position to position + 1. + let char = string + .encode_utf16() + .nth(position as usize) + .expect("Already checked bounds above"); + + Ok(char::try_from(u32::from(char)) + .unwrap_or('\u{FFFD}' /* replacement char */) + .into()) + } + _ => { + // 5. If position < 0 or position โ‰ฅ size, return the empty String. + Ok("".into()) + } } } @@ -444,20 +482,29 @@ impl String { /// [spec]: https://tc39.es/proposal-relative-indexing-method/#sec-string.prototype.at /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/at pub(crate) fn at(this: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let s = this.to_string(context)?; - let len = s.encode_utf16().count(); - let relative_index = args - .get(0) - .cloned() - .unwrap_or_default() - .to_integer(context)?; - let k = if relative_index < 0_f64 { - len - (-relative_index as usize) - } else { - relative_index as usize + + // 3. Let len be the length of S. + let len = s.encode_utf16().count() as i64; + + // 4. Let relativeIndex be ? ToIntegerOrInfinity(index). + let relative_index = args.get_or_undefined(0).to_integer_or_infinity(context)?; + let k = match relative_index { + // 5. If relativeIndex โ‰ฅ 0, then + // a. Let k be relativeIndex. + IntegerOrInfinity::Integer(i) if i >= 0 && i < len => i as usize, + // 6. Else, + // a. Let k be len + relativeIndex. + IntegerOrInfinity::Integer(i) if i < 0 && (-i) <= len => (len + i) as usize, + // 7. If k < 0 or k โ‰ฅ len, return undefined. + _ => return Ok(JsValue::undefined()), }; + // 8. Return the substring of S from k to k + 1. if let Some(utf16_val) = s.encode_utf16().nth(k) { Ok(JsValue::new( from_u32(u32::from(utf16_val)).expect("invalid utf-16 character"), @@ -486,25 +533,26 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let position = args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)?; - // Fast path returning undefined when pos is obviously out of range - if position < 0.0 { - return Ok(JsValue::undefined()); - } + // 3. Let position be ? ToIntegerOrInfinity(pos). + let position = args.get_or_undefined(0).to_integer_or_infinity(context)?; - if let Some((code_point, _, _)) = code_point_at(&string, position as i64) { - Ok(JsValue::new(code_point)) - } else { - Ok(JsValue::undefined()) + // 4. Let size be the length of S. + let size = string.encode_utf16().count() as i64; + + match position { + IntegerOrInfinity::Integer(position) if (0..size).contains(&position) => { + // 6. Let cp be ! CodePointAt(S, position). + // 7. Return ๐”ฝ(cp.[[CodePoint]]). + Ok(code_point_at(&string, position as usize).0.into()) + } + // 5. If position < 0 or position โ‰ฅ size, return undefined. + _ => Ok(JsValue::undefined()), } } @@ -527,28 +575,31 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let position = args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)?; - // Fast path returning NaN when pos is obviously out of range - if position < 0.0 || position >= string.len() as f64 { - return Ok(JsValue::nan()); - } + // 3. Let position be ? ToIntegerOrInfinity(pos). + let position = args.get_or_undefined(0).to_integer_or_infinity(context)?; - // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points - // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. - // If there is no element at that index, the result is NaN - if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { - Ok(JsValue::new(f64::from(utf16_val))) - } else { - Ok(JsValue::nan()) + // 4. Let size be the length of S. + let size = string.encode_utf16().count() as i64; + + match position { + IntegerOrInfinity::Integer(position) if (0..size).contains(&position) => { + // 6. Return the Number value for the numeric value of the code unit at index position within the String S. + let char_code = u32::from( + string + .encode_utf16() + .nth(position as usize) + .expect("Already checked bounds above."), + ); + Ok(char_code.into()) + } + // 5. If position < 0 or position โ‰ฅ size, return NaN. + _ => Ok(JsValue::nan()), } } @@ -571,13 +622,21 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let mut string = this.to_string(context)?.to_string(); + // 3. Let R be S. + // 4. For each element next of args, do for arg in args { + // a. Let nextString be ? ToString(next). + // b. Set R to the string-concatenation of R and nextString. string.push_str(&arg.to_string(context)?); } + // 5. Return R. Ok(JsValue::new(string)) } @@ -597,26 +656,39 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - if let Some(arg) = args.get(0) { - let n = arg.to_integer(context)?; - if n < 0.0 { - return context.throw_range_error("repeat count cannot be a negative number"); - } + let len = string.encode_utf16().count(); - if n.is_infinite() { - return context.throw_range_error("repeat count cannot be infinity"); - } + // 3. Let n be ? ToIntegerOrInfinity(count). + match args.get_or_undefined(0).to_integer_or_infinity(context)? { + IntegerOrInfinity::Integer(n) + if n > 0 && (n as usize) * len <= Self::MAX_STRING_LENGTH => + { + if string.is_empty() { + return Ok("".into()); + } + let n = n as usize; + let mut result = std::string::String::with_capacity(n * len); + + std::iter::repeat(&string[..]) + .take(n) + .for_each(|s| result.push_str(s)); - if n * (string.len() as f64) > Self::MAX_STRING_LENGTH { - return context - .throw_range_error("repeat count must not overflow maximum string length"); + // 6. Return the String value that is made from n copies of S appended together. + Ok(result.into()) } - Ok(string.repeat(n as usize).into()) - } else { - Ok("".into()) + // 5. If n is 0, return the empty String. + IntegerOrInfinity::Integer(n) if n == 0 => Ok("".into()), + // 4. If n < 0 or n is +โˆž, throw a RangeError exception. + _ => context.throw_range_error( + "repeat count must be a positive finite number \ + that doesn't overflow the maximum string length (2^32 - 1)", + ), } } @@ -635,38 +707,52 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points - // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. - let len = string.encode_utf16().count(); - let from = match args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)? - { - int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, - int_start if int_start < 0.0 => (len as f64 + int_start).max(0.0), - int_start => int_start.min(len as f64), + // 3. Let len be the length of S. + let len = string.encode_utf16().count() as i64; + + // 4. Let intStart be ? ToIntegerOrInfinity(start). + let from = match args.get_or_undefined(0).to_integer_or_infinity(context)? { + // 6. Else if intStart < 0, let from be max(len + intStart, 0). + IntegerOrInfinity::Integer(i) if i < 0 => max(len + i, 0), + + // 7. Else, let from be min(intStart, len). + IntegerOrInfinity::Integer(i) => min(i, len), + IntegerOrInfinity::PositiveInfinity => len, + + // 5. If intStart is -โˆž, let from be 0. + IntegerOrInfinity::NegativeInfinity => 0, } as usize; + // 8. If end is undefined, let intEnd be len; else let intEnd be ? ToIntegerOrInfinity(end). let to = match args .get(1) .filter(|end| !end.is_undefined()) - .map(|end| end.to_integer(context)) + .map(|end| end.to_integer_or_infinity(context)) .transpose()? - .unwrap_or(len as f64) + .unwrap_or(IntegerOrInfinity::Integer(len)) { - int_end if int_end.is_infinite() && int_end.is_sign_negative() => 0.0, - int_end if int_end < 0.0 => (len as f64 + int_end).max(0.0), - int_end => int_end.min(len as f64), + // 10. Else if intEnd < 0, let to be max(len + intEnd, 0). + IntegerOrInfinity::Integer(i) if i < 0 => max(len + i, 0), + + // 11. Else, let to be min(intEnd, len). + IntegerOrInfinity::Integer(i) => min(i, len), + IntegerOrInfinity::PositiveInfinity => len, + + // 9. If intEnd is -โˆž, let to be 0. + IntegerOrInfinity::NegativeInfinity => 0, } as usize; + // 12. If from โ‰ฅ to, return the empty String. if from >= to { Ok("".into()) } else { + // 13. Return the substring of S from from to to. let span = to - from; let substring_utf16: Vec = string.encode_utf16().skip(from).take(span).collect(); let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); @@ -689,39 +775,58 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let search_string = args.get(0).cloned().unwrap_or_else(JsValue::undefined); + let search_string = args.get_or_undefined(0); - if Self::is_regexp_object(&search_string) { + // 3. Let isRegExp be ? IsRegExp(searchString). + // 4. If isRegExp is true, throw a TypeError exception. + if is_reg_exp(search_string, context)? { context.throw_type_error( "First argument to String.prototype.startsWith must not be a regular expression", )?; } - let search_str = search_string.to_string(context)?; + // 5. Let searchStr be ? ToString(searchString). + let search_string = search_string.to_string(context)?; - let len = string.encode_utf16().count(); - let search_length = search_str.encode_utf16().count(); + // 6. Let len be the length of S. + let len = string.encode_utf16().count() as i64; - // If less than 2 args specified, position is 'undefined', defaults to 0 - let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { - position if position.is_undefined() => 0.0, - position => position.to_integer(context)?, + // 7. If position is undefined, let pos be 0; else let pos be ? ToIntegerOrInfinity(position). + let pos = match args.get_or_undefined(1) { + &JsValue::Undefined => IntegerOrInfinity::Integer(0), + position => position.to_integer_or_infinity(context)?, }; - let start = pos.min(len as f64).max(0.0); - let end = start + search_length as f64; + // 8. Let start be the result of clamping pos between 0 and len. + let start = pos.clamp_finite(0, len) as usize; - if end > len as f64 { + // 9. Let searchLength be the length of searchStr. + let search_length = search_string.encode_utf16().count(); + + // 10. If searchLength = 0, return true. + if search_length == 0 { + return Ok(JsValue::new(true)); + } + + // 11. Let end be start + searchLength. + let end = start + search_length; + + // 12. If end > len, return false. + if end > len as usize { Ok(JsValue::new(false)) } else { - let substring_utf16 = string - .encode_utf16() - .skip(start as usize) - .take(search_length); - let search_str_utf16 = search_str.encode_utf16(); + // 13. Let substring be the substring of S from start to end. + // 14. Return ! SameValueNonNumeric(substring, searchStr). + // `SameValueNonNumeric` forwards to `==`, so directly check + // equality to avoid converting to `JsValue` + let substring_utf16 = string.encode_utf16().skip(start).take(search_length); + let search_str_utf16 = search_string.encode_utf16(); Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) } } @@ -741,44 +846,58 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) { - search_string if Self::is_regexp_object(&search_string) => { + let search_str = match args.get_or_undefined(0) { + // 3. Let isRegExp be ? IsRegExp(searchString). + // 4. If isRegExp is true, throw a TypeError exception. + search_string if is_reg_exp(search_string, context)? => { return context.throw_type_error( "First argument to String.prototype.endsWith must not be a regular expression", ); } + // 5. Let searchStr be ? ToString(searchString). search_string => search_string.to_string(context)?, }; - let len = string.encode_utf16().count(); + // 6. Let len be the length of S. + let len = string.encode_utf16().count() as i64; - let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { - end_position if end_position.is_undefined() => len as f64, - end_position => end_position.to_integer(context)?, + // 7. If endPosition is undefined, let pos be len; else let pos be ? ToIntegerOrInfinity(endPosition). + let end = match args.get_or_undefined(1) { + end_position if end_position.is_undefined() => IntegerOrInfinity::Integer(len), + end_position => end_position.to_integer_or_infinity(context)?, }; - let end = pos.max(0.0).min(len as f64) as usize; - - // If less than 2 args specified, end_position is 'undefined', defaults to - // length of this - if search_str.is_empty() { - return Ok(JsValue::new(true)); - } + // 8. Let end be the result of clamping pos between 0 and len. + let end = end.clamp_finite(0, len) as usize; + // 9. Let searchLength be the length of searchStr. let search_length = search_str.encode_utf16().count(); - if end < search_length { - Ok(JsValue::new(false)) - } else { - let start = end - search_length; + // 10. If searchLength = 0, return true. + if search_length == 0 { + return Ok(true.into()); + } + + // 11. Let start be end - searchLength. + if let Some(start) = end.checked_sub(search_length) { + // 13. Let substring be the substring of S from start to end. + // 14. Return ! SameValueNonNumeric(substring, searchStr). + // `SameValueNonNumeric` forwards to `==`, so directly check + // equality to avoid converting to `JsValue` let substring_utf16 = string.encode_utf16().skip(start).take(search_length); let search_str_utf16 = search_str.encode_utf16(); Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) + } else { + // 12. If start < 0, return false. + Ok(false.into()) } } @@ -797,39 +916,36 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) { - search_string if Self::is_regexp_object(&search_string) => { + let search_str = match args.get_or_undefined(0) { + // 3. Let isRegExp be ? IsRegExp(searchString). + search_string if is_reg_exp(search_string, context)? => { return context.throw_type_error( + // 4. If isRegExp is true, throw a TypeError exception. "First argument to String.prototype.includes must not be a regular expression", ); } + // 5. Let searchStr be ? ToString(searchString). search_string => search_string.to_string(context)?, }; - let pos = args - .get(1) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)?; - let start = pos.max(0.0) as usize; + // 6. Let pos be ? ToIntegerOrInfinity(position). + // 7. Assert: If position is undefined, then pos is 0. + let pos = args.get_or_undefined(1).to_integer_or_infinity(context)?; - let substring_lossy = if start > 0 { - let substring_utf16: Vec = string.encode_utf16().skip(start).collect(); - StdString::from_utf16_lossy(&substring_utf16) - } else { - string.to_string() - }; - Ok(substring_lossy.contains(search_str.as_str()).into()) - } + // 8. Let len be the length of S. + // 9. Let start be the result of clamping pos between 0 and len. + let start = pos.clamp_finite(0, string.encode_utf16().count() as i64) as usize; - fn is_regexp_object(value: &JsValue) -> bool { - value - .as_object() - .map(|obj| obj.borrow().is_regexp()) - .unwrap_or_default() + // 10. Let index be ! StringIndexOf(S, searchStr, start). + // 11. If index is not -1, return true. + // 12. Return false. + Ok(string.index_of(&search_str, start).is_some().into()) } /// `String.prototype.replace( regexp|substr, newSubstr|function )` @@ -977,9 +1093,9 @@ impl String { // 2. If searchValue is neither undefined nor null, then if !search_value.is_null_or_undefined() { // a. Let isRegExp be ? IsRegExp(searchValue). - if let Some(obj) = search_value.as_object().filter(|obj| obj.is_regexp()) { + if let Some(obj) = search_value.as_object() { // b. If isRegExp is true, then - if obj.is_regexp() { + if is_reg_exp_object(obj, context)? { // i. Let flags be ? Get(searchValue, "flags"). let flags = obj.get("flags", context)?; @@ -1017,13 +1133,12 @@ impl String { .map(JsObject::is_callable) .unwrap_or_default(); - // 6. If functionalReplace is false, then - #[allow(clippy::if_not_else)] - let replace_value_string = if !functional_replace { - // a. Set replaceValue to ? ToString(replaceValue). - replace_value.to_string(context)? + let replace_value_string = if functional_replace { + None } else { - JsString::empty() + // a. Set replaceValue to ? ToString(replaceValue). + // 6. If functionalReplace is false, then + Some(replace_value.to_string(context)?) }; // 7. Let searchLength be the length of searchString. @@ -1065,9 +1180,24 @@ impl String { .collect::>(), ); - // b. If functionalReplace is true, then // c. Else, - let replacement = if functional_replace { + let replacement = if let Some(ref replace_value) = replace_value_string { + // i. Assert: Type(replaceValue) is String. + // ii. Let captures be a new empty List. + // iii. Let replacement be ! GetSubstitution(searchString, string, p, captures, undefined, replaceValue). + get_substitution( + &search_string, + &string, + p, + &[], + &JsValue::undefined(), + replace_value, + context, + ) + .expect("GetSubstitution should never fail here.") + } + // b. If functionalReplace is true, then + else { // i. Let replacement be ? ToString(? Call(replaceValue, undefined, ยซ searchString, ๐”ฝ(p), string ยป)). context .call( @@ -1080,21 +1210,8 @@ impl String { ], )? .to_string(context)? - } else { - // i. Assert: Type(replaceValue) is String. - // ii. Let captures be a new empty List. - // iii. Let replacement be ! GetSubstitution(searchString, string, p, captures, undefined, replaceValue). - get_substitution( - search_string.as_str(), - string.as_str(), - p, - &[], - &JsValue::undefined(), - &replace_value_string, - context, - ) - .expect("GetSubstitution should never fail here.") }; + // d. Set result to the string-concatenation of result, preserved, and replacement. result = JsString::new(format!("{}{preserved}{replacement}", result.as_str())); @@ -1139,45 +1256,30 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; - - let search_str = args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_string(context)?; - let pos = args - .get(1) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)?; + // 2. Let S be ? ToString(O). + let string = this.to_string(context)?; - let len = string.encode_utf16().count(); - let start = pos.max(0.0); + // 3. Let searchStr be ? ToString(searchString). + let search_str = args.get_or_undefined(0).to_string(context)?; - if search_str.is_empty() { - return Ok(JsValue::new(start.min(len as f64))); - } + // 4. Let pos be ? ToIntegerOrInfinity(position). + // 5. Assert: If position is undefined, then pos is 0. + let pos = args.get_or_undefined(1).to_integer_or_infinity(context)?; - if start < len as f64 { - let start = start as usize; + // 6. Let len be the length of S. + let len = string.encode_utf16().count() as i64; - let substring_lossy = if start > 0 { - let substring_utf16: Vec = string.encode_utf16().skip(start).collect(); - StdString::from_utf16_lossy(&substring_utf16) - } else { - string.to_string() - }; + // 7. Let start be the result of clamping pos between 0 and len. + let start = pos.clamp_finite(0, len) as usize; - if let Some(position) = substring_lossy.find(search_str.as_str()) { - return Ok(JsValue::new( - substring_lossy[..position].encode_utf16().count() + start, - )); - } - } - Ok(JsValue::new(-1)) + // 8. Return ๐”ฝ(! StringIndexOf(S, searchStr, start)). + Ok(string + .index_of(&search_str, start) + .map_or(-1, |i| i as i64) + .into()) } /// `String.prototype.lastIndexOf( searchValue[, fromIndex] )` @@ -1200,44 +1302,39 @@ impl String { ) -> JsResult { // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; // 3. Let searchStr be ? ToString(searchString). - let search_str = args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_string(context)?; + let search_str = args.get_or_undefined(0).to_string(context)?; // 4. Let numPos be ? ToNumber(position). // 5. Assert: If position is undefined, then numPos is NaN. - let num_pos = args - .get(1) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_number(context)?; + let num_pos = args.get_or_undefined(1).to_number(context)?; // 6. If numPos is NaN, let pos be +โˆž; otherwise, let pos be ! ToIntegerOrInfinity(numPos). let pos = if num_pos.is_nan() { - f64::INFINITY + IntegerOrInfinity::PositiveInfinity } else { - JsValue::new(num_pos).to_integer(context)? + JsValue::new(num_pos) + .to_integer_or_infinity(context) + .expect("Already called `to_number so this must not fail.") }; // 7. Let len be the length of S. let len = string.encode_utf16().count(); // 8. Let start be the result of clamping pos between 0 and len. - let start = pos.max(0.0).min(len as f64) as usize; + let start = pos.clamp_finite(0, len as i64) as usize; // 9. If searchStr is the empty String, return ๐”ฝ(start). if search_str.is_empty() { - return Ok(JsValue::new(start as f64)); + return Ok(JsValue::new(start)); } - // TODO: Full UTF-16 support // 10. Let searchLen be the length of searchStr. let search_len = search_str.encode_utf16().count(); + // 11. For each non-negative integer i starting with start such that i โ‰ค len - searchLen, in descending order, do // a. Let candidate be the substring of S from i to i + searchLen. let substring_utf16: Vec = string.encode_utf16().take(start + search_len).collect(); @@ -1294,64 +1391,78 @@ impl String { rx.invoke(WellKnownSymbols::r#match(), &[JsValue::new(s)], context) } - /// Abstract method `StringPad`. + /// Abstract operation `StringPad ( O, maxLength, fillString, placement )`. /// /// Performs the actual string padding for padStart/End. - /// + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-stringpad fn string_pad( object: &JsValue, max_length: &JsValue, fill_string: &JsValue, - at_start: bool, + placement: Placement, context: &mut Context, ) -> JsResult { + // 1. Let S be ? ToString(O). let string = object.to_string(context)?; + // 2. Let intMaxLength be โ„(? ToLength(maxLength)). let int_max_length = max_length.to_length(context)?; + + // 3. Let stringLength be the length of S. let string_length = string.encode_utf16().count(); + // 4. If intMaxLength โ‰ค stringLength, return S. if int_max_length <= string_length { return Ok(string.into()); } + // 5. If fillString is undefined, let filler be the String value consisting solely of the code unit 0x0020 (SPACE). let filler = if fill_string.is_undefined() { "\u{0020}".into() } else { + // 6. Else, let filler be ? ToString(fillString). fill_string.to_string(context)? }; - let filler_utf16: Vec = filler.encode_utf16().collect(); + // 7. If filler is the empty String, return S. if filler.is_empty() { return Ok(string.into()); } + // 8. Let fillLen be intMaxLength - stringLength. let fill_len = int_max_length - string_length; - let filler_len = filler_utf16.len(); - - let mut truncated_string_filler = StdString::new(); - let mut truncated_string_filler_len: usize = 0; - - while truncated_string_filler_len < fill_len { - if truncated_string_filler_len.wrapping_add(filler_len) <= fill_len { - truncated_string_filler.push_str(&filler); - truncated_string_filler_len += filler_len; + let filler_len = filler.encode_utf16().count(); + + // 9. Let truncatedStringFiller be the String value consisting of repeated + // concatenations of filler truncated to length fillLen. + let repetitions = { + let q = fill_len / filler_len; + let r = fill_len % filler_len; + if r == 0 { + q } else { - truncated_string_filler.push_str( - StdString::from_utf16_lossy( - &filler_utf16[..fill_len - truncated_string_filler_len], - ) - .as_str(), - ); - truncated_string_filler_len = fill_len; + q + 1 } - } - if at_start { - truncated_string_filler.push_str(&string); - Ok(truncated_string_filler.into()) + }; + + let truncated_string_filler = filler + .repeat(repetitions) + .encode_utf16() + .take(fill_len) + .collect::>(); + let truncated_string_filler = + std::string::String::from_utf16_lossy(truncated_string_filler.as_slice()); + + // 10. If placement is start, return the string-concatenation of truncatedStringFiller and S. + if placement == Placement::Start { + Ok(format!("{truncated_string_filler}{string}").into()) } else { - let mut string = string.to_string(); - string.push_str(&truncated_string_filler); - Ok(string.into()) + // 11. Else, return the string-concatenation of S and truncatedStringFiller. + Ok(format!("{string}{truncated_string_filler}").into()) } } @@ -1372,12 +1483,14 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; - let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined); - let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined); + let max_length = args.get_or_undefined(0); + let fill_string = args.get_or_undefined(1); - Self::string_pad(this, &max_length, &fill_string, false, context) + // 2. Return ? StringPad(O, maxLength, fillString, end). + Self::string_pad(this, max_length, fill_string, Placement::End, context) } /// `String.prototype.padStart( targetLength [, padString] )` @@ -1397,12 +1510,14 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; - let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined); - let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined); + let max_length = args.get_or_undefined(0); + let fill_string = args.get_or_undefined(1); - Self::string_pad(this, &max_length, &fill_string, true, context) + // 2. Return ? StringPad(O, maxLength, fillString, start). + Self::string_pad(this, max_length, fill_string, Placement::Start, context) } /// String.prototype.trim() @@ -1487,10 +1602,17 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - // The Rust String is mapped to uppercase using the builtin .to_lowercase(). - // There might be corner cases where it does not behave exactly like Javascript expects + + // 3. Let sText be ! StringToCodePoints(S). + // 4. Let lowerText be the result of toLowercase(sText), according to + // the Unicode Default Case Conversion algorithm. + // 5. Let L be ! CodePointsToString(lowerText). + // 6. Return L. Ok(JsValue::new(string.to_lowercase())) } @@ -1512,10 +1634,22 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { + // This function behaves in exactly the same way as `String.prototype.toLowerCase`, except that the String is + // mapped using the toUppercase algorithm of the Unicode Default Case Conversion. + + // Comments below are an adaptation of the `String.prototype.toLowerCase` documentation. + + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - // The Rust String is mapped to uppercase using the builtin .to_uppercase(). - // There might be corner cases where it does not behave exactly like Javascript expects + + // 3. Let sText be ! StringToCodePoints(S). + // 4. Let upperText be the result of toUppercase(sText), according to + // the Unicode Default Case Conversion algorithm. + // 5. Let L be ! CodePointsToString(upperText). + // 6. Return L. Ok(JsValue::new(string.to_uppercase())) } @@ -1534,29 +1668,37 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let len = string.len(); - let int_start = args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)?; + // 3. Let len be the length of S. + let len = string.encode_utf16().count() as i64; + + // 4. Let intStart be ? ToIntegerOrInfinity(start). + let int_start = args.get_or_undefined(0).to_integer_or_infinity(context)?; - let int_end = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { - end if end.is_undefined() => len as f64, - end => end.to_integer(context)?, + // 5. If end is undefined, let intEnd be len; else let intEnd be ? ToIntegerOrInfinity(end). + let int_end = match args.get_or_undefined(1) { + &JsValue::Undefined => IntegerOrInfinity::Integer(len), + end => end.to_integer_or_infinity(context)?, }; - // Both start and end args replaced by 0 if they were negative - // or by the length of the String if they were greater - let final_start = int_start.max(0.0).min(len as f64); - let final_end = int_end.max(0.0).min(len as f64); + // 6. Let finalStart be the result of clamping intStart between 0 and len. + let final_start = int_start.clamp_finite(0, len) as usize; - let from = final_start.min(final_end) as usize; - let to = final_start.max(final_end) as usize; + // 7. Let finalEnd be the result of clamping intEnd between 0 and len. + let final_end = int_end.clamp_finite(0, len) as usize; + // 8. Let from be min(finalStart, finalEnd). + let from = min(final_start, final_end); + + // 9. Let to be max(finalStart, finalEnd). + let to = max(final_start, final_end); + + // 10. Return the substring of S from from to to. // Extract the part of the string contained between the from index and the to index // where from is guaranteed to be smaller or equal to to // TODO: Full UTF-16 support @@ -1582,40 +1724,54 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; - let string: Vec = this.to_string(context)?.encode_utf16().collect(); - let size = string.len(); - let int_start = match args - .get(0) - .cloned() - .unwrap_or_else(JsValue::undefined) - .to_integer(context)? - { - int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, - int_start if int_start < 0.0 => (int_start + size as f64).max(0.0), - int_start => int_start, - }; + // 2. Let S be ? ToString(O). + let string = this.to_string(context)?; + + // 3. Let size be the length of S. + let size = string.encode_utf16().count() as i64; - let int_length = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { - length if length.is_undefined() => size as f64, - length => length.to_integer(context)?, + // 4. Let intStart be ? ToIntegerOrInfinity(start). + let int_start = args.get_or_undefined(0).to_integer_or_infinity(context)?; + + // 7. If length is undefined, let intLength be size; otherwise let intLength be ? ToIntegerOrInfinity(length). + // Moved it before to ensure an error throws before returning the empty string on `match int_start` + let int_length = match args.get_or_undefined(1) { + &JsValue::Undefined => IntegerOrInfinity::Integer(size), + val => val.to_integer_or_infinity(context)?, }; - if int_start.is_infinite() || int_length <= 0.0 || int_length.is_infinite() { - return Ok("".into()); - } + let int_start = match int_start { + // 6. Else if intStart < 0, set intStart to max(size + intStart, 0). + IntegerOrInfinity::Integer(i) if i < 0 => max(size + i, 0), + IntegerOrInfinity::Integer(i) => i, + // 8. If intStart is +โˆž, ... return the empty String + IntegerOrInfinity::PositiveInfinity => return Ok("".into()), + // 5. If intStart is -โˆž, set intStart to 0. + IntegerOrInfinity::NegativeInfinity => 0, + } as usize; - let int_end = (int_start + int_length).min(size as f64) as usize; - let int_start = int_start as usize; + // 8. If ... intLength โ‰ค 0, or intLength is +โˆž, return the empty String. + let int_length = match int_length { + IntegerOrInfinity::Integer(i) if i > 0 => i, + _ => return Ok("".into()), + } as usize; - if int_start >= int_end { - Ok("".into()) - } else { - let substring_utf16 = &string[int_start..int_end]; - let substring = StdString::from_utf16_lossy(substring_utf16); - Ok(substring.into()) - } + // 9. Let intEnd be min(intStart + intLength, size). + let int_end = min(int_start + int_length, size as usize); + + // 11. Return the substring of S from intStart to intEnd. + // 10. If intStart โ‰ฅ intEnd, return the empty String. + let substring_utf16: Vec = string + .encode_utf16() + .skip(int_start) + .take(int_end - int_start) + .collect(); + let substring = StdString::from_utf16_lossy(&substring_utf16); + + Ok(substring.into()) } /// `String.prototype.split ( separator, limit )` @@ -1782,11 +1938,11 @@ impl String { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/valueOf pub(crate) fn value_of( this: &JsValue, - args: &[JsValue], + _: &[JsValue], context: &mut Context, ) -> JsResult { - // Use the to_string method because it is specified to do the same thing in this case - Self::to_string(this, args, context) + // 1. Return ? thisStringValue(this value). + Self::this_string_value(this, context).map(JsValue::from) } /// `String.prototype.matchAll( regexp )` @@ -1861,24 +2017,35 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + + // 2. Let S be ? ToString(O). let s = this.to_string(context)?; + let form = args.get_or_undefined(0); let f_str; let f = if form.is_undefined() { + // 3. If form is undefined, let f be "NFC". "NFC" } else { + // 4. Else, let f be ? ToString(form). f_str = form.to_string(context)?; f_str.as_str() }; + // 6. Let ns be the String value that is the result of normalizing S + // into the normalization form named by f as specified in + // https://unicode.org/reports/tr15/. + // 7. Return ns. match f { "NFC" => Ok(JsValue::new(s.nfc().collect::())), "NFD" => Ok(JsValue::new(s.nfd().collect::())), "NFKC" => Ok(JsValue::new(s.nfkc().collect::())), "NFKD" => Ok(JsValue::new(s.nfkd().collect::())), + // 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception. _ => context .throw_range_error("The normalization form should be one of NFC, NFD, NFKC, NFKD."), } @@ -1933,7 +2100,7 @@ impl String { } } -/// `22.1.3.17.1 GetSubstitution ( matched, str, position, captures, namedCaptures, replacement )` +/// Abstract operation `GetSubstitution ( matched, str, position, captures, namedCaptures, replacement )` /// /// More information: /// - [ECMAScript reference][spec] @@ -2153,3 +2320,32 @@ fn split_match(s_str: &str, q: usize, r_str: &str) -> Option { // 5. Return q + r. Some(q + r) } + +/// Abstract operation `IsRegExp( argument )` +/// +/// More information: +/// [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma262/#sec-isregexp +fn is_reg_exp(argument: &JsValue, context: &mut Context) -> JsResult { + // 1. If Type(argument) is not Object, return false. + let argument = match argument { + JsValue::Object(o) => o, + _ => return Ok(false), + }; + + is_reg_exp_object(argument, context) +} +fn is_reg_exp_object(argument: &JsObject, context: &mut Context) -> JsResult { + // 2. Let matcher be ? Get(argument, @@match). + let matcher = argument.get(WellKnownSymbols::r#match(), context)?; + + // 3. If matcher is not undefined, return ! ToBoolean(matcher). + if !matcher.is_undefined() { + return Ok(matcher.to_boolean()); + } + + // 4. If argument has a [[RegExpMatcher]] internal slot, return true. + // 5. Return false. + Ok(argument.is_regexp()) +} diff --git a/boa_engine/src/builtins/string/string_iterator.rs b/boa_engine/src/builtins/string/string_iterator.rs index 48e2c8cc93..e798a00ba0 100644 --- a/boa_engine/src/builtins/string/string_iterator.rs +++ b/boa_engine/src/builtins/string/string_iterator.rs @@ -57,8 +57,7 @@ impl StringIterator { context, )); } - let (_, code_unit_count, _) = code_point_at(&native_string, i64::from(position)) - .expect("Invalid code point position"); + let (_, code_unit_count, _) = code_point_at(&native_string, position as usize); string_iterator.next_index += i32::from(code_unit_count); let result_string = crate::builtins::string::String::substring( &string_iterator.string, diff --git a/boa_engine/src/builtins/string/tests.rs b/boa_engine/src/builtins/string/tests.rs index c3118869f2..a8f3b2ea80 100644 --- a/boa_engine/src/builtins/string/tests.rs +++ b/boa_engine/src/builtins/string/tests.rs @@ -147,7 +147,8 @@ fn repeat_throws_when_count_is_negative() { } "# ), - "\"RangeError: repeat count cannot be a negative number\"" + "\"RangeError: repeat count must be a positive finite number \ + that doesn't overflow the maximum string length (2^32 - 1)\"" ); } @@ -166,7 +167,8 @@ fn repeat_throws_when_count_is_infinity() { } "# ), - "\"RangeError: repeat count cannot be infinity\"" + "\"RangeError: repeat count must be a positive finite number \ + that doesn't overflow the maximum string length (2^32 - 1)\"" ); } @@ -185,7 +187,8 @@ fn repeat_throws_when_count_overflows_max_length() { } "# ), - "\"RangeError: repeat count must not overflow maximum string length\"" + "\"RangeError: repeat count must be a positive finite number \ + that doesn't overflow the maximum string length (2^32 - 1)\"" ); } diff --git a/boa_engine/src/builtins/symbol/mod.rs b/boa_engine/src/builtins/symbol/mod.rs index 1089bc9182..4df82b7a6e 100644 --- a/boa_engine/src/builtins/symbol/mod.rs +++ b/boa_engine/src/builtins/symbol/mod.rs @@ -210,8 +210,11 @@ impl Symbol { _: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let sym be ? thisSymbolValue(this value). let symbol = Self::this_symbol_value(this, context)?; - Ok(symbol.to_string().into()) + + // 2. Return SymbolDescriptiveString(sym). + Ok(symbol.descriptive_string().into()) } /// `Symbol.prototype.valueOf()` diff --git a/boa_engine/src/symbol.rs b/boa_engine/src/symbol.rs index b35e18a2bc..f869c6a0a6 100644 --- a/boa_engine/src/symbol.rs +++ b/boa_engine/src/symbol.rs @@ -289,6 +289,16 @@ impl JsSymbol { pub fn hash(&self) -> u64 { self.inner.hash } + + /// Abstract operation `SymbolDescriptiveString ( sym )` + /// + /// More info: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-symboldescriptivestring + pub fn descriptive_string(&self) -> JsString { + self.to_string().into() + } } impl Finalize for JsSymbol {} diff --git a/boa_engine/src/tests.rs b/boa_engine/src/tests.rs index b93da4cc1c..f2d3b97fd6 100644 --- a/boa_engine/src/tests.rs +++ b/boa_engine/src/tests.rs @@ -1,5 +1,6 @@ use crate::{ - builtins::Number, check_output, exec, forward, forward_val, Context, JsValue, TestAction, + builtins::Number, check_output, exec, forward, forward_val, value::IntegerOrInfinity, Context, + JsValue, TestAction, }; #[test] @@ -942,40 +943,49 @@ fn to_index() { } #[test] -fn to_integer() { +fn to_integer_or_infinity() { let mut context = Context::default(); - assert!(Number::equal( - JsValue::nan().to_integer(&mut context).unwrap(), - 0.0 - )); - assert!(Number::equal( + assert_eq!( + JsValue::nan().to_integer_or_infinity(&mut context).unwrap(), + 0 + ); + assert_eq!( JsValue::new(f64::NEG_INFINITY) - .to_integer(&mut context) + .to_integer_or_infinity(&mut context) .unwrap(), - f64::NEG_INFINITY - )); - assert!(Number::equal( + IntegerOrInfinity::NegativeInfinity + ); + assert_eq!( JsValue::new(f64::INFINITY) - .to_integer(&mut context) + .to_integer_or_infinity(&mut context) .unwrap(), - f64::INFINITY - )); - assert!(Number::equal( - JsValue::new(0.0).to_integer(&mut context).unwrap(), - 0.0 - )); - let number = JsValue::new(-0.0).to_integer(&mut context).unwrap(); - assert!(!number.is_sign_negative()); - assert!(Number::equal(number, 0.0)); - assert!(Number::equal( - JsValue::new(20.9).to_integer(&mut context).unwrap(), - 20.0 - )); - assert!(Number::equal( - JsValue::new(-20.9).to_integer(&mut context).unwrap(), - -20.0 - )); + IntegerOrInfinity::PositiveInfinity + ); + assert_eq!( + JsValue::new(0.0) + .to_integer_or_infinity(&mut context) + .unwrap(), + 0 + ); + assert_eq!( + JsValue::new(-0.0) + .to_integer_or_infinity(&mut context) + .unwrap(), + 0 + ); + assert_eq!( + JsValue::new(20.9) + .to_integer_or_infinity(&mut context) + .unwrap(), + 20 + ); + assert_eq!( + JsValue::new(-20.9) + .to_integer_or_infinity(&mut context) + .unwrap(), + -20 + ); } #[test] diff --git a/boa_engine/src/value/integer.rs b/boa_engine/src/value/integer.rs new file mode 100644 index 0000000000..afc25ff091 --- /dev/null +++ b/boa_engine/src/value/integer.rs @@ -0,0 +1,69 @@ +use std::cmp::Ordering; + +/// Represents the result of `ToIntegerOrInfinity` operation +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum IntegerOrInfinity { + PositiveInfinity, + Integer(i64), + NegativeInfinity, +} + +impl IntegerOrInfinity { + /// Clamps an `IntegerOrInfinity` between two `i64`, effectively converting + /// it to an i64. + pub fn clamp_finite(self, min: i64, max: i64) -> i64 { + assert!(min <= max); + + match self { + IntegerOrInfinity::Integer(i) => i.clamp(min, max), + IntegerOrInfinity::PositiveInfinity => max, + IntegerOrInfinity::NegativeInfinity => min, + } + } + + /// Gets the wrapped `i64` if the variant is an `Integer`. + pub fn as_integer(self) -> Option { + match self { + IntegerOrInfinity::Integer(i) => Some(i), + _ => None, + } + } +} + +impl PartialEq for IntegerOrInfinity { + fn eq(&self, other: &i64) -> bool { + match self { + IntegerOrInfinity::Integer(i) => i == other, + _ => false, + } + } +} + +impl PartialEq for i64 { + fn eq(&self, other: &IntegerOrInfinity) -> bool { + match other { + IntegerOrInfinity::Integer(i) => i == other, + _ => false, + } + } +} + +impl PartialOrd for IntegerOrInfinity { + fn partial_cmp(&self, other: &i64) -> Option { + match self { + IntegerOrInfinity::PositiveInfinity => Some(Ordering::Greater), + IntegerOrInfinity::Integer(i) => i.partial_cmp(other), + IntegerOrInfinity::NegativeInfinity => Some(Ordering::Less), + } + } +} + +impl PartialOrd for i64 { + fn partial_cmp(&self, other: &IntegerOrInfinity) -> Option { + match other { + IntegerOrInfinity::PositiveInfinity => Some(Ordering::Less), + IntegerOrInfinity::Integer(i) => self.partial_cmp(i), + IntegerOrInfinity::NegativeInfinity => Some(Ordering::Greater), + } + } +} diff --git a/boa_engine/src/value/mod.rs b/boa_engine/src/value/mod.rs index dac1538064..e3178f2fa7 100644 --- a/boa_engine/src/value/mod.rs +++ b/boa_engine/src/value/mod.rs @@ -32,6 +32,7 @@ mod conversions; pub(crate) mod display; mod equality; mod hash; +mod integer; mod operations; mod serde_json; mod r#type; @@ -40,6 +41,7 @@ pub use conversions::*; pub use display::ValueDisplay; pub use equality::*; pub use hash::*; +pub use integer::IntegerOrInfinity; pub use operations::*; pub use r#type::Type; @@ -76,14 +78,6 @@ pub enum JsValue { Symbol(JsSymbol), } -/// Represents the result of `ToIntegerOrInfinity` operation -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum IntegerOrInfinity { - Integer(i64), - PositiveInfinity, - NegativeInfinity, -} - impl JsValue { /// Create a new [`JsValue`]. #[inline] @@ -614,7 +608,7 @@ impl JsValue { } // 3. Let int be the mathematical value whose sign is the sign of number and whose magnitude is floor(abs(โ„(number))). - let int = number.floor() as i64; + let int = number.abs().floor().copysign(number) as i64; // 4. Let int8bit be int modulo 2^8. let int_8_bit = int % 2i64.pow(8); @@ -643,7 +637,7 @@ impl JsValue { } // 3. Let int be the mathematical value whose sign is the sign of number and whose magnitude is floor(abs(โ„(number))). - let int = number.floor() as i64; + let int = number.abs().floor().copysign(number) as i64; // 4. Let int8bit be int modulo 2^8. let int_8_bit = int % 2i64.pow(8); @@ -715,7 +709,7 @@ impl JsValue { } // 3. Let int be the mathematical value whose sign is the sign of number and whose magnitude is floor(abs(โ„(number))). - let int = number.floor() as i64; + let int = number.abs().floor().copysign(number) as i64; // 4. Let int16bit be int modulo 2^16. let int_16_bit = int % 2i64.pow(16); @@ -744,7 +738,7 @@ impl JsValue { } // 3. Let int be the mathematical value whose sign is the sign of number and whose magnitude is floor(abs(โ„(number))). - let int = number.floor() as i64; + let int = number.abs().floor().copysign(number) as i64; // 4. Let int16bit be int modulo 2^16. let int_16_bit = int % 2i64.pow(16); @@ -796,21 +790,29 @@ impl JsValue { /// /// See: pub fn to_index(&self, context: &mut Context) -> JsResult { + // 1. If value is undefined, then if self.is_undefined() { + // a. Return 0. return Ok(0); } - let integer_index = self.to_integer(context)?; + // 2. Else, + // a. Let integer be ? ToIntegerOrInfinity(value). + let integer = self.to_integer_or_infinity(context)?; - if integer_index < 0.0 { - return context.throw_range_error("Integer index must be >= 0"); - } + // b. Let clamped be ! ToLength(๐”ฝ(integer)). + let clamped = integer.clamp_finite(0, Number::MAX_SAFE_INTEGER as i64); - if integer_index > Number::MAX_SAFE_INTEGER { - return context.throw_range_error("Integer index must be less than 2**(53) - 1"); + // c. If ! SameValue(๐”ฝ(integer), clamped) is false, throw a RangeError exception. + if integer != clamped { + return context.throw_range_error("Index must be between 0 and 2^53 - 1"); } - Ok(integer_index as usize) + // d. Assert: 0 โ‰ค integer โ‰ค 2^53 - 1. + debug_assert!(0 <= clamped && clamped <= Number::MAX_SAFE_INTEGER as i64); + + // e. Return integer. + Ok(clamped as usize) } /// Converts argument to an integer suitable for use as the length of an array-like object. @@ -818,37 +820,43 @@ impl JsValue { /// See: pub fn to_length(&self, context: &mut Context) -> JsResult { // 1. Let len be ? ToInteger(argument). - let len = self.to_integer(context)?; - // 2. If len โ‰ค +0, return +0. - if len < 0.0 { - return Ok(0); - } - // 3. Return min(len, 2^53 - 1). - Ok(len.min(Number::MAX_SAFE_INTEGER) as usize) + Ok(self + .to_integer_or_infinity(context)? + .clamp_finite(0, Number::MAX_SAFE_INTEGER as i64) as usize) } - /// Converts a value to an integral Number value. + /// Abstract operation `ToIntegerOrInfinity ( argument )` + /// + /// This method converts a `Value` to an integer representing its `Number` value with + /// fractional part truncated, or to +โˆž or -โˆž when that `Number` value is infinite. + /// + /// More information: + /// - [ECMAScript reference][spec] /// - /// See: - pub fn to_integer(&self, context: &mut Context) -> JsResult { + /// [spec]: https://tc39.es/ecma262/#sec-tointegerorinfinity + pub fn to_integer_or_infinity(&self, context: &mut Context) -> JsResult { // 1. Let number be ? ToNumber(argument). let number = self.to_number(context)?; - // 2. If number is +โˆž or -โˆž, return number. - if !number.is_finite() { - // 3. If number is NaN, +0, or -0, return +0. - if number.is_nan() { - return Ok(0.0); - } - return Ok(number); - } + if number.is_nan() || number == 0.0 { + // 2. If number is NaN, +0๐”ฝ, or -0๐”ฝ, return 0. + Ok(IntegerOrInfinity::Integer(0)) + } else if number == f64::INFINITY { + // 3. If number is +โˆž๐”ฝ, return +โˆž. + Ok(IntegerOrInfinity::PositiveInfinity) + } else if number == f64::NEG_INFINITY { + // 4. If number is -โˆž๐”ฝ, return -โˆž. + Ok(IntegerOrInfinity::NegativeInfinity) + } else { + // 5. Let integer be floor(abs(โ„(number))). + // 6. If number < +0๐”ฝ, set integer to -integer. + let integer = number.abs().floor().copysign(number) as i64; - // 4. Let integer be the Number value that is the same sign as number and whose magnitude is floor(abs(number)). - // 5. If integer is -0, return +0. - // 6. Return integer. - Ok(number.trunc() + 0.0) // We add 0.0 to convert -0.0 to +0.0 + // 7. Return integer. + Ok(IntegerOrInfinity::Integer(integer)) + } } /// Converts a value to a double precision floating point. @@ -918,37 +926,6 @@ impl JsValue { .and_then(|obj| obj.to_property_descriptor(context)) } - /// Converts argument to an integer, +โˆž, or -โˆž. - /// - /// See: - pub fn to_integer_or_infinity(&self, context: &mut Context) -> JsResult { - // 1. Let number be ? ToNumber(argument). - let number = self.to_number(context)?; - - // 2. If number is NaN, +0๐”ฝ, or -0๐”ฝ, return 0. - if number.is_nan() || number == 0.0 || number == -0.0 { - Ok(IntegerOrInfinity::Integer(0)) - } else if number.is_infinite() && number.is_sign_positive() { - // 3. If number is +โˆž๐”ฝ, return +โˆž. - Ok(IntegerOrInfinity::PositiveInfinity) - } else if number.is_infinite() && number.is_sign_negative() { - // 4. If number is -โˆž๐”ฝ, return -โˆž. - Ok(IntegerOrInfinity::NegativeInfinity) - } else { - // 5. Let integer be floor(abs(โ„(number))). - let integer = number.abs().floor(); - let integer = integer.min(Number::MAX_SAFE_INTEGER) as i64; - - // 6. If number < +0๐”ฝ, set integer to -integer. - // 7. Return integer. - if number < 0.0 { - Ok(IntegerOrInfinity::Integer(-integer)) - } else { - Ok(IntegerOrInfinity::Integer(integer)) - } - } - } - /// `typeof` operator. Returns a string representing the type of the /// given ECMA Value. ///