Browse Source

Fix string.prototype methods and add static string methods (#1123)

<!---
Thank you for contributing to Boa! Please fill out the template below, and remove or add any
information as you feel neccesary.
--->

This Pull Request fixes existing string prototype methods in #13 and adds static methods.

It changes the following:

- Fix bugs in existing string prototype methods and improve readability (e.g. rename variables to match the names in spec)
- Add static methods `String.raw`, `String.fromCharCode`, `String.fromCodePoint`
- Fix broken unit tests


Co-authored-by: RageKnify <RageKnify@gmail.com>
pull/1839/head
Jevan Chan 3 years ago
parent
commit
be26b10ea8
  1. 2
      boa/src/builtins/regexp/mod.rs
  2. 622
      boa/src/builtins/string/mod.rs
  3. 4
      boa/src/builtins/string/string_iterator.rs
  4. 16
      boa/src/builtins/string/tests.rs

2
boa/src/builtins/regexp/mod.rs

@ -1760,7 +1760,7 @@ fn advance_string_index(s: &JsString, index: usize, unicode: bool) -> usize {
// 5. Let cp be ! CodePointAt(S, index). // 5. Let cp be ! CodePointAt(S, index).
let (_, offset, _) = let (_, offset, _) =
crate::builtins::string::code_point_at(s, index as i32).expect("Failed to get code point"); crate::builtins::string::code_point_at(s, index as i64).expect("Failed to get code point");
index + offset as usize index + offset as usize
} }

622
boa/src/builtins/string/mod.rs

@ -15,7 +15,7 @@ mod tests;
use super::JsArgs; use super::JsArgs;
use crate::{ use crate::{
builtins::{string::string_iterator::StringIterator, Array, BuiltIn, RegExp, Symbol}, builtins::{string::string_iterator::StringIterator, Array, BuiltIn, Number, RegExp, Symbol},
context::StandardObjects, context::StandardObjects,
object::{ object::{
internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData, internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData,
@ -24,24 +24,21 @@ use crate::{
symbol::WellKnownSymbols, symbol::WellKnownSymbols,
BoaProfiler, Context, JsResult, JsString, JsValue, BoaProfiler, Context, JsResult, JsString, JsValue,
}; };
use std::{ use std::{char::from_u32, cmp::max, string::String as StdString};
char::{decode_utf16, from_u32},
cmp::{max, min},
string::String as StdString,
};
use unicode_normalization::UnicodeNormalization; use unicode_normalization::UnicodeNormalization;
pub(crate) fn code_point_at(string: &JsString, position: i32) -> Option<(u32, u8, bool)> { pub(crate) fn code_point_at(string: &JsString, position: i64) -> Option<(u32, u8, bool)> {
let size = string.encode_utf16().count() as i32; let size = string.encode_utf16().count();
if position < 0 || position >= size { if position < 0 || position >= size as i64 {
return None; return None;
} }
let mut encoded = string.encode_utf16(); let mut encoded = string.encode_utf16();
let first = encoded.nth(position as usize)?; let first = encoded.nth(position as usize)?;
if !is_leading_surrogate(first) && !is_trailing_surrogate(first) { if !is_leading_surrogate(first) && !is_trailing_surrogate(first) {
return Some((u32::from(first), 1, false)); return Some((u32::from(first), 1, false));
} }
if is_trailing_surrogate(first) || position + 1 == size { if is_trailing_surrogate(first) || position + 1 == size as i64 {
return Some((u32::from(first), 1, true)); return Some((u32::from(first), 1, true));
} }
let second = encoded.next()?; let second = encoded.next()?;
@ -108,6 +105,7 @@ impl BuiltIn for String {
.property("length", 0, attribute) .property("length", 0, attribute)
.static_method(Self::raw, "raw", 1) .static_method(Self::raw, "raw", 1)
.static_method(Self::from_char_code, "fromCharCode", 1) .static_method(Self::from_char_code, "fromCharCode", 1)
.static_method(Self::from_code_point, "fromCodePoint", 1)
.method(Self::char_at, "charAt", 1) .method(Self::char_at, "charAt", 1)
.method(Self::char_code_at, "charCodeAt", 1) .method(Self::char_code_at, "charCodeAt", 1)
.method(Self::code_point_at, "codePointAt", 1) .method(Self::code_point_at, "codePointAt", 1)
@ -230,6 +228,49 @@ impl String {
.ok_or_else(|| context.construct_type_error("'this' is not a string")) .ok_or_else(|| context.construct_type_error("'this' is not a string"))
} }
/// `String.fromCodePoint(num1[, ...[, numN]])`
///
/// The static `String.fromCodePoint()` method returns a string created by using the specified sequence of code points.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-string.fromcodepoint
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint
pub(crate) fn from_code_point(
_: &JsValue,
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
// 1. Let result be the empty String.
let mut result = StdString::new();
// 2. For each element next of codePoints, do
for arg in args.iter() {
// a. Let nextCP be ? ToNumber(next).
let nextcp = arg.to_number(context)?;
// b. If ! IsIntegralNumber(nextCP) is false, throw a RangeError exception.
if !Number::is_float_integer(nextcp) {
return Err(context.construct_range_error(format!("invalid code point: {nextcp}")));
}
// c. If ℝ(nextCP) < 0 or ℝ(nextCP) > 0x10FFFF, throw a RangeError exception.
if nextcp < 0.0 || nextcp > f64::from(0x10FFFF) {
return Err(context.construct_range_error(format!("invalid code point: {nextcp}")));
}
// TODO: Full UTF-16 support
// d. Set result to the string-concatenation of result and ! UTF16EncodeCodePoint(ℝ(nextCP)).
result.push(char::try_from(nextcp as u32).unwrap_or('\u{FFFD}' /* replacement char */));
}
// 3. Assert: If codePoints is empty, then result is the empty String.
// 4. Return result.
Ok(result.into())
}
/// `String.prototype.raw( template, ...substitutions )` /// `String.prototype.raw( template, ...substitutions )`
/// ///
/// More information: /// More information:
@ -364,17 +405,16 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let primitive_val = this.to_string(context)?; let position = args
let pos = args
.get(0) .get(0)
.cloned() .cloned()
.unwrap_or_else(JsValue::undefined) .unwrap_or_else(JsValue::undefined)
.to_integer(context)? as i32; .to_integer(context)?;
// Fast path returning empty string when pos is obviously out of range // Fast path returning empty string when pos is obviously out of range
if pos < 0 || pos >= primitive_val.len() as i32 { if position < 0.0 {
return Ok("".into()); return Ok("".into());
} }
@ -382,8 +422,10 @@ impl String {
// unicode code points // unicode code points
// Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of
// bytes is an O(1) operation. // bytes is an O(1) operation.
if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { if let Some(utf16_val) = string.encode_utf16().nth(position as usize) {
Ok(JsValue::new(from_u32(u32::from(utf16_val)).unwrap())) Ok(char::try_from(u32::from(utf16_val))
.unwrap_or('\u{FFFD}' /* replacement char */)
.into())
} else { } else {
Ok("".into()) Ok("".into())
} }
@ -445,19 +487,20 @@ impl String {
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. // First we get it the actual string a private field stored on the object only the context has access to.
// Then we convert it into a Rust String by wrapping it in from_value // Then we convert it into a Rust String by wrapping it in from_value
let primitive_val = this.to_string(context)?; let this = this.require_object_coercible(context)?;
let pos = args let string = this.to_string(context)?;
let position = args
.get(0) .get(0)
.cloned() .cloned()
.unwrap_or_else(JsValue::undefined) .unwrap_or_else(JsValue::undefined)
.to_integer(context)? as i32; .to_integer(context)?;
// Fast path returning undefined when pos is obviously out of range // Fast path returning undefined when pos is obviously out of range
if pos < 0 || pos >= primitive_val.len() as i32 { if position < 0.0 {
return Ok(JsValue::undefined()); return Ok(JsValue::undefined());
} }
if let Some((code_point, _, _)) = code_point_at(&primitive_val, pos) { if let Some((code_point, _, _)) = code_point_at(&string, position as i64) {
Ok(JsValue::new(code_point)) Ok(JsValue::new(code_point))
} else { } else {
Ok(JsValue::undefined()) Ok(JsValue::undefined())
@ -485,22 +528,23 @@ impl String {
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. // First we get it the actual string a private field stored on the object only the context has access to.
// Then we convert it into a Rust String by wrapping it in from_value // Then we convert it into a Rust String by wrapping it in from_value
let primitive_val = this.to_string(context)?; let this = this.require_object_coercible(context)?;
let pos = args let string = this.to_string(context)?;
let position = args
.get(0) .get(0)
.cloned() .cloned()
.unwrap_or_else(JsValue::undefined) .unwrap_or_else(JsValue::undefined)
.to_integer(context)? as i32; .to_integer(context)?;
// Fast path returning NaN when pos is obviously out of range // Fast path returning NaN when pos is obviously out of range
if pos < 0 || pos >= primitive_val.len() as i32 { if position < 0.0 || position >= string.len() as f64 {
return Ok(JsValue::nan()); return Ok(JsValue::nan());
} }
// Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points
// Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation.
// If there is no element at that index, the result is NaN // If there is no element at that index, the result is NaN
if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { if let Some(utf16_val) = string.encode_utf16().nth(position as usize) {
Ok(JsValue::new(f64::from(utf16_val))) Ok(JsValue::new(f64::from(utf16_val)))
} else { } else {
Ok(JsValue::nan()) Ok(JsValue::nan())
@ -526,8 +570,8 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
let object = this.require_object_coercible(context)?; let this = this.require_object_coercible(context)?;
let mut string = object.to_string(context)?.to_string(); let mut string = this.to_string(context)?.to_string();
for arg in args { for arg in args {
string.push_str(&arg.to_string(context)?); string.push_str(&arg.to_string(context)?);
@ -552,8 +596,8 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
let object = this.require_object_coercible(context)?; let this = this.require_object_coercible(context)?;
let string = object.to_string(context)?; let string = this.to_string(context)?;
if let Some(arg) = args.get(0) { if let Some(arg) = args.get(0) {
let n = arg.to_integer(context)?; let n = arg.to_integer(context)?;
@ -590,44 +634,43 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let primitive_val = this.to_string(context)?;
// Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points
// Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation.
let length = primitive_val.chars().count() as i32; let len = string.encode_utf16().count();
let from = match args
let start = args
.get(0) .get(0)
.cloned() .cloned()
.unwrap_or_else(JsValue::undefined) .unwrap_or_else(JsValue::undefined)
.to_integer(context)? as i32; .to_integer(context)?
let end = args {
int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0,
int_start if int_start < 0.0 => (len as f64 + int_start).max(0.0),
int_start => int_start.min(len as f64),
} as usize;
let to = match args
.get(1) .get(1)
.cloned() .filter(|end| !end.is_undefined())
.unwrap_or_else(|| JsValue::new(length)) .map(|end| end.to_integer(context))
.to_integer(context)? as i32; .transpose()?
.unwrap_or(len as f64)
let from = if start < 0 { {
max(length.wrapping_add(start), 0) int_end if int_end.is_infinite() && int_end.is_sign_negative() => 0.0,
} else { int_end if int_end < 0.0 => (len as f64 + int_end).max(0.0),
min(start, length) int_end => int_end.min(len as f64),
}; } as usize;
let to = if end < 0 {
max(length.wrapping_add(end), 0) if from >= to {
Ok("".into())
} else { } else {
min(end, length) let span = to - from;
}; let substring_utf16: Vec<u16> = string.encode_utf16().skip(from).take(span).collect();
let substring_lossy = StdString::from_utf16_lossy(&substring_utf16);
let span = max(to.wrapping_sub(from), 0); Ok(substring_lossy.into())
}
let new_str: StdString = primitive_val
.chars()
.skip(from as usize)
.take(span as usize)
.collect();
Ok(JsValue::new(new_str))
} }
/// `String.prototype.startWith( searchString[, position] )` /// `String.prototype.startWith( searchString[, position] )`
@ -645,41 +688,40 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let primitive_val = this.to_string(context)?;
let arg = args.get_or_undefined(0); let search_string = args.get(0).cloned().unwrap_or_else(JsValue::undefined);
if Self::is_regexp_object(arg) { if Self::is_regexp_object(&search_string) {
context.throw_type_error( context.throw_type_error(
"First argument to String.prototype.startsWith must not be a regular expression", "First argument to String.prototype.startsWith must not be a regular expression",
)?; )?;
} }
let search_string = arg.to_string(context)?; let search_str = search_string.to_string(context)?;
let length = primitive_val.chars().count() as i32; let len = string.encode_utf16().count();
let search_length = search_string.chars().count() as i32; let search_length = search_str.encode_utf16().count();
// If less than 2 args specified, position is 'undefined', defaults to 0 // If less than 2 args specified, position is 'undefined', defaults to 0
let position = if let Some(integer) = args.get(1) { let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) {
integer.to_integer(context)? as i32 position if position.is_undefined() => 0.0,
} else { position => position.to_integer(context)?,
0
}; };
let start = min(max(position, 0), length); let start = pos.min(len as f64).max(0.0);
let end = start.wrapping_add(search_length); let end = start + search_length as f64;
if end > length { if end > len as f64 {
Ok(JsValue::new(false)) Ok(JsValue::new(false))
} else { } else {
// Only use the part of the string from "start" let substring_utf16 = string
let this_string: StdString = primitive_val.chars().skip(start as usize).collect(); .encode_utf16()
Ok(JsValue::new( .skip(start as usize)
this_string.starts_with(search_string.as_str()), .take(search_length);
)) let search_str_utf16 = search_str.encode_utf16();
Ok(JsValue::new(substring_utf16.eq(search_str_utf16)))
} }
} }
@ -698,40 +740,44 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let primitive_val = this.to_string(context)?;
let arg = args.get_or_undefined(0);
if Self::is_regexp_object(arg) { let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) {
context.throw_type_error( search_string if Self::is_regexp_object(&search_string) => {
return context.throw_type_error(
"First argument to String.prototype.endsWith must not be a regular expression", "First argument to String.prototype.endsWith must not be a regular expression",
)?; );
} }
search_string => search_string.to_string(context)?,
};
let search_string = arg.to_string(context)?; let len = string.encode_utf16().count();
let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) {
end_position if end_position.is_undefined() => len as f64,
end_position => end_position.to_integer(context)?,
};
let length = primitive_val.chars().count() as i32; let end = pos.max(0.0).min(len as f64) as usize;
let search_length = search_string.chars().count() as i32;
// If less than 2 args specified, end_position is 'undefined', defaults to // If less than 2 args specified, end_position is 'undefined', defaults to
// length of this // length of this
let end_position = if let Some(integer) = args.get(1) { if search_str.is_empty() {
integer.to_integer(context)? as i32 return Ok(JsValue::new(true));
} else { }
length
};
let end = min(max(end_position, 0), length); let search_length = search_str.encode_utf16().count();
let start = end.wrapping_sub(search_length);
if start < 0 { if end < search_length {
Ok(JsValue::new(false)) Ok(JsValue::new(false))
} else { } else {
// Only use the part of the string up to "end" let start = end - search_length;
let this_string: StdString = primitive_val.chars().take(end as usize).collect();
Ok(JsValue::new(this_string.ends_with(search_string.as_str()))) let substring_utf16 = string.encode_utf16().skip(start).take(search_length);
let search_str_utf16 = search_str.encode_utf16();
Ok(JsValue::new(substring_utf16.eq(search_str_utf16)))
} }
} }
@ -750,36 +796,32 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let primitive_val = this.to_string(context)?;
let arg = args.get_or_undefined(0);
if Self::is_regexp_object(arg) { let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) {
context.throw_type_error( search_string if Self::is_regexp_object(&search_string) => {
return context.throw_type_error(
"First argument to String.prototype.includes must not be a regular expression", "First argument to String.prototype.includes must not be a regular expression",
)?; );
} }
search_string => search_string.to_string(context)?,
};
let search_string = arg.to_string(context)?; let pos = args
.get(1)
let length = primitive_val.chars().count() as i32; .cloned()
.unwrap_or_else(JsValue::undefined)
// If less than 2 args specified, position is 'undefined', defaults to 0 .to_integer(context)?;
let start = pos.max(0.0) as usize;
let position = if let Some(integer) = args.get(1) { let substring_lossy = if start > 0 {
integer.to_integer(context)? as i32 let substring_utf16: Vec<u16> = string.encode_utf16().skip(start).collect();
StdString::from_utf16_lossy(&substring_utf16)
} else { } else {
0 string.to_string()
}; };
Ok(substring_lossy.contains(search_str.as_str()).into())
let start = min(max(position, 0), length);
// Take the string from "this" and use only the part of it after "start"
let this_string: StdString = primitive_val.chars().skip(start as usize).collect();
Ok(JsValue::new(this_string.contains(search_string.as_str())))
} }
fn is_regexp_object(value: &JsValue) -> bool { fn is_regexp_object(value: &JsValue) -> bool {
@ -1099,29 +1141,41 @@ impl String {
let this = this.require_object_coercible(context)?; let this = this.require_object_coercible(context)?;
let string = this.to_string(context)?; let string = this.to_string(context)?;
let search_string = args let search_str = args
.get(0) .get(0)
.cloned() .cloned()
.unwrap_or_else(JsValue::undefined) .unwrap_or_else(JsValue::undefined)
.to_string(context)?; .to_string(context)?;
let length = string.chars().count(); let pos = args
let start = args
.get(1) .get(1)
.map(|position| position.to_integer(context)) .cloned()
.transpose()? .unwrap_or_else(JsValue::undefined)
.map_or(0, |position| position.max(0.0).min(length as f64) as usize); .to_integer(context)?;
if search_string.is_empty() { let len = string.encode_utf16().count();
return Ok(start.min(length).into()); let start = pos.max(0.0);
if search_str.is_empty() {
return Ok(JsValue::new(start.min(len as f64)));
} }
if start < length { if start < len as f64 {
if let Some(position) = string.find(search_string.as_str()) { let start = start as usize;
return Ok(string[..position].chars().count().into());
let substring_lossy = if start > 0 {
let substring_utf16: Vec<u16> = string.encode_utf16().skip(start).collect();
StdString::from_utf16_lossy(&substring_utf16)
} else {
string.to_string()
};
if let Some(position) = substring_lossy.find(search_str.as_str()) {
return Ok(JsValue::new(
substring_lossy[..position].encode_utf16().count() + start,
));
} }
} }
Ok(JsValue::new(-1)) Ok(JsValue::new(-1))
} }
@ -1143,32 +1197,58 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// 1. Let O be ? RequireObjectCoercible(this value).
let this = this.require_object_coercible(context)?; let this = this.require_object_coercible(context)?;
// 2. Let S be ? ToString(O).
let string = this.to_string(context)?; let string = this.to_string(context)?;
let search_string = args // 3. Let searchStr be ? ToString(searchString).
let search_str = args
.get(0) .get(0)
.cloned() .cloned()
.unwrap_or_else(JsValue::undefined) .unwrap_or_else(JsValue::undefined)
.to_string(context)?; .to_string(context)?;
let length = string.chars().count(); // 4. Let numPos be ? ToNumber(position).
let start = args // 5. Assert: If position is undefined, then numPos is NaN.
let num_pos = args
.get(1) .get(1)
.map(|position| position.to_integer(context)) .cloned()
.transpose()? .unwrap_or_else(JsValue::undefined)
.map_or(0, |position| position.max(0.0).min(length as f64) as usize); .to_number(context)?;
if search_string.is_empty() { // 6. If numPos is NaN, let pos be +∞; otherwise, let pos be ! ToIntegerOrInfinity(numPos).
return Ok(start.min(length).into()); let pos = if num_pos.is_nan() {
} f64::INFINITY
} else {
JsValue::new(num_pos).to_integer(context)?
};
if start < length { // 7. Let len be the length of S.
if let Some(position) = string.rfind(search_string.as_str()) { let len = string.encode_utf16().count();
return Ok(string[..position].chars().count().into()); // 8. Let start be the result of clamping pos between 0 and len.
} let start = pos.max(0.0).min(len as f64) as usize;
// 9. If searchStr is the empty String, return 𝔽(start).
if search_str.is_empty() {
return Ok(JsValue::new(start as f64));
}
// TODO: Full UTF-16 support
// 10. Let searchLen be the length of searchStr.
let search_len = search_str.encode_utf16().count();
// 11. For each non-negative integer i starting with start such that i ≤ len - searchLen, in descending order, do
// a. Let candidate be the substring of S from i to i + searchLen.
let substring_utf16: Vec<u16> = string.encode_utf16().take(start + search_len).collect();
let substring_lossy = StdString::from_utf16_lossy(&substring_utf16);
if let Some(position) = substring_lossy.rfind(search_str.as_str()) {
// b. If candidate is the same sequence of code units as searchStr, return 𝔽(i).
return Ok(JsValue::new(
substring_lossy[..position].encode_utf16().count(),
));
} }
// 12. Return -1𝔽.
Ok(JsValue::new(-1)) Ok(JsValue::new(-1))
} }
@ -1218,36 +1298,59 @@ impl String {
/// Performs the actual string padding for padStart/End. /// Performs the actual string padding for padStart/End.
/// <https://tc39.es/ecma262/#sec-stringpad/> /// <https://tc39.es/ecma262/#sec-stringpad/>
fn string_pad( fn string_pad(
primitive: JsString, object: &JsValue,
max_length: i32, max_length: &JsValue,
fill_string: Option<&JsString>, fill_string: &JsValue,
at_start: bool, at_start: bool,
) -> JsValue { context: &mut Context,
let primitive_length = primitive.len() as i32; ) -> JsResult<JsValue> {
let string = object.to_string(context)?;
let int_max_length = max_length.to_length(context)?;
let string_length = string.encode_utf16().count();
if max_length <= primitive_length { if int_max_length <= string_length {
return JsValue::new(primitive); return Ok(string.into());
} }
let filler = fill_string.map_or(" ", JsString::as_str); let filler = if fill_string.is_undefined() {
"\u{0020}".into()
} else {
fill_string.to_string(context)?
};
let filler_utf16: Vec<u16> = filler.encode_utf16().collect();
if filler.is_empty() { if filler.is_empty() {
return JsValue::new(primitive); return Ok(string.into());
} }
let fill_len = max_length.wrapping_sub(primitive_length); let fill_len = int_max_length - string_length;
let mut fill_str = StdString::new(); let filler_len = filler_utf16.len();
while fill_str.len() < fill_len as usize { let mut truncated_string_filler = StdString::new();
fill_str.push_str(filler); let mut truncated_string_filler_len: usize = 0;
}
// Cut to size max_length
let concat_fill_str: StdString = fill_str.chars().take(fill_len as usize).collect();
while truncated_string_filler_len < fill_len {
if truncated_string_filler_len.wrapping_add(filler_len) <= fill_len {
truncated_string_filler.push_str(&filler);
truncated_string_filler_len += filler_len;
} else {
truncated_string_filler.push_str(
StdString::from_utf16_lossy(
&filler_utf16[..fill_len - truncated_string_filler_len],
)
.as_str(),
);
truncated_string_filler_len = fill_len;
}
}
if at_start { if at_start {
JsValue::new(format!("{concat_fill_str}{primitive}")) truncated_string_filler.push_str(&string);
Ok(truncated_string_filler.into())
} else { } else {
JsValue::new(format!("{primitive}{concat_fill_str}")) let mut string = string.to_string();
string.push_str(&truncated_string_filler);
Ok(string.into())
} }
} }
@ -1268,23 +1371,12 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
let primitive = this.to_string(context)?; let this = this.require_object_coercible(context)?;
if args.is_empty() {
return Err(JsValue::new("padEnd requires maxLength argument"));
}
let max_length = args
.get(0)
.expect("failed to get argument for String method")
.to_integer(context)? as i32;
let fill_string = args.get(1).map(|arg| arg.to_string(context)).transpose()?; let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined);
let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined);
Ok(Self::string_pad( Self::string_pad(this, &max_length, &fill_string, false, context)
primitive,
max_length,
fill_string.as_ref(),
false,
))
} }
/// `String.prototype.padStart( targetLength [, padString] )` /// `String.prototype.padStart( targetLength [, padString] )`
@ -1304,23 +1396,12 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
let primitive = this.to_string(context)?; let this = this.require_object_coercible(context)?;
if args.is_empty() {
return Err(JsValue::new("padStart requires maxLength argument"));
}
let max_length = args
.get(0)
.expect("failed to get argument for String method")
.to_integer(context)? as i32;
let fill_string = args.get(1).map(|arg| arg.to_string(context)).transpose()?; let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined);
let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined);
Ok(Self::string_pad( Self::string_pad(this, &max_length, &fill_string, true, context)
primitive,
max_length,
fill_string.as_ref(),
true,
))
} }
/// String.prototype.trim() /// String.prototype.trim()
@ -1336,8 +1417,8 @@ impl String {
/// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trim /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trim
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim
pub(crate) fn trim(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> { pub(crate) fn trim(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
let this = this.require_object_coercible(context)?; let object = this.require_object_coercible(context)?;
let string = this.to_string(context)?; let string = object.to_string(context)?;
Ok(JsValue::new(string.trim_matches(is_trimmable_whitespace))) Ok(JsValue::new(string.trim_matches(is_trimmable_whitespace)))
} }
@ -1358,6 +1439,7 @@ impl String {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
let this = this.require_object_coercible(context)?;
let string = this.to_string(context)?; let string = this.to_string(context)?;
Ok(JsValue::new( Ok(JsValue::new(
string.trim_start_matches(is_trimmable_whitespace), string.trim_start_matches(is_trimmable_whitespace),
@ -1404,12 +1486,11 @@ impl String {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let this_str = this.to_string(context)?;
// The Rust String is mapped to uppercase using the builtin .to_lowercase(). // The Rust String is mapped to uppercase using the builtin .to_lowercase().
// There might be corner cases where it does not behave exactly like Javascript expects // There might be corner cases where it does not behave exactly like Javascript expects
Ok(JsValue::new(this_str.to_lowercase())) Ok(JsValue::new(string.to_lowercase()))
} }
/// `String.prototype.toUpperCase()` /// `String.prototype.toUpperCase()`
@ -1430,12 +1511,11 @@ impl String {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let this_str = this.to_string(context)?;
// The Rust String is mapped to uppercase using the builtin .to_uppercase(). // The Rust String is mapped to uppercase using the builtin .to_uppercase().
// There might be corner cases where it does not behave exactly like Javascript expects // There might be corner cases where it does not behave exactly like Javascript expects
Ok(JsValue::new(this_str.to_uppercase())) Ok(JsValue::new(string.to_uppercase()))
} }
/// `String.prototype.substring( indexStart[, indexEnd] )` /// `String.prototype.substring( indexStart[, indexEnd] )`
@ -1453,39 +1533,36 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string = this.to_string(context)?;
let primitive_val = this.to_string(context)?;
// If no args are specified, start is 'undefined', defaults to 0 let len = string.len();
let start = if let Some(integer) = args.get(0) { let int_start = args
integer.to_integer(context)? as i32 .get(0)
} else { .cloned()
0 .unwrap_or_else(JsValue::undefined)
}; .to_integer(context)?;
let length = primitive_val.encode_utf16().count() as i32;
// If less than 2 args specified, end is the length of the this object converted to a String let int_end = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) {
let end = if let Some(integer) = args.get(1) { end if end.is_undefined() => len as f64,
integer.to_integer(context)? as i32 end => end.to_integer(context)?,
} else {
length
}; };
// Both start and end args replaced by 0 if they were negative // Both start and end args replaced by 0 if they were negative
// or by the length of the String if they were greater // or by the length of the String if they were greater
let final_start = min(max(start, 0), length); let final_start = int_start.max(0.0).min(len as f64);
let final_end = min(max(end, 0), length); let final_end = int_end.max(0.0).min(len as f64);
// Start and end are swapped if start is greater than end
let from = min(final_start, final_end) as usize; let from = final_start.min(final_end) as usize;
let to = max(final_start, final_end) as usize; let to = final_start.max(final_end) as usize;
// Extract the part of the string contained between the start index and the end index
// where start is guaranteed to be smaller or equals to end // Extract the part of the string contained between the from index and the to index
let extracted_string: Result<StdString, _> = decode_utf16( // where from is guaranteed to be smaller or equal to to
primitive_val // TODO: Full UTF-16 support
.encode_utf16() let substring_utf16: Vec<u16> = string.encode_utf16().skip(from).take(to - from).collect();
.skip(from) let substring = StdString::from_utf16_lossy(&substring_utf16);
.take(to.wrapping_sub(from)),
) Ok(substring.into())
.collect();
Ok(JsValue::new(extracted_string.expect("Invalid string")))
} }
/// `String.prototype.substr( start[, length] )` /// `String.prototype.substr( start[, length] )`
@ -1504,44 +1581,39 @@ impl String {
args: &[JsValue], args: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// First we get it the actual string a private field stored on the object only the context has access to. let this = this.require_object_coercible(context)?;
// Then we convert it into a Rust String by wrapping it in from_value let string: Vec<u16> = this.to_string(context)?.encode_utf16().collect();
let primitive_val = this.to_string(context)?; let size = string.len();
// If no args are specified, start is 'undefined', defaults to 0
let mut start = if let Some(integer) = args.get(0) { let int_start = match args
integer.to_integer(context)? as i32 .get(0)
} else { .cloned()
0 .unwrap_or_else(JsValue::undefined)
.to_integer(context)?
{
int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0,
int_start if int_start < 0.0 => (int_start + size as f64).max(0.0),
int_start => int_start,
}; };
let length = primitive_val.chars().count() as i32;
// If less than 2 args specified, end is +infinity, the maximum number value. let int_length = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) {
// Using i32::max_value() should be safe because the final length used is at most length if length.is_undefined() => size as f64,
// the number of code units from start to the end of the string, length => length.to_integer(context)?,
// which should always be smaller or equals to both +infinity and i32::max_value
let end = if let Some(integer) = args.get(1) {
integer.to_integer(context)? as i32
} else {
i32::MAX
}; };
// If start is negative it become the number of code units from the end of the string
if start < 0 {
start = max(length.wrapping_add(start), 0);
}
// length replaced by 0 if it was negative
// or by the number of code units from start to the end of the string if it was greater
let result_length = min(max(end, 0), length.wrapping_sub(start));
// If length is negative we return an empty string
// otherwise we extract the part of the string from start and is length code units long
if result_length <= 0 {
Ok(JsValue::new(""))
} else {
let extracted_string: StdString = primitive_val
.chars()
.skip(start as usize)
.take(result_length as usize)
.collect();
Ok(JsValue::new(extracted_string)) if int_start.is_infinite() || int_length <= 0.0 || int_length.is_infinite() {
return Ok("".into());
}
let int_end = (int_start + int_length).min(size as f64) as usize;
let int_start = int_start as usize;
if int_start >= int_end {
Ok("".into())
} else {
let substring_utf16 = &string[int_start..int_end];
let substring = StdString::from_utf16_lossy(substring_utf16);
Ok(substring.into())
} }
} }

4
boa/src/builtins/string/string_iterator.rs

@ -56,8 +56,8 @@ impl StringIterator {
context, context,
)); ));
} }
let (_, code_unit_count, _) = let (_, code_unit_count, _) = code_point_at(&native_string, i64::from(position))
code_point_at(&native_string, position).expect("Invalid code point position"); .expect("Invalid code point position");
string_iterator.next_index += i32::from(code_unit_count); string_iterator.next_index += i32::from(code_unit_count);
let result_string = crate::builtins::string::String::substring( let result_string = crate::builtins::string::String::substring(
&string_iterator.string, &string_iterator.string,

16
boa/src/builtins/string/tests.rs

@ -893,14 +893,14 @@ fn last_index_of_with_non_string_search_string_argument() {
fn last_index_of_with_from_index_argument() { fn last_index_of_with_from_index_argument() {
let mut context = Context::default(); let mut context = Context::default();
assert_eq!(forward(&mut context, "''.lastIndexOf('x', 2)"), "-1"); assert_eq!(forward(&mut context, "''.lastIndexOf('x', 2)"), "-1");
assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "-1"); assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "0");
assert_eq!(forward(&mut context, "'abcxx'.lastIndexOf('x', 2)"), "4"); assert_eq!(forward(&mut context, "'abcxx'.lastIndexOf('x', 2)"), "-1");
assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "-1"); assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "0");
assert_eq!(forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 2)"), "3"); assert_eq!(forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 2)"), "-1");
assert_eq!( assert_eq!(
forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 10000000)"), forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 10000000)"),
"-1" "3"
); );
} }
@ -945,13 +945,13 @@ fn last_index_non_integer_position_argument() {
); );
assert_eq!( assert_eq!(
forward(&mut context, "'abcx'.lastIndexOf('x', new String('1'))"), forward(&mut context, "'abcx'.lastIndexOf('x', new String('1'))"),
"3" "-1"
); );
assert_eq!( assert_eq!(
forward(&mut context, "'abcx'.lastIndexOf('x', new String('100'))"), forward(&mut context, "'abcx'.lastIndexOf('x', new String('100'))"),
"-1" "3"
); );
assert_eq!(forward(&mut context, "'abcx'.lastIndexOf('x', null)"), "3"); assert_eq!(forward(&mut context, "'abcx'.lastIndexOf('x', null)"), "-1");
} }
#[test] #[test]

Loading…
Cancel
Save