From e8bc79c26a3444e8afeb83e6eee4bd7fe827e5c2 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Wed, 6 Jan 2021 07:02:58 -0800 Subject: [PATCH] Implement String.prototype.split (#1026) --- boa/src/builtins/string/mod.rs | 75 ++++++++++++++++++- boa/src/builtins/string/tests.rs | 123 +++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 1 deletion(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index a9bdc22f29..6cf9c511ea 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -15,7 +15,7 @@ mod tests; use crate::property::DataDescriptor; use crate::{ - builtins::{string::string_iterator::StringIterator, BuiltIn, RegExp}, + builtins::{string::string_iterator::StringIterator, Array, BuiltIn, RegExp}, object::{ConstructorBuilder, Object, ObjectData}, property::Attribute, value::{RcString, Value}, @@ -105,6 +105,7 @@ impl BuiltIn for String { .method(Self::to_uppercase, "toUpperCase", 0) .method(Self::substring, "substring", 2) .method(Self::substr, "substr", 2) + .method(Self::split, "split", 2) .method(Self::value_of, "valueOf", 0) .method(Self::match_all, "matchAll", 1) .method(Self::replace, "replace", 2) @@ -1165,6 +1166,78 @@ impl String { } } + /// String.prototype.split() + /// + /// The `split()` method divides a String into an ordered list of substrings, puts these substrings into an array, and returns the array. + /// + /// The division is done by searching for a pattern; where the pattern is provided as the first parameter in the method's call. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.split + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split + pub(crate) fn split(this: &Value, args: &[Value], context: &mut Context) -> Result { + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + + let separator = args.get(0).filter(|value| !value.is_null_or_undefined()); + + if let Some(result) = separator + .and_then(|separator| separator.as_object()) + .and_then(|separator| { + let key = context.well_known_symbols().split_symbol(); + + match separator.get_method(context, key) { + Ok(splitter) => splitter.map(|splitter| { + let arguments = &[ + Value::from(string.clone()), + args.get(1) + .map(|x| x.to_owned()) + .unwrap_or(Value::Undefined), + ]; + splitter.call(this, arguments, context) + }), + Err(_) => Some(Err( + context.construct_type_error("separator[Symbol.split] is not a function") + )), + } + }) + { + return result; + } + + let separator = separator + .map(|separator| separator.to_string(context)) + .transpose()?; + + let limit = args + .get(1) + .map(|arg| arg.to_integer(context).map(|limit| limit as usize)) + .transpose()? + .unwrap_or(std::u32::MAX as usize); + + let values: Vec = match separator { + None if limit == 0 => vec![], + None => vec![Value::from(string)], + Some(separator) if separator.is_empty() => string + .encode_utf16() + // TODO: Support keeping invalid code point in string + .map(|cp| Value::from(std::string::String::from_utf16_lossy(&[cp]))) + .take(limit) + .collect(), + Some(separator) => string + .split(separator.as_str()) + .map(&Value::from) + .take(limit) + .collect(), + }; + + let new = Array::new_array(context)?; + Array::construct_array(&new, &values, context) + } + /// String.prototype.valueOf() /// /// The `valueOf()` method returns the primitive value of a `String` object. diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index a4f9409e38..e94d0636af 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -563,6 +563,129 @@ fn trim_end() { assert_eq!(forward(&mut context, "' Hello '.trimEnd()"), "\" Hello\""); } +#[test] +fn split() { + let mut context = Context::new(); + assert_eq!( + forward(&mut context, "'Hello'.split()"), + forward(&mut context, "['Hello']") + ); + assert_eq!( + forward(&mut context, "'Hello'.split(null)"), + forward(&mut context, "['Hello']") + ); + assert_eq!( + forward(&mut context, "'Hello'.split(undefined)"), + forward(&mut context, "['Hello']") + ); + assert_eq!( + forward(&mut context, "'Hello'.split('')"), + forward(&mut context, "['H','e','l','l','o']") + ); + + assert_eq!( + forward(&mut context, "'x1x2'.split('x')"), + forward(&mut context, "['','1','2']") + ); + assert_eq!( + forward(&mut context, "'x1x2x'.split('x')"), + forward(&mut context, "['','1','2','']") + ); + + assert_eq!( + forward(&mut context, "'x1x2x'.split('x', 0)"), + forward(&mut context, "[]") + ); + assert_eq!( + forward(&mut context, "'x1x2x'.split('x', 2)"), + forward(&mut context, "['','1']") + ); + assert_eq!( + forward(&mut context, "'x1x2x'.split('x', 10)"), + forward(&mut context, "['','1','2','']") + ); + + assert_eq!( + forward(&mut context, "'x1x2x'.split(1)"), + forward(&mut context, "['x','x2x']") + ); + + assert_eq!( + forward(&mut context, "'Hello'.split(null, 0)"), + forward(&mut context, "[]") + ); + assert_eq!( + forward(&mut context, "'Hello'.split(undefined, 0)"), + forward(&mut context, "[]") + ); + + assert_eq!( + forward(&mut context, "''.split()"), + forward(&mut context, "['']") + ); + assert_eq!( + forward(&mut context, "''.split(undefined)"), + forward(&mut context, "['']") + ); + assert_eq!( + forward(&mut context, "''.split('')"), + forward(&mut context, "[]") + ); + assert_eq!( + forward(&mut context, "''.split('1')"), + forward(&mut context, "['']") + ); + + // TODO: Support keeping invalid code point in string + assert_eq!( + forward(&mut context, "'𝟘𝟙𝟚𝟛'.split('')"), + forward(&mut context, "['�','�','�','�','�','�','�','�']") + ); +} + +#[test] +fn split_with_symbol_split_method() { + assert_eq!( + forward( + &mut Context::new(), + r#" + let sep = {}; + sep[Symbol.split] = function(s, limit) { return s + limit.toString(); }; + 'hello'.split(sep, 10) + "# + ), + "\"hello10\"" + ); + + assert_eq!( + forward( + &mut Context::new(), + r#" + let sep = {}; + sep[Symbol.split] = undefined; + 'hello'.split(sep) + "# + ), + "[ \"hello\" ]" + ); + + assert_eq!( + forward( + &mut Context::new(), + r#" + try { + let sep = {}; + sep[Symbol.split] = 10; + 'hello'.split(sep, 10); + } catch(e) { + e.toString() + } + "# + ), + "\"TypeError: separator[Symbol.split] is not a function\"" + ); +} + #[test] fn index_of_with_no_arguments() { let mut context = Context::new();