Browse Source

Implement RegExp `v` flag (#3695)

pull/3700/head
raskad 9 months ago committed by GitHub
parent
commit
a7c2f5baa7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 4
      Cargo.lock
  2. 2
      Cargo.toml
  3. 124
      core/engine/src/builtins/regexp/mod.rs
  4. 12
      core/parser/src/lexer/regex.rs
  5. 1
      test262_config.toml

4
Cargo.lock generated

@ -3058,9 +3058,9 @@ dependencies = [
[[package]] [[package]]
name = "regress" name = "regress"
version = "0.8.0" version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f5f39ba4513916c1b2657b72af6ec671f091cd637992f58d0ede5cae4e5dea0" checksum = "d06f9a1f7cd8473611ba1a480cf35f9c5cffc2954336ba90a982fdb7e7d7f51e"
dependencies = [ dependencies = [
"hashbrown 0.14.3", "hashbrown 0.14.3",
"memchr", "memchr",

2
Cargo.toml

@ -61,7 +61,7 @@ once_cell = { version = "1.19.0", default-features = false }
phf = { version = "0.11.2", default-features = false } phf = { version = "0.11.2", default-features = false }
pollster = "0.3.0" pollster = "0.3.0"
regex = "1.10.3" regex = "1.10.3"
regress = { version="0.8.0", features = ["utf16"]} regress = { version="0.9.0", features = ["utf16"]}
rustc-hash = { version = "1.1.0", default-features = false } rustc-hash = { version = "1.1.0", default-features = false }
serde_json = "1.0.114" serde_json = "1.0.114"
serde = "1.0.197" serde = "1.0.197"

124
core/engine/src/builtins/regexp/mod.rs

@ -75,6 +75,9 @@ impl IntrinsicObject for RegExp {
let get_unicode = BuiltInBuilder::callable(realm, Self::get_unicode) let get_unicode = BuiltInBuilder::callable(realm, Self::get_unicode)
.name(js_string!("get unicode")) .name(js_string!("get unicode"))
.build(); .build();
let get_unicode_sets = BuiltInBuilder::callable(realm, Self::get_unicode_sets)
.name(js_string!("get unicodeSets"))
.build();
let get_sticky = BuiltInBuilder::callable(realm, Self::get_sticky) let get_sticky = BuiltInBuilder::callable(realm, Self::get_sticky)
.name(js_string!("get sticky")) .name(js_string!("get sticky"))
.build(); .build();
@ -136,6 +139,12 @@ impl IntrinsicObject for RegExp {
None, None,
flag_attributes, flag_attributes,
) )
.accessor(
js_string!("unicodeSets"),
Some(get_unicode_sets),
None,
flag_attributes,
)
.accessor( .accessor(
js_string!("sticky"), js_string!("sticky"),
Some(get_sticky), Some(get_sticky),
@ -427,6 +436,7 @@ impl RegExp {
b's' => regexp.flags.contains(RegExpFlags::DOT_ALL), b's' => regexp.flags.contains(RegExpFlags::DOT_ALL),
b'i' => regexp.flags.contains(RegExpFlags::IGNORE_CASE), b'i' => regexp.flags.contains(RegExpFlags::IGNORE_CASE),
b'u' => regexp.flags.contains(RegExpFlags::UNICODE), b'u' => regexp.flags.contains(RegExpFlags::UNICODE),
b'v' => regexp.flags.contains(RegExpFlags::UNICODE_SETS),
b'y' => regexp.flags.contains(RegExpFlags::STICKY), b'y' => regexp.flags.contains(RegExpFlags::STICKY),
_ => unreachable!(), _ => unreachable!(),
})); }));
@ -447,6 +457,7 @@ impl RegExp {
b's' => "dotAll", b's' => "dotAll",
b'i' => "ignoreCase", b'i' => "ignoreCase",
b'u' => "unicode", b'u' => "unicode",
b'v' => "unicodeSets",
b'y' => "sticky", b'y' => "sticky",
_ => unreachable!(), _ => unreachable!(),
}; };
@ -565,6 +576,22 @@ impl RegExp {
Self::regexp_has_flag(this, b'u', context) Self::regexp_has_flag(this, b'u', context)
} }
/// `get RegExp.prototype.unicodeSets`
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.unicodesets
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
pub(crate) fn get_unicode_sets(
this: &JsValue,
_: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
Self::regexp_has_flag(this, b'v', context)
}
/// `get RegExp.prototype.sticky` /// `get RegExp.prototype.sticky`
/// ///
/// This flag indicates that it matches only from the index indicated by the `lastIndex` property /// This flag indicates that it matches only from the index indicated by the `lastIndex` property
@ -601,58 +628,67 @@ impl RegExp {
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
// 1. Let R be the this value. // 1. Let R be the this value.
// 2. If Type(R) is not Object, throw a TypeError exception. // 2. If R is not an Object, throw a TypeError exception.
if let Some(object) = this.as_object() { let Some(object) = this.as_object() else {
// 3. Let result be the empty String. return Err(JsNativeError::typ()
let mut result = String::new(); .with_message("RegExp.prototype.flags getter called on non-object")
.into());
};
// 4. Let hasIndices be ToBoolean(? Get(R, "hasIndices")). // 3. Let codeUnits be a new empty List.
// 5. If hasIndices is true, append the code unit 0x0064 (LATIN SMALL LETTER D) as the last code unit of result. let mut code_units = Vec::new();
if object.get(utf16!("hasIndices"), context)?.to_boolean() {
result.push('d');
}
// 6. Let global be ! ToBoolean(? Get(R, "global")). // 4. Let hasIndices be ToBoolean(? Get(R, "hasIndices")).
// 7. If global is true, append the code unit 0x0067 (LATIN SMALL LETTER G) as the last code unit of result. // 5. If hasIndices is true, append the code unit 0x0064 (LATIN SMALL LETTER D) to codeUnits.
if object.get(utf16!("global"), context)?.to_boolean() { if object.get(utf16!("hasIndices"), context)?.to_boolean() {
result.push('g'); code_units.extend_from_slice(utf16!("d"));
} }
// 8. Let ignoreCase be ! ToBoolean(? Get(R, "ignoreCase")).
// 9. If ignoreCase is true, append the code unit 0x0069 (LATIN SMALL LETTER I) as the last code unit of result.
if object.get(utf16!("ignoreCase"), context)?.to_boolean() {
result.push('i');
}
// 10. Let multiline be ! ToBoolean(? Get(R, "multiline")). // 6. Let global be ToBoolean(? Get(R, "global")).
// 11. If multiline is true, append the code unit 0x006D (LATIN SMALL LETTER M) as the last code unit of result. // 7. If global is true, append the code unit 0x0067 (LATIN SMALL LETTER G) to codeUnits.
if object.get(utf16!("multiline"), context)?.to_boolean() { if object.get(utf16!("global"), context)?.to_boolean() {
result.push('m'); code_units.extend_from_slice(utf16!("g"));
} }
// 12. Let dotAll be ! ToBoolean(? Get(R, "dotAll")). // 8. Let ignoreCase be ToBoolean(? Get(R, "ignoreCase")).
// 13. If dotAll is true, append the code unit 0x0073 (LATIN SMALL LETTER S) as the last code unit of result. // 9. If ignoreCase is true, append the code unit 0x0069 (LATIN SMALL LETTER I) to codeUnits.
if object.get(utf16!("dotAll"), context)?.to_boolean() { if object.get(utf16!("ignoreCase"), context)?.to_boolean() {
result.push('s'); code_units.extend_from_slice(utf16!("i"));
} }
// 14. Let unicode be ! ToBoolean(? Get(R, "unicode")).
// 15. If unicode is true, append the code unit 0x0075 (LATIN SMALL LETTER U) as the last code unit of result.
if object.get(utf16!("unicode"), context)?.to_boolean() {
result.push('u');
}
// 16. Let sticky be ! ToBoolean(? Get(R, "sticky")). // 10. Let multiline be ToBoolean(? Get(R, "multiline")).
// 17. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) as the last code unit of result. // 11. If multiline is true, append the code unit 0x006D (LATIN SMALL LETTER M) to codeUnits.
if object.get(utf16!("sticky"), context)?.to_boolean() { if object.get(utf16!("multiline"), context)?.to_boolean() {
result.push('y'); code_units.extend_from_slice(utf16!("m"));
} }
// 18. Return result. // 12. Let dotAll be ToBoolean(? Get(R, "dotAll")).
return Ok(js_string!(result).into()); // 13. If dotAll is true, append the code unit 0x0073 (LATIN SMALL LETTER S) to codeUnits.
if object.get(utf16!("dotAll"), context)?.to_boolean() {
code_units.extend_from_slice(utf16!("s"));
} }
Err(JsNativeError::typ() // 14. Let unicode be ToBoolean(? Get(R, "unicode")).
.with_message("RegExp.prototype.flags getter called on non-object") // 15. If unicode is true, append the code unit 0x0075 (LATIN SMALL LETTER U) to codeUnits.
.into()) if object.get(utf16!("unicode"), context)?.to_boolean() {
code_units.extend_from_slice(utf16!("u"));
}
// 16. Let unicodeSets be ToBoolean(? Get(R, "unicodeSets")).
// 17. If unicodeSets is true, append the code unit 0x0076 (LATIN SMALL LETTER V) to codeUnits.
if object.get(utf16!("unicodeSets"), context)?.to_boolean() {
code_units.extend_from_slice(utf16!("v"));
}
// 18. Let sticky be ToBoolean(? Get(R, "sticky")).
// 19. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) to codeUnits.
if object.get(utf16!("sticky"), context)?.to_boolean() {
code_units.extend_from_slice(utf16!("y"));
}
// 20. Return the String value whose code units are the elements of the List codeUnits.
// If codeUnits has no elements, the empty String is returned.
Ok(JsString::from(code_units).into())
} }
/// `get RegExp.prototype.source` /// `get RegExp.prototype.source`

12
core/parser/src/lexer/regex.rs

@ -169,6 +169,9 @@ bitflags! {
/// Whether the regular expression result exposes the start and end indices of /// Whether the regular expression result exposes the start and end indices of
/// captured substrings. /// captured substrings.
const HAS_INDICES = 0b0100_0000; const HAS_INDICES = 0b0100_0000;
/// Whether or not UnicodeSets features are enabled.
const UNICODE_SETS = 0b1000_0000;
} }
} }
@ -186,6 +189,7 @@ impl FromStr for RegExpFlags {
b'u' => Self::UNICODE, b'u' => Self::UNICODE,
b'y' => Self::STICKY, b'y' => Self::STICKY,
b'd' => Self::HAS_INDICES, b'd' => Self::HAS_INDICES,
b'v' => Self::UNICODE_SETS,
_ => return Err(format!("invalid regular expression flag {}", char::from(c))), _ => return Err(format!("invalid regular expression flag {}", char::from(c))),
}; };
@ -198,6 +202,10 @@ impl FromStr for RegExpFlags {
flags.insert(new_flag); flags.insert(new_flag);
} }
if flags.contains(Self::UNICODE) && flags.contains(Self::UNICODE_SETS) {
return Err("cannot use both 'u' and 'v' flags".into());
}
Ok(flags) Ok(flags)
} }
} }
@ -233,6 +241,9 @@ impl ToString for RegExpFlags {
if self.contains(Self::STICKY) { if self.contains(Self::STICKY) {
s.push('y'); s.push('y');
} }
if self.contains(Self::UNICODE_SETS) {
s.push('v');
}
s s
} }
} }
@ -244,6 +255,7 @@ impl From<RegExpFlags> for Flags {
multiline: value.contains(RegExpFlags::MULTILINE), multiline: value.contains(RegExpFlags::MULTILINE),
dot_all: value.contains(RegExpFlags::DOT_ALL), dot_all: value.contains(RegExpFlags::DOT_ALL),
unicode: value.contains(RegExpFlags::UNICODE), unicode: value.contains(RegExpFlags::UNICODE),
unicode_sets: value.contains(RegExpFlags::UNICODE_SETS),
..Self::default() ..Self::default()
} }
} }

1
test262_config.toml

@ -13,7 +13,6 @@ features = [
"Intl.DisplayNames", "Intl.DisplayNames",
"Intl.RelativeTimeFormat", "Intl.RelativeTimeFormat",
"Intl-enumeration", "Intl-enumeration",
"regexp-v-flag",
### Pending proposals ### Pending proposals

Loading…
Cancel
Save