mirror of https://github.com/boa-dev/boa.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
559 lines
21 KiB
559 lines
21 KiB
//! This module implements the global `RegExp` object. |
|
//! |
|
//! `The `RegExp` object is used for matching text with a pattern. |
|
//! |
|
//! More information: |
|
//! - [ECMAScript reference][spec] |
|
//! - [MDN documentation][mdn] |
|
//! |
|
//! [spec]: https://tc39.es/ecma262/#sec-regexp-constructor |
|
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp |
|
|
|
use crate::{ |
|
builtins::BuiltIn, |
|
gc::{empty_trace, Finalize, Trace}, |
|
object::{ConstructorBuilder, ObjectData}, |
|
property::{Attribute, DataDescriptor}, |
|
value::{RcString, Value}, |
|
BoaProfiler, Context, Result, |
|
}; |
|
use regress::Regex; |
|
|
|
#[cfg(test)] |
|
mod tests; |
|
|
|
/// The internal representation on a `RegExp` object. |
|
#[derive(Debug, Clone, Finalize)] |
|
pub struct RegExp { |
|
/// Regex matcher. |
|
matcher: Regex, |
|
|
|
/// Update last_index, set if global or sticky flags are set. |
|
use_last_index: bool, |
|
|
|
/// String of parsed flags. |
|
flags: Box<str>, |
|
|
|
/// Flag 's' - dot matches newline characters. |
|
dot_all: bool, |
|
|
|
/// Flag 'g' |
|
global: bool, |
|
|
|
/// Flag 'i' - ignore case. |
|
ignore_case: bool, |
|
|
|
/// Flag 'm' - '^' and '$' match beginning/end of line. |
|
multiline: bool, |
|
|
|
/// Flag 'y' |
|
sticky: bool, |
|
|
|
/// Flag 'u' - Unicode. |
|
unicode: bool, |
|
|
|
pub(crate) original_source: Box<str>, |
|
original_flags: Box<str>, |
|
} |
|
|
|
// Only safe while regress::Regex doesn't implement Trace itself. |
|
unsafe impl Trace for RegExp { |
|
empty_trace!(); |
|
} |
|
|
|
impl BuiltIn for RegExp { |
|
const NAME: &'static str = "RegExp"; |
|
|
|
fn attribute() -> Attribute { |
|
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE |
|
} |
|
|
|
fn init(context: &mut Context) -> (&'static str, Value, Attribute) { |
|
let _timer = BoaProfiler::global().start_event(Self::NAME, "init"); |
|
|
|
let regexp_object = ConstructorBuilder::with_standard_object( |
|
context, |
|
Self::constructor, |
|
context.standard_objects().regexp_object().clone(), |
|
) |
|
.name(Self::NAME) |
|
.length(Self::LENGTH) |
|
.property("lastIndex", 0, Attribute::all()) |
|
.method(Self::test, "test", 1) |
|
.method(Self::exec, "exec", 1) |
|
.method(Self::to_string, "toString", 0) |
|
.build(); |
|
|
|
// TODO: add them RegExp accessor properties |
|
|
|
(Self::NAME, regexp_object.into(), Self::attribute()) |
|
} |
|
} |
|
|
|
impl RegExp { |
|
/// The name of the object. |
|
pub(crate) const NAME: &'static str = "RegExp"; |
|
|
|
/// The amount of arguments this function object takes. |
|
pub(crate) const LENGTH: usize = 2; |
|
|
|
/// Create a new `RegExp` |
|
pub(crate) fn constructor(this: &Value, args: &[Value], ctx: &mut Context) -> Result<Value> { |
|
let arg = args.get(0).ok_or_else(Value::undefined)?; |
|
|
|
let (regex_body, mut regex_flags) = match arg { |
|
Value::String(ref body) => { |
|
// first argument is a string -> use it as regex pattern |
|
( |
|
body.to_string().into_boxed_str(), |
|
String::new().into_boxed_str(), |
|
) |
|
} |
|
Value::Object(ref obj) => { |
|
let obj = obj.borrow(); |
|
if let Some(regex) = obj.as_regexp() { |
|
// first argument is another `RegExp` object, so copy its pattern and flags |
|
(regex.original_source.clone(), regex.original_flags.clone()) |
|
} else { |
|
( |
|
String::new().into_boxed_str(), |
|
String::new().into_boxed_str(), |
|
) |
|
} |
|
} |
|
_ => return Err(Value::undefined()), |
|
}; |
|
// if a second argument is given and it's a string, use it as flags |
|
if let Some(Value::String(flags)) = args.get(1) { |
|
regex_flags = flags.to_string().into_boxed_str(); |
|
} |
|
|
|
// parse flags |
|
let mut sorted_flags = String::new(); |
|
let mut dot_all = false; |
|
let mut global = false; |
|
let mut ignore_case = false; |
|
let mut multiline = false; |
|
let mut sticky = false; |
|
let mut unicode = false; |
|
if regex_flags.contains('g') { |
|
global = true; |
|
sorted_flags.push('g'); |
|
} |
|
if regex_flags.contains('i') { |
|
ignore_case = true; |
|
sorted_flags.push('i'); |
|
} |
|
if regex_flags.contains('m') { |
|
multiline = true; |
|
sorted_flags.push('m'); |
|
} |
|
if regex_flags.contains('s') { |
|
dot_all = true; |
|
sorted_flags.push('s'); |
|
} |
|
if regex_flags.contains('u') { |
|
unicode = true; |
|
sorted_flags.push('u'); |
|
} |
|
if regex_flags.contains('y') { |
|
sticky = true; |
|
sorted_flags.push('y'); |
|
} |
|
|
|
let matcher = match Regex::with_flags(®ex_body, sorted_flags.as_str()) { |
|
Err(error) => { |
|
return Err( |
|
ctx.construct_syntax_error(format!("failed to create matcher: {}", error.text)) |
|
); |
|
} |
|
Ok(val) => val, |
|
}; |
|
|
|
let regexp = RegExp { |
|
matcher, |
|
use_last_index: global || sticky, |
|
flags: sorted_flags.into_boxed_str(), |
|
dot_all, |
|
global, |
|
ignore_case, |
|
multiline, |
|
sticky, |
|
unicode, |
|
original_source: regex_body, |
|
original_flags: regex_flags, |
|
}; |
|
|
|
this.set_data(ObjectData::RegExp(Box::new(regexp))); |
|
|
|
Ok(this.clone()) |
|
} |
|
|
|
// /// `RegExp.prototype.dotAll` |
|
// /// |
|
// /// The `dotAll` property indicates whether or not the "`s`" flag is used with the regular expression. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.dotAll |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll |
|
// fn get_dot_all(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.dot_all))) |
|
// } |
|
|
|
// /// `RegExp.prototype.flags` |
|
// /// |
|
// /// The `flags` property returns a string consisting of the [`flags`][flags] of the current regular expression object. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.flags |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags |
|
// /// [flags]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#Advanced_searching_with_flags_2 |
|
// fn get_flags(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.flags.clone()))) |
|
// } |
|
|
|
// /// `RegExp.prototype.global` |
|
// /// |
|
// /// The `global` property indicates whether or not the "`g`" flag is used with the regular expression. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.global |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global |
|
// fn get_global(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.global))) |
|
// } |
|
|
|
// /// `RegExp.prototype.ignoreCase` |
|
// /// |
|
// /// The `ignoreCase` property indicates whether or not the "`i`" flag is used with the regular expression. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.ignorecase |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase |
|
// fn get_ignore_case(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.ignore_case))) |
|
// } |
|
|
|
// /// `RegExp.prototype.multiline` |
|
// /// |
|
// /// The multiline property indicates whether or not the "m" flag is used with the regular expression. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.multiline |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline |
|
// fn get_multiline(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.multiline))) |
|
// } |
|
|
|
// /// `RegExp.prototype.source` |
|
// /// |
|
// /// The `source` property returns a `String` containing the source text of the regexp object, |
|
// /// and it doesn't contain the two forward slashes on both sides and any flags. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.source |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/source |
|
// fn get_source(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// Ok(this.get_internal_slot("OriginalSource")) |
|
// } |
|
|
|
// /// `RegExp.prototype.sticky` |
|
// /// |
|
// /// The `flags` property returns a string consisting of the [`flags`][flags] of the current regular expression object. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky |
|
// fn get_sticky(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.sticky))) |
|
// } |
|
|
|
// /// `RegExp.prototype.unicode` |
|
// /// |
|
// /// The unicode property indicates whether or not the "`u`" flag is used with a regular expression. |
|
// /// unicode is a read-only property of an individual regular expression instance. |
|
// /// |
|
// /// More information: |
|
// /// - [ECMAScript reference][spec] |
|
// /// - [MDN documentation][mdn] |
|
// /// |
|
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode |
|
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode |
|
// fn get_unicode(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> { |
|
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.unicode))) |
|
// } |
|
|
|
/// `RegExp.prototype.test( string )` |
|
/// |
|
/// The `test()` method executes a search for a match between a regular expression and a specified string. |
|
/// |
|
/// Returns `true` or `false`. |
|
/// |
|
/// More information: |
|
/// - [ECMAScript reference][spec] |
|
/// - [MDN documentation][mdn] |
|
/// |
|
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.test |
|
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test |
|
pub(crate) fn test(this: &Value, args: &[Value], context: &mut Context) -> Result<Value> { |
|
let mut last_index = this.get_field("lastIndex", context)?.to_index(context)?; |
|
let result = if let Some(object) = this.as_object() { |
|
// 3. Let string be ? ToString(S). |
|
let arg_str = args |
|
.get(0) |
|
.cloned() |
|
.unwrap_or_default() |
|
.to_string(context)?; |
|
|
|
// 4. Let match be ? RegExpExec(R, string). |
|
let object = object.borrow(); |
|
if let Some(regex) = object.as_regexp() { |
|
let result = |
|
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() { |
|
if regex.use_last_index { |
|
last_index = m.end(); |
|
} |
|
true |
|
} else { |
|
if regex.use_last_index { |
|
last_index = 0; |
|
} |
|
false |
|
}; |
|
|
|
// 5. If match is not null, return true; else return false. |
|
Ok(Value::boolean(result)) |
|
} else { |
|
return context |
|
.throw_type_error("RegExp.prototype.exec method called on incompatible value"); |
|
} |
|
} else { |
|
// 2. If Type(R) is not Object, throw a TypeError exception. |
|
return context |
|
.throw_type_error("RegExp.prototype.exec method called on incompatible value"); |
|
}; |
|
this.set_field("lastIndex", Value::from(last_index), context)?; |
|
result |
|
} |
|
|
|
/// `RegExp.prototype.exec( string )` |
|
/// |
|
/// The exec() method executes a search for a match in a specified string. |
|
/// |
|
/// Returns a result array, or `null`. |
|
/// |
|
/// More information: |
|
/// - [ECMAScript reference][spec] |
|
/// - [MDN documentation][mdn] |
|
/// |
|
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.exec |
|
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec |
|
pub(crate) fn exec(this: &Value, args: &[Value], context: &mut Context) -> Result<Value> { |
|
// 4. Return ? RegExpBuiltinExec(R, S). |
|
let mut last_index = this.get_field("lastIndex", context)?.to_index(context)?; |
|
let result = if let Some(object) = this.as_object() { |
|
let object = object.borrow(); |
|
if let Some(regex) = object.as_regexp() { |
|
// 3. Let S be ? ToString(string). |
|
let arg_str = args |
|
.get(0) |
|
.cloned() |
|
.unwrap_or_default() |
|
.to_string(context)?; |
|
|
|
let result = { |
|
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() { |
|
if regex.use_last_index { |
|
last_index = m.end(); |
|
} |
|
let groups = m.captures.len() + 1; |
|
let mut result = Vec::with_capacity(groups); |
|
for i in 0..groups { |
|
if let Some(range) = m.group(i) { |
|
result.push(Value::from( |
|
arg_str.get(range).expect("Could not get slice"), |
|
)); |
|
} else { |
|
result.push(Value::undefined()); |
|
} |
|
} |
|
|
|
let result = Value::from(result); |
|
result.set_property( |
|
"index", |
|
DataDescriptor::new(m.start(), Attribute::all()), |
|
); |
|
result |
|
.set_property("input", DataDescriptor::new(arg_str, Attribute::all())); |
|
result |
|
} else { |
|
if regex.use_last_index { |
|
last_index = 0; |
|
} |
|
Value::null() |
|
} |
|
}; |
|
|
|
Ok(result) |
|
} else { |
|
// 2. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]). |
|
context |
|
.throw_type_error("RegExp.prototype.exec method called on incompatible value") |
|
} |
|
} else { |
|
return context.throw_type_error("exec method called on incompatible value"); |
|
}; |
|
|
|
this.set_field("lastIndex", Value::from(last_index), context)?; |
|
result |
|
} |
|
|
|
/// `RegExp.prototype[ @@match ]( string )` |
|
/// |
|
/// This method retrieves the matches when matching a string against a regular expression. |
|
/// |
|
/// More information: |
|
/// - [ECMAScript reference][spec] |
|
/// - [MDN documentation][mdn] |
|
/// |
|
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@match |
|
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@match |
|
pub(crate) fn r#match(this: &Value, arg: RcString, context: &mut Context) -> Result<Value> { |
|
let (matcher, flags) = if let Some(object) = this.as_object() { |
|
let object = object.borrow(); |
|
if let Some(regex) = object.as_regexp() { |
|
(regex.matcher.clone(), regex.flags.clone()) |
|
} else { |
|
return context |
|
.throw_type_error("RegExp.prototype.exec method called on incompatible value"); |
|
} |
|
} else { |
|
return context |
|
.throw_type_error("RegExp.prototype.match method called on incompatible value"); |
|
}; |
|
if flags.contains('g') { |
|
let mut matches = Vec::new(); |
|
for mat in matcher.find_iter(&arg) { |
|
matches.push(Value::from(&arg[mat.range()])); |
|
} |
|
if matches.is_empty() { |
|
return Ok(Value::null()); |
|
} |
|
Ok(Value::from(matches)) |
|
} else { |
|
Self::exec(this, &[Value::from(arg)], context) |
|
} |
|
} |
|
|
|
/// `RegExp.prototype.toString()` |
|
/// |
|
/// Return a string representing the regular expression. |
|
/// |
|
/// More information: |
|
/// - [ECMAScript reference][spec] |
|
/// - [MDN documentation][mdn] |
|
/// |
|
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.tostring |
|
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/toString |
|
#[allow(clippy::wrong_self_convention)] |
|
pub(crate) fn to_string(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> { |
|
let (body, flags) = if let Some(object) = this.as_object() { |
|
let object = object.borrow(); |
|
let regex = object.as_regexp().ok_or_else(|| { |
|
context.construct_type_error(format!( |
|
"Method RegExp.prototype.toString called on incompatible receiver {}", |
|
this.display() |
|
)) |
|
})?; |
|
(regex.original_source.clone(), regex.flags.clone()) |
|
} else { |
|
return context.throw_type_error(format!( |
|
"Method RegExp.prototype.toString called on incompatible receiver {}", |
|
this.display() |
|
)); |
|
}; |
|
Ok(Value::from(format!("/{}/{}", body, flags))) |
|
} |
|
|
|
/// `RegExp.prototype[ @@matchAll ]( string )` |
|
/// |
|
/// The `[@@matchAll]` method returns all matches of the regular expression against a string. |
|
/// |
|
/// More information: |
|
/// - [ECMAScript reference][spec] |
|
/// - [MDN documentation][mdn] |
|
/// |
|
/// [spec]: https://tc39.es/ecma262/#sec-regexp-prototype-matchall |
|
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@matchAll |
|
// TODO: it's returning an array, it should return an iterator |
|
pub(crate) fn match_all(this: &Value, arg_str: String, context: &mut Context) -> Result<Value> { |
|
let matches = if let Some(object) = this.as_object() { |
|
let object = object.borrow(); |
|
if let Some(regex) = object.as_regexp() { |
|
let mut matches = Vec::new(); |
|
|
|
for mat in regex.matcher.find_iter(&arg_str) { |
|
let match_vec: Vec<Value> = mat |
|
.groups() |
|
.map(|group| match group { |
|
Some(range) => Value::from(&arg_str[range]), |
|
None => Value::undefined(), |
|
}) |
|
.collect(); |
|
|
|
let match_val = Value::from(match_vec); |
|
|
|
match_val |
|
.set_property("index", DataDescriptor::new(mat.start(), Attribute::all())); |
|
match_val.set_property( |
|
"input", |
|
DataDescriptor::new(arg_str.clone(), Attribute::all()), |
|
); |
|
matches.push(match_val); |
|
|
|
if !regex.flags.contains('g') { |
|
break; |
|
} |
|
} |
|
|
|
matches |
|
} else { |
|
return context.throw_type_error( |
|
"RegExp.prototype.match_all method called on incompatible value", |
|
); |
|
} |
|
} else { |
|
return context.throw_type_error( |
|
"RegExp.prototype.match_all method called on incompatible value", |
|
); |
|
}; |
|
|
|
let length = matches.len(); |
|
let result = Value::from(matches); |
|
result.set_field("length", Value::from(length), context)?; |
|
result.set_data(ObjectData::Array); |
|
|
|
Ok(result) |
|
} |
|
}
|
|
|