Rust编写的JavaScript引擎,该项目是一个试验性质的项目。
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

559 lines
21 KiB

//! This module implements the global `RegExp` object.
//!
//! `The `RegExp` object is used for matching text with a pattern.
//!
//! More information:
//! - [ECMAScript reference][spec]
//! - [MDN documentation][mdn]
//!
//! [spec]: https://tc39.es/ecma262/#sec-regexp-constructor
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
use crate::{
builtins::BuiltIn,
gc::{empty_trace, Finalize, Trace},
object::{ConstructorBuilder, ObjectData},
property::{Attribute, DataDescriptor},
value::{RcString, Value},
BoaProfiler, Context, Result,
};
use regress::Regex;
#[cfg(test)]
mod tests;
/// The internal representation on a `RegExp` object.
#[derive(Debug, Clone, Finalize)]
pub struct RegExp {
/// Regex matcher.
matcher: Regex,
/// Update last_index, set if global or sticky flags are set.
use_last_index: bool,
/// String of parsed flags.
flags: Box<str>,
/// Flag 's' - dot matches newline characters.
dot_all: bool,
/// Flag 'g'
global: bool,
/// Flag 'i' - ignore case.
ignore_case: bool,
/// Flag 'm' - '^' and '$' match beginning/end of line.
multiline: bool,
/// Flag 'y'
sticky: bool,
/// Flag 'u' - Unicode.
unicode: bool,
pub(crate) original_source: Box<str>,
original_flags: Box<str>,
}
// Only safe while regress::Regex doesn't implement Trace itself.
unsafe impl Trace for RegExp {
empty_trace!();
}
impl BuiltIn for RegExp {
const NAME: &'static str = "RegExp";
fn attribute() -> Attribute {
Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE
}
fn init(context: &mut Context) -> (&'static str, Value, Attribute) {
let _timer = BoaProfiler::global().start_event(Self::NAME, "init");
let regexp_object = ConstructorBuilder::with_standard_object(
context,
Self::constructor,
context.standard_objects().regexp_object().clone(),
)
.name(Self::NAME)
.length(Self::LENGTH)
.property("lastIndex", 0, Attribute::all())
.method(Self::test, "test", 1)
.method(Self::exec, "exec", 1)
.method(Self::to_string, "toString", 0)
.build();
// TODO: add them RegExp accessor properties
(Self::NAME, regexp_object.into(), Self::attribute())
}
}
impl RegExp {
/// The name of the object.
pub(crate) const NAME: &'static str = "RegExp";
/// The amount of arguments this function object takes.
pub(crate) const LENGTH: usize = 2;
/// Create a new `RegExp`
pub(crate) fn constructor(this: &Value, args: &[Value], ctx: &mut Context) -> Result<Value> {
let arg = args.get(0).ok_or_else(Value::undefined)?;
let (regex_body, mut regex_flags) = match arg {
Value::String(ref body) => {
// first argument is a string -> use it as regex pattern
(
body.to_string().into_boxed_str(),
String::new().into_boxed_str(),
)
}
Value::Object(ref obj) => {
let obj = obj.borrow();
if let Some(regex) = obj.as_regexp() {
// first argument is another `RegExp` object, so copy its pattern and flags
(regex.original_source.clone(), regex.original_flags.clone())
} else {
(
String::new().into_boxed_str(),
String::new().into_boxed_str(),
)
}
}
_ => return Err(Value::undefined()),
};
// if a second argument is given and it's a string, use it as flags
if let Some(Value::String(flags)) = args.get(1) {
regex_flags = flags.to_string().into_boxed_str();
}
// parse flags
let mut sorted_flags = String::new();
let mut dot_all = false;
let mut global = false;
let mut ignore_case = false;
let mut multiline = false;
let mut sticky = false;
let mut unicode = false;
if regex_flags.contains('g') {
global = true;
sorted_flags.push('g');
}
if regex_flags.contains('i') {
ignore_case = true;
sorted_flags.push('i');
}
if regex_flags.contains('m') {
multiline = true;
sorted_flags.push('m');
}
if regex_flags.contains('s') {
dot_all = true;
sorted_flags.push('s');
}
if regex_flags.contains('u') {
unicode = true;
sorted_flags.push('u');
}
if regex_flags.contains('y') {
sticky = true;
sorted_flags.push('y');
}
let matcher = match Regex::with_flags(&regex_body, sorted_flags.as_str()) {
Err(error) => {
return Err(
ctx.construct_syntax_error(format!("failed to create matcher: {}", error.text))
);
}
Ok(val) => val,
};
let regexp = RegExp {
matcher,
use_last_index: global || sticky,
flags: sorted_flags.into_boxed_str(),
dot_all,
global,
ignore_case,
multiline,
sticky,
unicode,
original_source: regex_body,
original_flags: regex_flags,
};
this.set_data(ObjectData::RegExp(Box::new(regexp)));
Ok(this.clone())
}
// /// `RegExp.prototype.dotAll`
// ///
// /// The `dotAll` property indicates whether or not the "`s`" flag is used with the regular expression.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.dotAll
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll
// fn get_dot_all(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.dot_all)))
// }
// /// `RegExp.prototype.flags`
// ///
// /// The `flags` property returns a string consisting of the [`flags`][flags] of the current regular expression object.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.flags
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags
// /// [flags]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#Advanced_searching_with_flags_2
// fn get_flags(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.flags.clone())))
// }
// /// `RegExp.prototype.global`
// ///
// /// The `global` property indicates whether or not the "`g`" flag is used with the regular expression.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.global
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global
// fn get_global(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.global)))
// }
// /// `RegExp.prototype.ignoreCase`
// ///
// /// The `ignoreCase` property indicates whether or not the "`i`" flag is used with the regular expression.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.ignorecase
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase
// fn get_ignore_case(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.ignore_case)))
// }
// /// `RegExp.prototype.multiline`
// ///
// /// The multiline property indicates whether or not the "m" flag is used with the regular expression.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.multiline
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline
// fn get_multiline(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.multiline)))
// }
// /// `RegExp.prototype.source`
// ///
// /// The `source` property returns a `String` containing the source text of the regexp object,
// /// and it doesn't contain the two forward slashes on both sides and any flags.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.source
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/source
// fn get_source(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// Ok(this.get_internal_slot("OriginalSource"))
// }
// /// `RegExp.prototype.sticky`
// ///
// /// The `flags` property returns a string consisting of the [`flags`][flags] of the current regular expression object.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky
// fn get_sticky(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.sticky)))
// }
// /// `RegExp.prototype.unicode`
// ///
// /// The unicode property indicates whether or not the "`u`" flag is used with a regular expression.
// /// unicode is a read-only property of an individual regular expression instance.
// ///
// /// More information:
// /// - [ECMAScript reference][spec]
// /// - [MDN documentation][mdn]
// ///
// /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode
// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode
// fn get_unicode(this: &Value, _: &[Value], _: &mut Context) -> Result<Value> {
// this.with_internal_state_ref(|regex: &RegExp| Ok(Value::from(regex.unicode)))
// }
/// `RegExp.prototype.test( string )`
///
/// The `test()` method executes a search for a match between a regular expression and a specified string.
///
/// Returns `true` or `false`.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.test
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test
pub(crate) fn test(this: &Value, args: &[Value], context: &mut Context) -> Result<Value> {
let mut last_index = this.get_field("lastIndex", context)?.to_index(context)?;
let result = if let Some(object) = this.as_object() {
// 3. Let string be ? ToString(S).
let arg_str = args
.get(0)
.cloned()
.unwrap_or_default()
.to_string(context)?;
// 4. Let match be ? RegExpExec(R, string).
let object = object.borrow();
if let Some(regex) = object.as_regexp() {
let result =
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() {
if regex.use_last_index {
last_index = m.end();
}
true
} else {
if regex.use_last_index {
last_index = 0;
}
false
};
// 5. If match is not null, return true; else return false.
Ok(Value::boolean(result))
} else {
return context
.throw_type_error("RegExp.prototype.exec method called on incompatible value");
}
} else {
// 2. If Type(R) is not Object, throw a TypeError exception.
return context
.throw_type_error("RegExp.prototype.exec method called on incompatible value");
};
this.set_field("lastIndex", Value::from(last_index), context)?;
result
}
/// `RegExp.prototype.exec( string )`
///
/// The exec() method executes a search for a match in a specified string.
///
/// Returns a result array, or `null`.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.exec
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
pub(crate) fn exec(this: &Value, args: &[Value], context: &mut Context) -> Result<Value> {
// 4. Return ? RegExpBuiltinExec(R, S).
let mut last_index = this.get_field("lastIndex", context)?.to_index(context)?;
let result = if let Some(object) = this.as_object() {
let object = object.borrow();
if let Some(regex) = object.as_regexp() {
// 3. Let S be ? ToString(string).
let arg_str = args
.get(0)
.cloned()
.unwrap_or_default()
.to_string(context)?;
let result = {
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() {
if regex.use_last_index {
last_index = m.end();
}
let groups = m.captures.len() + 1;
let mut result = Vec::with_capacity(groups);
for i in 0..groups {
if let Some(range) = m.group(i) {
result.push(Value::from(
arg_str.get(range).expect("Could not get slice"),
));
} else {
result.push(Value::undefined());
}
}
let result = Value::from(result);
result.set_property(
"index",
DataDescriptor::new(m.start(), Attribute::all()),
);
result
.set_property("input", DataDescriptor::new(arg_str, Attribute::all()));
result
} else {
if regex.use_last_index {
last_index = 0;
}
Value::null()
}
};
Ok(result)
} else {
// 2. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
context
.throw_type_error("RegExp.prototype.exec method called on incompatible value")
}
} else {
return context.throw_type_error("exec method called on incompatible value");
};
this.set_field("lastIndex", Value::from(last_index), context)?;
result
}
/// `RegExp.prototype[ @@match ]( string )`
///
/// This method retrieves the matches when matching a string against a regular expression.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@match
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@match
pub(crate) fn r#match(this: &Value, arg: RcString, context: &mut Context) -> Result<Value> {
let (matcher, flags) = if let Some(object) = this.as_object() {
let object = object.borrow();
if let Some(regex) = object.as_regexp() {
(regex.matcher.clone(), regex.flags.clone())
} else {
return context
.throw_type_error("RegExp.prototype.exec method called on incompatible value");
}
} else {
return context
.throw_type_error("RegExp.prototype.match method called on incompatible value");
};
if flags.contains('g') {
let mut matches = Vec::new();
for mat in matcher.find_iter(&arg) {
matches.push(Value::from(&arg[mat.range()]));
}
if matches.is_empty() {
return Ok(Value::null());
}
Ok(Value::from(matches))
} else {
Self::exec(this, &[Value::from(arg)], context)
}
}
/// `RegExp.prototype.toString()`
///
/// Return a string representing the regular expression.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.tostring
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/toString
#[allow(clippy::wrong_self_convention)]
pub(crate) fn to_string(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
let (body, flags) = if let Some(object) = this.as_object() {
let object = object.borrow();
let regex = object.as_regexp().ok_or_else(|| {
context.construct_type_error(format!(
"Method RegExp.prototype.toString called on incompatible receiver {}",
this.display()
))
})?;
(regex.original_source.clone(), regex.flags.clone())
} else {
return context.throw_type_error(format!(
"Method RegExp.prototype.toString called on incompatible receiver {}",
this.display()
));
};
Ok(Value::from(format!("/{}/{}", body, flags)))
}
/// `RegExp.prototype[ @@matchAll ]( string )`
///
/// The `[@@matchAll]` method returns all matches of the regular expression against a string.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp-prototype-matchall
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@matchAll
// TODO: it's returning an array, it should return an iterator
pub(crate) fn match_all(this: &Value, arg_str: String, context: &mut Context) -> Result<Value> {
let matches = if let Some(object) = this.as_object() {
let object = object.borrow();
if let Some(regex) = object.as_regexp() {
let mut matches = Vec::new();
for mat in regex.matcher.find_iter(&arg_str) {
let match_vec: Vec<Value> = mat
.groups()
.map(|group| match group {
Some(range) => Value::from(&arg_str[range]),
None => Value::undefined(),
})
.collect();
let match_val = Value::from(match_vec);
match_val
.set_property("index", DataDescriptor::new(mat.start(), Attribute::all()));
match_val.set_property(
"input",
DataDescriptor::new(arg_str.clone(), Attribute::all()),
);
matches.push(match_val);
if !regex.flags.contains('g') {
break;
}
}
matches
} else {
return context.throw_type_error(
"RegExp.prototype.match_all method called on incompatible value",
);
}
} else {
return context.throw_type_error(
"RegExp.prototype.match_all method called on incompatible value",
);
};
let length = matches.len();
let result = Value::from(matches);
result.set_field("length", Value::from(length), context)?;
result.set_data(ObjectData::Array);
Ok(result)
}
}