Browse Source

Implement `regress` (#774)

Co-authored-by: RageKnify <rageknify@gmail.com>
pull/813/head
neeldug 4 years ago committed by GitHub
parent
commit
327d71bb98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      Cargo.lock
  2. 2
      boa/Cargo.toml
  3. 139
      boa/src/builtins/regexp/mod.rs
  4. 42
      boa/src/builtins/string/mod.rs
  5. 8
      boa/src/builtins/string/tests.rs

11
Cargo.lock generated

@ -17,7 +17,7 @@ dependencies = [
"num-traits",
"once_cell",
"rand",
"regex",
"regress",
"rustc-hash",
"ryu-js",
"serde",
@ -971,6 +971,15 @@ version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
[[package]]
name = "regress"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d669f9db16c334d72d7f92d52874373eb0df8f642230401952a6901872fee4c5"
dependencies = [
"memchr",
]
[[package]]
name = "rust-argon2"
version = "0.8.2"

2
boa/Cargo.toml

@ -21,7 +21,7 @@ gc = { version = "0.3.6", features = ["derive"] }
serde_json = "1.0.58"
rand = "0.7.3"
num-traits = "0.2.12"
regex = "1.3.9"
regress = "0.1.4"
rustc-hash = "1.1.0"
num-bigint = { version = "0.3.0", features = ["serde"] }
num-integer = "0.1.43"

139
boa/src/builtins/regexp/mod.rs

@ -17,7 +17,7 @@ use crate::{
value::{RcString, Value},
BoaProfiler, Context, Result,
};
use regex::Regex;
use regress::{Flags, Regex};
#[cfg(test)]
mod tests;
@ -123,7 +123,6 @@ impl RegExp {
// parse flags
let mut sorted_flags = String::new();
let mut pattern = String::new();
let mut dot_all = false;
let mut global = false;
let mut ignore_case = false;
@ -137,34 +136,26 @@ impl RegExp {
if regex_flags.contains('i') {
ignore_case = true;
sorted_flags.push('i');
pattern.push('i');
}
if regex_flags.contains('m') {
multiline = true;
sorted_flags.push('m');
pattern.push('m');
}
if regex_flags.contains('s') {
dot_all = true;
sorted_flags.push('s');
pattern.push('s');
}
if regex_flags.contains('u') {
unicode = true;
sorted_flags.push('u');
//pattern.push('s'); // rust uses utf-8 anyway
}
if regex_flags.contains('y') {
sticky = true;
sorted_flags.push('y');
}
// the `regex` crate uses '(?{flags})` inside the pattern to enable flags
if !pattern.is_empty() {
pattern = format!("(?{})", pattern);
}
pattern.push_str(regex_body.as_str());
let matcher = Regex::new(pattern.as_str()).expect("failed to create matcher");
let matcher = Regex::newf(regex_body.as_str(), Flags::from(sorted_flags.as_str()))
.expect("failed to create matcher");
let regexp = RegExp {
matcher,
use_last_index: global || sticky,
@ -319,17 +310,18 @@ impl RegExp {
let mut last_index = this.get_field("lastIndex").to_index(ctx)?;
let result = if let Some(object) = this.as_object() {
let regex = object.as_regexp().unwrap();
let result = if let Some(m) = regex.matcher.find_at(arg_str.as_str(), last_index) {
if regex.use_last_index {
last_index = m.end();
}
true
} else {
if regex.use_last_index {
last_index = 0;
}
false
};
let result =
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() {
if regex.use_last_index {
last_index = m.total().end;
}
true
} else {
if regex.use_last_index {
last_index = 0;
}
false
};
Ok(Value::boolean(result))
} else {
panic!("object is not a regexp")
@ -358,35 +350,36 @@ impl RegExp {
let mut last_index = this.get_field("lastIndex").to_index(ctx)?;
let result = if let Some(object) = this.as_object() {
let regex = object.as_regexp().unwrap();
let mut locations = regex.matcher.capture_locations();
let result = if let Some(m) =
regex
.matcher
.captures_read_at(&mut locations, arg_str.as_str(), last_index)
{
if regex.use_last_index {
last_index = m.end();
}
let mut result = Vec::with_capacity(locations.len());
for i in 0..locations.len() {
if let Some((start, end)) = locations.get(i) {
result.push(Value::from(
arg_str.get(start..end).expect("Could not get slice"),
));
} else {
result.push(Value::undefined());
let result = {
if let Some(m) = regex.matcher.find_from(arg_str.as_str(), last_index).next() {
if regex.use_last_index {
last_index = m.total().end;
}
let groups = m.captures.len() + 1;
let mut result = Vec::with_capacity(groups);
for i in 0..groups {
if let Some(range) = m.group(i) {
result.push(Value::from(
arg_str.get(range).expect("Could not get slice"),
));
} else {
result.push(Value::undefined());
}
}
}
let result = Value::from(result);
result.set_property("index", Property::default().value(Value::from(m.start())));
result.set_property("input", Property::default().value(Value::from(arg_str)));
result
} else {
if regex.use_last_index {
last_index = 0;
let result = Value::from(result);
result.set_property(
"index",
Property::default().value(Value::from(m.total().start)),
);
result.set_property("input", Property::default().value(Value::from(arg_str)));
result
} else {
if regex.use_last_index {
last_index = 0;
}
Value::null()
}
Value::null()
};
Ok(result)
} else {
@ -416,7 +409,7 @@ impl RegExp {
if flags.contains('g') {
let mut matches = Vec::new();
for mat in matcher.find_iter(&arg) {
matches.push(Value::from(mat.as_str()));
matches.push(Value::from(&arg[mat.total()]));
}
if matches.is_empty() {
return Ok(Value::null());
@ -467,29 +460,29 @@ impl RegExp {
let regex = object.as_regexp().unwrap();
let mut matches = Vec::new();
for m in regex.matcher.find_iter(&arg_str) {
if let Some(caps) = regex.matcher.captures(&m.as_str()) {
let match_vec = caps
.iter()
.map(|group| match group {
Some(g) => Value::from(g.as_str()),
None => Value::undefined(),
})
.collect::<Vec<Value>>();
let match_val = Value::from(match_vec);
match_val
.set_property("index", Property::default().value(Value::from(m.start())));
match_val.set_property(
"input",
Property::default().value(Value::from(arg_str.clone())),
);
matches.push(match_val);
if !regex.flags.contains('g') {
break;
}
for mat in regex.matcher.find_iter(&arg_str) {
let match_vec: Vec<Value> = mat
.groups()
.map(|group| match group {
Some(range) => Value::from(&arg_str[range]),
None => Value::undefined(),
})
.collect();
let match_val = Value::from(match_vec);
match_val.set_property(
"index",
Property::default().value(Value::from(mat.total().start)),
);
match_val.set_property(
"input",
Property::default().value(Value::from(arg_str.clone())),
);
matches.push(match_val);
if !regex.flags.contains('g') {
break;
}
}

42
boa/src/builtins/string/mod.rs

@ -20,7 +20,7 @@ use crate::{
value::{RcString, Value},
BoaProfiler, Context, Result,
};
use regex::Regex;
use regress::Regex;
use std::{
char::decode_utf16,
cmp::{max, min},
@ -554,8 +554,9 @@ impl String {
None => return Ok(Value::from(primitive_val)),
};
let caps = re
.captures(&primitive_val)
.expect("unable to get capture groups from text");
.find(&primitive_val)
.expect("unable to get capture groups from text")
.captures;
let replace_value = if args.len() > 1 {
// replace_object could be a string or function or not exist at all
@ -583,17 +584,16 @@ impl String {
}
(Some('&'), _) => {
// $&
let matched = caps.get(0).expect("cannot get matched value");
result.push_str(matched.as_str());
result.push_str(&primitive_val[mat.total()]);
}
(Some('`'), _) => {
// $`
let start_of_match = mat.start();
let start_of_match = mat.total().start;
result.push_str(&primitive_val[..start_of_match]);
}
(Some('\''), _) => {
// $'
let end_of_match = mat.end();
let end_of_match = mat.total().end;
result.push_str(&primitive_val[end_of_match..]);
}
(Some(second), Some(third))
@ -610,9 +610,9 @@ impl String {
result.push(ch);
}
} else {
let group = match caps.get(nn) {
Some(text) => text.as_str(),
None => "",
let group = match mat.group(nn) {
Some(range) => &primitive_val[range.clone()],
_ => "",
};
result.push_str(group);
chars.next(); // consume third
@ -625,9 +625,9 @@ impl String {
result.push(first);
result.push(second);
} else {
let group = match caps.get(n) {
Some(text) => text.as_str(),
None => "",
let group = match mat.group(n) {
Some(range) => &primitive_val[range.clone()],
_ => "",
};
result.push_str(group);
}
@ -654,16 +654,16 @@ impl String {
}
Value::Object(_) => {
// This will return the matched substring first, then captured parenthesized groups later
let mut results: Vec<Value> = caps
.iter()
.map(|capture| Value::from(capture.unwrap().as_str()))
let mut results: Vec<Value> = mat
.groups()
.map(|group| match group {
Some(range) => Value::from(&primitive_val[range]),
None => Value::undefined(),
})
.collect();
// Returns the starting byte offset of the match
let start = caps
.get(0)
.expect("Unable to get Byte offset from string for match")
.start();
let start = mat.total().start;
results.push(Value::from(start));
// Push the whole string being examined
results.push(Value::from(primitive_val.to_string()));
@ -679,7 +679,7 @@ impl String {
};
Ok(Value::from(primitive_val.replacen(
&mat.as_str(),
&primitive_val[mat.total()],
&replace_value,
1,
)))

8
boa/src/builtins/string/tests.rs

@ -10,7 +10,7 @@ fn length() {
const a = new String(' ');
const b = new String('\ud834\udf06');
const c = new String(' \b ');
cosnt d = new String('')
const d = new String('')
"#;
eprintln!("{}", forward(&mut engine, init));
let a = forward(&mut engine, "a.length");
@ -275,11 +275,12 @@ fn replace_with_function() {
let mut engine = Context::new();
let init = r#"
var a = "ecmascript is cool";
var p1, p2, p3;
var replacer = (match, cap1, cap2, cap3) => {
var p1, p2, p3, length;
var replacer = (match, cap1, cap2, cap3, len) => {
p1 = cap1;
p2 = cap2;
p3 = cap3;
length = len;
return "awesome!";
};
@ -294,6 +295,7 @@ fn replace_with_function() {
assert_eq!(forward(&mut engine, "p1"), "\"o\"");
assert_eq!(forward(&mut engine, "p2"), "\"o\"");
assert_eq!(forward(&mut engine, "p3"), "\"l\"");
assert_eq!(forward(&mut engine, "length"), "14");
}
#[test]

Loading…
Cancel
Save