Browse Source

Add support for regex literals (#94)

* Implement regex literal lexing

* Add parser support for regex literals

* Implement very basic RegExp object

* Fix escaping a backslash

* Store rust structs as internal state in objects

* Remove unnecessary regexp constant

* Implement RegExp.test()

* Implement properties on RegExp

* Implement RegExp.exec()

* Implement RegExp.toString()

* Rename RegularExpression to RegularExpressionLiteral
pull/97/head
Sophie Tauchert 5 years ago committed by Jason Williams
parent
commit
5e7df4f3cb
  1. 37
      Cargo.lock
  2. 3
      Cargo.toml
  3. 4
      src/lib/exec.rs
  4. 2
      src/lib/js/mod.rs
  5. 14
      src/lib/js/object.rs
  6. 64
      src/lib/js/object/internal_state.rs
  7. 392
      src/lib/js/regexp.rs
  8. 93
      src/lib/js/value.rs
  9. 3
      src/lib/syntax/ast/constant.rs
  10. 8
      src/lib/syntax/ast/token.rs
  11. 91
      src/lib/syntax/lexer.rs
  12. 7
      src/lib/syntax/parser.rs

37
Cargo.lock generated

@ -9,10 +9,19 @@ dependencies = [
"gc 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"gc_derive 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.47 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "aho-corasick"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "arrayvec"
version = "0.4.10"
@ -463,6 +472,17 @@ dependencies = [
"redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-automata"
version = "0.1.7"
@ -471,6 +491,11 @@ dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ryu"
version = "1.0.0"
@ -575,6 +600,14 @@ dependencies = [
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.42"
@ -695,6 +728,7 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
"checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71"
"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
@ -750,7 +784,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88c3d9193984285d544df4a30c23a4e62ead42edf70a4452ceb76dac1ce05c26"
"checksum regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed09217220c272b29ef237a974ad58515bde75f194e3ffa7e6d0bf0f3b01f86"
"checksum regex-syntax 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b143cceb2ca5e56d5671988ef8b15615733e7ee16cd348e064333b251b89343f"
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
"checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
@ -764,6 +800,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum synstructure 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3a761d12e6d8dcb4dcf952a7a89b475e3a9d69e4a69307e01a470977642914bd"
"checksum termion 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6a8fb22f7cde82c8220e5aeacb3258ed7ce996142c77cba193f203515e26c330"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
"checksum tinytemplate 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4574b75faccaacddb9b284faecdf0b544b80b6b294f3d062d325c5726a209c20"
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"

3
Cargo.toml

@ -19,6 +19,7 @@ gc_derive = "0.3.2"
serde_json = "1.0.40"
rand = "0.7.0"
chrono = "0.4.7"
regex = "^1.2"
# Optional Dependencies
wasm-bindgen = { version = "0.2.47", optional = true }
@ -33,4 +34,4 @@ path = "src/lib/lib.rs"
[[bench]]
name = "string"
harness = false
harness = false

4
src/lib/exec.rs

@ -5,7 +5,7 @@ use crate::{
function::{Function, RegularFunction},
json, math, object,
object::{ObjectKind, INSTANCE_PROTOTYPE, PROTOTYPE},
string,
regexp, string,
value::{from_value, to_value, ResultValue, Value, ValueData},
},
syntax::ast::{
@ -58,7 +58,6 @@ impl Executor for Interpreter {
// Do Const values need to be garbage collected? We no longer need them once we've generated Values
ExprDef::Const(Const::String(ref str)) => Ok(to_value(str.to_owned())),
ExprDef::Const(Const::Bool(val)) => Ok(to_value(val)),
ExprDef::Const(Const::RegExp(_, _, _)) => Ok(to_value(None::<()>)),
ExprDef::Block(ref es) => {
let mut obj = to_value(None::<()>);
for e in es.iter() {
@ -390,6 +389,7 @@ impl InterpreterBuilder {
array::init(&global);
function::init(&global);
json::init(&global);
regexp::init(&global);
string::init(&global);
Self { global }

2
src/lib/js/mod.rs

@ -12,6 +12,8 @@ pub mod json;
pub mod math;
/// The global `Object` object
pub mod object;
/// The global 'RegExp' object
pub mod regexp;
/// The global `String` object
pub mod string;
/// Javascript values, utility methods and conversion between Javascript values and Rust values

14
src/lib/js/object.rs

@ -10,10 +10,14 @@ use gc::Gc;
use gc_derive::{Finalize, Trace};
use std::{borrow::Borrow, collections::HashMap, ops::Deref};
/// Static `prototype`, usually set on constructors as a key to point to their respective prototype object.
pub use internal_state::{InternalState, InternalStateCell};
mod internal_state;
/// Static `prototype`, usually set on constructors as a key to point to their respective prototype object.
pub static PROTOTYPE: &str = "prototype";
/// Static `__proto__`, usually set on Object instances as a key to point to their respective prototype object.
/// Static `__proto__`, usually set on Object instances as a key to point to their respective prototype object.
pub static INSTANCE_PROTOTYPE: &str = "__proto__";
/// `ObjectData` is the representation of an object in JavaScript
@ -27,6 +31,8 @@ pub struct Object {
pub properties: Box<HashMap<String, Property>>,
/// Symbol Properties
pub sym_properties: Box<HashMap<usize, Property>>,
/// Some rust object that stores internal state
pub state: Option<Box<InternalStateCell>>,
}
impl Object {
@ -37,6 +43,7 @@ impl Object {
internal_slots: Box::new(HashMap::new()),
properties: Box::new(HashMap::new()),
sym_properties: Box::new(HashMap::new()),
state: None,
}
}
@ -47,6 +54,7 @@ impl Object {
internal_slots: Box::new(HashMap::new()),
properties: Box::new(HashMap::new()),
sym_properties: Box::new(HashMap::new()),
state: None,
};
obj.internal_slots
@ -61,6 +69,7 @@ impl Object {
internal_slots: Box::new(HashMap::new()),
properties: Box::new(HashMap::new()),
sym_properties: Box::new(HashMap::new()),
state: None,
};
obj.internal_slots
@ -75,6 +84,7 @@ impl Object {
internal_slots: Box::new(HashMap::new()),
properties: Box::new(HashMap::new()),
sym_properties: Box::new(HashMap::new()),
state: None,
};
obj.internal_slots

64
src/lib/js/object/internal_state.rs

@ -0,0 +1,64 @@
//! Implementations for storing normal rust structs inside any object as internal state.
use std::{
any::Any,
fmt::{self, Debug},
ops::{Deref, DerefMut},
rc::Rc,
};
use gc::{unsafe_empty_trace, Finalize, Trace};
/// Wrapper around `Rc` to implement `Trace` and `Finalize`.
#[derive(Clone)]
pub struct InternalStateCell {
/// The internal state.
state: Rc<dyn Any>,
}
impl Finalize for InternalStateCell {}
unsafe impl Trace for InternalStateCell {
unsafe_empty_trace!();
}
impl Deref for InternalStateCell {
type Target = dyn Any;
fn deref(&self) -> &Self::Target {
Deref::deref(&self.state)
}
}
impl DerefMut for InternalStateCell {
fn deref_mut(&mut self) -> &mut Self::Target {
Rc::get_mut(&mut self.state).expect("failed to get mutable")
}
}
/// The derived version would print 'InternalStateCell { state: ... }', this custom implementation
/// only prints the actual internal state.
impl Debug for InternalStateCell {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Debug::fmt(&self.state, f)
}
}
impl InternalStateCell {
/// Create new `InternalStateCell` from a value.
pub fn new<T: Any + InternalState>(value: T) -> Self {
Self {
state: Rc::new(value),
}
}
/// Get a reference to the stored value and cast it to `T`.
pub fn downcast_ref<T: Any + InternalState>(&self) -> Option<&T> {
self.deref().downcast_ref::<T>()
}
/// Get a mutable reference to the stored value and cast it to `T`.
pub fn downcast_mut<T: Any + InternalState>(&mut self) -> Option<&mut T> {
self.deref_mut().downcast_mut::<T>()
}
}
/// This trait must be implemented by all structs used for internal state.
pub trait InternalState: Debug {}

392
src/lib/js/regexp.rs

@ -0,0 +1,392 @@
use std::ops::Deref;
use gc::Gc;
use regex::Regex;
use crate::{
exec::Interpreter,
js::{
function::NativeFunctionData,
object::{InternalState, ObjectKind, PROTOTYPE},
property::Property,
value::{from_value, to_value, FromValue, ResultValue, Value, ValueData},
},
};
#[derive(Debug)]
struct RegExp {
/// Regex matcher.
matcher: Regex,
/// Update last_index, set if global or sticky flags are set.
use_last_index: bool,
/// String of parsed flags.
flags: String,
/// Flag 's' - dot matches newline characters.
dot_all: bool,
/// Flag 'g'
global: bool,
/// Flag 'i' - ignore case.
ignore_case: bool,
/// Flag 'm' - '^' and '$' match beginning/end of line.
multiline: bool,
/// Flag 'y'
sticky: bool,
/// Flag 'u' - Unicode.
unicode: bool,
}
impl InternalState for RegExp {}
fn get_argument<T: FromValue>(args: &[Value], idx: usize) -> Result<T, Value> {
match args.get(idx) {
Some(arg) => from_value(arg.clone()).map_err(to_value),
None => Err(to_value(format!("expected argument at index {}", idx))),
}
}
/// Create a new `RegExp`
pub fn make_regexp(this: &Value, args: &[Value], _: &mut Interpreter) -> ResultValue {
if args.is_empty() {
return Err(Gc::new(ValueData::Undefined));
}
let mut regex_body = String::new();
let mut regex_flags = String::new();
#[allow(clippy::indexing_slicing)] // length has been checked
match args[0].deref() {
ValueData::String(ref body) => {
// first argument is a string -> use it as regex pattern
regex_body = body.into();
}
ValueData::Object(ref obj) => {
let slots = &*obj.borrow().internal_slots;
if slots.get("RegExpMatcher").is_some() {
// first argument is another `RegExp` object, so copy its pattern and flags
if let Some(body) = slots.get("OriginalSource") {
regex_body = from_value(body.clone()).unwrap();
}
if let Some(flags) = slots.get("OriginalFlags") {
regex_flags = from_value(flags.clone()).unwrap();
}
}
}
_ => return Err(Gc::new(ValueData::Undefined)),
}
// if a second argument is given and it's a string, use it as flags
match args.get(1) {
None => {}
Some(flags) => {
if let ValueData::String(flags) = flags.deref() {
regex_flags = flags.into();
}
}
}
// parse flags
let mut sorted_flags = String::new();
let mut pattern = String::new();
let mut dot_all = false;
let mut global = false;
let mut ignore_case = false;
let mut multiline = false;
let mut sticky = false;
let mut unicode = false;
if regex_flags.contains('g') {
global = true;
sorted_flags.push('g');
}
if regex_flags.contains('i') {
ignore_case = true;
sorted_flags.push('i');
pattern.push('i');
}
if regex_flags.contains('m') {
multiline = true;
sorted_flags.push('m');
pattern.push('m');
}
if regex_flags.contains('s') {
dot_all = true;
sorted_flags.push('s');
pattern.push('s');
}
if regex_flags.contains('u') {
unicode = true;
sorted_flags.push('u');
//pattern.push('s'); // rust uses utf-8 anyway
}
if regex_flags.contains('y') {
sticky = true;
sorted_flags.push('y');
}
// the `regex` crate uses '(?{flags})` inside the pattern to enable flags
if !pattern.is_empty() {
pattern = format!("(?{})", pattern);
}
pattern.push_str(regex_body.as_str());
let matcher = Regex::new(pattern.as_str()).expect("failed to create matcher");
let regexp = RegExp {
matcher,
use_last_index: global || sticky,
flags: sorted_flags,
dot_all,
global,
ignore_case,
multiline,
sticky,
unicode,
};
// This value is used by console.log and other routines to match Object type
// to its Javascript Identifier (global constructor method name)
this.set_kind(ObjectKind::Ordinary);
this.set_internal_slot("RegExpMatcher", Gc::new(ValueData::Undefined));
this.set_internal_slot("OriginalSource", to_value(regex_body));
this.set_internal_slot("OriginalFlags", to_value(regex_flags));
this.set_internal_state(regexp);
Ok(this.clone())
}
fn get_dot_all(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
this.with_internal_state_ref(|regex: &RegExp| Ok(to_value(regex.dot_all)))
}
fn get_flags(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
this.with_internal_state_ref(|regex: &RegExp| Ok(to_value(regex.flags.clone())))
}
fn get_global(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
this.with_internal_state_ref(|regex: &RegExp| Ok(to_value(regex.global)))
}
fn get_ignore_case(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
this.with_internal_state_ref(|regex: &RegExp| Ok(to_value(regex.ignore_case)))
}
fn get_multiline(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
this.with_internal_state_ref(|regex: &RegExp| Ok(to_value(regex.multiline)))
}
fn get_source(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
Ok(this.get_internal_slot("OriginalSource"))
}
fn get_sticky(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
this.with_internal_state_ref(|regex: &RegExp| Ok(to_value(regex.sticky)))
}
fn get_unicode(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
this.with_internal_state_ref(|regex: &RegExp| Ok(to_value(regex.unicode)))
}
fn _make_prop(getter: NativeFunctionData) -> Property {
Property {
writable: false,
enumerable: false,
configurable: true,
value: Gc::new(ValueData::Undefined),
get: to_value(getter),
set: Gc::new(ValueData::Undefined),
}
}
/// Search for a match between this regex and a specified string
pub fn test(this: &Value, args: &[Value], _: &mut Interpreter) -> ResultValue {
let arg_str = get_argument::<String>(args, 0)?;
let mut last_index = from_value::<usize>(this.get_field("lastIndex")).map_err(to_value)?;
let result = this.with_internal_state_ref(|regex: &RegExp| {
let result = match regex.matcher.find_at(arg_str.as_str(), last_index) {
Some(m) => {
if regex.use_last_index {
last_index = m.end();
}
true
}
None => {
if regex.use_last_index {
last_index = 0;
}
false
}
};
Ok(Gc::new(ValueData::Boolean(result)))
});
this.set_field_slice("lastIndex", to_value(last_index));
result
}
/// Search for a match between this regex and a specified string
pub fn exec(this: &Value, args: &[Value], _: &mut Interpreter) -> ResultValue {
let arg_str = get_argument::<String>(args, 0)?;
let mut last_index = from_value::<usize>(this.get_field("lastIndex")).map_err(to_value)?;
let result = this.with_internal_state_ref(|regex: &RegExp| {
let mut locations = regex.matcher.capture_locations();
let result =
match regex
.matcher
.captures_read_at(&mut locations, arg_str.as_str(), last_index)
{
Some(m) => {
if regex.use_last_index {
last_index = m.end();
}
let mut result = Vec::with_capacity(locations.len());
for i in 0..locations.len() {
if let Some((start, end)) = locations.get(i) {
result.push(to_value(&arg_str[start..end]));
} else {
result.push(Gc::new(ValueData::Undefined));
}
}
let result = to_value(result);
result.set_prop_slice("index", Property::new(to_value(m.start())));
result.set_prop_slice("input", Property::new(to_value(arg_str)));
result
}
None => {
if regex.use_last_index {
last_index = 0;
}
Gc::new(ValueData::Null)
}
};
Ok(result)
});
this.set_field_slice("lastIndex", to_value(last_index));
result
}
/// Return a string representing the regular expression
pub fn to_string(this: &Value, _: &[Value], _: &mut Interpreter) -> ResultValue {
let body = from_value::<String>(this.get_internal_slot("OriginalSource")).map_err(to_value)?;
let flags = this.with_internal_state_ref(|regex: &RegExp| regex.flags.clone());
Ok(to_value(format!("/{}/{}", body, flags)))
}
/// Create a new `RegExp` object
pub fn _create(global: &Value) -> Value {
let regexp = to_value(make_regexp as NativeFunctionData);
let proto = ValueData::new_obj(Some(global));
proto.set_field_slice("test", to_value(test as NativeFunctionData));
proto.set_field_slice("exec", to_value(exec as NativeFunctionData));
proto.set_field_slice("toString", to_value(to_string as NativeFunctionData));
proto.set_field_slice("lastIndex", to_value(0));
proto.set_prop_slice("dotAll", _make_prop(get_dot_all));
proto.set_prop_slice("flags", _make_prop(get_flags));
proto.set_prop_slice("global", _make_prop(get_global));
proto.set_prop_slice("ignoreCase", _make_prop(get_ignore_case));
proto.set_prop_slice("multiline", _make_prop(get_multiline));
proto.set_prop_slice("source", _make_prop(get_source));
proto.set_prop_slice("sticky", _make_prop(get_sticky));
proto.set_prop_slice("unicode", _make_prop(get_unicode));
regexp.set_field_slice(PROTOTYPE, proto);
regexp
}
/// Initialise the `RegExp` object on the global object
pub fn init(global: &Value) {
global.set_field_slice("RegExp", _create(global));
}
#[cfg(test)]
mod tests {
use crate::exec::Executor;
use crate::forward;
#[test]
fn test_constructors() {
let mut engine = Executor::new();
let init = r#"
let constructed = new RegExp("[0-9]+(\\.[0-9]+)?");
let literal = /[0-9]+(\.[0-9]+)?/;
let ctor_literal = new RegExp(/[0-9]+(\.[0-9]+)?/);
"#;
forward(&mut engine, init);
assert_eq!(forward(&mut engine, "constructed.test('1.0')"), "true");
assert_eq!(forward(&mut engine, "literal.test('1.0')"), "true");
assert_eq!(forward(&mut engine, "ctor_literal.test('1.0')"), "true");
}
// TODO: uncomment this test when property getters are supported
// #[test]
// fn test_flags() {
// let mut engine = Executor::new();
// let init = r#"
// var re_gi = /test/gi;
// var re_sm = /test/sm;
// "#;
//
// forward(&mut engine, init);
// assert_eq!(forward(&mut engine, "re_gi.global"), "true");
// assert_eq!(forward(&mut engine, "re_gi.ignoreCase"), "true");
// assert_eq!(forward(&mut engine, "re_gi.multiline"), "false");
// assert_eq!(forward(&mut engine, "re_gi.dotAll"), "false");
// assert_eq!(forward(&mut engine, "re_gi.unicode"), "false");
// assert_eq!(forward(&mut engine, "re_gi.sticky"), "false");
// assert_eq!(forward(&mut engine, "re_gi.flags"), "gi");
//
// assert_eq!(forward(&mut engine, "re_sm.global"), "false");
// assert_eq!(forward(&mut engine, "re_sm.ignoreCase"), "false");
// assert_eq!(forward(&mut engine, "re_sm.multiline"), "true");
// assert_eq!(forward(&mut engine, "re_sm.dotAll"), "true");
// assert_eq!(forward(&mut engine, "re_sm.unicode"), "false");
// assert_eq!(forward(&mut engine, "re_sm.sticky"), "false");
// assert_eq!(forward(&mut engine, "re_sm.flags"), "ms");
// }
#[test]
fn test_last_index() {
let mut engine = Executor::new();
let init = r#"
let regex = /[0-9]+(\.[0-9]+)?/g;
"#;
forward(&mut engine, init);
assert_eq!(forward(&mut engine, "regex.lastIndex"), "0");
assert_eq!(forward(&mut engine, "regex.test('1.0foo')"), "true");
assert_eq!(forward(&mut engine, "regex.lastIndex"), "3");
assert_eq!(forward(&mut engine, "regex.test('1.0foo')"), "false");
assert_eq!(forward(&mut engine, "regex.lastIndex"), "0");
}
#[test]
fn test_exec() {
let mut engine = Executor::new();
let init = r#"
var re = /quick\s(brown).+?(jumps)/ig;
var result = re.exec('The Quick Brown Fox Jumps Over The Lazy Dog');
"#;
forward(&mut engine, init);
assert_eq!(forward(&mut engine, "result[0]"), "Quick Brown Fox Jumps");
assert_eq!(forward(&mut engine, "result[1]"), "Brown");
assert_eq!(forward(&mut engine, "result[2]"), "Jumps");
assert_eq!(forward(&mut engine, "result.index"), "4");
assert_eq!(
forward(&mut engine, "result.input"),
"The Quick Brown Fox Jumps Over The Lazy Dog"
);
}
#[test]
fn test_to_string() {
let mut engine = Executor::new();
assert_eq!(
forward(&mut engine, "(new RegExp('a+b+c')).toString()"),
"/a+b+c/"
);
assert_eq!(
forward(&mut engine, "(new RegExp('bar', 'g')).toString()"),
"/bar/g"
);
assert_eq!(
forward(&mut engine, "(new RegExp('\\\\n', 'g')).toString()"),
"/\\n/g"
);
assert_eq!(forward(&mut engine, "/\\n/g.toString()"), "/\\n/g");
}
}

93
src/lib/js/value.rs

@ -1,12 +1,13 @@
use crate::js::{
function::{Function, NativeFunction, NativeFunctionData},
object::{Object, ObjectKind, INSTANCE_PROTOTYPE, PROTOTYPE},
object::{InternalState, InternalStateCell, Object, ObjectKind, INSTANCE_PROTOTYPE, PROTOTYPE},
property::Property,
};
use gc::{Gc, GcCell};
use gc_derive::{Finalize, Trace};
use serde_json::{map::Map, Number as JSONNumber, Value as JSONValue};
use std::{
any::Any,
f64::NAN,
fmt::{self, Display},
ops::{Add, BitAnd, BitOr, BitXor, Deref, DerefMut, Div, Mul, Not, Rem, Shl, Shr, Sub},
@ -302,6 +303,75 @@ impl ValueData {
}
}
/// Check whether an object has an internal state set.
pub fn has_internal_state(&self) -> bool {
if let ValueData::Object(ref obj) = *self {
obj.borrow().state.is_some()
} else {
false
}
}
/// Get the internal state of an object.
pub fn get_internal_state(&self) -> Option<InternalStateCell> {
if let ValueData::Object(ref obj) = *self {
obj.borrow()
.state
.as_ref()
.map(|state| state.deref().clone())
} else {
None
}
}
/// Run a function with a reference to the internal state.
///
/// # Panics
///
/// This will panic if this value doesn't have an internal state or if the internal state doesn't
/// have the concrete type `S`.
pub fn with_internal_state_ref<S: Any + InternalState, R, F: FnOnce(&S) -> R>(
&self,
f: F,
) -> R {
if let ValueData::Object(ref obj) = *self {
let o = obj.borrow();
let state = o
.state
.as_ref()
.expect("no state")
.downcast_ref()
.expect("wrong state type");
f(state)
} else {
panic!("not an object");
}
}
/// Run a function with a mutable reference to the internal state.
///
/// # Panics
///
/// This will panic if this value doesn't have an internal state or if the internal state doesn't
/// have the concrete type `S`.
pub fn with_internal_state_mut<S: Any + InternalState, R, F: FnOnce(&mut S) -> R>(
&self,
f: F,
) -> R {
if let ValueData::Object(ref obj) = *self {
let mut o = obj.borrow_mut();
let state = o
.state
.as_mut()
.expect("no state")
.downcast_mut()
.expect("wrong state type");
f(state)
} else {
panic!("not an object");
}
}
/// Check to see if the Value has the field, mainly used by environment records
pub fn has_field(&self, field: &str) -> bool {
self.get_prop(field).is_some()
@ -386,6 +456,15 @@ impl ValueData {
self.set_prop(field.to_string(), prop)
}
/// Set internal state of an Object. Discards the previous state if it was set.
pub fn set_internal_state<T: Any + InternalState>(&self, state: T) {
if let ValueData::Object(ref obj) = *self {
obj.borrow_mut()
.state
.replace(Box::new(InternalStateCell::new(state)));
}
}
/// Convert from a JSON value to a JS value
pub fn from_json(json: JSONValue) -> Self {
match json {
@ -609,6 +688,18 @@ pub trait FromValue {
Self: Sized;
}
impl ToValue for Value {
fn to_value(&self) -> Value {
self.clone()
}
}
impl FromValue for Value {
fn from_value(value: Value) -> Result<Self, &'static str> {
Ok(value)
}
}
impl ToValue for String {
fn to_value(&self) -> Value {
Gc::new(ValueData::String(self.clone()))

3
src/lib/syntax/ast/constant.rs

@ -6,8 +6,6 @@ use std::fmt::{Display, Formatter, Result};
pub enum Const {
/// A UTF-8 string, such as `"Hello, world"`
String(String),
// A regular expression, such as `/where('s| is) [wW]ally/`
RegExp(String, bool, bool),
// A 64-bit floating-point number, such as `3.1415`
Num(f64),
// A 32-bit integer, such as `42`
@ -24,7 +22,6 @@ impl Display for Const {
fn fmt(&self, f: &mut Formatter) -> Result {
match *self {
Const::String(ref st) => write!(f, "\"{}\"", st),
Const::RegExp(ref reg, _, _) => write!(f, "~/{}/", reg),
Const::Num(num) => write!(f, "{}", num),
Const::Int(num) => write!(f, "{}", num),
Const::Bool(v) => write!(f, "{}", v),

8
src/lib/syntax/ast/token.rs

@ -58,8 +58,8 @@ pub enum TokenData {
Punctuator(Punctuator),
/// A string literal
StringLiteral(String),
/// A regular expression
RegularExpression(String),
/// A regular expression, consisting of body and flags
RegularExpressionLiteral(String, String),
/// A comment
Comment(String),
}
@ -75,7 +75,9 @@ impl Display for TokenData {
TokenData::NumericLiteral(ref num) => write!(f, "{}", num),
TokenData::Punctuator(ref punc) => write!(f, "{}", punc),
TokenData::StringLiteral(ref lit) => write!(f, "{}", lit),
TokenData::RegularExpression(ref reg) => write!(f, "{}", reg),
TokenData::RegularExpressionLiteral(ref body, ref flags) => {
write!(f, "/{}/{}", body, flags)
}
TokenData::Comment(ref comm) => write!(f, "/*{}*/", comm),
}
}

91
src/lib/syntax/lexer.rs

@ -114,7 +114,7 @@ impl<'a> Lexer<'a> {
///
/// # Arguments
///
/// * `buffer` - A string slice that holds the source code.
/// * `buffer` - A string slice that holds the source code.
/// The buffer needs to have a lifetime as long as the Lexer instance itself
///
/// # Example
@ -298,8 +298,8 @@ impl<'a> Lexer<'a> {
.unwrap()
}
}
'\'' | '"' => escape,
_ => panic!(
'\'' | '"' | '\\' => escape,
ch => panic!(
"{}:{}: Invalid escape `{}`",
self.line_number, self.column_number, ch
),
@ -446,12 +446,13 @@ impl<'a> Lexer<'a> {
// Comments
'/' => {
if let Some(ch) = self.preview_next() {
let token = match ch {
// Matched comment
match ch {
// line comment
'/' => {
let comment = self.read_line()?;
TokenData::Comment(comment)
self.push_token(TokenData::Comment(comment));
}
// block comment
'*' => {
let mut buf = String::new();
loop {
@ -466,14 +467,60 @@ impl<'a> Lexer<'a> {
next_ch => buf.push(next_ch),
}
}
TokenData::Comment(buf)
self.push_token(TokenData::Comment(buf));
}
'=' => TokenData::Punctuator(Punctuator::AssignDiv),
_ => TokenData::Punctuator(Punctuator::Div),
};
self.push_token(token)
// division, assigndiv or regex literal
_ => {
// if we fail to parse a regex literal, store a copy of the current
// buffer to restore later on
let original_buffer = self.buffer.clone();
// first, try to parse a regex literal
let mut body = String::new();
let mut regex = false;
loop {
match self.buffer.next() {
// end of body
Some('/') => {
regex = true;
break;
}
// newline/eof not allowed in regex literal
Some('\n') | Some('\r') | Some('\u{2028}')
| Some('\u{2029}') | None => break,
// escape sequence
Some('\\') => {
body.push('\\');
match self.next()? {
// newline not allowed in regex literal
'\n' | '\r' | '\u{2028}' | '\u{2029}' => break,
ch => body.push(ch),
}
}
Some(ch) => body.push(ch),
}
}
if regex {
// body was parsed, now look for flags
let flags = self.take_char_while(char::is_alphabetic)?;
self.push_token(TokenData::RegularExpressionLiteral(
body, flags,
));
} else {
// failed to parse regex, restore original buffer position and
// parse either div or assigndiv
self.buffer = original_buffer;
if self.next_is('=') {
self.push_token(TokenData::Punctuator(
Punctuator::AssignDiv,
));
} else {
self.push_token(TokenData::Punctuator(Punctuator::Div));
}
}
}
}
} else {
return Err(LexerError::new("Expecting Token /,*,="));
return Err(LexerError::new("Expecting Token /,*,= or regex"));
}
}
'*' => op!(self, Punctuator::AssignMul, Punctuator::Mul, {
@ -894,4 +941,24 @@ mod tests {
assert_eq!(lexer.tokens[0].data, TokenData::NumericLiteral(1.0));
assert_eq!(lexer.tokens[1].data, TokenData::Punctuator(Punctuator::Dot));
}
#[test]
fn test_regex_literal() {
let mut lexer = Lexer::new("/(?:)/");
lexer.lex().expect("failed to lex");
assert_eq!(
lexer.tokens[0].data,
TokenData::RegularExpressionLiteral("(?:)".to_string(), "".to_string())
);
}
#[test]
fn test_regex_literal_flags() {
let mut lexer = Lexer::new(r"/\/[^\/]*\/*/gmi");
lexer.lex().expect("failed to lex");
assert_eq!(
lexer.tokens[0].data,
TokenData::RegularExpressionLiteral("\\/[^\\/]*\\/*".to_string(), "gmi".to_string())
);
}
}

7
src/lib/syntax/parser.rs

@ -359,6 +359,13 @@ impl Parser {
}
TokenData::Identifier(s) => mk!(self, ExprDef::Local(s)),
TokenData::Keyword(keyword) => self.parse_struct(keyword)?,
TokenData::RegularExpressionLiteral(body, flags) => Expr::new(ExprDef::Construct(
Box::new(Expr::new(ExprDef::Local("RegExp".to_string()))),
vec![
Expr::new(ExprDef::Const(Const::String(body))),
Expr::new(ExprDef::Const(Const::String(flags))),
],
)),
TokenData::Punctuator(Punctuator::OpenParen) => {
match self.get_token(self.pos)?.data {
TokenData::Punctuator(Punctuator::CloseParen)

Loading…
Cancel
Save