Browse Source

Rework RegExp struct to include bitflags field (#1837)

This Pull Request fixes/closes #1819.

It changes the following:

- Move the bitflags from `boa/src/syntax/lexer/regex.rs` to `boa/src/builtins/regexp/mod.rs`
- Replace the booleans in the RegExp struct to include the bitflags struct
- Update match expressions to make use of the bitflags struct


Co-authored-by: Aäron Munsters <45006406+aaronmunsters@users.noreply.github.com>
pull/1838/head
Aäron Munsters 3 years ago
parent
commit
826adb2009
  1. 111
      boa/src/builtins/regexp/mod.rs
  2. 2
      boa/src/syntax/lexer/mod.rs
  3. 59
      boa/src/syntax/lexer/regex.rs

111
boa/src/builtins/regexp/mod.rs

@ -11,6 +11,8 @@
pub mod regexp_string_iterator; pub mod regexp_string_iterator;
use std::str::FromStr;
use crate::{ use crate::{
builtins::{array::Array, string, BuiltIn}, builtins::{array::Array, string, BuiltIn},
context::StandardObjects, context::StandardObjects,
@ -21,6 +23,7 @@ use crate::{
}, },
property::Attribute, property::Attribute,
symbol::WellKnownSymbols, symbol::WellKnownSymbols,
syntax::lexer::regex::RegExpFlags,
value::{IntegerOrInfinity, JsValue}, value::{IntegerOrInfinity, JsValue},
BoaProfiler, Context, JsResult, JsString, BoaProfiler, Context, JsResult, JsString,
}; };
@ -37,25 +40,7 @@ mod tests;
pub struct RegExp { pub struct RegExp {
/// Regex matcher. /// Regex matcher.
matcher: Regex, matcher: Regex,
flags: RegExpFlags,
/// Flag 's' - dot matches newline characters.
dot_all: bool,
/// Flag 'g'
global: bool,
/// Flag 'i' - ignore case.
ignore_case: bool,
/// Flag 'm' - '^' and '$' match beginning/end of line.
multiline: bool,
/// Flag 'y'
sticky: bool,
/// Flag 'u' - Unicode.
unicode: bool,
original_source: JsString, original_source: JsString,
original_flags: JsString, original_flags: JsString,
} }
@ -283,45 +268,10 @@ impl RegExp {
// 5. If F contains any code unit other than "g", "i", "m", "s", "u", or "y" // 5. If F contains any code unit other than "g", "i", "m", "s", "u", or "y"
// or if it contains the same code unit more than once, throw a SyntaxError exception. // or if it contains the same code unit more than once, throw a SyntaxError exception.
let mut global = false; let flags = match RegExpFlags::from_str(&f) {
let mut ignore_case = false; Err(msg) => return context.throw_syntax_error(msg),
let mut multiline = false; Ok(result) => result,
let mut dot_all = false; };
let mut unicode = false;
let mut sticky = false;
for c in f.chars() {
match c {
'g' if global => {
return context.throw_syntax_error("RegExp flags contains multiple 'g'")
}
'g' => global = true,
'i' if ignore_case => {
return context.throw_syntax_error("RegExp flags contains multiple 'i'")
}
'i' => ignore_case = true,
'm' if multiline => {
return context.throw_syntax_error("RegExp flags contains multiple 'm'")
}
'm' => multiline = true,
's' if dot_all => {
return context.throw_syntax_error("RegExp flags contains multiple 's'")
}
's' => dot_all = true,
'u' if unicode => {
return context.throw_syntax_error("RegExp flags contains multiple 'u'")
}
'u' => unicode = true,
'y' if sticky => {
return context.throw_syntax_error("RegExp flags contains multiple 'y'")
}
'y' => sticky = true,
c => {
return context.throw_syntax_error(format!(
"RegExp flags contains unknown code unit '{c}'",
))
}
}
}
// 12. Set obj.[[OriginalSource]] to P. // 12. Set obj.[[OriginalSource]] to P.
// 13. Set obj.[[OriginalFlags]] to F. // 13. Set obj.[[OriginalFlags]] to F.
@ -336,12 +286,7 @@ impl RegExp {
let regexp = Self { let regexp = Self {
matcher, matcher,
dot_all, flags,
global,
ignore_case,
multiline,
sticky,
unicode,
original_source: p, original_source: p,
original_flags: f, original_flags: f,
}; };
@ -387,16 +332,16 @@ impl RegExp {
} }
#[inline] #[inline]
fn regexp_has_flag(this: &JsValue, flag: char, context: &mut Context) -> JsResult<JsValue> { fn regexp_has_flag(this: &JsValue, flag: u8, context: &mut Context) -> JsResult<JsValue> {
if let Some(object) = this.as_object() { if let Some(object) = this.as_object() {
if let Some(regexp) = object.borrow().as_regexp() { if let Some(regexp) = object.borrow().as_regexp() {
return Ok(JsValue::new(match flag { return Ok(JsValue::new(match flag {
'g' => regexp.global, b'g' => regexp.flags.contains(RegExpFlags::GLOBAL),
'm' => regexp.multiline, b'm' => regexp.flags.contains(RegExpFlags::MULTILINE),
's' => regexp.dot_all, b's' => regexp.flags.contains(RegExpFlags::DOT_ALL),
'i' => regexp.ignore_case, b'i' => regexp.flags.contains(RegExpFlags::IGNORE_CASE),
'u' => regexp.unicode, b'u' => regexp.flags.contains(RegExpFlags::UNICODE),
'y' => regexp.sticky, b'y' => regexp.flags.contains(RegExpFlags::STICKY),
_ => unreachable!(), _ => unreachable!(),
})); }));
} }
@ -410,12 +355,12 @@ impl RegExp {
} }
let name = match flag { let name = match flag {
'g' => "global", b'g' => "global",
'm' => "multiline", b'm' => "multiline",
's' => "dotAll", b's' => "dotAll",
'i' => "ignoreCase", b'i' => "ignoreCase",
'u' => "unicode", b'u' => "unicode",
'y' => "sticky", b'y' => "sticky",
_ => unreachable!(), _ => unreachable!(),
}; };
@ -439,7 +384,7 @@ impl RegExp {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'g', context) Self::regexp_has_flag(this, b'g', context)
} }
/// `get RegExp.prototype.ignoreCase` /// `get RegExp.prototype.ignoreCase`
@ -457,7 +402,7 @@ impl RegExp {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'i', context) Self::regexp_has_flag(this, b'i', context)
} }
/// `get RegExp.prototype.multiline` /// `get RegExp.prototype.multiline`
@ -475,7 +420,7 @@ impl RegExp {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'm', context) Self::regexp_has_flag(this, b'm', context)
} }
/// `get RegExp.prototype.dotAll` /// `get RegExp.prototype.dotAll`
@ -493,7 +438,7 @@ impl RegExp {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 's', context) Self::regexp_has_flag(this, b's', context)
} }
/// `get RegExp.prototype.unicode` /// `get RegExp.prototype.unicode`
@ -512,7 +457,7 @@ impl RegExp {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'u', context) Self::regexp_has_flag(this, b'u', context)
} }
/// `get RegExp.prototype.sticky` /// `get RegExp.prototype.sticky`
@ -531,7 +476,7 @@ impl RegExp {
_: &[JsValue], _: &[JsValue],
context: &mut Context, context: &mut Context,
) -> JsResult<JsValue> { ) -> JsResult<JsValue> {
Self::regexp_has_flag(this, 'y', context) Self::regexp_has_flag(this, b'y', context)
} }
/// `get RegExp.prototype.flags` /// `get RegExp.prototype.flags`

2
boa/src/syntax/lexer/mod.rs

@ -20,7 +20,7 @@ pub mod error;
mod identifier; mod identifier;
mod number; mod number;
mod operator; mod operator;
mod regex; pub mod regex;
mod spread; mod spread;
mod string; mod string;
mod template; mod template;

59
boa/src/syntax/lexer/regex.rs

@ -12,7 +12,7 @@ use bitflags::bitflags;
use boa_interner::{Interner, Sym}; use boa_interner::{Interner, Sym};
use std::{ use std::{
io::{self, ErrorKind, Read}, io::{self, ErrorKind, Read},
str, str::{self, FromStr},
}; };
/// Regex literal lexing. /// Regex literal lexing.
@ -133,7 +133,7 @@ impl<R> Tokenizer<R> for RegexLiteral {
bitflags! { bitflags! {
/// Flags of a regular expression. /// Flags of a regular expression.
#[derive(Default)] #[derive(Default)]
struct RegExpFlags: u8 { pub struct RegExpFlags: u8 {
const GLOBAL = 0b0000_0001; const GLOBAL = 0b0000_0001;
const IGNORE_CASE = 0b0000_0010; const IGNORE_CASE = 0b0000_0010;
const MULTILINE = 0b0000_0100; const MULTILINE = 0b0000_0100;
@ -143,33 +143,40 @@ bitflags! {
} }
} }
fn parse_regex_flags(s: &str, start: Position, interner: &mut Interner) -> Result<Sym, Error> { impl FromStr for RegExpFlags {
let mut flags = RegExpFlags::default(); type Err = String;
for c in s.bytes() {
let new_flag = match c { fn from_str(s: &str) -> Result<Self, Self::Err> {
b'g' => RegExpFlags::GLOBAL, let mut flags = Self::default();
b'i' => RegExpFlags::IGNORE_CASE, for c in s.bytes() {
b'm' => RegExpFlags::MULTILINE, let new_flag = match c {
b's' => RegExpFlags::DOT_ALL, b'g' => Self::GLOBAL,
b'u' => RegExpFlags::UNICODE, b'i' => Self::IGNORE_CASE,
b'y' => RegExpFlags::STICKY, b'm' => Self::MULTILINE,
_ => { b's' => Self::DOT_ALL,
return Err(Error::syntax( b'u' => Self::UNICODE,
format!("invalid regular expression flag {}", char::from(c)), b'y' => Self::STICKY,
start, _ => return Err(format!("invalid regular expression flag {}", char::from(c))),
)) };
}
};
if flags.contains(new_flag) { if flags.contains(new_flag) {
return Err(Error::syntax( return Err(format!(
format!("repeated regular expression flag {}", char::from(c)), "repeated regular expression flag {}",
start, char::from(c)
)); ));
}
flags.insert(new_flag);
} }
flags.insert(new_flag);
Ok(flags)
}
}
fn parse_regex_flags(s: &str, start: Position, interner: &mut Interner) -> Result<Sym, Error> {
match RegExpFlags::from_str(s) {
Err(message) => Err(Error::Syntax(message.into(), start)),
Ok(flags) => Ok(interner.get_or_intern(flags.to_string())),
} }
Ok(interner.get_or_intern(flags.to_string()))
} }
impl ToString for RegExpFlags { impl ToString for RegExpFlags {

Loading…
Cancel
Save