Browse Source

Fix regular expression construction (#3338)

* Fix regular expression construction

The previous implementation regular expression (e.g. /abc/) used the global `RegExp`,
which caused errors when the `RegExp` was overwritten.

* Apply Review
pull/3339/head
Haled Odat 8 months ago committed by GitHub
parent
commit
4bdb6c6fe5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      boa_ast/src/expression/mod.rs
  2. 94
      boa_ast/src/expression/regexp.rs
  3. 8
      boa_ast/src/visitor.rs
  4. 10
      boa_engine/src/builtins/regexp/tests.rs
  5. 12
      boa_engine/src/bytecompiler/expression/mod.rs
  6. 15
      boa_engine/src/vm/code_block.rs
  7. 5
      boa_engine/src/vm/flowgraph/mod.rs
  8. 11
      boa_engine/src/vm/opcode/mod.rs
  9. 54
      boa_engine/src/vm/opcode/push/literal.rs
  10. 13
      boa_parser/src/parser/expression/primary/mod.rs
  11. 26
      boa_parser/src/parser/expression/tests.rs

8
boa_ast/src/expression/mod.rs

@ -28,6 +28,7 @@ mod identifier;
mod new;
mod optional;
mod parenthesized;
mod regexp;
mod spread;
mod tagged_template;
mod r#yield;
@ -40,6 +41,7 @@ pub use optional::{Optional, OptionalOperation, OptionalOperationKind};
pub use parenthesized::Parenthesized;
pub use r#await::Await;
pub use r#yield::Yield;
pub use regexp::RegExpLiteral;
pub use spread::Spread;
pub use tagged_template::TaggedTemplate;
@ -74,6 +76,9 @@ pub enum Expression {
/// See [`Literal`].
Literal(Literal),
/// See [`RegExpLiteral`].
RegExpLiteral(RegExpLiteral),
/// See [`ArrayLiteral`].
ArrayLiteral(ArrayLiteral),
@ -210,6 +215,7 @@ impl Expression {
Self::Await(aw) => aw.to_interned_string(interner),
Self::Yield(yi) => yi.to_interned_string(interner),
Self::Parenthesized(expr) => expr.to_interned_string(interner),
Self::RegExpLiteral(regexp) => regexp.to_interned_string(interner),
Self::FormalParameterList(_) => unreachable!(),
}
}
@ -291,6 +297,7 @@ impl VisitWith for Expression {
match self {
Self::Identifier(id) => visitor.visit_identifier(id),
Self::Literal(lit) => visitor.visit_literal(lit),
Self::RegExpLiteral(regexp) => visitor.visit_reg_exp_literal(regexp),
Self::ArrayLiteral(arlit) => visitor.visit_array_literal(arlit),
Self::ObjectLiteral(olit) => visitor.visit_object_literal(olit),
Self::Spread(sp) => visitor.visit_spread(sp),
@ -333,6 +340,7 @@ impl VisitWith for Expression {
match self {
Self::Identifier(id) => visitor.visit_identifier_mut(id),
Self::Literal(lit) => visitor.visit_literal_mut(lit),
Self::RegExpLiteral(regexp) => visitor.visit_reg_exp_literal_mut(regexp),
Self::ArrayLiteral(arlit) => visitor.visit_array_literal_mut(arlit),
Self::ObjectLiteral(olit) => visitor.visit_object_literal_mut(olit),
Self::Spread(sp) => visitor.visit_spread_mut(sp),

94
boa_ast/src/expression/regexp.rs

@ -0,0 +1,94 @@
//! This module contains the ECMAScript representation regular expressions.
//!
//! More information:
//! - [ECMAScript reference][spec]
//! - [MDN documentation][mdn]
//!
//! [spec]: https://tc39.es/ecma262/#sec-literals-regular-expression-literals
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions
use std::ops::ControlFlow;
use boa_interner::{Interner, Sym, ToInternedString};
use crate::{
try_break,
visitor::{VisitWith, Visitor, VisitorMut},
};
use super::Expression;
/// Regular expressions in ECMAScript.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-literals-regular-expression-literals
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RegExpLiteral {
pattern: Sym,
flags: Sym,
}
impl RegExpLiteral {
/// Create a new [`RegExp`].
#[inline]
#[must_use]
pub const fn new(pattern: Sym, flags: Sym) -> Self {
Self { pattern, flags }
}
/// Get the pattern part of the [`RegExp`].
#[inline]
#[must_use]
pub const fn pattern(&self) -> Sym {
self.pattern
}
/// Get the flags part of the [`RegExp`].
#[inline]
#[must_use]
pub const fn flags(&self) -> Sym {
self.flags
}
}
impl ToInternedString for RegExpLiteral {
#[inline]
fn to_interned_string(&self, interner: &Interner) -> String {
let pattern = interner.resolve_expect(self.pattern);
let flags = interner.resolve_expect(self.flags);
format!("/{pattern}/{flags}")
}
}
impl From<RegExpLiteral> for Expression {
#[inline]
fn from(value: RegExpLiteral) -> Self {
Self::RegExpLiteral(value)
}
}
impl VisitWith for RegExpLiteral {
#[inline]
fn visit_with<'a, V>(&'a self, visitor: &mut V) -> ControlFlow<V::BreakTy>
where
V: Visitor<'a>,
{
try_break!(visitor.visit_sym(&self.pattern));
visitor.visit_sym(&self.flags)
}
#[inline]
fn visit_with_mut<'a, V>(&'a mut self, visitor: &mut V) -> ControlFlow<V::BreakTy>
where
V: VisitorMut<'a>,
{
try_break!(visitor.visit_sym_mut(&mut self.pattern));
visitor.visit_sym_mut(&mut self.flags)
}
}

8
boa_ast/src/visitor.rs

@ -22,7 +22,8 @@ use crate::{
Binary, BinaryInPrivate, Conditional, Unary, Update,
},
Await, Call, Expression, Identifier, ImportCall, New, Optional, OptionalOperation,
OptionalOperationKind, Parenthesized, Spread, SuperCall, TaggedTemplate, Yield,
OptionalOperationKind, Parenthesized, RegExpLiteral, Spread, SuperCall, TaggedTemplate,
Yield,
},
function::{
ArrowFunction, AsyncArrowFunction, AsyncFunction, AsyncGenerator, Class, ClassElement,
@ -156,6 +157,7 @@ node_ref! {
Binding,
Pattern,
Literal,
RegExpLiteral,
ArrayLiteral,
ObjectLiteral,
Spread,
@ -258,6 +260,7 @@ pub trait Visitor<'ast>: Sized {
define_visit!(visit_binding, Binding);
define_visit!(visit_pattern, Pattern);
define_visit!(visit_literal, Literal);
define_visit!(visit_reg_exp_literal, RegExpLiteral);
define_visit!(visit_array_literal, ArrayLiteral);
define_visit!(visit_object_literal, ObjectLiteral);
define_visit!(visit_spread, Spread);
@ -357,6 +360,7 @@ pub trait Visitor<'ast>: Sized {
NodeRef::Binding(n) => self.visit_binding(n),
NodeRef::Pattern(n) => self.visit_pattern(n),
NodeRef::Literal(n) => self.visit_literal(n),
NodeRef::RegExpLiteral(n) => self.visit_reg_exp_literal(n),
NodeRef::ArrayLiteral(n) => self.visit_array_literal(n),
NodeRef::ObjectLiteral(n) => self.visit_object_literal(n),
NodeRef::Spread(n) => self.visit_spread(n),
@ -461,6 +465,7 @@ pub trait VisitorMut<'ast>: Sized {
define_visit_mut!(visit_binding_mut, Binding);
define_visit_mut!(visit_pattern_mut, Pattern);
define_visit_mut!(visit_literal_mut, Literal);
define_visit_mut!(visit_reg_exp_literal_mut, RegExpLiteral);
define_visit_mut!(visit_array_literal_mut, ArrayLiteral);
define_visit_mut!(visit_object_literal_mut, ObjectLiteral);
define_visit_mut!(visit_spread_mut, Spread);
@ -560,6 +565,7 @@ pub trait VisitorMut<'ast>: Sized {
NodeRefMut::Binding(n) => self.visit_binding_mut(n),
NodeRefMut::Pattern(n) => self.visit_pattern_mut(n),
NodeRefMut::Literal(n) => self.visit_literal_mut(n),
NodeRefMut::RegExpLiteral(n) => self.visit_reg_exp_literal_mut(n),
NodeRefMut::ArrayLiteral(n) => self.visit_array_literal_mut(n),
NodeRefMut::ObjectLiteral(n) => self.visit_object_literal_mut(n),
NodeRefMut::Spread(n) => self.visit_spread_mut(n),

10
boa_engine/src/builtins/regexp/tests.rs

@ -212,3 +212,13 @@ fn search() {
TestAction::assert_eq("/d/[Symbol.search](undefined)", 2),
]);
}
#[test]
fn regular_expression_construction_independant_of_global_reg_exp() {
let regex = "/abc/";
run_test_actions([
TestAction::run(regex),
TestAction::run("RegExp = null"),
TestAction::run(regex),
]);
}

12
boa_engine/src/bytecompiler/expression/mod.rs

@ -14,6 +14,7 @@ use boa_ast::{
access::{PropertyAccess, PropertyAccessField},
literal::{Literal as AstLiteral, TemplateElement, TemplateLiteral},
operator::Conditional,
Identifier,
},
Expression,
};
@ -79,6 +80,17 @@ impl ByteCompiler<'_, '_> {
pub(crate) fn compile_expr_impl(&mut self, expr: &Expression, use_expr: bool) {
match expr {
Expression::Literal(lit) => self.compile_literal(lit, use_expr),
Expression::RegExpLiteral(regexp) => {
let pattern_index = self.get_or_insert_name(Identifier::new(regexp.pattern()));
let flags_index = self.get_or_insert_name(Identifier::new(regexp.flags()));
self.emit(
Opcode::PushRegExp,
&[
Operand::Varying(pattern_index),
Operand::Varying(flags_index),
],
);
}
Expression::Unary(unary) => self.compile_unary(unary, use_expr),
Expression::Update(update) => self.compile_update(update, use_expr),
Expression::Binary(binary) => self.compile_binary(binary, use_expr),

15
boa_engine/src/vm/code_block.rs

@ -324,6 +324,18 @@ impl CodeBlock {
Instruction::PushDouble { value } => ryu_js::Buffer::new().format(*value).to_string(),
Instruction::PushLiteral { index }
| Instruction::ThrowNewTypeError { message: index } => index.value().to_string(),
Instruction::PushRegExp {
pattern_index: source_index,
flags_index: flag_index,
} => {
let pattern = self.names[source_index.value() as usize]
.clone()
.to_std_string_escaped();
let flags = self.names[flag_index.value() as usize]
.clone()
.to_std_string_escaped();
format!("/{pattern}/{flags}")
}
Instruction::Jump { address: value }
| Instruction::JumpIfTrue { address: value }
| Instruction::JumpIfFalse { address: value }
@ -631,8 +643,7 @@ impl CodeBlock {
| Instruction::Reserved53
| Instruction::Reserved54
| Instruction::Reserved55
| Instruction::Reserved56
| Instruction::Reserved57 => unreachable!("Reserved opcodes are unrechable"),
| Instruction::Reserved56 => unreachable!("Reserved opcodes are unrechable"),
}
}
}

5
boa_engine/src/vm/flowgraph/mod.rs

@ -76,7 +76,7 @@ impl CodeBlock {
graph.add_node(previous_pc, NodeShape::None, label.into(), Color::None);
graph.add_edge(previous_pc, pc, None, Color::None, EdgeStyle::Line);
}
Instruction::PushLiteral { .. } => {
Instruction::PushLiteral { .. } | Instruction::PushRegExp { .. } => {
graph.add_node(previous_pc, NodeShape::None, label.into(), Color::None);
graph.add_edge(previous_pc, pc, None, Color::None, EdgeStyle::Line);
}
@ -522,8 +522,7 @@ impl CodeBlock {
| Instruction::Reserved53
| Instruction::Reserved54
| Instruction::Reserved55
| Instruction::Reserved56
| Instruction::Reserved57 => unreachable!("Reserved opcodes are unrechable"),
| Instruction::Reserved56 => unreachable!("Reserved opcodes are unrechable"),
}
}

11
boa_engine/src/vm/opcode/mod.rs

@ -694,11 +694,18 @@ generate_opcodes! {
/// Like strings and bigints. The index operand is used to index into the `literals`
/// array to get the value.
///
/// Operands: index: `u32`
/// Operands: index: `VaryingOperand`
///
/// Stack: **=>** (`literals[index]`)
PushLiteral { index: VaryingOperand },
/// Push regexp value on the stack.
///
/// Operands: pattern_index: `VaryingOperand`, flags: `VaryingOperand`
///
/// Stack: **=>** regexp
PushRegExp { pattern_index: VaryingOperand, flags_index: VaryingOperand },
/// Push empty object `{}` value on the stack.
///
/// Operands:
@ -2176,8 +2183,6 @@ generate_opcodes! {
Reserved55 => Reserved,
/// Reserved [`Opcode`].
Reserved56 => Reserved,
/// Reserved [`Opcode`].
Reserved57 => Reserved,
}
/// Specific opcodes for bindings.

54
boa_engine/src/vm/opcode/push/literal.rs

@ -1,4 +1,5 @@
use crate::{
object::JsRegExp,
vm::{opcode::Operation, CompletionType},
Context, JsResult,
};
@ -24,8 +25,8 @@ impl Operation for PushLiteral {
const INSTRUCTION: &'static str = "INST - PushLiteral";
fn execute(context: &mut Context<'_>) -> JsResult<CompletionType> {
let index = context.vm.read::<u8>();
Self::operation(context, index as usize)
let index = context.vm.read::<u8>() as usize;
Self::operation(context, index)
}
fn execute_with_u16_operands(context: &mut Context<'_>) -> JsResult<CompletionType> {
@ -34,7 +35,52 @@ impl Operation for PushLiteral {
}
fn execute_with_u32_operands(context: &mut Context<'_>) -> JsResult<CompletionType> {
let index = context.vm.read::<u32>();
Self::operation(context, index as usize)
let index = context.vm.read::<u32>() as usize;
Self::operation(context, index)
}
}
/// `PushRegExp` implements the Opcode Operation for `Opcode::PushRegExp`
///
/// Operation:
/// - Push regexp value on the stack.
#[derive(Debug, Clone, Copy)]
pub(crate) struct PushRegExp;
impl PushRegExp {
fn operation(
context: &mut Context<'_>,
pattern_index: usize,
flags_index: usize,
) -> JsResult<CompletionType> {
let pattern = context.vm.frame().code_block.names[pattern_index].clone();
let flags = context.vm.frame().code_block.names[flags_index].clone();
let regexp = JsRegExp::new(pattern, flags, context)?;
context.vm.push(regexp);
Ok(CompletionType::Normal)
}
}
impl Operation for PushRegExp {
const NAME: &'static str = "PushRegExp";
const INSTRUCTION: &'static str = "INST - PushRegExp";
fn execute(context: &mut Context<'_>) -> JsResult<CompletionType> {
let pattern_index = context.vm.read::<u8>() as usize;
let flags_index = context.vm.read::<u8>() as usize;
Self::operation(context, pattern_index, flags_index)
}
fn execute_with_u16_operands(context: &mut Context<'_>) -> JsResult<CompletionType> {
let pattern_index = context.vm.read::<u16>() as usize;
let flags_index = context.vm.read::<u16>() as usize;
Self::operation(context, pattern_index, flags_index)
}
fn execute_with_u32_operands(context: &mut Context<'_>) -> JsResult<CompletionType> {
let pattern_index = context.vm.read::<u32>() as usize;
let flags_index = context.vm.read::<u32>() as usize;
Self::operation(context, pattern_index, flags_index)
}
}

13
boa_parser/src/parser/expression/primary/mod.rs

@ -41,13 +41,14 @@ use crate::{
},
Error,
};
use ast::expression::RegExpLiteral as AstRegExp;
use boa_ast::{
self as ast,
declaration::Variable,
expression::{
literal::Literal,
operator::{assign::AssignTarget, binary::BinaryOp},
Call, Identifier, New, Parenthesized,
Identifier, Parenthesized,
},
function::{FormalParameter, FormalParameterList},
operations::{contains, ContainsSymbol},
@ -239,10 +240,7 @@ where
Ok(node)
}
TokenKind::RegularExpressionLiteral(body, flags) => {
let node = ast::Expression::from(New::from(Call::new(
Identifier::new(Sym::REGEXP).into(),
vec![Literal::from(*body).into(), Literal::from(*flags).into()].into(),
)));
let node = AstRegExp::new(*body, *flags).into();
cursor.advance(interner);
Ok(node)
}
@ -252,10 +250,7 @@ where
let tok = cursor.lex_regex(position, interner)?;
if let TokenKind::RegularExpressionLiteral(body, flags) = *tok.kind() {
Ok(ast::Expression::from(New::from(Call::new(
Identifier::new(Sym::REGEXP).into(),
vec![Literal::from(body).into(), Literal::from(flags).into()].into(),
))))
Ok(AstRegExp::new(body, flags).into())
} else {
// A regex was expected and nothing else.
Err(Error::unexpected(

26
boa_parser/src/parser/expression/tests.rs

@ -8,7 +8,7 @@ use boa_ast::{
binary::{ArithmeticOp, BitwiseOp, LogicalOp, RelationalOp},
Assign, Binary,
},
Call, Identifier, New, Parenthesized,
Call, Identifier, Parenthesized, RegExpLiteral,
},
Declaration, Expression, Statement,
};
@ -99,14 +99,10 @@ fn check_numeric_operations() {
.get_or_intern_static("myRegex", utf16!("myRegex"))
.into(),
Some(
New::from(Call::new(
Identifier::new(Sym::REGEXP).into(),
vec![
Literal::from(interner.get_or_intern_static("=", utf16!("="))).into(),
Literal::from(Sym::EMPTY_STRING).into(),
]
.into(),
))
RegExpLiteral::new(
interner.get_or_intern_static("=", utf16!("=")),
Sym::EMPTY_STRING,
)
.into(),
),
)]
@ -122,14 +118,10 @@ fn check_numeric_operations() {
"fn(/=/);",
vec![Statement::Expression(Expression::from(Call::new(
Identifier::new(interner.get_or_intern_static("fn", utf16!("fn"))).into(),
vec![New::from(Call::new(
Identifier::new(Sym::REGEXP).into(),
vec![
Literal::from(interner.get_or_intern_static("=", utf16!("="))).into(),
Literal::from(Sym::EMPTY_STRING).into(),
]
.into(),
))
vec![RegExpLiteral::new(
interner.get_or_intern_static("=", utf16!("=")),
Sym::EMPTY_STRING,
)
.into()]
.into(),
)))

Loading…
Cancel
Save