Browse Source

Fix more Annex B tests (#2841)

This Pull Request fixes some additional Annex B tests.

It changes the following:

- Fixes bugs related to parsing HTML closing comments (`-->`).
- Implements `RegExp::compile` behind the `annex-b` feature.
- Ignores the `legacy-regexp` feature flag, since it's still stage 3.
pull/2852/head
José Julián Espina 2 years ago
parent
commit
40a5ae0a2f
  1. 14
      boa_engine/src/builtins/function/mod.rs
  2. 100
      boa_engine/src/builtins/regexp/mod.rs
  3. 2
      boa_engine/src/string/mod.rs
  4. 7
      boa_parser/src/lexer/mod.rs
  5. 14
      boa_parser/src/lexer/regex.rs
  6. 2
      boa_parser/src/parser/cursor/buffered_lexer/mod.rs
  7. 1
      test_ignore.toml

14
boa_engine/src/builtins/function/mod.rs

@ -40,7 +40,7 @@ use boa_parser::{Parser, Source};
use boa_profiler::Profiler; use boa_profiler::Profiler;
use thin_vec::ThinVec; use thin_vec::ThinVec;
use std::fmt; use std::{fmt, io::Read};
use super::{promise::PromiseCapability, BuiltInBuilder, BuiltInConstructor, IntrinsicObject}; use super::{promise::PromiseCapability, BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
@ -642,12 +642,16 @@ impl BuiltInFunctionObject {
.into()); .into());
} }
let body_arg = body_arg.to_string(context)?; // 11. Let bodyString be the string-concatenation of 0x000A (LINE FEED), ? ToString(bodyArg), and 0x000A (LINE FEED).
let body_arg = body_arg.to_string(context)?.to_std_string_escaped();
let body = b"\n".chain(body_arg.as_bytes()).chain(b"\n".as_slice());
// TODO: make parser generic to u32 iterators // TODO: make parser generic to u32 iterators
let body = match Parser::new(Source::from_bytes(&body_arg.to_std_string_escaped())) let body = match Parser::new(Source::from_reader(body, None)).parse_function_body(
.parse_function_body(context.interner_mut(), generator, r#async) context.interner_mut(),
{ generator,
r#async,
) {
Ok(statement_list) => statement_list, Ok(statement_list) => statement_list,
Err(e) => { Err(e) => {
return Err(JsNativeError::syntax() return Err(JsNativeError::syntax()

100
boa_engine/src/builtins/regexp/mod.rs

@ -27,7 +27,7 @@ use crate::{
}; };
use boa_parser::lexer::regex::RegExpFlags; use boa_parser::lexer::regex::RegExpFlags;
use boa_profiler::Profiler; use boa_profiler::Profiler;
use regress::Regex; use regress::{Flags, Regex};
use std::str::FromStr; use std::str::FromStr;
use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject}; use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
@ -94,7 +94,7 @@ impl IntrinsicObject for RegExp {
.callable(Self::get_source) .callable(Self::get_source)
.name("get source") .name("get source")
.build(); .build();
BuiltInBuilder::from_standard_constructor::<Self>(realm) let regexp = BuiltInBuilder::from_standard_constructor::<Self>(realm)
.static_accessor( .static_accessor(
JsSymbol::species(), JsSymbol::species(),
Some(get_species), Some(get_species),
@ -137,8 +137,12 @@ impl IntrinsicObject for RegExp {
.accessor(utf16!("unicode"), Some(get_unicode), None, flag_attributes) .accessor(utf16!("unicode"), Some(get_unicode), None, flag_attributes)
.accessor(utf16!("sticky"), Some(get_sticky), None, flag_attributes) .accessor(utf16!("sticky"), Some(get_sticky), None, flag_attributes)
.accessor(utf16!("flags"), Some(get_flags), None, flag_attributes) .accessor(utf16!("flags"), Some(get_flags), None, flag_attributes)
.accessor(utf16!("source"), Some(get_source), None, flag_attributes) .accessor(utf16!("source"), Some(get_source), None, flag_attributes);
.build();
#[cfg(feature = "annex-b")]
let regexp = regexp.method(Self::compile, "compile", 2);
regexp.build();
} }
fn get(intrinsics: &Intrinsics) -> JsObject { fn get(intrinsics: &Intrinsics) -> JsObject {
@ -288,26 +292,29 @@ impl RegExp {
Ok(result) => result, Ok(result) => result,
}; };
// TODO: Correct UTF-16 handling in 6. - 8. // 10. If u is true, then
// a. Let patternText be StringToCodePoints(P).
// 9. Let parseResult be ParsePattern(patternText, u). // 11. Else,
// 10. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception. // a. Let patternText be the result of interpreting each of P's 16-bit elements as a Unicode BMP code point. UTF-16 decoding is not applied to the elements.
// 11. Assert: parseResult is a Pattern Parse Node. // 12. Let parseResult be ParsePattern(patternText, u).
// 12. Set obj.[[OriginalSource]] to P. // 13. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception.
// 13. Set obj.[[OriginalFlags]] to F. // 14. Assert: parseResult is a Pattern Parse Node.
// 14. NOTE: The definitions of DotAll, IgnoreCase, Multiline, and Unicode in 22.2.2.1 refer to this value of obj.[[OriginalFlags]]. // 15. Set obj.[[OriginalSource]] to P.
// 15. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult. // 16. Set obj.[[OriginalFlags]] to F.
// TODO: add support for utf16 regex to remove this conversions. // 17. Let capturingGroupsCount be CountLeftCapturingParensWithin(parseResult).
let ps = p.to_std_string_escaped(); // 18. Let rer be the RegExp Record { [[IgnoreCase]]: i, [[Multiline]]: m, [[DotAll]]: s, [[Unicode]]: u, [[CapturingGroupsCount]]: capturingGroupsCount }.
let fs = f.to_std_string_escaped(); // 19. Set obj.[[RegExpRecord]] to rer.
let matcher = match Regex::with_flags(&ps, fs.as_ref()) { // 20. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult with argument rer.
Err(error) => { let matcher =
return Err(JsNativeError::syntax() match Regex::from_unicode(p.code_points().map(CodePoint::as_u32), Flags::from(flags)) {
.with_message(format!("failed to create matcher: {}", error.text)) Err(error) => {
.into()); return Err(JsNativeError::syntax()
} .with_message(format!("failed to create matcher: {}", error.text))
Ok(val) => val, .into());
}; }
Ok(val) => val,
};
let regexp = Self { let regexp = Self {
matcher, matcher,
flags, flags,
@ -1659,6 +1666,51 @@ impl RegExp {
// 22. Return A. // 22. Return A.
Ok(a.into()) Ok(a.into())
} }
/// [`RegExp.prototype.compile ( pattern, flags )`][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.compile
#[cfg(feature = "annex-b")]
fn compile(this: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Let O be the this value.
// 2. Perform ? RequireInternalSlot(O, [[RegExpMatcher]]).
let this = this
.as_object()
.filter(|o| o.borrow().is_regexp())
.cloned()
.ok_or_else(|| {
JsNativeError::typ()
.with_message("`RegExp.prototype.compile` cannot be called for a non-object")
})?;
let pattern = args.get_or_undefined(0);
let flags = args.get_or_undefined(1);
// 3. If pattern is an Object and pattern has a [[RegExpMatcher]] internal slot, then
let (pattern, flags) = if let Some((p, f)) = pattern.as_object().and_then(|o| {
let o = o.borrow();
o.as_regexp()
.map(|rx| (rx.original_source.clone(), rx.original_flags.clone()))
}) {
// a. If flags is not undefined, throw a TypeError exception.
if !flags.is_undefined() {
return Err(JsNativeError::typ()
.with_message(
"`RegExp.prototype.compile` cannot be \
called with both a RegExp initializer and new flags",
)
.into());
}
// b. Let P be pattern.[[OriginalSource]].
// c. Let F be pattern.[[OriginalFlags]].
(p.into(), f.into())
} else {
// 4. Else,
// a. Let P be pattern.
// b. Let F be flags.
(pattern.clone(), flags.clone())
};
// 5. Return ? RegExpInitialize(O, P, F).
Self::initialize(this, &pattern, &flags, context)
}
} }
/// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )` /// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )`

2
boa_engine/src/string/mod.rs

@ -355,7 +355,7 @@ impl JsString {
} }
/// Gets an iterator of all the Unicode codepoints of a [`JsString`]. /// Gets an iterator of all the Unicode codepoints of a [`JsString`].
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + '_ { pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
char::decode_utf16(self.iter().copied()).map(|res| match res { char::decode_utf16(self.iter().copied()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c), Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()), Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),

7
boa_parser/src/lexer/mod.rs

@ -318,12 +318,7 @@ impl<R> Lexer<R> {
} }
}?; }?;
if token.kind() == &TokenKind::Comment { Ok(Some(token))
// Skip comment
self.next(interner)
} else {
Ok(Some(token))
}
} else { } else {
Err(Error::syntax( Err(Error::syntax(
format!( format!(

14
boa_parser/src/lexer/regex.rs

@ -5,7 +5,7 @@ use bitflags::bitflags;
use boa_ast::Position; use boa_ast::Position;
use boa_interner::{Interner, Sym}; use boa_interner::{Interner, Sym};
use boa_profiler::Profiler; use boa_profiler::Profiler;
use regress::Regex; use regress::{Flags, Regex};
use std::{ use std::{
io::{self, ErrorKind, Read}, io::{self, ErrorKind, Read},
str::{self, FromStr}, str::{self, FromStr},
@ -237,3 +237,15 @@ impl ToString for RegExpFlags {
s s
} }
} }
impl From<RegExpFlags> for Flags {
fn from(value: RegExpFlags) -> Self {
Self {
icase: value.contains(RegExpFlags::IGNORE_CASE),
multiline: value.contains(RegExpFlags::MULTILINE),
dot_all: value.contains(RegExpFlags::DOT_ALL),
unicode: value.contains(RegExpFlags::UNICODE),
..Self::default()
}
}
}

2
boa_parser/src/parser/cursor/buffered_lexer/mod.rs

@ -152,8 +152,8 @@ where
} else { } else {
self.peeked[self.write_index] = self.lexer.next(interner)?; self.peeked[self.write_index] = self.lexer.next(interner)?;
} }
self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE;
self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE;
debug_assert_ne!( debug_assert_ne!(
self.read_index, self.write_index, self.read_index, self.write_index,
"we reached the read index with the write index" "we reached the read index with the write index"

1
test_ignore.toml

@ -15,6 +15,7 @@ features = [
"decorators", "decorators",
"array-grouping", "array-grouping",
"IsHTMLDDA", "IsHTMLDDA",
"legacy-regexp",
# Non-implemented Intl features # Non-implemented Intl features
"intl-normative-optional", "intl-normative-optional",

Loading…
Cancel
Save