Browse Source

Lazy evaluation for cooked template string (#1103)

* Lazy evaluate cooked template string

Fix octal escape in string literal


Add tests


Fix zero escape


Fix zero escape lookahead


Rename variables


Rename helper functions


Refactor match arms


Fix escape line terminator sequence


Fix single character escape


Fix line terminator and escape followed by unicode char


Add NonOctalDecimalEscapeSequence


Fix comment


Refactor


Modify error message


Add tests


Rename tests


Add test for error


Add comments for unsafe bytes to str


Update boa/src/syntax/lexer/string.rs

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
Minor refactor


Remove unsafe bytes to str


Fix panic when reading invalid utf-8 chars


Refactor string literal


Support invalid utf-8 chars in string literal input


Add cook function for template literal


Fix line continuation bug


Add methods for utf16 buffer trait


Add trait comments


Add error message for template literal


Add and fix comments


Hide unused exported function and modify tests


Fix bug


Lazy evaluate cooked template string


Fix clippy


Fix test262 stack overflow issue


Fix invalid setting strict mode with template literal


Remove unnecessary cache


Remove

* Add comments

* Minor update
pull/1192/head
Jevan Chan 4 years ago committed by GitHub
parent
commit
6333daae3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 15
      boa/src/syntax/ast/node/template/mod.rs
  2. 124
      boa/src/syntax/lexer/template.rs
  3. 6
      boa/src/syntax/lexer/tests.rs
  4. 42
      boa/src/syntax/lexer/token.rs
  5. 12
      boa/src/syntax/parser/expression/left_hand_side/template.rs
  6. 12
      boa/src/syntax/parser/expression/primary/mod.rs
  7. 16
      boa/src/syntax/parser/expression/primary/template/mod.rs
  8. 7
      boa/src/syntax/parser/function/mod.rs
  9. 7
      boa/src/syntax/parser/mod.rs

15
boa/src/syntax/ast/node/template/mod.rs

@ -69,12 +69,17 @@ impl fmt::Display for TemplateLit {
pub struct TaggedTemplate { pub struct TaggedTemplate {
tag: Box<Node>, tag: Box<Node>,
raws: Vec<Box<str>>, raws: Vec<Box<str>>,
cookeds: Vec<Box<str>>, cookeds: Vec<Option<Box<str>>>,
exprs: Vec<Node>, exprs: Vec<Node>,
} }
impl TaggedTemplate { impl TaggedTemplate {
pub fn new(tag: Node, raws: Vec<Box<str>>, cookeds: Vec<Box<str>>, exprs: Vec<Node>) -> Self { pub fn new(
tag: Node,
raws: Vec<Box<str>>,
cookeds: Vec<Option<Box<str>>>,
exprs: Vec<Node>,
) -> Self {
Self { Self {
tag: Box::new(tag), tag: Box::new(tag),
raws, raws,
@ -96,7 +101,11 @@ impl Executable for TaggedTemplate {
} }
for (i, cooked) in self.cookeds.iter().enumerate() { for (i, cooked) in self.cookeds.iter().enumerate() {
template_object.set_field(i, Value::from(cooked), context)?; if let Some(cooked) = cooked {
template_object.set_field(i, Value::from(cooked), context)?;
} else {
template_object.set_field(i, Value::undefined(), context)?;
}
} }
template_object.set_field("raw", raw_array, context)?; template_object.set_field("raw", raw_array, context)?;

124
boa/src/syntax/lexer/template.rs

@ -11,6 +11,80 @@ use crate::{
}; };
use std::io::{self, ErrorKind, Read}; use std::io::{self, ErrorKind, Read};
#[cfg(feature = "deser")]
use serde::{Deserialize, Serialize};
#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug)]
pub struct TemplateString {
/// The start position of the template string. Used to make lexer error if `to_owned_cooked` failed.
start_pos: Position,
/// The template string of template literal with argument `raw` true.
raw: Box<str>,
}
impl TemplateString {
pub fn new<R>(raw: R, start_pos: Position) -> Self
where
R: Into<Box<str>>,
{
Self {
start_pos,
raw: raw.into(),
}
}
/// Converts the raw template string into a mutable string slice.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn as_raw(&self) -> &str {
self.raw.as_ref()
}
/// Creats a new cooked template string. Returns a lexer error if it fails to cook the template string.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn to_owned_cooked(&self) -> Result<Box<str>, Error> {
let mut cursor = Cursor::with_position(self.raw.as_bytes(), self.start_pos);
let mut buf: Vec<u16> = Vec::new();
loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;
match ch {
Some(0x005C /* \ */) => {
let escape_value = StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
true,
true,
)?;
if let Some(escape_value) = escape_value {
buf.push_code_point(escape_value);
}
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
// which matches the TV of <CR> <LF>
buf.push_code_point(ch);
}
None => break,
}
}
Ok(buf.to_string_lossy().into())
}
}
/// Template literal lexing. /// Template literal lexing.
/// ///
/// Expects: Initial ` to already be consumed by cursor. /// Expects: Initial ` to already be consumed by cursor.
@ -43,21 +117,19 @@ impl<R> Tokenizer<R> for TemplateLiteral {
match ch { match ch {
0x0060 /* ` */ => { 0x0060 /* ` */ => {
let raw = buf.to_string_lossy(); let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation) let template_string = TemplateString::new(raw, start_pos);
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
return Ok(Token::new( return Ok(Token::new(
TokenKind::template_no_substitution(raw, cooked), TokenKind::template_no_substitution(template_string),
Span::new(start_pos, cursor.pos()), Span::new(start_pos, cursor.pos()),
)); ));
} }
0x0024 /* $ */ if cursor.next_is(b'{')? => { 0x0024 /* $ */ if cursor.next_is(b'{')? => {
let raw = buf.to_string_lossy(); let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation) let template_string = TemplateString::new(raw, start_pos);
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
return Ok(Token::new( return Ok(Token::new(
TokenKind::template_middle(raw, cooked), TokenKind::template_middle(template_string),
Span::new(start_pos, cursor.pos()), Span::new(start_pos, cursor.pos()),
)); ));
} }
@ -82,43 +154,3 @@ impl<R> Tokenizer<R> for TemplateLiteral {
} }
} }
} }
impl TemplateLiteral {
fn cook_template_string(
raw: &str,
start_pos: Position,
is_strict_mode: bool,
) -> Result<String, Error> {
let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos);
let mut buf: Vec<u16> = Vec::new();
loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;
match ch {
Some(0x005C /* \ */) => {
if let Some(escape_value) =
StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
is_strict_mode,
true,
)?
{
buf.push_code_point(escape_value);
}
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
// which matches the TV of <CR> <LF>
buf.push_code_point(ch);
}
None => break,
}
}
Ok(buf.to_string_lossy())
}
}

6
boa/src/syntax/lexer/tests.rs

@ -6,6 +6,7 @@ use super::token::Numeric;
use super::*; use super::*;
use super::{Error, Position}; use super::{Error, Position};
use crate::syntax::ast::Keyword; use crate::syntax::ast::Keyword;
use crate::syntax::lexer::template::TemplateString;
use std::str; use std::str;
fn span(start: (u32, u32), end: (u32, u32)) -> Span { fn span(start: (u32, u32), end: (u32, u32)) -> Span {
@ -136,7 +137,10 @@ fn check_template_literal_simple() {
assert_eq!( assert_eq!(
lexer.next().unwrap().unwrap().kind(), lexer.next().unwrap().unwrap().kind(),
&TokenKind::template_no_substitution("I'm a template literal", "I'm a template literal") &TokenKind::template_no_substitution(TemplateString::new(
"I'm a template literal",
Position::new(1, 1)
))
); );
} }

42
boa/src/syntax/lexer/token.rs

@ -10,8 +10,8 @@ use super::regex::RegExpFlags;
use crate::{ use crate::{
builtins::BigInt, builtins::BigInt,
syntax::ast::{Keyword, Punctuator, Span}, syntax::ast::{Keyword, Punctuator, Span},
syntax::lexer::template::TemplateString,
}; };
use std::fmt::{self, Debug, Display, Formatter}; use std::fmt::{self, Debug, Display, Formatter};
#[cfg(feature = "deser")] #[cfg(feature = "deser")]
@ -126,20 +126,10 @@ pub enum TokenKind {
StringLiteral(Box<str>), StringLiteral(Box<str>),
/// A part of a template literal without substitution. /// A part of a template literal without substitution.
TemplateNoSubstitution { TemplateNoSubstitution(TemplateString),
/// The string as it has been entered, without processing escape sequences.
raw: Box<str>,
/// The raw string with escape sequences processed.
cooked: Box<str>,
},
/// The part of a template literal between substitutions /// The part of a template literal between substitutions
TemplateMiddle { TemplateMiddle(TemplateString),
/// The string as it has been entered, without processing escape sequences.
raw: Box<str>,
/// The raw string with escape sequences processed.
cooked: Box<str>,
},
/// A regular expression, consisting of body and flags. /// A regular expression, consisting of body and flags.
RegularExpressionLiteral(Box<str>, RegExpFlags), RegularExpressionLiteral(Box<str>, RegExpFlags),
@ -220,26 +210,12 @@ impl TokenKind {
Self::StringLiteral(lit.into()) Self::StringLiteral(lit.into())
} }
pub fn template_middle<R, C>(raw: R, cooked: C) -> Self pub fn template_middle(template_string: TemplateString) -> Self {
where Self::TemplateMiddle(template_string)
R: Into<Box<str>>,
C: Into<Box<str>>,
{
Self::TemplateMiddle {
raw: raw.into(),
cooked: cooked.into(),
}
} }
pub fn template_no_substitution<R, C>(raw: R, cooked: C) -> Self pub fn template_no_substitution(template_string: TemplateString) -> Self {
where Self::TemplateNoSubstitution(template_string)
R: Into<Box<str>>,
C: Into<Box<str>>,
{
Self::TemplateNoSubstitution {
raw: raw.into(),
cooked: cooked.into(),
}
} }
/// Creates a `RegularExpressionLiteral` token kind. /// Creates a `RegularExpressionLiteral` token kind.
@ -275,8 +251,8 @@ impl Display for TokenKind {
Self::NumericLiteral(Numeric::BigInt(ref num)) => write!(f, "{}n", num), Self::NumericLiteral(Numeric::BigInt(ref num)) => write!(f, "{}n", num),
Self::Punctuator(ref punc) => write!(f, "{}", punc), Self::Punctuator(ref punc) => write!(f, "{}", punc),
Self::StringLiteral(ref lit) => write!(f, "{}", lit), Self::StringLiteral(ref lit) => write!(f, "{}", lit),
Self::TemplateNoSubstitution { ref cooked, .. } => write!(f, "{}", cooked), Self::TemplateNoSubstitution(ref ts) => write!(f, "{}", ts.as_raw()),
Self::TemplateMiddle { ref cooked, .. } => write!(f, "{}", cooked), Self::TemplateMiddle(ref ts) => write!(f, "{}", ts.as_raw()),
Self::RegularExpressionLiteral(ref body, ref flags) => write!(f, "/{}/{}", body, flags), Self::RegularExpressionLiteral(ref body, ref flags) => write!(f, "/{}/{}", body, flags),
Self::LineTerminator => write!(f, "line terminator"), Self::LineTerminator => write!(f, "line terminator"),
Self::Comment => write!(f, "comment"), Self::Comment => write!(f, "comment"),

12
boa/src/syntax/parser/expression/left_hand_side/template.rs

@ -59,9 +59,9 @@ where
loop { loop {
match token.kind() { match token.kind() {
TokenKind::TemplateMiddle { raw, cooked } => { TokenKind::TemplateMiddle(template_string) => {
raws.push(raw.clone()); raws.push(template_string.as_raw().to_owned().into_boxed_str());
cookeds.push(cooked.clone()); cookeds.push(template_string.to_owned_cooked().ok());
exprs.push( exprs.push(
Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?,
); );
@ -70,9 +70,9 @@ where
"template literal", "template literal",
)?; )?;
} }
TokenKind::TemplateNoSubstitution { raw, cooked } => { TokenKind::TemplateNoSubstitution(template_string) => {
raws.push(raw.clone()); raws.push(template_string.as_raw().to_owned().into_boxed_str());
cookeds.push(cooked.clone()); cookeds.push(template_string.to_owned_cooked().ok());
return Ok(Node::from(TaggedTemplate::new( return Ok(Node::from(TaggedTemplate::new(
self.tag, raws, cookeds, exprs, self.tag, raws, cookeds, exprs,
))); )));

12
boa/src/syntax/parser/expression/primary/mod.rs

@ -107,8 +107,9 @@ where
TokenKind::BooleanLiteral(boolean) => Ok(Const::from(*boolean).into()), TokenKind::BooleanLiteral(boolean) => Ok(Const::from(*boolean).into()),
TokenKind::NullLiteral => Ok(Const::Null.into()), TokenKind::NullLiteral => Ok(Const::Null.into()),
TokenKind::Identifier(ident) => Ok(Identifier::from(ident.as_ref()).into()), // TODO: IdentifierReference TokenKind::Identifier(ident) => Ok(Identifier::from(ident.as_ref()).into()), // TODO: IdentifierReference
TokenKind::StringLiteral(s) | TokenKind::TemplateNoSubstitution { cooked: s, .. } => { TokenKind::StringLiteral(s) => Ok(Const::from(s.as_ref()).into()),
Ok(Const::from(s.as_ref()).into()) TokenKind::TemplateNoSubstitution(template_string) => {
Ok(Const::from(template_string.to_owned_cooked().map_err(ParseError::lex)?).into())
} }
TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()), TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()),
TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()), TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()),
@ -138,11 +139,14 @@ where
Err(ParseError::unexpected(tok, "regular expression literal")) Err(ParseError::unexpected(tok, "regular expression literal"))
} }
} }
TokenKind::TemplateMiddle { cooked, .. } => TemplateLiteral::new( TokenKind::TemplateMiddle(template_string) => TemplateLiteral::new(
self.allow_yield, self.allow_yield,
self.allow_await, self.allow_await,
tok.span().start(), tok.span().start(),
cooked.as_ref(), template_string
.to_owned_cooked()
.map_err(ParseError::lex)?
.as_ref(),
) )
.parse(cursor) .parse(cursor)
.map(Node::TemplateLit), .map(Node::TemplateLit),

16
boa/src/syntax/parser/expression/primary/template/mod.rs

@ -74,10 +74,10 @@ where
loop { loop {
match cursor.lex_template(self.start)?.kind() { match cursor.lex_template(self.start)?.kind() {
TokenKind::TemplateMiddle { TokenKind::TemplateMiddle(template_string) => {
cooked: template, .. let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?;
} => {
elements.push(TemplateElement::String(template.to_owned())); elements.push(TemplateElement::String(cooked));
elements.push(TemplateElement::Expr( elements.push(TemplateElement::Expr(
Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?,
)); ));
@ -86,10 +86,10 @@ where
"template literal", "template literal",
)?; )?;
} }
TokenKind::TemplateNoSubstitution { TokenKind::TemplateNoSubstitution(template_string) => {
cooked: template, .. let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?;
} => {
elements.push(TemplateElement::String(template.to_owned())); elements.push(TemplateElement::String(cooked));
return Ok(TemplateLit::new(elements)); return Ok(TemplateLit::new(elements));
} }
_ => { _ => {

7
boa/src/syntax/parser/function/mod.rs

@ -269,11 +269,8 @@ where
TokenKind::Punctuator(Punctuator::CloseBlock) => { TokenKind::Punctuator(Punctuator::CloseBlock) => {
return Ok(Vec::new().into()); return Ok(Vec::new().into());
} }
TokenKind::StringLiteral(string) TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => {
| TokenKind::TemplateNoSubstitution { cooked: string, .. } => { cursor.set_strict_mode(true);
if string == &"use strict".into() {
cursor.set_strict_mode(true);
}
} }
_ => {} _ => {}
} }

7
boa/src/syntax/parser/mod.rs

@ -125,11 +125,8 @@ where
match cursor.peek(0)? { match cursor.peek(0)? {
Some(tok) => { Some(tok) => {
match tok.kind() { match tok.kind() {
TokenKind::StringLiteral(string) TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => {
| TokenKind::TemplateNoSubstitution { cooked: string, .. } => { cursor.set_strict_mode(true);
if string.as_ref() == "use strict" {
cursor.set_strict_mode(true);
}
} }
_ => {} _ => {}
} }

Loading…
Cancel
Save