From 6333daae3fdc0949a3f48e16e3d88266f1c4fa50 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Fri, 26 Mar 2021 06:45:32 -0700 Subject: [PATCH] Lazy evaluation for cooked template string (#1103) * Lazy evaluate cooked template string Fix octal escape in string literal Add tests Fix zero escape Fix zero escape lookahead Rename variables Rename helper functions Refactor match arms Fix escape line terminator sequence Fix single character escape Fix line terminator and escape followed by unicode char Add NonOctalDecimalEscapeSequence Fix comment Refactor Modify error message Add tests Rename tests Add test for error Add comments for unsafe bytes to str Update boa/src/syntax/lexer/string.rs Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com> Minor refactor Remove unsafe bytes to str Fix panic when reading invalid utf-8 chars Refactor string literal Support invalid utf-8 chars in string literal input Add cook function for template literal Fix line continuation bug Add methods for utf16 buffer trait Add trait comments Add error message for template literal Add and fix comments Hide unused exported function and modify tests Fix bug Lazy evaluate cooked template string Fix clippy Fix test262 stack overflow issue Fix invalid setting strict mode with template literal Remove unnecessary cache Remove * Add comments * Minor update --- boa/src/syntax/ast/node/template/mod.rs | 15 ++- boa/src/syntax/lexer/template.rs | 124 +++++++++++------- boa/src/syntax/lexer/tests.rs | 6 +- boa/src/syntax/lexer/token.rs | 42 ++---- .../expression/left_hand_side/template.rs | 12 +- .../syntax/parser/expression/primary/mod.rs | 12 +- .../parser/expression/primary/template/mod.rs | 16 +-- boa/src/syntax/parser/function/mod.rs | 7 +- boa/src/syntax/parser/mod.rs | 7 +- 9 files changed, 130 insertions(+), 111 deletions(-) diff --git a/boa/src/syntax/ast/node/template/mod.rs b/boa/src/syntax/ast/node/template/mod.rs index 2e6a747a84..5ab8f32169 100644 --- a/boa/src/syntax/ast/node/template/mod.rs +++ b/boa/src/syntax/ast/node/template/mod.rs @@ -69,12 +69,17 @@ impl fmt::Display for TemplateLit { pub struct TaggedTemplate { tag: Box, raws: Vec>, - cookeds: Vec>, + cookeds: Vec>>, exprs: Vec, } impl TaggedTemplate { - pub fn new(tag: Node, raws: Vec>, cookeds: Vec>, exprs: Vec) -> Self { + pub fn new( + tag: Node, + raws: Vec>, + cookeds: Vec>>, + exprs: Vec, + ) -> Self { Self { tag: Box::new(tag), raws, @@ -96,7 +101,11 @@ impl Executable for TaggedTemplate { } for (i, cooked) in self.cookeds.iter().enumerate() { - template_object.set_field(i, Value::from(cooked), context)?; + if let Some(cooked) = cooked { + template_object.set_field(i, Value::from(cooked), context)?; + } else { + template_object.set_field(i, Value::undefined(), context)?; + } } template_object.set_field("raw", raw_array, context)?; diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index 9636d1c849..76aedeaa7a 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -11,6 +11,80 @@ use crate::{ }; use std::io::{self, ErrorKind, Read}; +#[cfg(feature = "deser")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))] +#[derive(Clone, PartialEq, Debug)] +pub struct TemplateString { + /// The start position of the template string. Used to make lexer error if `to_owned_cooked` failed. + start_pos: Position, + /// The template string of template literal with argument `raw` true. + raw: Box, +} + +impl TemplateString { + pub fn new(raw: R, start_pos: Position) -> Self + where + R: Into>, + { + Self { + start_pos, + raw: raw.into(), + } + } + + /// Converts the raw template string into a mutable string slice. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings + pub fn as_raw(&self) -> &str { + self.raw.as_ref() + } + + /// Creats a new cooked template string. Returns a lexer error if it fails to cook the template string. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings + pub fn to_owned_cooked(&self) -> Result, Error> { + let mut cursor = Cursor::with_position(self.raw.as_bytes(), self.start_pos); + let mut buf: Vec = Vec::new(); + + loop { + let ch_start_pos = cursor.pos(); + let ch = cursor.next_char()?; + + match ch { + Some(0x005C /* \ */) => { + let escape_value = StringLiteral::take_escape_sequence_or_line_continuation( + &mut cursor, + ch_start_pos, + true, + true, + )?; + + if let Some(escape_value) = escape_value { + buf.push_code_point(escape_value); + } + } + Some(ch) => { + // The caller guarantees that sequences '`' and '${' never appear + // LineTerminatorSequence is consumed by `cursor.next_char()` and returns , + // which matches the TV of + buf.push_code_point(ch); + } + None => break, + } + } + + Ok(buf.to_string_lossy().into()) + } +} + /// Template literal lexing. /// /// Expects: Initial ` to already be consumed by cursor. @@ -43,21 +117,19 @@ impl Tokenizer for TemplateLiteral { match ch { 0x0060 /* ` */ => { let raw = buf.to_string_lossy(); - // TODO: Cook the raw string only when needed (lazy evaluation) - let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + let template_string = TemplateString::new(raw, start_pos); return Ok(Token::new( - TokenKind::template_no_substitution(raw, cooked), + TokenKind::template_no_substitution(template_string), Span::new(start_pos, cursor.pos()), )); } 0x0024 /* $ */ if cursor.next_is(b'{')? => { let raw = buf.to_string_lossy(); - // TODO: Cook the raw string only when needed (lazy evaluation) - let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + let template_string = TemplateString::new(raw, start_pos); return Ok(Token::new( - TokenKind::template_middle(raw, cooked), + TokenKind::template_middle(template_string), Span::new(start_pos, cursor.pos()), )); } @@ -82,43 +154,3 @@ impl Tokenizer for TemplateLiteral { } } } - -impl TemplateLiteral { - fn cook_template_string( - raw: &str, - start_pos: Position, - is_strict_mode: bool, - ) -> Result { - let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos); - let mut buf: Vec = Vec::new(); - - loop { - let ch_start_pos = cursor.pos(); - let ch = cursor.next_char()?; - - match ch { - Some(0x005C /* \ */) => { - if let Some(escape_value) = - StringLiteral::take_escape_sequence_or_line_continuation( - &mut cursor, - ch_start_pos, - is_strict_mode, - true, - )? - { - buf.push_code_point(escape_value); - } - } - Some(ch) => { - // The caller guarantees that sequences '`' and '${' never appear - // LineTerminatorSequence is consumed by `cursor.next_char()` and returns , - // which matches the TV of - buf.push_code_point(ch); - } - None => break, - } - } - - Ok(buf.to_string_lossy()) - } -} diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index 2cafc5c900..eb6c4f71a2 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -6,6 +6,7 @@ use super::token::Numeric; use super::*; use super::{Error, Position}; use crate::syntax::ast::Keyword; +use crate::syntax::lexer::template::TemplateString; use std::str; fn span(start: (u32, u32), end: (u32, u32)) -> Span { @@ -136,7 +137,10 @@ fn check_template_literal_simple() { assert_eq!( lexer.next().unwrap().unwrap().kind(), - &TokenKind::template_no_substitution("I'm a template literal", "I'm a template literal") + &TokenKind::template_no_substitution(TemplateString::new( + "I'm a template literal", + Position::new(1, 1) + )) ); } diff --git a/boa/src/syntax/lexer/token.rs b/boa/src/syntax/lexer/token.rs index 82a4f2e984..e2042c2f33 100644 --- a/boa/src/syntax/lexer/token.rs +++ b/boa/src/syntax/lexer/token.rs @@ -10,8 +10,8 @@ use super::regex::RegExpFlags; use crate::{ builtins::BigInt, syntax::ast::{Keyword, Punctuator, Span}, + syntax::lexer::template::TemplateString, }; - use std::fmt::{self, Debug, Display, Formatter}; #[cfg(feature = "deser")] @@ -126,20 +126,10 @@ pub enum TokenKind { StringLiteral(Box), /// A part of a template literal without substitution. - TemplateNoSubstitution { - /// The string as it has been entered, without processing escape sequences. - raw: Box, - /// The raw string with escape sequences processed. - cooked: Box, - }, + TemplateNoSubstitution(TemplateString), /// The part of a template literal between substitutions - TemplateMiddle { - /// The string as it has been entered, without processing escape sequences. - raw: Box, - /// The raw string with escape sequences processed. - cooked: Box, - }, + TemplateMiddle(TemplateString), /// A regular expression, consisting of body and flags. RegularExpressionLiteral(Box, RegExpFlags), @@ -220,26 +210,12 @@ impl TokenKind { Self::StringLiteral(lit.into()) } - pub fn template_middle(raw: R, cooked: C) -> Self - where - R: Into>, - C: Into>, - { - Self::TemplateMiddle { - raw: raw.into(), - cooked: cooked.into(), - } + pub fn template_middle(template_string: TemplateString) -> Self { + Self::TemplateMiddle(template_string) } - pub fn template_no_substitution(raw: R, cooked: C) -> Self - where - R: Into>, - C: Into>, - { - Self::TemplateNoSubstitution { - raw: raw.into(), - cooked: cooked.into(), - } + pub fn template_no_substitution(template_string: TemplateString) -> Self { + Self::TemplateNoSubstitution(template_string) } /// Creates a `RegularExpressionLiteral` token kind. @@ -275,8 +251,8 @@ impl Display for TokenKind { Self::NumericLiteral(Numeric::BigInt(ref num)) => write!(f, "{}n", num), Self::Punctuator(ref punc) => write!(f, "{}", punc), Self::StringLiteral(ref lit) => write!(f, "{}", lit), - Self::TemplateNoSubstitution { ref cooked, .. } => write!(f, "{}", cooked), - Self::TemplateMiddle { ref cooked, .. } => write!(f, "{}", cooked), + Self::TemplateNoSubstitution(ref ts) => write!(f, "{}", ts.as_raw()), + Self::TemplateMiddle(ref ts) => write!(f, "{}", ts.as_raw()), Self::RegularExpressionLiteral(ref body, ref flags) => write!(f, "/{}/{}", body, flags), Self::LineTerminator => write!(f, "line terminator"), Self::Comment => write!(f, "comment"), diff --git a/boa/src/syntax/parser/expression/left_hand_side/template.rs b/boa/src/syntax/parser/expression/left_hand_side/template.rs index 0c5d78e374..a08194ec98 100644 --- a/boa/src/syntax/parser/expression/left_hand_side/template.rs +++ b/boa/src/syntax/parser/expression/left_hand_side/template.rs @@ -59,9 +59,9 @@ where loop { match token.kind() { - TokenKind::TemplateMiddle { raw, cooked } => { - raws.push(raw.clone()); - cookeds.push(cooked.clone()); + TokenKind::TemplateMiddle(template_string) => { + raws.push(template_string.as_raw().to_owned().into_boxed_str()); + cookeds.push(template_string.to_owned_cooked().ok()); exprs.push( Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, ); @@ -70,9 +70,9 @@ where "template literal", )?; } - TokenKind::TemplateNoSubstitution { raw, cooked } => { - raws.push(raw.clone()); - cookeds.push(cooked.clone()); + TokenKind::TemplateNoSubstitution(template_string) => { + raws.push(template_string.as_raw().to_owned().into_boxed_str()); + cookeds.push(template_string.to_owned_cooked().ok()); return Ok(Node::from(TaggedTemplate::new( self.tag, raws, cookeds, exprs, ))); diff --git a/boa/src/syntax/parser/expression/primary/mod.rs b/boa/src/syntax/parser/expression/primary/mod.rs index 16183f002e..20ec3c8656 100644 --- a/boa/src/syntax/parser/expression/primary/mod.rs +++ b/boa/src/syntax/parser/expression/primary/mod.rs @@ -107,8 +107,9 @@ where TokenKind::BooleanLiteral(boolean) => Ok(Const::from(*boolean).into()), TokenKind::NullLiteral => Ok(Const::Null.into()), TokenKind::Identifier(ident) => Ok(Identifier::from(ident.as_ref()).into()), // TODO: IdentifierReference - TokenKind::StringLiteral(s) | TokenKind::TemplateNoSubstitution { cooked: s, .. } => { - Ok(Const::from(s.as_ref()).into()) + TokenKind::StringLiteral(s) => Ok(Const::from(s.as_ref()).into()), + TokenKind::TemplateNoSubstitution(template_string) => { + Ok(Const::from(template_string.to_owned_cooked().map_err(ParseError::lex)?).into()) } TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()), TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()), @@ -138,11 +139,14 @@ where Err(ParseError::unexpected(tok, "regular expression literal")) } } - TokenKind::TemplateMiddle { cooked, .. } => TemplateLiteral::new( + TokenKind::TemplateMiddle(template_string) => TemplateLiteral::new( self.allow_yield, self.allow_await, tok.span().start(), - cooked.as_ref(), + template_string + .to_owned_cooked() + .map_err(ParseError::lex)? + .as_ref(), ) .parse(cursor) .map(Node::TemplateLit), diff --git a/boa/src/syntax/parser/expression/primary/template/mod.rs b/boa/src/syntax/parser/expression/primary/template/mod.rs index ab03356cfb..cdfed7ea4f 100644 --- a/boa/src/syntax/parser/expression/primary/template/mod.rs +++ b/boa/src/syntax/parser/expression/primary/template/mod.rs @@ -74,10 +74,10 @@ where loop { match cursor.lex_template(self.start)?.kind() { - TokenKind::TemplateMiddle { - cooked: template, .. - } => { - elements.push(TemplateElement::String(template.to_owned())); + TokenKind::TemplateMiddle(template_string) => { + let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?; + + elements.push(TemplateElement::String(cooked)); elements.push(TemplateElement::Expr( Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, )); @@ -86,10 +86,10 @@ where "template literal", )?; } - TokenKind::TemplateNoSubstitution { - cooked: template, .. - } => { - elements.push(TemplateElement::String(template.to_owned())); + TokenKind::TemplateNoSubstitution(template_string) => { + let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?; + + elements.push(TemplateElement::String(cooked)); return Ok(TemplateLit::new(elements)); } _ => { diff --git a/boa/src/syntax/parser/function/mod.rs b/boa/src/syntax/parser/function/mod.rs index 8340fe7964..ee3463cef3 100644 --- a/boa/src/syntax/parser/function/mod.rs +++ b/boa/src/syntax/parser/function/mod.rs @@ -269,11 +269,8 @@ where TokenKind::Punctuator(Punctuator::CloseBlock) => { return Ok(Vec::new().into()); } - TokenKind::StringLiteral(string) - | TokenKind::TemplateNoSubstitution { cooked: string, .. } => { - if string == &"use strict".into() { - cursor.set_strict_mode(true); - } + TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => { + cursor.set_strict_mode(true); } _ => {} } diff --git a/boa/src/syntax/parser/mod.rs b/boa/src/syntax/parser/mod.rs index d33f6ea15d..f6b677a77c 100644 --- a/boa/src/syntax/parser/mod.rs +++ b/boa/src/syntax/parser/mod.rs @@ -125,11 +125,8 @@ where match cursor.peek(0)? { Some(tok) => { match tok.kind() { - TokenKind::StringLiteral(string) - | TokenKind::TemplateNoSubstitution { cooked: string, .. } => { - if string.as_ref() == "use strict" { - cursor.set_strict_mode(true); - } + TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => { + cursor.set_strict_mode(true); } _ => {} }