From f62a77d3fa719a5a2ead7713668d04fb1d5b268f Mon Sep 17 00:00:00 2001 From: tofpie <75836434+tofpie@users.noreply.github.com> Date: Tue, 12 Jan 2021 20:42:40 +0100 Subject: [PATCH] Implement template literals and tagged templates (#997) * Implement template literals and tagged templates * Merge master into for-in * Implement suggestions from review * Implement suggestions from review Co-authored-by: tofpie --- boa/src/syntax/ast/node/mod.rs | 12 + boa/src/syntax/ast/node/template/mod.rs | 156 +++++++++++ boa/src/syntax/ast/node/template/tests.rs | 31 ++ boa/src/syntax/lexer/cursor.rs | 10 + boa/src/syntax/lexer/mod.rs | 7 + boa/src/syntax/lexer/string.rs | 264 ++++++++++-------- boa/src/syntax/lexer/template.rs | 80 ++++-- boa/src/syntax/lexer/tests.rs | 35 ++- boa/src/syntax/lexer/token.rs | 41 ++- .../parser/cursor/buffered_lexer/mod.rs | 6 + boa/src/syntax/parser/cursor/mod.rs | 5 + .../parser/expression/left_hand_side/call.rs | 13 +- .../expression/left_hand_side/member.rs | 14 +- .../parser/expression/left_hand_side/mod.rs | 1 + .../expression/left_hand_side/template.rs | 90 ++++++ .../syntax/parser/expression/primary/mod.rs | 18 +- .../parser/expression/primary/template/mod.rs | 104 +++++++ boa/src/syntax/parser/function/mod.rs | 3 +- boa/src/syntax/parser/mod.rs | 3 +- test_ignore.txt | 4 + 20 files changed, 749 insertions(+), 148 deletions(-) create mode 100644 boa/src/syntax/ast/node/template/mod.rs create mode 100644 boa/src/syntax/ast/node/template/tests.rs create mode 100644 boa/src/syntax/parser/expression/left_hand_side/template.rs create mode 100644 boa/src/syntax/parser/expression/primary/template/mod.rs diff --git a/boa/src/syntax/ast/node/mod.rs b/boa/src/syntax/ast/node/mod.rs index bd6992327e..06119dc9fd 100644 --- a/boa/src/syntax/ast/node/mod.rs +++ b/boa/src/syntax/ast/node/mod.rs @@ -17,6 +17,7 @@ pub mod return_smt; pub mod spread; pub mod statement_list; pub mod switch; +pub mod template; pub mod throw; pub mod try_node; @@ -41,6 +42,7 @@ pub use self::{ spread::Spread, statement_list::{RcStatementList, StatementList}, switch::{Case, Switch}, + template::{TaggedTemplate, TemplateLit}, throw::Throw, try_node::{Catch, Finally, Try}, }; @@ -160,6 +162,12 @@ pub enum Node { /// A spread (...x) statement. [More information](./spread/struct.Spread.html). Spread(Spread), + /// A tagged template. [More information](./template/struct.TaggedTemplate.html). + TaggedTemplate(TaggedTemplate), + + /// A template literal. [More information](./template/struct.TemplateLit.html). + TemplateLit(TemplateLit), + /// A throw statement. [More information](./throw/struct.Throw.html). Throw(Throw), @@ -257,6 +265,8 @@ impl Node { Self::BinOp(ref op) => Display::fmt(op, f), Self::UnaryOp(ref op) => Display::fmt(op, f), Self::Return(ref ret) => Display::fmt(ret, f), + Self::TaggedTemplate(ref template) => Display::fmt(template, f), + Self::TemplateLit(ref template) => Display::fmt(template, f), Self::Throw(ref throw) => Display::fmt(throw, f), Self::Assign(ref op) => Display::fmt(op, f), Self::LetDeclList(ref decl) => Display::fmt(decl, f), @@ -309,6 +319,8 @@ impl Executable for Node { Node::UnaryOp(ref op) => op.run(context), Node::New(ref call) => call.run(context), Node::Return(ref ret) => ret.run(context), + Node::TaggedTemplate(ref template) => template.run(context), + Node::TemplateLit(ref template) => template.run(context), Node::Throw(ref throw) => throw.run(context), Node::Assign(ref op) => op.run(context), Node::VarDeclList(ref decl) => decl.run(context), diff --git a/boa/src/syntax/ast/node/template/mod.rs b/boa/src/syntax/ast/node/template/mod.rs new file mode 100644 index 0000000000..89ad2dbbca --- /dev/null +++ b/boa/src/syntax/ast/node/template/mod.rs @@ -0,0 +1,156 @@ +//! Template literal node. + +use super::Node; +use crate::{builtins::Array, exec::Executable, value::Type, BoaProfiler, Context, Result, Value}; +use gc::{Finalize, Trace}; + +#[cfg(feature = "deser")] +use serde::{Deserialize, Serialize}; +use std::fmt; + +#[cfg(test)] +mod tests; + +/// Template literals are string literals allowing embedded expressions. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// - [MDN documentation][mdn] +/// +/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals +/// [spec]: https://tc39.es/ecma262/#sec-template-literals +#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))] +#[derive(Clone, Debug, Trace, Finalize, PartialEq)] +pub struct TemplateLit { + elements: Vec, +} + +impl TemplateLit { + pub fn new(elements: Vec) -> Self { + TemplateLit { elements } + } +} + +impl Executable for TemplateLit { + fn run(&self, context: &mut Context) -> Result { + let _timer = BoaProfiler::global().start_event("TemplateLiteral", "exec"); + let mut result = String::new(); + + for element in self.elements.iter() { + match element { + TemplateElement::String(s) => { + result.push_str(s); + } + TemplateElement::Expr(node) => { + let value = node.run(context)?; + let s = value.to_string(context)?; + result.push_str(&s); + } + } + } + Ok(result.into()) + } +} + +impl fmt::Display for TemplateLit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "`")?; + for elt in &self.elements { + match elt { + TemplateElement::String(s) => write!(f, "{}", s)?, + TemplateElement::Expr(n) => write!(f, "${{{}}}", n)?, + } + } + write!(f, "`") + } +} +#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))] +#[derive(Clone, Debug, Trace, Finalize, PartialEq)] +pub struct TaggedTemplate { + tag: Box, + raws: Vec>, + cookeds: Vec>, + exprs: Vec, +} + +impl TaggedTemplate { + pub fn new(tag: Node, raws: Vec>, cookeds: Vec>, exprs: Vec) -> Self { + Self { + tag: Box::new(tag), + raws, + cookeds, + exprs, + } + } +} + +impl Executable for TaggedTemplate { + fn run(&self, context: &mut Context) -> Result { + let _timer = BoaProfiler::global().start_event("TaggedTemplate", "exec"); + + let template_object = Array::new_array(context)?; + let raw_array = Array::new_array(context)?; + + for (i, raw) in self.raws.iter().enumerate() { + raw_array.set_field(i, Value::from(raw), context)?; + } + + for (i, cooked) in self.cookeds.iter().enumerate() { + template_object.set_field(i, Value::from(cooked), context)?; + } + template_object.set_field("raw", raw_array, context)?; + + let (this, func) = match *self.tag { + Node::GetConstField(ref get_const_field) => { + let mut obj = get_const_field.obj().run(context)?; + if obj.get_type() != Type::Object { + obj = Value::Object(obj.to_object(context)?); + } + ( + obj.clone(), + obj.get_field(get_const_field.field(), context)?, + ) + } + Node::GetField(ref get_field) => { + let obj = get_field.obj().run(context)?; + let field = get_field.field().run(context)?; + ( + obj.clone(), + obj.get_field(field.to_property_key(context)?, context)?, + ) + } + _ => (context.global_object().clone(), self.tag.run(context)?), + }; + + let mut args = Vec::new(); + args.push(template_object); + for expr in self.exprs.iter() { + args.push(expr.run(context)?); + } + + context.call(&func, &this, &args) + } +} + +impl fmt::Display for TaggedTemplate { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}`", self.tag)?; + for (raw, expr) in self.raws.iter().zip(self.exprs.iter()) { + write!(f, "{}${{{}}}", raw, expr)?; + } + write!(f, "`") + } +} + +impl From for Node { + fn from(template: TaggedTemplate) -> Self { + Node::TaggedTemplate(template) + } +} + +#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))] +#[derive(Clone, Debug, Trace, Finalize, PartialEq)] +pub enum TemplateElement { + String(Box), + Expr(Node), +} diff --git a/boa/src/syntax/ast/node/template/tests.rs b/boa/src/syntax/ast/node/template/tests.rs new file mode 100644 index 0000000000..9f9a299ee6 --- /dev/null +++ b/boa/src/syntax/ast/node/template/tests.rs @@ -0,0 +1,31 @@ +use crate::exec; + +#[test] +fn template_literal() { + let scenario = r#" + let a = 10; + `result: ${a} and ${a+10}`; + "#; + + assert_eq!(&exec(scenario), "\"result: 10 and 20\""); +} + +#[test] +fn tagged_template() { + let scenario = r#" + function tag(t, ...args) { + let a = [] + a = a.concat([t[0], t[1], t[2]]); + a = a.concat([t.raw[0], t.raw[1], t.raw[2]]); + a = a.concat([args[0], args[1]]); + return a + } + let a = 10; + tag`result: ${a} \x26 ${a+10}`; + "#; + + assert_eq!( + &exec(scenario), + r#"[ "result: ", " & ", "", "result: ", " \x26 ", "", 10, 20 ]"# + ); +} diff --git a/boa/src/syntax/lexer/cursor.rs b/boa/src/syntax/lexer/cursor.rs index 7446744603..d512bbd4ab 100644 --- a/boa/src/syntax/lexer/cursor.rs +++ b/boa/src/syntax/lexer/cursor.rs @@ -56,6 +56,16 @@ where } } + /// Creates a new Lexer cursor with an initial position. + #[inline] + pub(super) fn with_position(inner: R, pos: Position) -> Self { + Self { + iter: InnerIter::new(inner.bytes()), + pos, + strict_mode: false, + } + } + /// Peeks the next byte. #[inline] pub(super) fn peek(&mut self) -> Result, Error> { diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs index cb7b6366d4..38d7412d8e 100644 --- a/boa/src/syntax/lexer/mod.rs +++ b/boa/src/syntax/lexer/mod.rs @@ -281,6 +281,13 @@ impl Lexer { )) } } + + pub(crate) fn lex_template(&mut self, start: Position) -> Result + where + R: Read, + { + TemplateLiteral.lex(&mut self.cursor, start) + } } /// ECMAScript goal symbols. diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index ab07cc9552..6f20599811 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -44,9 +44,10 @@ impl StringLiteral { /// Terminator for the string. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum StringTerminator { +pub(crate) enum StringTerminator { SingleQuote, DoubleQuote, + End, } impl Tokenizer for StringLiteral { @@ -56,135 +57,172 @@ impl Tokenizer for StringLiteral { { let _timer = BoaProfiler::global().start_event("StringLiteral", "Lexing"); - let mut buf: Vec = Vec::new(); - loop { - let next_chr_start = cursor.pos(); - let next_chr = char::try_from(cursor.next_char()?.ok_or_else(|| { - Error::from(io::Error::new( - ErrorKind::UnexpectedEof, - "unterminated string literal", - )) - })?) - .unwrap(); + let (lit, span) = + unescape_string(cursor, start_pos, self.terminator, cursor.strict_mode())?; - match next_chr { - '\'' if self.terminator == StringTerminator::SingleQuote => { - break; - } - '"' if self.terminator == StringTerminator::DoubleQuote => { - break; - } - '\\' => { - let _timer = BoaProfiler::global() - .start_event("StringLiteral - escape sequence", "Lexing"); - - let escape = cursor.next_byte()?.ok_or_else(|| { - Error::from(io::Error::new( - ErrorKind::UnexpectedEof, - "unterminated escape sequence in string literal", - )) - })?; - - if escape != b'\n' { - match escape { - b'n' => buf.push('\n' as u16), - b'r' => buf.push('\r' as u16), - b't' => buf.push('\t' as u16), - b'b' => buf.push('\x08' as u16), - b'f' => buf.push('\x0c' as u16), - b'0' => buf.push('\0' as u16), - b'x' => { - let mut code_point_utf8_bytes = [0u8; 2]; + Ok(Token::new(TokenKind::string_literal(lit), span)) + } +} + +pub(super) fn unescape_string( + cursor: &mut Cursor, + start_pos: Position, + terminator: StringTerminator, + strict_mode: bool, +) -> Result<(String, Span), Error> +where + R: Read, +{ + let mut buf = Vec::new(); + loop { + let next_chr = cursor.next_char()?.map(char::try_from).transpose().unwrap(); + + match next_chr { + Some('\'') if terminator == StringTerminator::SingleQuote => { + break; + } + Some('"') if terminator == StringTerminator::DoubleQuote => { + break; + } + Some('\\') => { + let _timer = + BoaProfiler::global().start_event("StringLiteral - escape sequence", "Lexing"); + + let escape = cursor.peek()?.ok_or_else(|| { + Error::from(io::Error::new( + ErrorKind::UnexpectedEof, + "unterminated escape sequence in literal", + )) + })?; + + if escape <= 0x7f { + let _ = cursor.next_byte()?; + match escape { + b'\n' => (), + b'n' => buf.push('\n' as u16), + b'r' => buf.push('\r' as u16), + b't' => buf.push('\t' as u16), + b'b' => buf.push('\x08' as u16), + b'f' => buf.push('\x0c' as u16), + b'0' => buf.push('\0' as u16), + b'x' => { + let mut code_point_utf8_bytes = [0u8; 2]; + cursor.fill_bytes(&mut code_point_utf8_bytes)?; + let code_point_str = str::from_utf8(&code_point_utf8_bytes) + .expect("malformed Hexadecimal character escape sequence"); + let code_point = + u16::from_str_radix(&code_point_str, 16).map_err(|_| { + Error::syntax( + "invalid Hexadecimal escape sequence", + cursor.pos(), + ) + })?; + + buf.push(code_point); + } + b'u' => { + // Support \u{X..X} (Unicode Codepoint) + if cursor.next_is(b'{')? { + // TODO: use bytes for a bit better performance (using stack) + let mut code_point_buf = Vec::with_capacity(6); + cursor.take_until(b'}', &mut code_point_buf)?; + + let code_point_str = + unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) }; + // We know this is a single unicode codepoint, convert to u32 + let code_point = + u32::from_str_radix(&code_point_str, 16).map_err(|_| { + Error::syntax( + "malformed Unicode character escape sequence", + cursor.pos(), + ) + })?; + + // UTF16Encoding of a numeric code point value + if code_point > 0x10_FFFF { + return Err(Error::syntax("Unicode codepoint must not be greater than 0x10FFFF in escape sequence", cursor.pos())); + } else if code_point <= 65535 { + buf.push(code_point as u16); + } else { + let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16; + let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16; + buf.push(cu1); + buf.push(cu2); + } + } else { + // Collect each character after \u e.g \uD83D will give "D83D" + let mut code_point_utf8_bytes = [0u8; 4]; cursor.fill_bytes(&mut code_point_utf8_bytes)?; + + // Convert to u16 let code_point_str = str::from_utf8(&code_point_utf8_bytes) - .expect("malformed Hexadecimal character escape sequence"); + .expect("malformed Unicode character escape sequence"); let code_point = - u16::from_str_radix(&code_point_str, 16).map_err(|_| { + u16::from_str_radix(code_point_str, 16).map_err(|_| { Error::syntax( - "invalid Hexadecimal escape sequence", + "invalid Unicode escape sequence", cursor.pos(), ) })?; buf.push(code_point); } - b'u' => { - // Support \u{X..X} (Unicode Codepoint) - if cursor.next_is(b'{')? { - // TODO: use bytes for a bit better performance (using stack) - let mut code_point_buf = Vec::with_capacity(6); - cursor.take_until(b'}', &mut code_point_buf)?; - - let code_point_str = unsafe { - str::from_utf8_unchecked(code_point_buf.as_slice()) - }; - // We know this is a single unicode codepoint, convert to u32 - let code_point = u32::from_str_radix(&code_point_str, 16) - .map_err(|_| { - Error::syntax( - "malformed Unicode character escape sequence", - cursor.pos(), - ) - })?; - - // UTF16Encoding of a numeric code point value - if code_point > 0x10_FFFF { - return Err(Error::syntax("Unicode codepoint must not be greater than 0x10FFFF in escape sequence", cursor.pos())); - } else if code_point <= 65535 { - buf.push(code_point as u16); - } else { - let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16; - let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16; - buf.push(cu1); - buf.push(cu2); + } + n if char::is_digit(char::from(n), 8) => { + if strict_mode { + return Err(Error::syntax( + "octal escape sequences are deprecated", + cursor.pos(), + )); + } + let mut o = char::from(n).to_digit(8).unwrap(); + + match cursor.peek()? { + Some(c) if char::is_digit(char::from(c), 8) => { + let _ = cursor.next_byte()?; + o = o * 8 + char::from(n).to_digit(8).unwrap(); + if n <= b'3' { + match cursor.peek()? { + Some(c) if char::is_digit(char::from(c), 8) => { + let _ = cursor.next_byte(); + o = o * 8 + char::from(n).to_digit(8).unwrap(); + } + _ => (), + } } - } else { - // Collect each character after \u e.g \uD83D will give "D83D" - let mut code_point_utf8_bytes = [0u8; 4]; - cursor.fill_bytes(&mut code_point_utf8_bytes)?; - - // Convert to u16 - let code_point_str = str::from_utf8(&code_point_utf8_bytes) - .expect("malformed Unicode character escape sequence"); - let code_point = u16::from_str_radix(code_point_str, 16) - .map_err(|_| { - Error::syntax( - "invalid Unicode escape sequence", - cursor.pos(), - ) - })?; - - buf.push(code_point); } + _ => (), } - b'\'' | b'"' | b'\\' => buf.push(escape as u16), - _ => { - let details = format!( - "invalid escape sequence at line {}, column {}", - next_chr_start.line_number(), - next_chr_start.column_number(), - ); - return Err(Error::syntax(details, cursor.pos())); - } - }; - } + buf.push(o as u16); + } + _ => buf.push(escape as u16), + }; } - next_ch => { - if next_ch.len_utf16() == 1 { - buf.push(next_ch as u16); - } else { - let mut code_point_bytes_buf = [0u16; 2]; - let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf); - - buf.extend(code_point_bytes.iter()); - } + } + Some(next_ch) => { + if next_ch.len_utf16() == 1 { + buf.push(next_ch as u16); + } else { + let mut code_point_bytes_buf = [0u16; 2]; + let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf); + + buf.extend(code_point_bytes.iter()); } } + None if terminator != StringTerminator::End => { + return Err(Error::from(io::Error::new( + ErrorKind::UnexpectedEof, + "unterminated string literal", + ))); + } + None => { + break; + } } - - Ok(Token::new( - TokenKind::string_literal(String::from_utf16_lossy(buf.as_slice())), - Span::new(start_pos, cursor.pos()), - )) } + + Ok(( + String::from_utf16_lossy(buf.as_slice()), + Span::new(start_pos, cursor.pos()), + )) } diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index 28bccf5e1c..a34ba02523 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -3,13 +3,14 @@ use super::{Cursor, Error, Tokenizer}; use crate::{ profiler::BoaProfiler, + syntax::lexer::string::{unescape_string, StringTerminator}, syntax::{ ast::{Position, Span}, lexer::{Token, TokenKind}, }, }; +use std::convert::TryFrom; use std::io::{self, ErrorKind, Read}; -use std::str; /// Template literal lexing. /// @@ -33,28 +34,65 @@ impl Tokenizer for TemplateLiteral { let mut buf = Vec::new(); loop { - match cursor.next_byte()? { - None => { - return Err(Error::from(io::Error::new( - ErrorKind::UnexpectedEof, - "Unterminated template literal", - ))); + let next_chr = char::try_from(cursor.next_char()?.ok_or_else(|| { + Error::from(io::Error::new( + ErrorKind::UnexpectedEof, + "unterminated template literal", + )) + })?) + .unwrap(); + match next_chr { + '`' => { + let raw = String::from_utf16_lossy(buf.as_slice()); + let (cooked, _) = unescape_string( + &mut Cursor::with_position(raw.as_bytes(), start_pos), + start_pos, + StringTerminator::End, + true, + )?; + return Ok(Token::new( + TokenKind::template_no_substitution(raw, cooked), + Span::new(start_pos, cursor.pos()), + )); } - Some(b'`') => break, // Template literal finished. - Some(next_byte) => buf.push(next_byte), // TODO when there is an expression inside the literal - } - } + '$' if cursor.peek()? == Some(b'{') => { + let _ = cursor.next_byte()?; + let raw = String::from_utf16_lossy(buf.as_slice()); + let (cooked, _) = unescape_string( + &mut Cursor::with_position(raw.as_bytes(), start_pos), + start_pos, + StringTerminator::End, + true, + )?; + return Ok(Token::new( + TokenKind::template_middle(raw, cooked), + Span::new(start_pos, cursor.pos()), + )); + } + '\\' => { + let escape = cursor.peek()?.ok_or_else(|| { + Error::from(io::Error::new( + ErrorKind::UnexpectedEof, + "unterminated escape sequence in literal", + )) + })?; + buf.push('\\' as u16); + match escape { + b'`' | b'$' | b'\\' => buf.push(cursor.next_byte()?.unwrap() as u16), + _ => continue, + } + } + next_ch => { + if next_ch.len_utf16() == 1 { + buf.push(next_ch as u16); + } else { + let mut code_point_bytes_buf = [0u16; 2]; + let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf); - if let Ok(s) = str::from_utf8(buf.as_slice()) { - Ok(Token::new( - TokenKind::template_literal(s), - Span::new(start_pos, cursor.pos()), - )) - } else { - Err(Error::from(io::Error::new( - ErrorKind::InvalidData, - "Invalid UTF-8 character in template literal", - ))) + buf.extend(code_point_bytes.iter()); + } + } + } } } } diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index 230ab1847f..f54b8f4b33 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -6,6 +6,7 @@ use super::token::Numeric; use super::*; use super::{Error, Position}; use crate::syntax::ast::Keyword; +use crate::syntax::lexer::string::{unescape_string, StringTerminator}; use std::str; fn span(start: (u32, u32), end: (u32, u32)) -> Span { @@ -136,7 +137,7 @@ fn check_template_literal_simple() { assert_eq!( lexer.next().unwrap().unwrap().kind(), - &TokenKind::template_literal("I'm a template literal") + &TokenKind::template_no_substitution("I'm a template literal", "I'm a template literal") ); } @@ -857,6 +858,38 @@ fn unicode_escape_with_braces() { } } +#[test] +fn unicode_escape_with_braces_() { + let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string(); + + let mut cursor = Cursor::new(s.as_bytes()); + + if let Ok((s, _)) = unescape_string( + &mut cursor, + Position::new(1, 1), + StringTerminator::End, + false, + ) { + assert_eq!(s, "\u{20ac}\u{a0}\u{a0}") + } else { + panic!(); + } +} + +#[test] +fn unescape_string_with_single_escape() { + let s = r#"\Б"#.to_string(); + let mut cursor = Cursor::new(s.as_bytes()); + let (s, _) = unescape_string( + &mut cursor, + Position::new(1, 1), + StringTerminator::End, + false, + ) + .unwrap(); + assert_eq!(s, "Б"); +} + mod carriage_return { use super::*; diff --git a/boa/src/syntax/lexer/token.rs b/boa/src/syntax/lexer/token.rs index 270e14ab91..82a4f2e984 100644 --- a/boa/src/syntax/lexer/token.rs +++ b/boa/src/syntax/lexer/token.rs @@ -125,7 +125,21 @@ pub enum TokenKind { /// A string literal. StringLiteral(Box), - TemplateLiteral(Box), + /// A part of a template literal without substitution. + TemplateNoSubstitution { + /// The string as it has been entered, without processing escape sequences. + raw: Box, + /// The raw string with escape sequences processed. + cooked: Box, + }, + + /// The part of a template literal between substitutions + TemplateMiddle { + /// The string as it has been entered, without processing escape sequences. + raw: Box, + /// The raw string with escape sequences processed. + cooked: Box, + }, /// A regular expression, consisting of body and flags. RegularExpressionLiteral(Box, RegExpFlags), @@ -206,12 +220,26 @@ impl TokenKind { Self::StringLiteral(lit.into()) } - /// Creates a `TemplateLiteral` token type. - pub fn template_literal(lit: S) -> Self + pub fn template_middle(raw: R, cooked: C) -> Self where - S: Into>, + R: Into>, + C: Into>, { - Self::TemplateLiteral(lit.into()) + Self::TemplateMiddle { + raw: raw.into(), + cooked: cooked.into(), + } + } + + pub fn template_no_substitution(raw: R, cooked: C) -> Self + where + R: Into>, + C: Into>, + { + Self::TemplateNoSubstitution { + raw: raw.into(), + cooked: cooked.into(), + } } /// Creates a `RegularExpressionLiteral` token kind. @@ -247,7 +275,8 @@ impl Display for TokenKind { Self::NumericLiteral(Numeric::BigInt(ref num)) => write!(f, "{}n", num), Self::Punctuator(ref punc) => write!(f, "{}", punc), Self::StringLiteral(ref lit) => write!(f, "{}", lit), - Self::TemplateLiteral(ref lit) => write!(f, "{}", lit), + Self::TemplateNoSubstitution { ref cooked, .. } => write!(f, "{}", cooked), + Self::TemplateMiddle { ref cooked, .. } => write!(f, "{}", cooked), Self::RegularExpressionLiteral(ref body, ref flags) => write!(f, "/{}/{}", body, flags), Self::LineTerminator => write!(f, "line terminator"), Self::Comment => write!(f, "comment"), diff --git a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs index 976c9eda48..8c8995759c 100644 --- a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs +++ b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs @@ -84,6 +84,12 @@ where self.lexer.lex_slash_token(start).map_err(|e| e.into()) } + /// Lexes the next tokens as template middle or template tail assuming that the starting + /// '}' has already been consumed. + pub(super) fn lex_template(&mut self, start: Position) -> Result { + self.lexer.lex_template(start).map_err(ParseError::from) + } + #[inline] pub(super) fn strict_mode(&self) -> bool { self.lexer.strict_mode() diff --git a/boa/src/syntax/parser/cursor/mod.rs b/boa/src/syntax/parser/cursor/mod.rs index 7db28eb998..61ddcb55a8 100644 --- a/boa/src/syntax/parser/cursor/mod.rs +++ b/boa/src/syntax/parser/cursor/mod.rs @@ -46,6 +46,11 @@ where self.buffered_lexer.lex_regex(start) } + #[inline] + pub(super) fn lex_template(&mut self, start: Position) -> Result { + self.buffered_lexer.lex_template(start) + } + #[inline] pub(super) fn next(&mut self) -> Result, ParseError> { self.buffered_lexer.next(true) diff --git a/boa/src/syntax/parser/expression/left_hand_side/call.rs b/boa/src/syntax/parser/expression/left_hand_side/call.rs index 573288b62b..238823a2e8 100644 --- a/boa/src/syntax/parser/expression/left_hand_side/call.rs +++ b/boa/src/syntax/parser/expression/left_hand_side/call.rs @@ -19,8 +19,8 @@ use crate::{ }, lexer::TokenKind, parser::{ - expression::Expression, AllowAwait, AllowYield, Cursor, ParseError, ParseResult, - TokenParser, + expression::{left_hand_side::template::TaggedTemplateLiteral, Expression}, + AllowAwait, AllowYield, Cursor, ParseError, ParseResult, TokenParser, }, }, BoaProfiler, @@ -112,6 +112,15 @@ where cursor.expect(Punctuator::CloseBracket, "call expression")?; lhs = GetField::new(lhs, idx).into(); } + TokenKind::TemplateNoSubstitution { .. } | TokenKind::TemplateMiddle { .. } => { + lhs = TaggedTemplateLiteral::new( + self.allow_yield, + self.allow_await, + tok.span().start(), + lhs, + ) + .parse(cursor)?; + } _ => break, } } diff --git a/boa/src/syntax/parser/expression/left_hand_side/member.rs b/boa/src/syntax/parser/expression/left_hand_side/member.rs index f7a865d0b0..14395d4cc9 100644 --- a/boa/src/syntax/parser/expression/left_hand_side/member.rs +++ b/boa/src/syntax/parser/expression/left_hand_side/member.rs @@ -17,7 +17,10 @@ use crate::{ }, lexer::TokenKind, parser::{ - expression::{primary::PrimaryExpression, Expression}, + expression::{ + left_hand_side::template::TaggedTemplateLiteral, primary::PrimaryExpression, + Expression, + }, AllowAwait, AllowYield, Cursor, ParseError, ParseResult, TokenParser, }, }, @@ -105,6 +108,15 @@ where cursor.expect(Punctuator::CloseBracket, "member expression")?; lhs = GetField::new(lhs, idx).into(); } + TokenKind::TemplateNoSubstitution { .. } | TokenKind::TemplateMiddle { .. } => { + lhs = TaggedTemplateLiteral::new( + self.allow_yield, + self.allow_await, + tok.span().start(), + lhs, + ) + .parse(cursor)?; + } _ => break, } } diff --git a/boa/src/syntax/parser/expression/left_hand_side/mod.rs b/boa/src/syntax/parser/expression/left_hand_side/mod.rs index a41c6df0c7..76f36e3f3d 100644 --- a/boa/src/syntax/parser/expression/left_hand_side/mod.rs +++ b/boa/src/syntax/parser/expression/left_hand_side/mod.rs @@ -10,6 +10,7 @@ mod arguments; mod call; mod member; +mod template; use self::{call::CallExpression, member::MemberExpression}; use crate::{ diff --git a/boa/src/syntax/parser/expression/left_hand_side/template.rs b/boa/src/syntax/parser/expression/left_hand_side/template.rs new file mode 100644 index 0000000000..0c5d78e374 --- /dev/null +++ b/boa/src/syntax/parser/expression/left_hand_side/template.rs @@ -0,0 +1,90 @@ +use crate::{ + profiler::BoaProfiler, + syntax::{ + ast::node::TaggedTemplate, + ast::{Node, Position, Punctuator}, + lexer::TokenKind, + parser::{ + cursor::Cursor, expression::Expression, AllowAwait, AllowYield, ParseError, + ParseResult, TokenParser, + }, + }, +}; +use std::io::Read; + +/// Parses a tagged template. +/// +/// More information: +/// - [ECMAScript specification][spec] +/// +/// [spec]: https://tc39.es/ecma262/#prod-TemplateLiteral +#[derive(Debug, Clone)] +pub(super) struct TaggedTemplateLiteral { + allow_yield: AllowYield, + allow_await: AllowAwait, + start: Position, + tag: Node, +} + +impl TaggedTemplateLiteral { + /// Creates a new `TaggedTemplateLiteral` parser. + pub(super) fn new(allow_yield: Y, allow_await: A, start: Position, tag: Node) -> Self + where + Y: Into, + A: Into, + { + Self { + allow_yield: allow_yield.into(), + allow_await: allow_await.into(), + start, + tag, + } + } +} + +impl TokenParser for TaggedTemplateLiteral +where + R: Read, +{ + type Output = Node; + + fn parse(self, cursor: &mut Cursor) -> ParseResult { + let _timer = BoaProfiler::global().start_event("TaggedTemplateLiteral", "Parsing"); + + let mut raws = Vec::new(); + let mut cookeds = Vec::new(); + let mut exprs = Vec::new(); + + let mut token = cursor.next()?.ok_or(ParseError::AbruptEnd)?; + + loop { + match token.kind() { + TokenKind::TemplateMiddle { raw, cooked } => { + raws.push(raw.clone()); + cookeds.push(cooked.clone()); + exprs.push( + Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, + ); + cursor.expect( + TokenKind::Punctuator(Punctuator::CloseBlock), + "template literal", + )?; + } + TokenKind::TemplateNoSubstitution { raw, cooked } => { + raws.push(raw.clone()); + cookeds.push(cooked.clone()); + return Ok(Node::from(TaggedTemplate::new( + self.tag, raws, cookeds, exprs, + ))); + } + _ => { + return Err(ParseError::general( + "cannot parse tagged template literal", + self.start, + )) + } + } + token = cursor.lex_template(self.start)?; + } + } +} diff --git a/boa/src/syntax/parser/expression/primary/mod.rs b/boa/src/syntax/parser/expression/primary/mod.rs index 7f55346663..16183f002e 100644 --- a/boa/src/syntax/parser/expression/primary/mod.rs +++ b/boa/src/syntax/parser/expression/primary/mod.rs @@ -11,6 +11,7 @@ mod array_initializer; mod async_function_expression; mod function_expression; mod object_initializer; +mod template; #[cfg(test)] mod tests; @@ -27,7 +28,10 @@ use crate::{ Const, Keyword, Punctuator, }, lexer::{token::Numeric, InputElement, TokenKind}, - parser::{AllowAwait, AllowYield, Cursor, ParseError, ParseResult, TokenParser}, + parser::{ + expression::primary::template::TemplateLiteral, AllowAwait, AllowYield, Cursor, + ParseError, ParseResult, TokenParser, + }, }, }; pub(in crate::syntax::parser) use object_initializer::Initializer; @@ -103,7 +107,9 @@ where TokenKind::BooleanLiteral(boolean) => Ok(Const::from(*boolean).into()), TokenKind::NullLiteral => Ok(Const::Null.into()), TokenKind::Identifier(ident) => Ok(Identifier::from(ident.as_ref()).into()), // TODO: IdentifierReference - TokenKind::StringLiteral(s) => Ok(Const::from(s.as_ref()).into()), + TokenKind::StringLiteral(s) | TokenKind::TemplateNoSubstitution { cooked: s, .. } => { + Ok(Const::from(s.as_ref()).into()) + } TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()), TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()), TokenKind::NumericLiteral(Numeric::BigInt(num)) => Ok(Const::from(num.clone()).into()), @@ -132,6 +138,14 @@ where Err(ParseError::unexpected(tok, "regular expression literal")) } } + TokenKind::TemplateMiddle { cooked, .. } => TemplateLiteral::new( + self.allow_yield, + self.allow_await, + tok.span().start(), + cooked.as_ref(), + ) + .parse(cursor) + .map(Node::TemplateLit), _ => Err(ParseError::unexpected(tok.clone(), "primary expression")), } } diff --git a/boa/src/syntax/parser/expression/primary/template/mod.rs b/boa/src/syntax/parser/expression/primary/template/mod.rs new file mode 100644 index 0000000000..ab03356cfb --- /dev/null +++ b/boa/src/syntax/parser/expression/primary/template/mod.rs @@ -0,0 +1,104 @@ +//! Template literal parsing. +//! +//! More information: +//! - [MDN documentation][mdn] +//! - [ECMAScript specification][spec] +//! +//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals +//! [spec]: https://tc39.es/ecma262/#sec-template-literals + +use crate::{ + profiler::BoaProfiler, + syntax::{ + ast::node::template::{TemplateElement, TemplateLit}, + ast::Position, + ast::Punctuator, + lexer::TokenKind, + parser::cursor::Cursor, + parser::expression::Expression, + parser::{AllowAwait, AllowYield, ParseError, TokenParser}, + }, +}; +use std::io::Read; + +/// Parses a template literal. +/// +/// More information: +/// - [MDN documentation][mdn] +/// - [ECMAScript specification][spec] +/// +/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals +/// [spec]: https://tc39.es/ecma262/#prod-TemplateLiteral +#[derive(Debug, Clone)] +pub(super) struct TemplateLiteral { + allow_yield: AllowYield, + allow_await: AllowAwait, + start: Position, + first: String, +} + +impl TemplateLiteral { + /// Creates a new `TemplateLiteral` parser. + pub(super) fn new(allow_yield: Y, allow_await: A, start: Position, first: &str) -> Self + where + Y: Into, + A: Into, + { + Self { + allow_yield: allow_yield.into(), + allow_await: allow_await.into(), + start, + first: first.to_owned(), + } + } +} + +impl TokenParser for TemplateLiteral +where + R: Read, +{ + type Output = TemplateLit; + + fn parse(self, cursor: &mut Cursor) -> Result { + let _timer = BoaProfiler::global().start_event("TemplateLiteral", "Parsing"); + + let mut elements = Vec::new(); + elements.push(TemplateElement::String(self.first.into_boxed_str())); + elements.push(TemplateElement::Expr( + Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, + )); + cursor.expect( + TokenKind::Punctuator(Punctuator::CloseBlock), + "template literal", + )?; + + loop { + match cursor.lex_template(self.start)?.kind() { + TokenKind::TemplateMiddle { + cooked: template, .. + } => { + elements.push(TemplateElement::String(template.to_owned())); + elements.push(TemplateElement::Expr( + Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?, + )); + cursor.expect( + TokenKind::Punctuator(Punctuator::CloseBlock), + "template literal", + )?; + } + TokenKind::TemplateNoSubstitution { + cooked: template, .. + } => { + elements.push(TemplateElement::String(template.to_owned())); + return Ok(TemplateLit::new(elements)); + } + _ => { + return Err(ParseError::general( + "cannot parse template literal", + self.start, + )) + } + } + } + } +} diff --git a/boa/src/syntax/parser/function/mod.rs b/boa/src/syntax/parser/function/mod.rs index da94bd5edb..8340fe7964 100644 --- a/boa/src/syntax/parser/function/mod.rs +++ b/boa/src/syntax/parser/function/mod.rs @@ -269,7 +269,8 @@ where TokenKind::Punctuator(Punctuator::CloseBlock) => { return Ok(Vec::new().into()); } - TokenKind::StringLiteral(string) | TokenKind::TemplateLiteral(string) => { + TokenKind::StringLiteral(string) + | TokenKind::TemplateNoSubstitution { cooked: string, .. } => { if string == &"use strict".into() { cursor.set_strict_mode(true); } diff --git a/boa/src/syntax/parser/mod.rs b/boa/src/syntax/parser/mod.rs index 8156a0bf5c..d33f6ea15d 100644 --- a/boa/src/syntax/parser/mod.rs +++ b/boa/src/syntax/parser/mod.rs @@ -125,7 +125,8 @@ where match cursor.peek(0)? { Some(tok) => { match tok.kind() { - TokenKind::StringLiteral(string) | TokenKind::TemplateLiteral(string) => { + TokenKind::StringLiteral(string) + | TokenKind::TemplateNoSubstitution { cooked: string, .. } => { if string.as_ref() == "use strict" { cursor.set_strict_mode(true); } diff --git a/test_ignore.txt b/test_ignore.txt index 377c213575..5d5d195e4b 100644 --- a/test_ignore.txt +++ b/test_ignore.txt @@ -54,3 +54,7 @@ S15.1.3.4_A1.3_T1 // This one seems to terminate the process somehow: arg-length-near-integer-limit + +// These generate a stack overflow +tco-call +tco-member