Browse Source

Fix line terminators in template strings (#3641)

pull/3646/head
raskad 9 months ago committed by GitHub
parent
commit
6ddc2b47ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 11
      core/parser/src/lexer/cursor.rs
  2. 165
      core/parser/src/lexer/template.rs
  3. 2
      core/parser/src/lexer/tests.rs
  4. 2
      core/parser/src/lexer/token.rs
  5. 8
      core/parser/src/parser/expression/left_hand_side/template.rs
  6. 23
      core/parser/src/parser/expression/primary/mod.rs
  7. 20
      core/parser/src/parser/expression/primary/template/mod.rs

11
core/parser/src/lexer/cursor.rs

@ -68,17 +68,6 @@ impl<R: ReadChar> Cursor<R> {
} }
} }
/// Creates a new Lexer cursor with an initial position.
pub(super) fn with_position(inner: R, pos: Position) -> Self {
Self {
iter: inner,
pos,
strict: false,
module: false,
peeked: [None; 4],
}
}
/// Peeks the next n bytes, the maximum number of peeked bytes is 4 (n <= 4). /// Peeks the next n bytes, the maximum number of peeked bytes is 4 (n <= 4).
pub(super) fn peek_n(&mut self, n: u8) -> Result<&[Option<u32>; 4], Error> { pub(super) fn peek_n(&mut self, n: u8) -> Result<&[Option<u32>; 4], Error> {
let _timer = Profiler::global().start_event("cursor::peek_n()", "Lexing"); let _timer = Profiler::global().start_event("cursor::peek_n()", "Lexing");

165
core/parser/src/lexer/template.rs

@ -1,12 +1,8 @@
//! Boa's lexing for ECMAScript template literals. //! Boa's lexing for ECMAScript template literals.
use crate::source::ReadChar;
use crate::{ use crate::{
lexer::{ lexer::{string::UTF16CodeUnitsBuffer, Cursor, Error, Token, TokenKind, Tokenizer},
string::{StringLiteral, UTF16CodeUnitsBuffer}, source::ReadChar,
Cursor, Error, Token, TokenKind, Tokenizer,
},
source::UTF8Input,
}; };
use boa_ast::{Position, Span}; use boa_ast::{Position, Span};
use boa_interner::{Interner, Sym}; use boa_interner::{Interner, Sym};
@ -16,17 +12,30 @@ use std::io::{self, ErrorKind};
#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TemplateString { pub struct TemplateString {
/// The template string of template literal with argument `raw` true. /// The raw template string.
raw: Sym, raw: Sym,
/// The start position of the template string. Used to make lexer error if `to_owned_cooked`
/// failed. /// The cooked template string.
start_pos: Position, cooked: Option<Sym>,
} }
impl TemplateString { impl TemplateString {
/// Creates a new `TemplateString` with the given raw template ans start position. /// Creates a new `TemplateString` with the given raw template ans start position.
pub const fn new(raw: Sym, start_pos: Position) -> Self { pub fn new(raw: Sym, interner: &mut Interner) -> Self {
Self { raw, start_pos } Self {
raw: Self::as_raw(raw, interner),
cooked: Self::as_cooked(raw, interner),
}
}
/// Returns the raw template string.
pub fn raw(self) -> Sym {
self.raw
}
/// Returns the cooked template string if it exists.
pub fn cooked(self) -> Option<Sym> {
self.cooked
} }
/// Converts the raw template string into a mutable string slice. /// Converts the raw template string into a mutable string slice.
@ -35,8 +44,34 @@ impl TemplateString {
/// - [ECMAScript reference][spec] /// - [ECMAScript reference][spec]
/// ///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub const fn as_raw(self) -> Sym { fn as_raw(raw: Sym, interner: &mut Interner) -> Sym {
self.raw let string = interner.resolve_expect(raw).utf16();
let mut iter = string.iter().peekable();
let mut buf: Vec<u16> = Vec::new();
loop {
match iter.next() {
Some(0x5C /* \ */) => {
buf.push_code_point(0x5C);
match iter.next() {
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}
interner.get_or_intern(buf.as_slice())
} }
/// Creates a new cooked template string. Returns a lexer error if it fails to cook the /// Creates a new cooked template string. Returns a lexer error if it fails to cook the
@ -46,39 +81,91 @@ impl TemplateString {
/// - [ECMAScript reference][spec] /// - [ECMAScript reference][spec]
/// ///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn to_owned_cooked(self, interner: &mut Interner) -> Result<Sym, Error> { fn as_cooked(raw: Sym, interner: &mut Interner) -> Option<Sym> {
let string = interner.resolve_expect(self.raw).to_string(); let string = interner.resolve_expect(raw).utf16();
let mut cursor = Cursor::with_position(UTF8Input::new(string.as_bytes()), self.start_pos); let mut iter = string.iter().peekable();
let mut buf: Vec<u16> = Vec::new(); let mut buf: Vec<u16> = Vec::new();
loop { loop {
let ch_start_pos = cursor.pos(); match iter.next() {
let ch = cursor.next_char()?; Some(0x5C /* \ */) => {
let escape_value = match iter.next() {
match ch { Some(0x62 /* b */) => 0x08 /* <BS> */,
Some(0x005C /* \ */) => { Some(0x74 /* t */) => 0x09 /* <HT> */,
let escape_value = StringLiteral::take_escape_sequence_or_line_continuation( Some(0x6E /* n */) => 0x0A /* <LF> */,
&mut cursor, Some(0x76 /* v */) => 0x0B /* <VT> */,
ch_start_pos, Some(0x66 /* f */) => 0x0C /* <FF> */,
true, Some(0x72 /* r */) => 0x0D /* <CR> */,
true, Some(0x22 /* " */) => 0x22 /* " */,
)?; Some(0x27 /* ' */) => 0x27 /* ' */,
Some(0x5C /* \ */) => 0x5C /* \ */,
if let (Some(escape_value), _) = escape_value { Some(0x30 /* 0 */) if iter
buf.push_code_point(escape_value); .peek()
} .filter(|ch| (0x30..=0x39 /* 0..=9 */).contains(**ch))
.is_none() => 0x00 /* NULL */,
// Hex Escape
Some(0x078 /* x */) => {
let mut s = String::with_capacity(2);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
u16::from_str_radix(&s, 16).ok()?.into()
}
// Unicode Escape
Some(0x75 /* u */) => {
let next = *iter.next()?;
if next == 0x7B /* { */ {
let mut buffer = String::with_capacity(6);
loop {
let next = *iter.next()?;
if next == 0x7D /* } */ {
break;
}
buffer.push(char::from_u32(u32::from(next))?);
}
let cp = u32::from_str_radix(&buffer, 16).ok()?;
if cp > 0x10_FFFF {
return None;
}
cp
} else {
let mut s = String::with_capacity(4);
s.push(char::from_u32(u32::from(next))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
u16::from_str_radix(&s, 16).ok()?.into()
}
}
// NonOctalDecimalEscapeSequence
Some(0x38 /* 8 */ | 0x39 /* 9 */) => {
return None;
}
// LegacyOctalEscapeSequence
Some(ch) if (0x30..=0x37 /* '0'..='7' */).contains(ch) => {
return None;
}
// Line Terminator
Some(0x0A /* <LF> */ | 0x0D /* <CR> */ | 0x2028 /* <LS> */ | 0x2029 /* <PS> */) => {
continue;
}
Some(ch) => {
u32::from(*ch)
}
None => return None,
};
buf.push_code_point(escape_value);
}
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
} }
Some(ch) => { Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear buf.push_code_point(u32::from(*ch));
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and
// returns <LF>, which matches the TV of <CR> <LF>
buf.push_code_point(ch);
} }
None => break, None => break,
} }
} }
Ok(interner.get_or_intern(&buf[..])) Some(interner.get_or_intern(buf.as_slice()))
} }
} }
@ -120,7 +207,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
// ` // `
0x0060 => { 0x0060 => {
let raw_sym = interner.get_or_intern(&buf[..]); let raw_sym = interner.get_or_intern(&buf[..]);
let template_string = TemplateString::new(raw_sym, start_pos); let template_string = TemplateString::new(raw_sym, interner);
return Ok(Token::new( return Ok(Token::new(
TokenKind::template_no_substitution(template_string), TokenKind::template_no_substitution(template_string),
@ -130,7 +217,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
// $ // $
0x0024 if cursor.next_if(0x7B /* { */)? => { 0x0024 if cursor.next_if(0x7B /* { */)? => {
let raw_sym = interner.get_or_intern(&buf[..]); let raw_sym = interner.get_or_intern(&buf[..]);
let template_string = TemplateString::new(raw_sym, start_pos); let template_string = TemplateString::new(raw_sym, interner);
return Ok(Token::new( return Ok(Token::new(
TokenKind::template_middle(template_string), TokenKind::template_middle(template_string),

2
core/parser/src/lexer/tests.rs

@ -169,7 +169,7 @@ fn check_template_literal_simple() {
assert_eq!( assert_eq!(
lexer.next(interner).unwrap().unwrap().kind(), lexer.next(interner).unwrap().unwrap().kind(),
&TokenKind::template_no_substitution(TemplateString::new(sym, Position::new(1, 1))) &TokenKind::template_no_substitution(TemplateString::new(sym, interner))
); );
} }

2
core/parser/src/lexer/token.rs

@ -273,7 +273,7 @@ impl TokenKind {
Self::Punctuator(punc) => punc.to_string(), Self::Punctuator(punc) => punc.to_string(),
Self::StringLiteral((lit, _)) => interner.resolve_expect(lit).to_string(), Self::StringLiteral((lit, _)) => interner.resolve_expect(lit).to_string(),
Self::TemplateNoSubstitution(ts) | Self::TemplateMiddle(ts) => { Self::TemplateNoSubstitution(ts) | Self::TemplateMiddle(ts) => {
interner.resolve_expect(ts.as_raw()).to_string() interner.resolve_expect(ts.raw()).to_string()
} }
Self::RegularExpressionLiteral(body, flags) => { Self::RegularExpressionLiteral(body, flags) => {
format!( format!(

8
core/parser/src/parser/expression/left_hand_side/template.rs

@ -64,8 +64,8 @@ where
loop { loop {
match token.kind() { match token.kind() {
TokenKind::TemplateMiddle(template_string) => { TokenKind::TemplateMiddle(template_string) => {
raws.push(template_string.as_raw()); raws.push(template_string.raw());
cookeds.push(template_string.to_owned_cooked(interner).ok()); cookeds.push(template_string.cooked());
exprs.push( exprs.push(
Expression::new(None, true, self.allow_yield, self.allow_await) Expression::new(None, true, self.allow_yield, self.allow_await)
.parse(cursor, interner)?, .parse(cursor, interner)?,
@ -77,8 +77,8 @@ where
)?; )?;
} }
TokenKind::TemplateNoSubstitution(template_string) => { TokenKind::TemplateNoSubstitution(template_string) => {
raws.push(template_string.as_raw()); raws.push(template_string.raw());
cookeds.push(template_string.to_owned_cooked(interner).ok()); cookeds.push(template_string.cooked());
return Ok(TaggedTemplate::new( return Ok(TaggedTemplate::new(
self.tag, self.tag,
raws.into_boxed_slice(), raws.into_boxed_slice(),

23
core/parser/src/parser/expression/primary/mod.rs

@ -215,12 +215,13 @@ where
Ok(node) Ok(node)
} }
TokenKind::TemplateNoSubstitution(template_string) => { TokenKind::TemplateNoSubstitution(template_string) => {
let node = Literal::from( let Some(cooked) = template_string.cooked() else {
template_string return Err(Error::general(
.to_owned_cooked(interner) "invalid escape in template literal",
.map_err(Error::lex)?, tok.span().start(),
) ));
.into(); };
let node = Literal::from(cooked).into();
cursor.advance(interner); cursor.advance(interner);
Ok(node) Ok(node)
} }
@ -261,13 +262,17 @@ where
} }
} }
TokenKind::TemplateMiddle(template_string) => { TokenKind::TemplateMiddle(template_string) => {
let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
tok.span().start(),
));
};
let parser = TemplateLiteral::new( let parser = TemplateLiteral::new(
self.allow_yield, self.allow_yield,
self.allow_await, self.allow_await,
tok.span().start(), tok.span().start(),
template_string cooked,
.to_owned_cooked(interner)
.map_err(Error::lex)?,
); );
cursor.advance(interner); cursor.advance(interner);
parser.parse(cursor, interner).map(Into::into) parser.parse(cursor, interner).map(Into::into)

20
core/parser/src/parser/expression/primary/template/mod.rs

@ -77,10 +77,12 @@ where
loop { loop {
match cursor.lex_template(self.start, interner)?.kind() { match cursor.lex_template(self.start, interner)?.kind() {
TokenKind::TemplateMiddle(template_string) => { TokenKind::TemplateMiddle(template_string) => {
let cooked = template_string let Some(cooked) = template_string.cooked() else {
.to_owned_cooked(interner) return Err(Error::general(
.map_err(Error::lex)?; "invalid escape in template literal",
self.start,
));
};
elements.push(TemplateElement::String(cooked)); elements.push(TemplateElement::String(cooked));
elements.push(TemplateElement::Expr( elements.push(TemplateElement::Expr(
Expression::new(None, true, self.allow_yield, self.allow_await) Expression::new(None, true, self.allow_yield, self.allow_await)
@ -93,10 +95,12 @@ where
)?; )?;
} }
TokenKind::TemplateNoSubstitution(template_string) => { TokenKind::TemplateNoSubstitution(template_string) => {
let cooked = template_string let Some(cooked) = template_string.cooked() else {
.to_owned_cooked(interner) return Err(Error::general(
.map_err(Error::lex)?; "invalid escape in template literal",
self.start,
));
};
elements.push(TemplateElement::String(cooked)); elements.push(TemplateElement::String(cooked));
return Ok(literal::TemplateLiteral::new(elements.into())); return Ok(literal::TemplateLiteral::new(elements.into()));
} }

Loading…
Cancel
Save