Browse Source

Refactor StringLiteral (#1084)

Fix octal escape in string literal


Add tests


Fix zero escape


Fix zero escape lookahead


Rename variables


Rename helper functions


Refactor match arms


Fix escape line terminator sequence


Fix single character escape


Fix line terminator and escape followed by unicode char


Fix broken tests


Add NonOctalDecimalEscapeSequence


Fix comment


Refactor


Modify error message


Add tests


Rename tests


Add test for error


Add comments for unsafe bytes to str


Update boa/src/syntax/lexer/string.rs

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
Minor refactor


Remove unsafe bytes to str


Fix panic when reading invalid utf-8 chars


Refactor string literal


Support invalid utf-8 chars in string literal input


Add cook function for template literal


Fix line continuation bug


Add methods for utf16 buffer trait


Add trait comments


Add error message for template literal


Add and fix comments


Hide unused exported function and modify tests


Fix bug


Fix merge bug
pull/1102/head
Jevan Chan 4 years ago committed by GitHub
parent
commit
038acb4989
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 53
      boa/src/builtins/json/tests.rs
  2. 36
      boa/src/builtins/string/tests.rs
  3. 258
      boa/src/syntax/lexer/string.rs
  4. 94
      boa/src/syntax/lexer/template.rs
  5. 162
      boa/src/syntax/lexer/tests.rs

53
boa/src/builtins/json/tests.rs

@ -217,10 +217,10 @@ fn json_stringify_pretty_print() {
);
let expected = forward(
&mut context,
r#"'{
"a": "b",
"b": "c"
}'"#,
r#"'{\n'
+' "a": "b",\n'
+' "b": "c"\n'
+'}'"#,
);
assert_eq!(actual, expected);
}
@ -235,10 +235,10 @@ fn json_stringify_pretty_print_four_spaces() {
);
let expected = forward(
&mut context,
r#"'{
"a": "b",
"b": "c"
}'"#,
r#"'{\n'
+' "a": "b",\n'
+' "b": "c"\n'
+'}'"#,
);
assert_eq!(actual, expected);
}
@ -253,10 +253,10 @@ fn json_stringify_pretty_print_twenty_spaces() {
);
let expected = forward(
&mut context,
r#"'{
"a": "b",
"b": "c"
}'"#,
r#"'{\n'
+' "a": "b",\n'
+' "b": "c"\n'
+'}'"#,
);
assert_eq!(actual, expected);
}
@ -271,10 +271,10 @@ fn json_stringify_pretty_print_with_number_object() {
);
let expected = forward(
&mut context,
r#"'{
"a": "b",
"b": "c"
}'"#,
r#"'{\n'
+' "a": "b",\n'
+' "b": "c"\n'
+'}'"#,
);
assert_eq!(actual, expected);
}
@ -301,10 +301,10 @@ fn json_stringify_pretty_print_with_too_long_string() {
);
let expected = forward(
&mut context,
r#"'{
abcdefghij"a": "b",
abcdefghij"b": "c"
}'"#,
r#"'{\n'
+'abcdefghij"a": "b",\n'
+'abcdefghij"b": "c"\n'
+'}'"#,
);
assert_eq!(actual, expected);
}
@ -319,10 +319,10 @@ fn json_stringify_pretty_print_with_string_object() {
);
let expected = forward(
&mut context,
r#"'{
abcd"a": "b",
abcd"b": "c"
}'"#,
r#"'{\n'
+'abcd"a": "b",\n'
+'abcd"b": "c"\n'
+'}'"#,
);
assert_eq!(actual, expected);
}
@ -404,10 +404,7 @@ fn json_parse_object_with_reviver() {
fn json_parse_sets_prototypes() {
let mut context = Context::new();
let init = r#"
const jsonString = "{
\"ob\":{\"ject\":1},
\"arr\": [0,1]
}";
const jsonString = "{\"ob\":{\"ject\":1},\"arr\": [0,1]}";
const jsonObj = JSON.parse(jsonString);
"#;
eprintln!("{}", forward(&mut context, init));

36
boa/src/builtins/string/tests.rs

@ -533,34 +533,46 @@ fn test_match() {
#[test]
fn trim() {
let mut context = Context::new();
assert_eq!(forward(&mut context, "'Hello'.trim()"), "\"Hello\"");
assert_eq!(forward(&mut context, "' \nHello'.trim()"), "\"Hello\"");
assert_eq!(forward(&mut context, "'Hello \n\r'.trim()"), "\"Hello\"");
assert_eq!(forward(&mut context, "' Hello '.trim()"), "\"Hello\"");
assert_eq!(forward(&mut context, r#"'Hello'.trim()"#), "\"Hello\"");
assert_eq!(forward(&mut context, r#"' \nHello'.trim()"#), "\"Hello\"");
assert_eq!(forward(&mut context, r#"'Hello \n\r'.trim()"#), "\"Hello\"");
assert_eq!(forward(&mut context, r#"' Hello '.trim()"#), "\"Hello\"");
}
#[test]
fn trim_start() {
let mut context = Context::new();
assert_eq!(forward(&mut context, "'Hello'.trimStart()"), "\"Hello\"");
assert_eq!(forward(&mut context, "' \nHello'.trimStart()"), "\"Hello\"");
assert_eq!(forward(&mut context, r#"'Hello'.trimStart()"#), "\"Hello\"");
assert_eq!(
forward(&mut context, "'Hello \n'.trimStart()"),
forward(&mut context, r#"' \nHello'.trimStart()"#),
"\"Hello\""
);
assert_eq!(
forward(&mut context, r#"'Hello \n'.trimStart()"#),
"\"Hello \n\""
);
assert_eq!(forward(&mut context, "' Hello '.trimStart()"), "\"Hello \"");
assert_eq!(
forward(&mut context, r#"' Hello '.trimStart()"#),
"\"Hello \""
);
}
#[test]
fn trim_end() {
let mut context = Context::new();
assert_eq!(forward(&mut context, "'Hello'.trimEnd()"), "\"Hello\"");
assert_eq!(forward(&mut context, r#"'Hello'.trimEnd()"#), "\"Hello\"");
assert_eq!(
forward(&mut context, "' \nHello'.trimEnd()"),
forward(&mut context, r#"' \nHello'.trimEnd()"#),
"\" \nHello\""
);
assert_eq!(forward(&mut context, "'Hello \n'.trimEnd()"), "\"Hello\"");
assert_eq!(forward(&mut context, "' Hello '.trimEnd()"), "\" Hello\"");
assert_eq!(
forward(&mut context, r#"'Hello \n'.trimEnd()"#),
"\"Hello\""
);
assert_eq!(
forward(&mut context, r#"' Hello '.trimEnd()"#),
"\" Hello\""
);
}
#[test]

258
boa/src/syntax/lexer/string.rs

@ -8,7 +8,6 @@ use crate::{
lexer::{Token, TokenKind},
},
};
use core::convert::TryFrom;
use std::{
io::{self, ErrorKind, Read},
str,
@ -47,7 +46,34 @@ impl StringLiteral {
pub(crate) enum StringTerminator {
SingleQuote,
DoubleQuote,
End,
}
/// Extends a buffer type to store UTF-16 code units and convert to string.
pub(crate) trait UTF16CodeUnitsBuffer {
/// Encodes the code point to UTF-16 code units and push to the buffer.
fn push_code_point(&mut self, code_point: u32);
/// Decodes the buffer into a String and replace the invalid data with the replacement character (U+FFFD).
fn to_string_lossy(&self) -> String;
}
impl UTF16CodeUnitsBuffer for Vec<u16> {
#[inline]
fn push_code_point(&mut self, code_point: u32) {
if code_point <= 65535 {
self.push(code_point as u16);
} else {
let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16;
let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16;
self.push(cu1);
self.push(cu2);
}
}
#[inline]
fn to_string_lossy(&self) -> String {
String::from_utf16_lossy(self.as_slice())
}
}
impl<R> Tokenizer<R> for StringLiteral {
@ -72,18 +98,19 @@ impl StringLiteral {
///
/// [spec]: https://tc39.es/ecma262/#prod-LineTerminator
#[inline]
pub(super) fn is_line_terminator(ch: char) -> bool {
pub(super) fn is_line_terminator(ch: u32) -> bool {
matches!(
ch,
'\u{000A}' /* <LF> */ | '\u{000D}' /* <CR> */ | '\u{2028}' /* <LS> */ | '\u{2029}' /* <PS> */
0x000A /* <LF> */ | 0x000D /* <CR> */ | 0x2028 /* <LS> */ | 0x2029 /* <PS> */
)
}
pub(super) fn take_string_characters<R>(
#[inline]
fn take_string_characters<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
terminator: StringTerminator,
strict_mode: bool,
is_strict_mode: bool,
) -> Result<(String, Span), Error>
where
R: Read,
@ -91,97 +118,25 @@ impl StringLiteral {
let mut buf = Vec::new();
loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?.map(char::try_from).transpose().unwrap();
let ch = cursor.next_char()?;
match ch {
Some('\'') if terminator == StringTerminator::SingleQuote => {
break;
}
Some('"') if terminator == StringTerminator::DoubleQuote => {
break;
}
None if terminator == StringTerminator::End => {
break;
}
Some('\\') => {
Some(0x0027 /* ' */) if terminator == StringTerminator::SingleQuote => break,
Some(0x0022 /* " */) if terminator == StringTerminator::DoubleQuote => break,
Some(0x005C /* \ */) => {
let _timer = BoaProfiler::global()
.start_event("StringLiteral - escape sequence", "Lexing");
let escape_ch = cursor
.next_char()?
.and_then(|byte| char::try_from(byte).ok())
.ok_or_else(|| {
Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated escape sequence in literal",
))
})?;
match escape_ch {
'b' => buf.push(0x0008 /* <BS> */),
't' => buf.push(0x0009 /* <HT> */),
'n' => buf.push(0x000A /* <LF> */),
'v' => buf.push(0x000B /* <VT> */),
'f' => buf.push(0x000C /* <FF> */),
'r' => buf.push(0x000D /* <CR> */),
'"' => buf.push(0x0022 /* " */),
'\'' => buf.push(0x0027 /* ' */),
'\\' => buf.push(0x005C /* \ */),
'0' if cursor
.peek()?
.filter(|next_byte| (b'0'..=b'9').contains(next_byte))
.is_none() =>
{
buf.push(0x0000 /* NULL */)
}
'x' => {
Self::take_hex_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
}
'u' => {
Self::take_unicode_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
}
'8' | '9' => {
// Grammar: NonOctalDecimalEscapeSequence
if strict_mode {
return Err(Error::syntax(
"\\8 and \\9 are not allowed in strict mode",
ch_start_pos,
));
} else {
buf.push(escape_ch as u16);
}
}
_ if escape_ch.is_digit(8) => {
Self::take_legacy_octal_escape_sequence(
cursor,
ch_start_pos,
Some(&mut buf),
strict_mode,
escape_ch as u8,
)?;
}
_ if Self::is_line_terminator(escape_ch) => {
// Grammar: LineContinuation
// Grammar: \ LineTerminatorSequence
// LineContinuation is the empty String. Do nothing and continue lexing.
}
_ => {
if escape_ch.len_utf16() == 1 {
buf.push(escape_ch as u16);
} else {
buf.extend(escape_ch.encode_utf16(&mut [0u16; 2]).iter());
}
}
};
}
Some(ch) => {
if ch.len_utf16() == 1 {
buf.push(ch as u16);
} else {
buf.extend(ch.encode_utf16(&mut [0u16; 2]).iter());
if let Some(escape_value) = Self::take_escape_sequence_or_line_continuation(cursor, ch_start_pos, is_strict_mode, false)? {
buf.push_code_point(escape_value);
}
}
None => {
Some(0x2028) => buf.push(0x2028 /* <LS> */),
Some(0x2029) => buf.push(0x2029 /* <PS> */),
Some(ch) if !Self::is_line_terminator(ch) => {
buf.push_code_point(ch);
}
_ => {
return Err(Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated string literal",
@ -190,17 +145,99 @@ impl StringLiteral {
}
}
Ok((
String::from_utf16_lossy(buf.as_slice()),
Span::new(start_pos, cursor.pos()),
))
Ok((buf.to_string_lossy(), Span::new(start_pos, cursor.pos())))
}
#[inline]
pub(super) fn take_escape_sequence_or_line_continuation<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
is_strict_mode: bool,
is_template_literal: bool,
) -> Result<Option<u32>, Error>
where
R: Read,
{
let escape_ch = cursor.next_char()?.ok_or_else(|| {
Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated escape sequence in literal",
))
})?;
let escape_value = match escape_ch {
0x0062 /* b */ => Some(0x0008 /* <BS> */),
0x0074 /* t */ => Some(0x0009 /* <HT> */),
0x006E /* n */ => Some(0x000A /* <LF> */),
0x0076 /* v */ => Some(0x000B /* <VT> */),
0x0066 /* f */ => Some(0x000C /* <FF> */),
0x0072 /* r */ => Some(0x000D /* <CR> */),
0x0022 /* " */ => Some(0x0022 /* " */),
0x0027 /* ' */ => Some(0x0027 /* ' */),
0x005C /* \ */ => Some(0x005C /* \ */),
0x0030 /* 0 */ if cursor
.peek()?
.filter(|next_byte| (b'0'..=b'9').contains(next_byte))
.is_none() =>
Some(0x0000 /* NULL */),
0x0078 /* x */ => {
Some(Self::take_hex_escape_sequence(cursor, start_pos)?)
}
0x0075 /* u */ => {
Some(Self::take_unicode_escape_sequence(cursor, start_pos)?)
}
0x0038 /* 8 */ | 0x0039 /* 9 */ => {
// Grammar: NonOctalDecimalEscapeSequence
if is_template_literal {
return Err(Error::syntax(
"\\8 and \\9 are not allowed in template literal",
start_pos,
));
} else if is_strict_mode {
return Err(Error::syntax(
"\\8 and \\9 are not allowed in strict mode",
start_pos,
));
} else {
Some(escape_ch)
}
}
_ if (0x0030..=0x0037 /* '0'..='7' */).contains(&escape_ch) => {
if is_template_literal {
return Err(Error::syntax(
"octal escape sequences are not allowed in template literal",
start_pos,
));
} else if is_strict_mode {
return Err(Error::syntax(
"octal escape sequences are not allowed in strict mode",
start_pos,
));
} else {
Some(Self::take_legacy_octal_escape_sequence(
cursor,
escape_ch as u8,
)?)
}
}
_ if Self::is_line_terminator(escape_ch) => {
// Grammar: LineContinuation
// Grammar: \ LineTerminatorSequence
// LineContinuation is the empty String.
None
}
_ => {
Some(escape_ch)
}
};
Ok(escape_value)
}
#[inline]
pub(super) fn take_unicode_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
code_units_buf: Option<&mut Vec<u16>>,
) -> Result<u32, Error>
where
R: Read,
@ -227,15 +264,6 @@ impl StringLiteral {
"Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
start_pos,
));
} else if let Some(code_units_buf) = code_units_buf {
if code_point <= 65535 {
code_units_buf.push(code_point as u16);
} else {
let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16;
let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16;
code_units_buf.push(cu1);
code_units_buf.push(cu2);
}
}
Ok(code_point)
@ -251,10 +279,6 @@ impl StringLiteral {
.and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok())
.ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
if let Some(code_units_buf) = code_units_buf {
code_units_buf.push(code_point);
}
Ok(code_point as u32)
}
}
@ -263,7 +287,6 @@ impl StringLiteral {
fn take_hex_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
code_units_buf: Option<&mut Vec<u16>>,
) -> Result<u32, Error>
where
R: Read,
@ -275,30 +298,17 @@ impl StringLiteral {
.and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok())
.ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
if let Some(code_units_buf) = code_units_buf {
code_units_buf.push(code_point);
}
Ok(code_point as u32)
}
#[inline]
fn take_legacy_octal_escape_sequence<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
code_units_buf: Option<&mut Vec<u16>>,
strict_mode: bool,
init_byte: u8,
) -> Result<u32, Error>
where
R: Read,
{
if strict_mode {
return Err(Error::syntax(
"octal escape sequences are not allowed in strict mode",
start_pos,
));
}
// Grammar: OctalDigit
let mut code_point = (init_byte - b'0') as u32;
@ -321,10 +331,6 @@ impl StringLiteral {
}
}
if let Some(code_units_buf) = code_units_buf {
code_units_buf.push(code_point as u16);
}
Ok(code_point)
}
}

94
boa/src/syntax/lexer/template.rs

@ -3,13 +3,12 @@
use super::{Cursor, Error, Tokenizer};
use crate::{
profiler::BoaProfiler,
syntax::lexer::string::{StringLiteral, StringTerminator},
syntax::lexer::string::{StringLiteral, UTF16CodeUnitsBuffer},
syntax::{
ast::{Position, Span},
lexer::{Token, TokenKind},
},
};
use std::convert::TryFrom;
use std::io::{self, ErrorKind, Read};
/// Template literal lexing.
@ -34,65 +33,92 @@ impl<R> Tokenizer<R> for TemplateLiteral {
let mut buf = Vec::new();
loop {
let next_chr = char::try_from(cursor.next_char()?.ok_or_else(|| {
let ch = cursor.next_char()?.ok_or_else(|| {
Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated template literal",
))
})?)
.unwrap();
match next_chr {
'`' => {
let raw = String::from_utf16_lossy(buf.as_slice());
let (cooked, _) = StringLiteral::take_string_characters(
&mut Cursor::with_position(raw.as_bytes(), start_pos),
start_pos,
StringTerminator::End,
true,
)?;
})?;
match ch {
0x0060 /* ` */ => {
let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation)
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
return Ok(Token::new(
TokenKind::template_no_substitution(raw, cooked),
Span::new(start_pos, cursor.pos()),
));
}
'$' if cursor.peek()? == Some(b'{') => {
let _ = cursor.next_byte()?;
let raw = String::from_utf16_lossy(buf.as_slice());
let (cooked, _) = StringLiteral::take_string_characters(
&mut Cursor::with_position(raw.as_bytes(), start_pos),
start_pos,
StringTerminator::End,
true,
)?;
0x0024 /* $ */ if cursor.next_is(b'{')? => {
let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation)
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
return Ok(Token::new(
TokenKind::template_middle(raw, cooked),
Span::new(start_pos, cursor.pos()),
));
}
'\\' => {
let escape = cursor.peek()?.ok_or_else(|| {
0x005C /* \ */ => {
let escape_ch = cursor.peek()?.ok_or_else(|| {
Error::from(io::Error::new(
ErrorKind::UnexpectedEof,
"unterminated escape sequence in literal",
))
})?;
buf.push('\\' as u16);
match escape {
buf.push(b'\\' as u16);
match escape_ch {
b'`' | b'$' | b'\\' => buf.push(cursor.next_byte()?.unwrap() as u16),
_ => continue,
}
}
next_ch => {
if next_ch.len_utf16() == 1 {
buf.push(next_ch as u16);
} else {
let mut code_point_bytes_buf = [0u16; 2];
let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf);
ch => {
buf.push_code_point(ch);
}
}
}
}
}
buf.extend(code_point_bytes.iter());
impl TemplateLiteral {
fn cook_template_string(
raw: &str,
start_pos: Position,
is_strict_mode: bool,
) -> Result<String, Error> {
let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos);
let mut buf: Vec<u16> = Vec::new();
loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;
match ch {
Some(0x005C /* \ */) => {
if let Some(escape_value) =
StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
is_strict_mode,
true,
)?
{
buf.push_code_point(escape_value);
}
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
// which matches the TV of <CR> <LF>
buf.push_code_point(ch);
}
None => break,
}
}
Ok(buf.to_string_lossy())
}
}

162
boa/src/syntax/lexer/tests.rs

@ -6,7 +6,6 @@ use super::token::Numeric;
use super::*;
use super::{Error, Position};
use crate::syntax::ast::Keyword;
use crate::syntax::lexer::string::{StringLiteral, StringTerminator};
use std::str;
fn span(start: (u32, u32), end: (u32, u32)) -> Span {
@ -815,9 +814,9 @@ fn illegal_code_point_following_numeric_literal() {
#[test]
fn string_unicode() {
let str = r#"'中文';"#;
let s = r#"'中文';"#;
let mut lexer = Lexer::new(str.as_bytes());
let mut lexer = Lexer::new(s.as_bytes());
let expected = [
TokenKind::StringLiteral("中文".into()),
@ -859,74 +858,56 @@ fn string_unicode_escape_with_braces() {
}
#[test]
fn take_string_characters_unicode_escape_with_braces_2() {
let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string();
let mut cursor = Cursor::new(s.as_bytes());
if let Ok((s, _)) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
) {
assert_eq!(s, "\u{20ac}\u{a0}\u{a0}")
} else {
panic!();
}
fn string_unicode_escape_with_braces_2() {
let s = r#"'\u{20ac}\u{a0}\u{a0}'"#;
let mut lexer = Lexer::new(s.as_bytes());
let expected = [TokenKind::StringLiteral("\u{20ac}\u{a0}\u{a0}".into())];
expect_tokens(&mut lexer, &expected);
}
#[test]
fn take_string_characters_with_single_escape() {
let s = r#"\Б"#.to_string();
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
)
.unwrap();
assert_eq!(s, "Б");
fn string_with_single_escape() {
let s = r#"'\Б'"#;
let mut lexer = Lexer::new(s.as_bytes());
let expected = [TokenKind::StringLiteral("Б".into())];
expect_tokens(&mut lexer, &expected);
}
#[test]
fn take_string_characters_legacy_octal_escape() {
fn string_legacy_octal_escape() {
let test_cases = [
(r#"\3"#, "\u{3}"),
(r#"\03"#, "\u{3}"),
(r#"\003"#, "\u{3}"),
(r#"\0003"#, "\u{0}3"),
(r#"\43"#, "#"),
(r#"\043"#, "#"),
(r#"\101"#, "A"),
(r#"'\3'"#, "\u{3}"),
(r#"'\03'"#, "\u{3}"),
(r#"'\003'"#, "\u{3}"),
(r#"'\0003'"#, "\u{0}3"),
(r#"'\43'"#, "#"),
(r#"'\043'"#, "#"),
(r#"'\101'"#, "A"),
];
for (s, expected) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
)
.unwrap();
let mut lexer = Lexer::new(s.as_bytes());
let expected_tokens = [TokenKind::StringLiteral((*expected).into())];
assert_eq!(s, *expected);
expect_tokens(&mut lexer, &expected_tokens);
}
for (s, _) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());
if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
true,
)
.expect_err("Octal-escape in strict mode not rejected as expected")
let mut lexer = Lexer::new(s.as_bytes());
lexer.set_strict_mode(true);
if let Error::Syntax(_, pos) = lexer
.next()
.expect_err("Octal-escape in strict mode not rejected as expected")
{
assert_eq!(pos, Position::new(1, 1));
assert_eq!(pos, Position::new(1, 2));
} else {
panic!("invalid error type");
}
@ -934,52 +915,39 @@ fn take_string_characters_legacy_octal_escape() {
}
#[test]
fn take_string_characters_zero_escape() {
let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")];
fn string_zero_escape() {
let test_cases = [(r#"'\0'"#, "\u{0}"), (r#"'\0A'"#, "\u{0}A")];
for (s, expected) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
)
.unwrap();
let mut lexer = Lexer::new(s.as_bytes());
let expected_tokens = [TokenKind::StringLiteral((*expected).into())];
assert_eq!(s, *expected);
expect_tokens(&mut lexer, &expected_tokens);
}
}
#[test]
fn take_string_characters_non_octal_decimal_escape() {
let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")];
fn string_non_octal_decimal_escape() {
let test_cases = [(r#"'\8'"#, "8"), (r#"'\9'"#, "9")];
for (s, expected) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
)
.unwrap();
let mut lexer = Lexer::new(s.as_bytes());
assert_eq!(s, *expected);
let expected_tokens = [TokenKind::StringLiteral((*expected).into())];
expect_tokens(&mut lexer, &expected_tokens);
}
for (s, _) in test_cases.iter() {
let mut cursor = Cursor::new(s.as_bytes());
if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
true,
)
.expect_err("Non-octal-decimal-escape in strict mode not rejected as expected")
let mut lexer = Lexer::new(s.as_bytes());
lexer.set_strict_mode(true);
if let Error::Syntax(_, pos) = lexer
.next()
.expect_err("Non-octal-decimal-escape in strict mode not rejected as expected")
{
assert_eq!(pos, Position::new(1, 1));
assert_eq!(pos, Position::new(1, 2));
} else {
panic!("invalid error type");
}
@ -987,18 +955,14 @@ fn take_string_characters_non_octal_decimal_escape() {
}
#[test]
fn take_string_characters_line_continuation() {
let s = "hello \\\nworld";
let mut cursor = Cursor::new(s.as_bytes());
let (s, _) = StringLiteral::take_string_characters(
&mut cursor,
Position::new(1, 1),
StringTerminator::End,
false,
)
.unwrap();
fn string_line_continuation() {
let s = "'hello \\\nworld'";
let mut lexer = Lexer::new(s.as_bytes());
let expected_tokens = [TokenKind::StringLiteral("hello world".into())];
assert_eq!(s, "hello world");
expect_tokens(&mut lexer, &expected_tokens);
}
mod carriage_return {

Loading…
Cancel
Save