Refactor StringLiteral (#1084)

Fix octal escape in string literal Add tests Fix zero escape Fix zero escape lookahead Rename variables Rename helper functions Refactor match arms Fix escape line terminator sequence Fix single character escape Fix line terminator and escape followed by unicode char Fix broken tests Add NonOctalDecimalEscapeSequence Fix comment Refactor Modify error message Add tests Rename tests Add test for error Add comments for unsafe bytes to str Update boa/src/syntax/lexer/string.rs Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com> Minor refactor Remove unsafe bytes to str Fix panic when reading invalid utf-8 chars Refactor string literal Support invalid utf-8 chars in string literal input Add cook function for template literal Fix line continuation bug Add methods for utf16 buffer trait Add trait comments Add error message for template literal Add and fix comments Hide unused exported function and modify tests Fix bug Fix merge bug
4 years ago · 038acb4989
5 changed files with 304 additions and 299 deletions
--- a/boa/src/builtins/json/tests.rs
+++ b/boa/src/builtins/json/tests.rs
@ -217,10 +217,10 @@ fn json_stringify_pretty_print() {
    );
    let expected = forward(
        &mut context,
-        r#"'{
-    "a": "b",
-    "b": "c"
-}'"#,
+        r#"'{\n'
+            +'    "a": "b",\n'
+            +'    "b": "c"\n'
+            +'}'"#,
    );
    assert_eq!(actual, expected);
 }
@ -235,10 +235,10 @@ fn json_stringify_pretty_print_four_spaces() {
    );
    let expected = forward(
        &mut context,
-        r#"'{
-    "a": "b",
-    "b": "c"
-}'"#,
+        r#"'{\n'
+            +'    "a": "b",\n'
+            +'    "b": "c"\n'
+            +'}'"#,
    );
    assert_eq!(actual, expected);
 }
@ -253,10 +253,10 @@ fn json_stringify_pretty_print_twenty_spaces() {
    );
    let expected = forward(
        &mut context,
-        r#"'{
-          "a": "b",
-          "b": "c"
-}'"#,
+        r#"'{\n'
+            +'          "a": "b",\n'
+            +'          "b": "c"\n'
+            +'}'"#,
    );
    assert_eq!(actual, expected);
 }
@ -271,10 +271,10 @@ fn json_stringify_pretty_print_with_number_object() {
    );
    let expected = forward(
        &mut context,
-        r#"'{
-          "a": "b",
-          "b": "c"
-}'"#,
+        r#"'{\n'
+        +'          "a": "b",\n'
+        +'          "b": "c"\n'
+        +'}'"#,
    );
    assert_eq!(actual, expected);
 }
@ -301,10 +301,10 @@ fn json_stringify_pretty_print_with_too_long_string() {
    );
    let expected = forward(
        &mut context,
-        r#"'{
-abcdefghij"a": "b",
-abcdefghij"b": "c"
-}'"#,
+        r#"'{\n'
+            +'abcdefghij"a": "b",\n'
+            +'abcdefghij"b": "c"\n'
+            +'}'"#,
    );
    assert_eq!(actual, expected);
 }
@ -319,10 +319,10 @@ fn json_stringify_pretty_print_with_string_object() {
    );
    let expected = forward(
        &mut context,
-        r#"'{
-abcd"a": "b",
-abcd"b": "c"
-}'"#,
+        r#"'{\n'
+            +'abcd"a": "b",\n'
+            +'abcd"b": "c"\n'
+            +'}'"#,
    );
    assert_eq!(actual, expected);
 }
@ -404,10 +404,7 @@ fn json_parse_object_with_reviver() {
 fn json_parse_sets_prototypes() {
    let mut context = Context::new();
    let init = r#"
-        const jsonString = "{
-            \"ob\":{\"ject\":1},
-            \"arr\": [0,1]
-        }";
+        const jsonString = "{\"ob\":{\"ject\":1},\"arr\": [0,1]}";
        const jsonObj = JSON.parse(jsonString);
    "#;
    eprintln!("{}", forward(&mut context, init));
--- a/boa/src/builtins/string/tests.rs
+++ b/boa/src/builtins/string/tests.rs
@ -533,34 +533,46 @@ fn test_match() {
 #[test]
 fn trim() {
    let mut context = Context::new();
-    assert_eq!(forward(&mut context, "'Hello'.trim()"), "\"Hello\"");
-    assert_eq!(forward(&mut context, "' \nHello'.trim()"), "\"Hello\"");
-    assert_eq!(forward(&mut context, "'Hello \n\r'.trim()"), "\"Hello\"");
-    assert_eq!(forward(&mut context, "' Hello '.trim()"), "\"Hello\"");
+    assert_eq!(forward(&mut context, r#"'Hello'.trim()"#), "\"Hello\"");
+    assert_eq!(forward(&mut context, r#"' \nHello'.trim()"#), "\"Hello\"");
+    assert_eq!(forward(&mut context, r#"'Hello \n\r'.trim()"#), "\"Hello\"");
+    assert_eq!(forward(&mut context, r#"' Hello '.trim()"#), "\"Hello\"");
 }

 #[test]
 fn trim_start() {
    let mut context = Context::new();
-    assert_eq!(forward(&mut context, "'Hello'.trimStart()"), "\"Hello\"");
-    assert_eq!(forward(&mut context, "' \nHello'.trimStart()"), "\"Hello\"");
+    assert_eq!(forward(&mut context, r#"'Hello'.trimStart()"#), "\"Hello\"");
    assert_eq!(
-        forward(&mut context, "'Hello \n'.trimStart()"),
+        forward(&mut context, r#"' \nHello'.trimStart()"#),
+        "\"Hello\""
+    );
+    assert_eq!(
+        forward(&mut context, r#"'Hello \n'.trimStart()"#),
        "\"Hello \n\""
    );
-    assert_eq!(forward(&mut context, "' Hello '.trimStart()"), "\"Hello \"");
+    assert_eq!(
+        forward(&mut context, r#"' Hello '.trimStart()"#),
+        "\"Hello \""
+    );
 }

 #[test]
 fn trim_end() {
    let mut context = Context::new();
-    assert_eq!(forward(&mut context, "'Hello'.trimEnd()"), "\"Hello\"");
+    assert_eq!(forward(&mut context, r#"'Hello'.trimEnd()"#), "\"Hello\"");
    assert_eq!(
-        forward(&mut context, "' \nHello'.trimEnd()"),
+        forward(&mut context, r#"' \nHello'.trimEnd()"#),
        "\" \nHello\""
    );
-    assert_eq!(forward(&mut context, "'Hello \n'.trimEnd()"), "\"Hello\"");
-    assert_eq!(forward(&mut context, "' Hello '.trimEnd()"), "\" Hello\"");
+    assert_eq!(
+        forward(&mut context, r#"'Hello \n'.trimEnd()"#),
+        "\"Hello\""
+    );
+    assert_eq!(
+        forward(&mut context, r#"' Hello '.trimEnd()"#),
+        "\" Hello\""
+    );
 }

 #[test]
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@ -8,7 +8,6 @@ use crate::{
        lexer::{Token, TokenKind},
    },
 };
-use core::convert::TryFrom;
 use std::{
    io::{self, ErrorKind, Read},
    str,
@ -47,7 +46,34 @@ impl StringLiteral {
 pub(crate) enum StringTerminator {
    SingleQuote,
    DoubleQuote,
-    End,
+}
+
+/// Extends a buffer type to store UTF-16 code units and convert to string.
+pub(crate) trait UTF16CodeUnitsBuffer {
+    /// Encodes the code point to UTF-16 code units and push to the buffer.
+    fn push_code_point(&mut self, code_point: u32);
+
+    /// Decodes the buffer into a String and replace the invalid data with the replacement character (U+FFFD).
+    fn to_string_lossy(&self) -> String;
+}
+
+impl UTF16CodeUnitsBuffer for Vec<u16> {
+    #[inline]
+    fn push_code_point(&mut self, code_point: u32) {
+        if code_point <= 65535 {
+            self.push(code_point as u16);
+        } else {
+            let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16;
+            let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16;
+            self.push(cu1);
+            self.push(cu2);
+        }
+    }
+
+    #[inline]
+    fn to_string_lossy(&self) -> String {
+        String::from_utf16_lossy(self.as_slice())
+    }
 }

 impl<R> Tokenizer<R> for StringLiteral {
@ -72,18 +98,19 @@ impl StringLiteral {
    ///
    /// [spec]: https://tc39.es/ecma262/#prod-LineTerminator
    #[inline]
-    pub(super) fn is_line_terminator(ch: char) -> bool {
+    pub(super) fn is_line_terminator(ch: u32) -> bool {
        matches!(
            ch,
-            '\u{000A}' /* <LF> */ | '\u{000D}' /* <CR> */ | '\u{2028}' /* <LS> */ | '\u{2029}' /* <PS> */
+            0x000A /* <LF> */ | 0x000D /* <CR> */ | 0x2028 /* <LS> */ | 0x2029 /* <PS> */
        )
    }

-    pub(super) fn take_string_characters<R>(
+    #[inline]
+    fn take_string_characters<R>(
        cursor: &mut Cursor<R>,
        start_pos: Position,
        terminator: StringTerminator,
-        strict_mode: bool,
+        is_strict_mode: bool,
    ) -> Result<(String, Span), Error>
    where
        R: Read,
@ -91,97 +118,25 @@ impl StringLiteral {
        let mut buf = Vec::new();
        loop {
            let ch_start_pos = cursor.pos();
-            let ch = cursor.next_char()?.map(char::try_from).transpose().unwrap();
+            let ch = cursor.next_char()?;

            match ch {
-                Some('\'') if terminator == StringTerminator::SingleQuote => {
-                    break;
-                }
-                Some('"') if terminator == StringTerminator::DoubleQuote => {
-                    break;
-                }
-                None if terminator == StringTerminator::End => {
-                    break;
-                }
-                Some('\\') => {
+                Some(0x0027 /* ' */) if terminator == StringTerminator::SingleQuote => break,
+                Some(0x0022 /* " */) if terminator == StringTerminator::DoubleQuote => break,
+                Some(0x005C /* \ */) => {
                    let _timer = BoaProfiler::global()
                        .start_event("StringLiteral - escape sequence", "Lexing");

-                    let escape_ch = cursor
-                        .next_char()?
-                        .and_then(|byte| char::try_from(byte).ok())
-                        .ok_or_else(|| {
-                            Error::from(io::Error::new(
-                                ErrorKind::UnexpectedEof,
-                                "unterminated escape sequence in literal",
-                            ))
-                        })?;
-
-                    match escape_ch {
-                        'b' => buf.push(0x0008 /* <BS> */),
-                        't' => buf.push(0x0009 /* <HT> */),
-                        'n' => buf.push(0x000A /* <LF> */),
-                        'v' => buf.push(0x000B /* <VT> */),
-                        'f' => buf.push(0x000C /* <FF> */),
-                        'r' => buf.push(0x000D /* <CR> */),
-                        '"' => buf.push(0x0022 /* " */),
-                        '\'' => buf.push(0x0027 /* ' */),
-                        '\\' => buf.push(0x005C /* \ */),
-                        '0' if cursor
-                            .peek()?
-                            .filter(|next_byte| (b'0'..=b'9').contains(next_byte))
-                            .is_none() =>
-                        {
-                            buf.push(0x0000 /* NULL */)
-                        }
-                        'x' => {
-                            Self::take_hex_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
-                        }
-                        'u' => {
-                            Self::take_unicode_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
-                        }
-                        '8' | '9' => {
-                            // Grammar: NonOctalDecimalEscapeSequence
-                            if strict_mode {
-                                return Err(Error::syntax(
-                                    "\\8 and \\9 are not allowed in strict mode",
-                                    ch_start_pos,
-                                ));
-                            } else {
-                                buf.push(escape_ch as u16);
-                            }
-                        }
-                        _ if escape_ch.is_digit(8) => {
-                            Self::take_legacy_octal_escape_sequence(
-                                cursor,
-                                ch_start_pos,
-                                Some(&mut buf),
-                                strict_mode,
-                                escape_ch as u8,
-                            )?;
-                        }
-                        _ if Self::is_line_terminator(escape_ch) => {
-                            // Grammar: LineContinuation
-                            // Grammar: \ LineTerminatorSequence
-                            // LineContinuation is the empty String. Do nothing and continue lexing.
-                        }
-                        _ => {
-                            if escape_ch.len_utf16() == 1 {
-                                buf.push(escape_ch as u16);
-                            } else {
-                                buf.extend(escape_ch.encode_utf16(&mut [0u16; 2]).iter());
-                            }
-                        }
-                    };
-                }
-                Some(ch) => {
-                    if ch.len_utf16() == 1 {
-                        buf.push(ch as u16);
-                    } else {
-                        buf.extend(ch.encode_utf16(&mut [0u16; 2]).iter());
+                    if let Some(escape_value) = Self::take_escape_sequence_or_line_continuation(cursor, ch_start_pos, is_strict_mode, false)? {
+                        buf.push_code_point(escape_value);
                    }
                }
-                None => {
+                Some(0x2028) => buf.push(0x2028 /* <LS> */),
+                Some(0x2029) => buf.push(0x2029 /* <PS> */),
+                Some(ch) if !Self::is_line_terminator(ch) => {
+                    buf.push_code_point(ch);
+                }
+                _ => {
                    return Err(Error::from(io::Error::new(
                        ErrorKind::UnexpectedEof,
                        "unterminated string literal",
@ -190,17 +145,99 @@ impl StringLiteral {
            }
        }

-        Ok((
-            String::from_utf16_lossy(buf.as_slice()),
-            Span::new(start_pos, cursor.pos()),
-        ))
+        Ok((buf.to_string_lossy(), Span::new(start_pos, cursor.pos())))
+    }
+
+    #[inline]
+    pub(super) fn take_escape_sequence_or_line_continuation<R>(
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        is_strict_mode: bool,
+        is_template_literal: bool,
+    ) -> Result<Option<u32>, Error>
+    where
+        R: Read,
+    {
+        let escape_ch = cursor.next_char()?.ok_or_else(|| {
+            Error::from(io::Error::new(
+                ErrorKind::UnexpectedEof,
+                "unterminated escape sequence in literal",
+            ))
+        })?;
+
+        let escape_value = match escape_ch {
+            0x0062 /* b */ => Some(0x0008 /* <BS> */),
+            0x0074 /* t */ => Some(0x0009 /* <HT> */),
+            0x006E /* n */ => Some(0x000A /* <LF> */),
+            0x0076 /* v */ => Some(0x000B /* <VT> */),
+            0x0066 /* f */ => Some(0x000C /* <FF> */),
+            0x0072 /* r */ => Some(0x000D /* <CR> */),
+            0x0022 /* " */ => Some(0x0022 /* " */),
+            0x0027 /* ' */ => Some(0x0027 /* ' */),
+            0x005C /* \ */ => Some(0x005C /* \ */),
+            0x0030 /* 0 */ if cursor
+                .peek()?
+                .filter(|next_byte| (b'0'..=b'9').contains(next_byte))
+                .is_none() =>
+                Some(0x0000 /* NULL */),
+            0x0078 /* x */ => {
+                Some(Self::take_hex_escape_sequence(cursor, start_pos)?)
+            }
+            0x0075 /* u */ => {
+                Some(Self::take_unicode_escape_sequence(cursor, start_pos)?)
+            }
+            0x0038 /* 8 */ | 0x0039 /* 9 */ => {
+                // Grammar: NonOctalDecimalEscapeSequence
+                if is_template_literal {
+                    return Err(Error::syntax(
+                        "\\8 and \\9 are not allowed in template literal",
+                        start_pos,
+                    ));
+                } else if is_strict_mode {
+                    return Err(Error::syntax(
+                        "\\8 and \\9 are not allowed in strict mode",
+                        start_pos,
+                    ));
+                } else {
+                    Some(escape_ch)
+                }
+            }
+            _ if (0x0030..=0x0037 /* '0'..='7' */).contains(&escape_ch) => {
+                if is_template_literal {
+                    return Err(Error::syntax(
+                        "octal escape sequences are not allowed in template literal",
+                        start_pos,
+                    ));
+                } else if is_strict_mode {
+                    return Err(Error::syntax(
+                        "octal escape sequences are not allowed in strict mode",
+                        start_pos,
+                    ));
+                } else {
+                    Some(Self::take_legacy_octal_escape_sequence(
+                        cursor,
+                        escape_ch as u8,
+                    )?)
+                }
+            }
+            _ if Self::is_line_terminator(escape_ch) => {
+                // Grammar: LineContinuation
+                // Grammar: \ LineTerminatorSequence
+                // LineContinuation is the empty String.
+                None
+            }
+            _ => {
+                Some(escape_ch)
+            }
+        };
+
+        Ok(escape_value)
    }

    #[inline]
    pub(super) fn take_unicode_escape_sequence<R>(
        cursor: &mut Cursor<R>,
        start_pos: Position,
-        code_units_buf: Option<&mut Vec<u16>>,
    ) -> Result<u32, Error>
    where
        R: Read,
@ -227,15 +264,6 @@ impl StringLiteral {
                    "Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
                    start_pos,
                ));
-            } else if let Some(code_units_buf) = code_units_buf {
-                if code_point <= 65535 {
-                    code_units_buf.push(code_point as u16);
-                } else {
-                    let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16;
-                    let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16;
-                    code_units_buf.push(cu1);
-                    code_units_buf.push(cu2);
-                }
            }

            Ok(code_point)
@ -251,10 +279,6 @@ impl StringLiteral {
                .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok())
                .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;

-            if let Some(code_units_buf) = code_units_buf {
-                code_units_buf.push(code_point);
-            }
-
            Ok(code_point as u32)
        }
    }
@ -263,7 +287,6 @@ impl StringLiteral {
    fn take_hex_escape_sequence<R>(
        cursor: &mut Cursor<R>,
        start_pos: Position,
-        code_units_buf: Option<&mut Vec<u16>>,
    ) -> Result<u32, Error>
    where
        R: Read,
@ -275,30 +298,17 @@ impl StringLiteral {
            .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok())
            .ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;

-        if let Some(code_units_buf) = code_units_buf {
-            code_units_buf.push(code_point);
-        }
-
        Ok(code_point as u32)
    }

    #[inline]
    fn take_legacy_octal_escape_sequence<R>(
        cursor: &mut Cursor<R>,
-        start_pos: Position,
-        code_units_buf: Option<&mut Vec<u16>>,
-        strict_mode: bool,
        init_byte: u8,
    ) -> Result<u32, Error>
    where
        R: Read,
    {
-        if strict_mode {
-            return Err(Error::syntax(
-                "octal escape sequences are not allowed in strict mode",
-                start_pos,
-            ));
-        }
        // Grammar: OctalDigit
        let mut code_point = (init_byte - b'0') as u32;

@ -321,10 +331,6 @@ impl StringLiteral {
            }
        }

-        if let Some(code_units_buf) = code_units_buf {
-            code_units_buf.push(code_point as u16);
-        }
-
        Ok(code_point)
    }
 }
--- a/boa/src/syntax/lexer/template.rs
+++ b/boa/src/syntax/lexer/template.rs
@ -3,13 +3,12 @@
 use super::{Cursor, Error, Tokenizer};
 use crate::{
    profiler::BoaProfiler,
-    syntax::lexer::string::{StringLiteral, StringTerminator},
+    syntax::lexer::string::{StringLiteral, UTF16CodeUnitsBuffer},
    syntax::{
        ast::{Position, Span},
        lexer::{Token, TokenKind},
    },
 };
-use std::convert::TryFrom;
 use std::io::{self, ErrorKind, Read};

 /// Template literal lexing.
@ -34,65 +33,92 @@ impl<R> Tokenizer<R> for TemplateLiteral {

        let mut buf = Vec::new();
        loop {
-            let next_chr = char::try_from(cursor.next_char()?.ok_or_else(|| {
+            let ch = cursor.next_char()?.ok_or_else(|| {
                Error::from(io::Error::new(
                    ErrorKind::UnexpectedEof,
                    "unterminated template literal",
                ))
-            })?)
-            .unwrap();
-            match next_chr {
-                '`' => {
-                    let raw = String::from_utf16_lossy(buf.as_slice());
-                    let (cooked, _) = StringLiteral::take_string_characters(
-                        &mut Cursor::with_position(raw.as_bytes(), start_pos),
-                        start_pos,
-                        StringTerminator::End,
-                        true,
-                    )?;
+            })?;
+
+            match ch {
+                0x0060 /* ` */ => {
+                    let raw = buf.to_string_lossy();
+                    // TODO: Cook the raw string only when needed (lazy evaluation)
+                    let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
+
                    return Ok(Token::new(
                        TokenKind::template_no_substitution(raw, cooked),
                        Span::new(start_pos, cursor.pos()),
                    ));
                }
-                '$' if cursor.peek()? == Some(b'{') => {
-                    let _ = cursor.next_byte()?;
-                    let raw = String::from_utf16_lossy(buf.as_slice());
-                    let (cooked, _) = StringLiteral::take_string_characters(
-                        &mut Cursor::with_position(raw.as_bytes(), start_pos),
-                        start_pos,
-                        StringTerminator::End,
-                        true,
-                    )?;
+                0x0024 /* $ */ if cursor.next_is(b'{')? => {
+                    let raw = buf.to_string_lossy();
+                    // TODO: Cook the raw string only when needed (lazy evaluation)
+                    let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
+
                    return Ok(Token::new(
                        TokenKind::template_middle(raw, cooked),
                        Span::new(start_pos, cursor.pos()),
                    ));
                }
-                '\\' => {
-                    let escape = cursor.peek()?.ok_or_else(|| {
+                0x005C /* \ */ => {
+                    let escape_ch = cursor.peek()?.ok_or_else(|| {
                        Error::from(io::Error::new(
                            ErrorKind::UnexpectedEof,
                            "unterminated escape sequence in literal",
                        ))
                    })?;
-                    buf.push('\\' as u16);
-                    match escape {
+
+                    buf.push(b'\\' as u16);
+                    match escape_ch {
                        b'`' | b'$' | b'\\' => buf.push(cursor.next_byte()?.unwrap() as u16),
                        _ => continue,
                    }
                }
-                next_ch => {
-                    if next_ch.len_utf16() == 1 {
-                        buf.push(next_ch as u16);
-                    } else {
-                        let mut code_point_bytes_buf = [0u16; 2];
-                        let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf);
+                ch => {
+                    buf.push_code_point(ch);
+                }
+            }
+        }
+    }
+}

-                        buf.extend(code_point_bytes.iter());
+impl TemplateLiteral {
+    fn cook_template_string(
+        raw: &str,
+        start_pos: Position,
+        is_strict_mode: bool,
+    ) -> Result<String, Error> {
+        let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos);
+        let mut buf: Vec<u16> = Vec::new();
+
+        loop {
+            let ch_start_pos = cursor.pos();
+            let ch = cursor.next_char()?;
+
+            match ch {
+                Some(0x005C /* \ */) => {
+                    if let Some(escape_value) =
+                        StringLiteral::take_escape_sequence_or_line_continuation(
+                            &mut cursor,
+                            ch_start_pos,
+                            is_strict_mode,
+                            true,
+                        )?
+                    {
+                        buf.push_code_point(escape_value);
                    }
                }
+                Some(ch) => {
+                    // The caller guarantees that sequences '`' and '${' never appear
+                    // LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
+                    // which matches the TV of <CR> <LF>
+                    buf.push_code_point(ch);
+                }
+                None => break,
            }
        }
+
+        Ok(buf.to_string_lossy())
    }
 }
--- a/boa/src/syntax/lexer/tests.rs
+++ b/boa/src/syntax/lexer/tests.rs
@ -6,7 +6,6 @@ use super::token::Numeric;
 use super::*;
 use super::{Error, Position};
 use crate::syntax::ast::Keyword;
-use crate::syntax::lexer::string::{StringLiteral, StringTerminator};
 use std::str;

 fn span(start: (u32, u32), end: (u32, u32)) -> Span {
@ -815,9 +814,9 @@ fn illegal_code_point_following_numeric_literal() {

 #[test]
 fn string_unicode() {
-    let str = r#"'中文';"#;
+    let s = r#"'中文';"#;

-    let mut lexer = Lexer::new(str.as_bytes());
+    let mut lexer = Lexer::new(s.as_bytes());

    let expected = [
        TokenKind::StringLiteral("中文".into()),
@ -859,74 +858,56 @@ fn string_unicode_escape_with_braces() {
 }

 #[test]
-fn take_string_characters_unicode_escape_with_braces_2() {
-    let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string();
-
-    let mut cursor = Cursor::new(s.as_bytes());
-
-    if let Ok((s, _)) = StringLiteral::take_string_characters(
-        &mut cursor,
-        Position::new(1, 1),
-        StringTerminator::End,
-        false,
-    ) {
-        assert_eq!(s, "\u{20ac}\u{a0}\u{a0}")
-    } else {
-        panic!();
-    }
+fn string_unicode_escape_with_braces_2() {
+    let s = r#"'\u{20ac}\u{a0}\u{a0}'"#;
+
+    let mut lexer = Lexer::new(s.as_bytes());
+
+    let expected = [TokenKind::StringLiteral("\u{20ac}\u{a0}\u{a0}".into())];
+
+    expect_tokens(&mut lexer, &expected);
 }

 #[test]
-fn take_string_characters_with_single_escape() {
-    let s = r#"\Б"#.to_string();
-    let mut cursor = Cursor::new(s.as_bytes());
-    let (s, _) = StringLiteral::take_string_characters(
-        &mut cursor,
-        Position::new(1, 1),
-        StringTerminator::End,
-        false,
-    )
-    .unwrap();
-    assert_eq!(s, "Б");
+fn string_with_single_escape() {
+    let s = r#"'\Б'"#;
+
+    let mut lexer = Lexer::new(s.as_bytes());
+
+    let expected = [TokenKind::StringLiteral("Б".into())];
+
+    expect_tokens(&mut lexer, &expected);
 }

 #[test]
-fn take_string_characters_legacy_octal_escape() {
+fn string_legacy_octal_escape() {
    let test_cases = [
-        (r#"\3"#, "\u{3}"),
-        (r#"\03"#, "\u{3}"),
-        (r#"\003"#, "\u{3}"),
-        (r#"\0003"#, "\u{0}3"),
-        (r#"\43"#, "#"),
-        (r#"\043"#, "#"),
-        (r#"\101"#, "A"),
+        (r#"'\3'"#, "\u{3}"),
+        (r#"'\03'"#, "\u{3}"),
+        (r#"'\003'"#, "\u{3}"),
+        (r#"'\0003'"#, "\u{0}3"),
+        (r#"'\43'"#, "#"),
+        (r#"'\043'"#, "#"),
+        (r#"'\101'"#, "A"),
    ];

    for (s, expected) in test_cases.iter() {
-        let mut cursor = Cursor::new(s.as_bytes());
-        let (s, _) = StringLiteral::take_string_characters(
-            &mut cursor,
-            Position::new(1, 1),
-            StringTerminator::End,
-            false,
-        )
-        .unwrap();
+        let mut lexer = Lexer::new(s.as_bytes());
+
+        let expected_tokens = [TokenKind::StringLiteral((*expected).into())];

-        assert_eq!(s, *expected);
+        expect_tokens(&mut lexer, &expected_tokens);
    }

    for (s, _) in test_cases.iter() {
-        let mut cursor = Cursor::new(s.as_bytes());
-
-        if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
-            &mut cursor,
-            Position::new(1, 1),
-            StringTerminator::End,
-            true,
-        )
-        .expect_err("Octal-escape in strict mode not rejected as expected")
+        let mut lexer = Lexer::new(s.as_bytes());
+        lexer.set_strict_mode(true);
+
+        if let Error::Syntax(_, pos) = lexer
+            .next()
+            .expect_err("Octal-escape in strict mode not rejected as expected")
        {
-            assert_eq!(pos, Position::new(1, 1));
+            assert_eq!(pos, Position::new(1, 2));
        } else {
            panic!("invalid error type");
        }
@ -934,52 +915,39 @@ fn take_string_characters_legacy_octal_escape() {
 }

 #[test]
-fn take_string_characters_zero_escape() {
-    let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")];
+fn string_zero_escape() {
+    let test_cases = [(r#"'\0'"#, "\u{0}"), (r#"'\0A'"#, "\u{0}A")];

    for (s, expected) in test_cases.iter() {
-        let mut cursor = Cursor::new(s.as_bytes());
-        let (s, _) = StringLiteral::take_string_characters(
-            &mut cursor,
-            Position::new(1, 1),
-            StringTerminator::End,
-            false,
-        )
-        .unwrap();
+        let mut lexer = Lexer::new(s.as_bytes());
+
+        let expected_tokens = [TokenKind::StringLiteral((*expected).into())];

-        assert_eq!(s, *expected);
+        expect_tokens(&mut lexer, &expected_tokens);
    }
 }

 #[test]
-fn take_string_characters_non_octal_decimal_escape() {
-    let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")];
+fn string_non_octal_decimal_escape() {
+    let test_cases = [(r#"'\8'"#, "8"), (r#"'\9'"#, "9")];

    for (s, expected) in test_cases.iter() {
-        let mut cursor = Cursor::new(s.as_bytes());
-        let (s, _) = StringLiteral::take_string_characters(
-            &mut cursor,
-            Position::new(1, 1),
-            StringTerminator::End,
-            false,
-        )
-        .unwrap();
+        let mut lexer = Lexer::new(s.as_bytes());

-        assert_eq!(s, *expected);
+        let expected_tokens = [TokenKind::StringLiteral((*expected).into())];
+
+        expect_tokens(&mut lexer, &expected_tokens);
    }

    for (s, _) in test_cases.iter() {
-        let mut cursor = Cursor::new(s.as_bytes());
-
-        if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
-            &mut cursor,
-            Position::new(1, 1),
-            StringTerminator::End,
-            true,
-        )
-        .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected")
+        let mut lexer = Lexer::new(s.as_bytes());
+        lexer.set_strict_mode(true);
+
+        if let Error::Syntax(_, pos) = lexer
+            .next()
+            .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected")
        {
-            assert_eq!(pos, Position::new(1, 1));
+            assert_eq!(pos, Position::new(1, 2));
        } else {
            panic!("invalid error type");
        }
@ -987,18 +955,14 @@ fn take_string_characters_non_octal_decimal_escape() {
 }

 #[test]
-fn take_string_characters_line_continuation() {
-    let s = "hello \\\nworld";
-    let mut cursor = Cursor::new(s.as_bytes());
-    let (s, _) = StringLiteral::take_string_characters(
-        &mut cursor,
-        Position::new(1, 1),
-        StringTerminator::End,
-        false,
-    )
-    .unwrap();
+fn string_line_continuation() {
+    let s = "'hello \\\nworld'";
+
+    let mut lexer = Lexer::new(s.as_bytes());
+
+    let expected_tokens = [TokenKind::StringLiteral("hello world".into())];

-    assert_eq!(s, "hello world");
+    expect_tokens(&mut lexer, &expected_tokens);
 }

 mod carriage_return {