boa/boa/src/syntax/lexer/mod.rs

//! A lexical analyzer for JavaScript source code.
//!
//! This module contains the Boa lexer or tokenizer implementation.
//!
//! The Lexer splits its input source code into a sequence of input elements called tokens,
//! represented by the [Token](../ast/token/struct.Token.html) structure. It also removes
//! whitespace and comments and attaches them to the next token.
//!
//! This is tightly coupled with the parser due to the javascript goal-symbol requirements
//! as documented by the spec.
//!
//! More information:
//!  - [ECMAScript reference][spec]
//!
//! [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar

mod comment;
mod cursor;
pub mod error;
mod identifier;
mod number;
mod operator;
mod regex;
mod spread;
mod string;
mod template;
pub mod token;

#[cfg(test)]
mod tests;

use self::{
    comment::{MultiLineComment, SingleLineComment},
    cursor::Cursor,
    identifier::Identifier,
    number::NumberLiteral,
    operator::Operator,
    regex::RegexLiteral,
    spread::SpreadLiteral,
    string::StringLiteral,
    template::TemplateLiteral,
};
use crate::syntax::ast::{Punctuator, Span};
pub use crate::{profiler::BoaProfiler, syntax::ast::Position};
pub use error::Error;
use std::io::Read;
pub use token::{Token, TokenKind};

trait Tokenizer<R> {
    /// Lexes the next token.
    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
    where
        R: Read;
}

/// Lexer or tokenizer for the Boa JavaScript Engine.
#[derive(Debug)]
pub struct Lexer<R> {
    cursor: Cursor<R>,
    goal_symbol: InputElement,
}

impl<R> Lexer<R> {
    /// Checks if a character is whitespace as per ECMAScript standards.
    ///
    /// The Rust `char::is_whitespace` function and the ECMAScript standard use different sets of
    /// characters as whitespaces:
    ///  * Rust uses `\p{White_Space}`,
    ///  * ECMAScript standard uses `\{Space_Separator}` + `\u{0009}`, `\u{000B}`, `\u{000C}`, `\u{FEFF}`
    ///
    /// [More information](https://tc39.es/ecma262/#table-32)
    fn is_whitespace(ch: char) -> bool {
        match ch {
            '\u{0020}' | '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{00A0}' | '\u{FEFF}' |
            // Unicode Space_Seperator category (minus \u{0020} and \u{00A0} which are allready stated above)
            '\u{1680}' | '\u{2000}'..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
            _ => false,
        }
    }

    /// Sets the goal symbol for the lexer.
    #[inline]
    pub(crate) fn set_goal(&mut self, elm: InputElement) {
        self.goal_symbol = elm;
    }

    /// Gets the goal symbol the lexer is currently using.
    #[inline]
    pub(crate) fn get_goal(&self) -> InputElement {
        self.goal_symbol
    }

    /// Creates a new lexer.
    #[inline]
    pub fn new(reader: R) -> Self
    where
        R: Read,
    {
        Self {
            cursor: Cursor::new(reader),
            goal_symbol: Default::default(),
        }
    }

    // Handles lexing of a token starting '/' with the '/' already being consumed.
    // This could be a divide symbol or the start of a regex.
    //
    // A '/' symbol can always be a comment but if as tested above it is not then
    // that means it could be multiple different tokens depending on the input token.
    //
    // As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar
    pub(crate) fn lex_slash_token(&mut self, start: Position) -> Result<Token, Error>
    where
        R: Read,
    {
        let _timer = BoaProfiler::global().start_event("lex_slash_token", "Lexing");

        if let Some(c) = self.cursor.peek()? {
            match c {
                '/' => {
                    self.cursor.next_char()?.expect("/ token vanished"); // Consume the '/'
                    SingleLineComment.lex(&mut self.cursor, start)
                }
                '*' => {
                    self.cursor.next_char()?.expect("* token vanished"); // Consume the '*'
                    MultiLineComment.lex(&mut self.cursor, start)
                }
                ch => {
                    match self.get_goal() {
                        InputElement::Div | InputElement::TemplateTail => {
                            // Only div punctuator allowed, regex not.

                            if ch == '=' {
                                // Indicates this is an AssignDiv.
                                self.cursor.next_char()?.expect("= token vanished"); // Consume the '='
                                Ok(Token::new(
                                    Punctuator::AssignDiv.into(),
                                    Span::new(start, self.cursor.pos()),
                                ))
                            } else {
                                Ok(Token::new(
                                    Punctuator::Div.into(),
                                    Span::new(start, self.cursor.pos()),
                                ))
                            }
                        }
                        InputElement::RegExp | InputElement::RegExpOrTemplateTail => {
                            // Can be a regular expression.
                            RegexLiteral.lex(&mut self.cursor, start)
                        }
                    }
                }
            }
        } else {
            Err(Error::syntax(
                "Abrupt end: Expecting Token /,*,= or regex",
                start,
            ))
        }
    }

    /// Retrieves the next token from the lexer.
    // We intentionally don't implement Iterator trait as Result<Option> is cleaner to handle.
    #[allow(clippy::should_implement_trait)]
    pub fn next(&mut self) -> Result<Option<Token>, Error>
    where
        R: Read,
    {
        let _timer = BoaProfiler::global().start_event("next()", "Lexing");

        let (start, next_chr) = loop {
            let start = self.cursor.pos();
            if let Some(next_chr) = self.cursor.next_char()? {
                // Ignore whitespace
                if !Self::is_whitespace(next_chr) {
                    break (start, next_chr);
                }
            } else {
                return Ok(None);
            }
        };

        // TODO, setting strict mode on/off.
        let strict_mode = false;

        let token = match next_chr {
            '\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new(
                TokenKind::LineTerminator,
                Span::new(start, self.cursor.pos()),
            )),
            '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start),
            '`' => TemplateLiteral.lex(&mut self.cursor, start),
            _ if next_chr.is_digit(10) => {
                NumberLiteral::new(next_chr, strict_mode).lex(&mut self.cursor, start)
            }
            _ if next_chr.is_alphabetic() || next_chr == '$' || next_chr == '_' => {
                Identifier::new(next_chr).lex(&mut self.cursor, start)
            }
            ';' => Ok(Token::new(
                Punctuator::Semicolon.into(),
                Span::new(start, self.cursor.pos()),
            )),
            ':' => Ok(Token::new(
                Punctuator::Colon.into(),
                Span::new(start, self.cursor.pos()),
            )),
            '.' => SpreadLiteral::new().lex(&mut self.cursor, start),
            '(' => Ok(Token::new(
                Punctuator::OpenParen.into(),
                Span::new(start, self.cursor.pos()),
            )),
            ')' => Ok(Token::new(
                Punctuator::CloseParen.into(),
                Span::new(start, self.cursor.pos()),
            )),
            ',' => Ok(Token::new(
                Punctuator::Comma.into(),
                Span::new(start, self.cursor.pos()),
            )),
            '{' => Ok(Token::new(
                Punctuator::OpenBlock.into(),
                Span::new(start, self.cursor.pos()),
            )),
            '}' => Ok(Token::new(
                Punctuator::CloseBlock.into(),
                Span::new(start, self.cursor.pos()),
            )),
            '[' => Ok(Token::new(
                Punctuator::OpenBracket.into(),
                Span::new(start, self.cursor.pos()),
            )),
            ']' => Ok(Token::new(
                Punctuator::CloseBracket.into(),
                Span::new(start, self.cursor.pos()),
            )),
            '?' => Ok(Token::new(
                Punctuator::Question.into(),
                Span::new(start, self.cursor.pos()),
            )),
            '/' => self.lex_slash_token(start),
            '=' | '*' | '+' | '-' | '%' | '|' | '&' | '^' | '<' | '>' | '!' | '~' => {
                Operator::new(next_chr).lex(&mut self.cursor, start)
            }
            _ => {
                let details = format!(
                    "unexpected '{}' at line {}, column {}",
                    next_chr,
                    start.line_number(),
                    start.column_number()
                );
                Err(Error::syntax(details, start))
            }
        }?;

        if token.kind() == &TokenKind::Comment {
            // Skip comment
            self.next()
        } else {
            Ok(Some(token))
        }
    }
}

/// ECMAScript goal symbols.
///
/// <https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar>
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum InputElement {
    Div,
    RegExp,
    RegExpOrTemplateTail,
    TemplateTail,
}

impl Default for InputElement {
    fn default() -> Self {
        InputElement::RegExp
    }
}
documentation updates 6 years ago			`//! A lexical analyzer for JavaScript source code.`
			`//!`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`//! This module contains the Boa lexer or tokenizer implementation.`
			`//!`
			`//! The Lexer splits its input source code into a sequence of input elements called tokens,`
			`//! represented by the [Token](../ast/token/struct.Token.html) structure. It also removes`
			`//! whitespace and comments and attaches them to the next token.`
			`//!`
			`//! This is tightly coupled with the parser due to the javascript goal-symbol requirements`
			`//! as documented by the spec.`
			`//!`
			`//! More information:`
			`//! - [ECMAScript reference][spec]`
			`//!`
			`//! [spec]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar`

			`mod comment;`
			`mod cursor;`
			`pub mod error;`
			`mod identifier;`
			`mod number;`
			`mod operator;`
			`mod regex;`
			`mod spread;`
			`mod string;`
			`mod template;`
			`pub mod token;`
Moved test modules to their own files (#258) 4 years ago
			`#[cfg(test)]`
			`mod tests;`

New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`use self::{`
			`comment::{MultiLineComment, SingleLineComment},`
			`cursor::Cursor,`
			`identifier::Identifier,`
			`number::NumberLiteral,`
			`operator::Operator,`
			`regex::RegexLiteral,`
			`spread::SpreadLiteral,`
			`string::StringLiteral,`
			`template::TemplateLiteral,`
Fixed a bunch of Clippy issues (#59) * Fixed a bunch of Clippy issues * Fixed bug with numbers * Added clippy checks in the CI 5 years ago			`};`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`use crate::syntax::ast::{Punctuator, Span};`
			`pub use crate::{profiler::BoaProfiler, syntax::ast::Position};`
			`pub use error::Error;`
			`use std::io::Read;`
			`pub use token::{Token, TokenKind};`

			`trait Tokenizer<R> {`
			`/// Lexes the next token.`
			`fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>`
Execution and Node modularization (#392) 4 years ago			`where`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`R: Read;`
adding lexer updates 6 years ago			`}`

New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`/// Lexer or tokenizer for the Boa JavaScript Engine.`
Fixed a bunch of Clippy issues (#59) * Fixed a bunch of Clippy issues * Fixed bug with numbers * Added clippy checks in the CI 5 years ago			`#[derive(Debug)]`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`pub struct Lexer<R> {`
			`cursor: Cursor<R>,`
			`goal_symbol: InputElement,`
adding tokenisation 6 years ago			`}`
updates to lexer 6 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`impl<R> Lexer<R> {`
			`/// Checks if a character is whitespace as per ECMAScript standards.`
documentation updates 6 years ago			`///`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			/// The Rust `char::is_whitespace` function and the ECMAScript standard use different sets of
			`/// characters as whitespaces:`
			/// * Rust uses `\p{White_Space}`,
			/// * ECMAScript standard uses `\{Space_Separator}` + `\u{0009}`, `\u{000B}`, `\u{000C}`, `\u{FEFF}`
			`///`
			`/// [More information](https://tc39.es/ecma262/#table-32)`
			`fn is_whitespace(ch: char) -> bool {`
			`match ch {`
			`'\u{0020}' \| '\u{0009}' \| '\u{000B}' \| '\u{000C}' \| '\u{00A0}' \| '\u{FEFF}' \|`
			`// Unicode Space_Seperator category (minus \u{0020} and \u{00A0} which are allready stated above)`
			`'\u{1680}' \| '\u{2000}'..='\u{200A}' \| '\u{202F}' \| '\u{205F}' \| '\u{3000}' => true,`
			`_ => false,`
updates to lexer 6 years ago			`}`
			`}`
Fixed positions in regexes and strict operators. (#295) I also removed an unused function in the parser and added a test for #294, currently ignored. 4 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`/// Sets the goal symbol for the lexer.`
			`#[inline]`
			`pub(crate) fn set_goal(&mut self, elm: InputElement) {`
			`self.goal_symbol = elm;`
Execution and Node modularization (#392) 4 years ago			`}`

New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`/// Gets the goal symbol the lexer is currently using.`
			`#[inline]`
			`pub(crate) fn get_goal(&self) -> InputElement {`
			`self.goal_symbol`
fix addition/subtraction with no space (#235) 4 years ago			`}`
giving lexer utf-8 capability 6 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`/// Creates a new lexer.`
			`#[inline]`
			`pub fn new(reader: R) -> Self`
better unicode escape support fixes #16 (#20) * better unicode escape support fixes #16 * adding benchmarking, updating deps and added test for numbers 5 years ago			`where`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`R: Read,`
better unicode escape support fixes #16 (#20) * better unicode escape support fixes #16 * adding benchmarking, updating deps and added test for numbers 5 years ago			`{`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`Self {`
			`cursor: Cursor::new(reader),`
			`goal_symbol: Default::default(),`
Fix lexing of "0_" token (#231) * Fix lexing of 0_ token * Fix bugs and return to non-strict * Extract read_integer_in_base 4 years ago			`}`
			`}`

New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`// Handles lexing of a token starting '/' with the '/' already being consumed.`
			`// This could be a divide symbol or the start of a regex.`
			`//`
			`// A '/' symbol can always be a comment but if as tested above it is not then`
			`// that means it could be multiple different tokens depending on the input token.`
			`//`
			`// As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar`
			`pub(crate) fn lex_slash_token(&mut self, start: Position) -> Result<Token, Error>`
			`where`
			`R: Read,`
			`{`
			`let _timer = BoaProfiler::global().start_event("lex_slash_token", "Lexing");`
Fix #331 "We only get `Const::Num`, never `Const::Int`" (#338) 4 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`if let Some(c) = self.cursor.peek()? {`
			`match c {`
			`'/' => {`
			`self.cursor.next_char()?.expect("/ token vanished"); // Consume the '/'`
			`SingleLineComment.lex(&mut self.cursor, start)`
Fix #331 "We only get `Const::Num`, never `Const::Int`" (#338) 4 years ago			`}`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`'*' => {`
			`self.cursor.next_char()?.expect("* token vanished"); // Consume the '*'`
			`MultiLineComment.lex(&mut self.cursor, start)`
Fix #331 "We only get `Const::Num`, never `Const::Int`" (#338) 4 years ago			`}`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`ch => {`
			`match self.get_goal() {`
			`InputElement::Div \| InputElement::TemplateTail => {`
			`// Only div punctuator allowed, regex not.`

			`if ch == '=' {`
			`// Indicates this is an AssignDiv.`
			`self.cursor.next_char()?.expect("= token vanished"); // Consume the '='`
			`Ok(Token::new(`
			`Punctuator::AssignDiv.into(),`
			`Span::new(start, self.cursor.pos()),`
			`))`
			`} else {`
			`Ok(Token::new(`
			`Punctuator::Div.into(),`
			`Span::new(start, self.cursor.pos()),`
			`))`
Fix #331 "We only get `Const::Num`, never `Const::Int`" (#338) 4 years ago			`}`
			`}`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`InputElement::RegExp \| InputElement::RegExpOrTemplateTail => {`
			`// Can be a regular expression.`
			`RegexLiteral.lex(&mut self.cursor, start)`
Fix #331 "We only get `Const::Num`, never `Const::Int`" (#338) 4 years ago			`}`
			`}`
			`}`
			`}`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`} else {`
			`Err(Error::syntax(`
			`"Abrupt end: Expecting Token /,*,= or regex",`
			`start,`
			`))`
Fix #331 "We only get `Const::Num`, never `Const::Int`" (#338) 4 years ago			`}`
			`}`

New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`/// Retrieves the next token from the lexer.`
			`// We intentionally don't implement Iterator trait as Result<Option> is cleaner to handle.`
			`#[allow(clippy::should_implement_trait)]`
			`pub fn next(&mut self) -> Result<Option<Token>, Error>`
			`where`
			`R: Read,`
			`{`
			`let _timer = BoaProfiler::global().start_event("next()", "Lexing");`

			`let (start, next_chr) = loop {`
			`let start = self.cursor.pos();`
			`if let Some(next_chr) = self.cursor.next_char()? {`
			`// Ignore whitespace`
			`if !Self::is_whitespace(next_chr) {`
			`break (start, next_chr);`
			`}`
			`} else {`
			`return Ok(None);`
adding error branch 6 years ago			`}`
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`};`
better unicode escape support fixes #16 (#20) * better unicode escape support fixes #16 * adding benchmarking, updating deps and added test for numbers 5 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`// TODO, setting strict mode on/off.`
			`let strict_mode = false;`
better unicode escape support fixes #16 (#20) * better unicode escape support fixes #16 * adding benchmarking, updating deps and added test for numbers 5 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`let token = match next_chr {`
			`'\r' \| '\n' \| '\u{2028}' \| '\u{2029}' => Ok(Token::new(`
			`TokenKind::LineTerminator,`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`'"' \| '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start),`
			'`' => TemplateLiteral.lex(&mut self.cursor, start),
			`_ if next_chr.is_digit(10) => {`
			`NumberLiteral::new(next_chr, strict_mode).lex(&mut self.cursor, start)`
			`}`
			`_ if next_chr.is_alphabetic() \|\| next_chr == '$' \|\| next_chr == '_' => {`
			`Identifier::new(next_chr).lex(&mut self.cursor, start)`
			`}`
			`';' => Ok(Token::new(`
			`Punctuator::Semicolon.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`':' => Ok(Token::new(`
			`Punctuator::Colon.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`'.' => SpreadLiteral::new().lex(&mut self.cursor, start),`
			`'(' => Ok(Token::new(`
			`Punctuator::OpenParen.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`')' => Ok(Token::new(`
			`Punctuator::CloseParen.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`',' => Ok(Token::new(`
			`Punctuator::Comma.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`'{' => Ok(Token::new(`
			`Punctuator::OpenBlock.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`'}' => Ok(Token::new(`
			`Punctuator::CloseBlock.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`'[' => Ok(Token::new(`
			`Punctuator::OpenBracket.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`']' => Ok(Token::new(`
			`Punctuator::CloseBracket.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`'?' => Ok(Token::new(`
			`Punctuator::Question.into(),`
			`Span::new(start, self.cursor.pos()),`
			`)),`
			`'/' => self.lex_slash_token(start),`
			`'=' \| '*' \| '+' \| '-' \| '%' \| '\|' \| '&' \| '^' \| '<' \| '>' \| '!' \| '~' => {`
			`Operator::new(next_chr).lex(&mut self.cursor, start)`
			`}`
			`_ => {`
			`let details = format!(`
			`"unexpected '{}' at line {}, column {}",`
			`next_chr,`
			`start.line_number(),`
			`start.column_number()`
			`);`
			`Err(Error::syntax(details, start))`
			`}`
			`}?;`
better unicode escape support fixes #16 (#20) * better unicode escape support fixes #16 * adding benchmarking, updating deps and added test for numbers 5 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`if token.kind() == &TokenKind::Comment {`
			`// Skip comment`
			`self.next()`
			`} else {`
			`Ok(Some(token))`
			`}`
			`}`
			`}`
Execution and Node modularization (#392) 4 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`/// ECMAScript goal symbols.`
			`///`
			`/// <https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar>`
			`#[derive(Debug, Clone, Copy, PartialEq, Eq)]`
			`pub(crate) enum InputElement {`
			`Div,`
			`RegExp,`
			`RegExpOrTemplateTail,`
			`TemplateTail,`
			`}`
Execution and Node modularization (#392) 4 years ago
New lexer (#559) Co-authored-by: HalidOdat <halidodat@gmail.com> Co-authored-by: Iban Eguia <razican@protonmail.ch> Co-authored-by: Paul Lancaster <paul@lancasterzone.com> Co-authored-by: neeldug <5161147+neeldug@users.noreply.github.com> 4 years ago			`impl Default for InputElement {`
			`fn default() -> Self {`
			`InputElement::RegExp`
giving lexer utf-8 capability 6 years ago			`}`
updates to lexer 6 years ago			`}`