Rust编写的JavaScript引擎,该项目是一个试验性质的项目。
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

147 lines
4.9 KiB

//! This module implements lexing for identifiers (foo, myvar, etc.) used in ECMAScript.
use crate::lexer::{
token::ContainsEscapeSequence, Cursor, Error, StringLiteral, Token, TokenKind, Tokenizer,
};
use boa_ast::{Keyword, Position, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;
/// Identifier lexing.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#prod-Identifier
/// [mdn]: https://developer.mozilla.org/en-US/docs/Glossary/Identifier
#[derive(Debug, Clone, Copy)]
pub(super) struct Identifier {
init: char,
}
impl Identifier {
/// Creates a new identifier/keyword lexer.
pub(super) const fn new(init: char) -> Self {
Self { init }
}
/// Checks if a character is `IdentifierStart` as per ECMAScript standards.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-names-and-keywords
pub(super) fn is_identifier_start(ch: u32) -> bool {
matches!(ch, 0x0024 /* $ */ | 0x005F /* _ */)
|| icu_properties::sets::id_start().contains32(ch)
}
/// Checks if a character is `IdentifierPart` as per ECMAScript standards.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-names-and-keywords
fn is_identifier_part(ch: u32) -> bool {
matches!(
ch,
0x0024 /* $ */ | 0x005F /* _ */ | 0x200C /* <ZWNJ> */ | 0x200D /* <ZWJ> */
) || icu_properties::sets::id_continue().contains32(ch)
}
}
impl<R> Tokenizer<R> for Identifier {
fn lex(
&mut self,
cursor: &mut Cursor<R>,
start_pos: Position,
interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
{
let _timer = Profiler::global().start_event("Identifier", "Lexing");
let (identifier_name, contains_escaped_chars) =
Self::take_identifier_name(cursor, start_pos, self.init)?;
let token_kind = match identifier_name.parse() {
Ok(Keyword::True) => {
TokenKind::BooleanLiteral((true, ContainsEscapeSequence(contains_escaped_chars)))
}
Ok(Keyword::False) => {
TokenKind::BooleanLiteral((false, ContainsEscapeSequence(contains_escaped_chars)))
}
Ok(Keyword::Null) => {
TokenKind::NullLiteral(ContainsEscapeSequence(contains_escaped_chars))
}
Ok(keyword) => TokenKind::Keyword((keyword, contains_escaped_chars)),
_ => TokenKind::IdentifierName((
interner.get_or_intern(identifier_name.as_str()),
ContainsEscapeSequence(contains_escaped_chars),
)),
};
Ok(Token::new(token_kind, Span::new(start_pos, cursor.pos())))
}
}
impl Identifier {
pub(super) fn take_identifier_name<R>(
cursor: &mut Cursor<R>,
start_pos: Position,
init: char,
) -> Result<(String, bool), Error>
where
R: Read,
{
let _timer = Profiler::global().start_event("Identifier::take_identifier_name", "Lexing");
let mut contains_escaped_chars = false;
let mut identifier_name = if init == '\\' && cursor.next_is(b'u')? {
let ch = StringLiteral::take_unicode_escape_sequence(cursor, start_pos)?;
if Self::is_identifier_start(ch) {
contains_escaped_chars = true;
String::from(
char::try_from(ch)
.expect("all identifier starts must be convertible to strings"),
)
} else {
return Err(Error::Syntax("invalid identifier start".into(), start_pos));
}
} else {
// The caller guarantees that `init` is a valid identifier start
String::from(init)
};
loop {
let ch = match cursor.peek_char()? {
Some(0x005C /* \ */) if cursor.peek_n(2)?.get(1) == Some(&0x75) /* u */ => {
let pos = cursor.pos();
let _next = cursor.next_byte();
let _next = cursor.next_byte();
let ch = StringLiteral::take_unicode_escape_sequence(cursor, pos)?;
if Self::is_identifier_part(ch) {
contains_escaped_chars = true;
ch
} else {
return Err(Error::Syntax("invalid identifier part".into(), pos));
}
}
Some(ch) if Self::is_identifier_part(ch) => {
cursor.next_char()?;
ch
},
_ => break,
};
identifier_name.push(char::try_from(ch).expect("checked character value"));
}
Ok((identifier_name, contains_escaped_chars))
}
}