mirror of https://github.com/boa-dev/boa.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
4.8 KiB
151 lines
4.8 KiB
4 years ago
|
//! This module implements lexing for identifiers (foo, myvar, etc.) used in the JavaScript programing language.
|
||
|
|
||
|
use super::{Cursor, Error, Tokenizer};
|
||
2 years ago
|
use crate::lexer::{StringLiteral, Token, TokenKind};
|
||
2 years ago
|
use boa_ast::{Keyword, Position, Span};
|
||
3 years ago
|
use boa_interner::Interner;
|
||
|
use boa_profiler::Profiler;
|
||
4 years ago
|
use boa_unicode::UnicodeProperties;
|
||
3 years ago
|
use std::io::Read;
|
||
4 years ago
|
|
||
4 years ago
|
/// Identifier lexing.
|
||
|
///
|
||
|
/// More information:
|
||
|
/// - [ECMAScript reference][spec]
|
||
|
/// - [MDN documentation][mdn]
|
||
|
///
|
||
|
/// [spec]: https://tc39.es/ecma262/#prod-Identifier
|
||
|
/// [mdn]: https://developer.mozilla.org/en-US/docs/Glossary/Identifier
|
||
|
#[derive(Debug, Clone, Copy)]
|
||
|
pub(super) struct Identifier {
|
||
|
init: char,
|
||
|
}
|
||
|
|
||
|
impl Identifier {
|
||
|
/// Creates a new identifier/keyword lexer.
|
||
|
pub(super) fn new(init: char) -> Self {
|
||
|
Self { init }
|
||
|
}
|
||
4 years ago
|
|
||
3 years ago
|
/// Checks if a character is `IdentifierStart` as per ECMAScript standards.
|
||
4 years ago
|
///
|
||
|
/// More information:
|
||
|
/// - [ECMAScript reference][spec]
|
||
|
///
|
||
|
/// [spec]: https://tc39.es/ecma262/#sec-names-and-keywords
|
||
|
pub(super) fn is_identifier_start(ch: u32) -> bool {
|
||
|
matches!(ch, 0x0024 /* $ */ | 0x005F /* _ */)
|
||
|
|| if let Ok(ch) = char::try_from(ch) {
|
||
|
ch.is_id_start()
|
||
|
} else {
|
||
|
false
|
||
|
}
|
||
|
}
|
||
|
|
||
3 years ago
|
/// Checks if a character is `IdentifierPart` as per ECMAScript standards.
|
||
4 years ago
|
///
|
||
|
/// More information:
|
||
|
/// - [ECMAScript reference][spec]
|
||
|
///
|
||
|
/// [spec]: https://tc39.es/ecma262/#sec-names-and-keywords
|
||
|
fn is_identifier_part(ch: u32) -> bool {
|
||
|
matches!(
|
||
|
ch,
|
||
|
0x0024 /* $ */ | 0x005F /* _ */ | 0x200C /* <ZWNJ> */ | 0x200D /* <ZWJ> */
|
||
|
) || if let Ok(ch) = char::try_from(ch) {
|
||
|
ch.is_id_continue()
|
||
|
} else {
|
||
|
false
|
||
|
}
|
||
|
}
|
||
4 years ago
|
}
|
||
|
|
||
|
impl<R> Tokenizer<R> for Identifier {
|
||
3 years ago
|
fn lex(
|
||
|
&mut self,
|
||
|
cursor: &mut Cursor<R>,
|
||
|
start_pos: Position,
|
||
|
interner: &mut Interner,
|
||
|
) -> Result<Token, Error>
|
||
4 years ago
|
where
|
||
|
R: Read,
|
||
|
{
|
||
3 years ago
|
let _timer = Profiler::global().start_event("Identifier", "Lexing");
|
||
4 years ago
|
|
||
4 years ago
|
let (identifier_name, contains_escaped_chars) =
|
||
|
Self::take_identifier_name(cursor, start_pos, self.init)?;
|
||
|
|
||
|
let token_kind = if let Ok(keyword) = identifier_name.parse() {
|
||
|
match keyword {
|
||
|
Keyword::True => TokenKind::BooleanLiteral(true),
|
||
|
Keyword::False => TokenKind::BooleanLiteral(false),
|
||
|
Keyword::Null => TokenKind::NullLiteral,
|
||
3 years ago
|
_ => TokenKind::Keyword((keyword, contains_escaped_chars)),
|
||
4 years ago
|
}
|
||
4 years ago
|
} else {
|
||
2 years ago
|
TokenKind::identifier(interner.get_or_intern(identifier_name.as_str()))
|
||
4 years ago
|
};
|
||
|
|
||
4 years ago
|
Ok(Token::new(token_kind, Span::new(start_pos, cursor.pos())))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl Identifier {
|
||
|
#[inline]
|
||
3 years ago
|
pub(super) fn take_identifier_name<R>(
|
||
4 years ago
|
cursor: &mut Cursor<R>,
|
||
|
start_pos: Position,
|
||
|
init: char,
|
||
|
) -> Result<(String, bool), Error>
|
||
|
where
|
||
|
R: Read,
|
||
|
{
|
||
3 years ago
|
let _timer = Profiler::global().start_event("Identifier::take_identifier_name", "Lexing");
|
||
3 years ago
|
|
||
4 years ago
|
let mut contains_escaped_chars = false;
|
||
|
let mut identifier_name = if init == '\\' && cursor.next_is(b'u')? {
|
||
|
let ch = StringLiteral::take_unicode_escape_sequence(cursor, start_pos)?;
|
||
|
|
||
|
if Self::is_identifier_start(ch) {
|
||
|
contains_escaped_chars = true;
|
||
3 years ago
|
String::from(
|
||
|
char::try_from(ch)
|
||
|
.expect("all identifier starts must be convertible to strings"),
|
||
|
)
|
||
4 years ago
|
} else {
|
||
|
return Err(Error::Syntax("invalid identifier start".into(), start_pos));
|
||
|
}
|
||
|
} else {
|
||
|
// The caller guarantees that `init` is a valid identifier start
|
||
|
String::from(init)
|
||
|
};
|
||
|
|
||
|
loop {
|
||
|
let ch = match cursor.peek_char()? {
|
||
2 years ago
|
Some(0x005C /* \ */) if cursor.peek_n(2)?.get(1) == Some(&0x75) /* u */ => {
|
||
4 years ago
|
let pos = cursor.pos();
|
||
3 years ago
|
let _next = cursor.next_byte();
|
||
|
let _next = cursor.next_byte();
|
||
4 years ago
|
let ch = StringLiteral::take_unicode_escape_sequence(cursor, pos)?;
|
||
|
|
||
|
if Self::is_identifier_part(ch) {
|
||
|
contains_escaped_chars = true;
|
||
|
ch
|
||
|
} else {
|
||
|
return Err(Error::Syntax("invalid identifier part".into(), pos));
|
||
|
}
|
||
|
}
|
||
|
Some(ch) if Self::is_identifier_part(ch) => {
|
||
|
let _ = cursor.next_char()?;
|
||
|
ch
|
||
|
},
|
||
|
_ => break,
|
||
|
};
|
||
|
|
||
3 years ago
|
identifier_name.push(char::try_from(ch).expect("checked character value"));
|
||
4 years ago
|
}
|
||
|
|
||
|
Ok((identifier_name, contains_escaped_chars))
|
||
4 years ago
|
}
|
||
|
}
|