Browse Source

Add numeric separator lexing (#995)

* Add numeric separator handling

* Implement suggestions from PR review

Co-authored-by: tofpie <tofpie@users.noreply.github.com>
pull/1004/head
tofpie 4 years ago committed by GitHub
parent
commit
6f3641d593
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      boa/src/syntax/lexer/mod.rs
  2. 91
      boa/src/syntax/lexer/number.rs
  3. 41
      boa/src/syntax/lexer/tests.rs

8
boa/src/syntax/lexer/mod.rs

@ -213,7 +213,13 @@ impl<R> Lexer<R> {
Punctuator::Colon.into(), Punctuator::Colon.into(),
Span::new(start, self.cursor.pos()), Span::new(start, self.cursor.pos()),
)), )),
'.' => SpreadLiteral::new().lex(&mut self.cursor, start), '.' => {
if self.cursor.peek()?.map(|c| c >= b'0' && c <= b'9') == Some(true) {
NumberLiteral::new(next_ch as u8).lex(&mut self.cursor, start)
} else {
SpreadLiteral::new().lex(&mut self.cursor, start)
}
}
'(' => Ok(Token::new( '(' => Ok(Token::new(
Punctuator::OpenParen.into(), Punctuator::OpenParen.into(),
Span::new(start, self.cursor.pos()), Span::new(start, self.cursor.pos()),

91
boa/src/syntax/lexer/number.rs

@ -110,11 +110,52 @@ where
} }
// Consume the decimal digits. // Consume the decimal digits.
cursor.take_while_ascii_pred(buf, &|ch| ch.is_digit(kind.base()))?; take_integer(buf, cursor, kind, true)?;
Ok(()) Ok(())
} }
fn take_integer<R>(
buf: &mut Vec<u8>,
cursor: &mut Cursor<R>,
kind: &NumericKind,
separator_allowed: bool,
) -> Result<(), Error>
where
R: Read,
{
let mut prev_is_underscore = false;
let mut pos = cursor.pos();
while cursor.next_is_ascii_pred(&|c| c.is_digit(kind.base()) || c == '_')? {
pos = cursor.pos();
match cursor.next_byte()? {
Some(c) if char::from(c).is_digit(kind.base()) => {
prev_is_underscore = false;
buf.push(c);
}
Some(b'_') if separator_allowed => {
if prev_is_underscore {
return Err(Error::syntax(
"only one underscore is allowed as numeric separator",
cursor.pos(),
));
}
prev_is_underscore = true;
}
Some(b'_') if !separator_allowed => {
return Err(Error::syntax("separator is not allowed", pos));
}
_ => (),
}
}
if prev_is_underscore {
return Err(Error::syntax(
"underscores are not allowed at the end of numeric literals",
pos,
));
}
Ok(())
}
/// Utility function for checking the NumericLiteral is not followed by an `IdentifierStart` or `DecimalDigit` character. /// Utility function for checking the NumericLiteral is not followed by an `IdentifierStart` or `DecimalDigit` character.
/// ///
/// More information: /// More information:
@ -149,6 +190,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
let mut kind = NumericKind::Integer(10); let mut kind = NumericKind::Integer(10);
let c = cursor.peek(); let c = cursor.peek();
let mut legacy_octal = false;
if self.init == b'0' { if self.init == b'0' {
if let Some(ch) = c? { if let Some(ch) = c? {
@ -180,7 +222,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
// Checks if the next char after '0o' is a digit of that base. if not return an error. // Checks if the next char after '0o' is a digit of that base. if not return an error.
if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(8))? { if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(8))? {
return Err(Error::syntax( return Err(Error::syntax(
"expected hexadecimal digit after number base prefix", "expected octal digit after number base prefix",
cursor.pos(), cursor.pos(),
)); ));
} }
@ -196,7 +238,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
// Checks if the next char after '0b' is a digit of that base. if not return an error. // Checks if the next char after '0b' is a digit of that base. if not return an error.
if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(2))? { if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(2))? {
return Err(Error::syntax( return Err(Error::syntax(
"expected hexadecimal digit after number base prefix", "expected binary digit after number base prefix",
cursor.pos(), cursor.pos(),
)); ));
} }
@ -211,6 +253,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
)); ));
} }
byte => { byte => {
legacy_octal = true;
let ch = char::from(byte); let ch = char::from(byte);
if ch.is_digit(8) { if ch.is_digit(8) {
// LegacyOctalIntegerLiteral // LegacyOctalIntegerLiteral
@ -237,8 +280,6 @@ impl<R> Tokenizer<R> for NumberLiteral {
"leading 0's are not allowed in strict mode", "leading 0's are not allowed in strict mode",
start_pos, start_pos,
)); ));
} else {
buf.push(cursor.next_byte()?.expect("Number digit vanished"));
} }
} // Else indicates that the symbol is a non-number. } // Else indicates that the symbol is a non-number.
} }
@ -253,34 +294,54 @@ impl<R> Tokenizer<R> for NumberLiteral {
} }
} }
// Consume digits until a non-digit character is encountered or all the characters are consumed. let next = if self.init == b'.' {
cursor.take_while_ascii_pred(&mut buf, &|c: char| c.is_digit(kind.base()))?; Some(b'.')
} else {
// Consume digits and separators until a non-digit non-separator
// character is encountered or all the characters are consumed.
take_integer(&mut buf, cursor, &kind, !legacy_octal)?;
cursor.peek()?
};
// The non-digit character could be: // The non-digit character could be:
// 'n' To indicate a BigIntLiteralSuffix. // 'n' To indicate a BigIntLiteralSuffix.
// '.' To indicate a decimal seperator. // '.' To indicate a decimal separator.
// 'e' | 'E' To indicate an ExponentPart. // 'e' | 'E' To indicate an ExponentPart.
match cursor.peek()? { match next {
Some(b'n') => { Some(b'n') => {
// DecimalBigIntegerLiteral // DecimalBigIntegerLiteral
// Lexing finished. // Lexing finished.
// Consume the n // Consume the n
if legacy_octal {
return Err(Error::syntax(
"'n' suffix not allowed in octal representation",
cursor.pos(),
));
}
cursor.next_byte()?.expect("n character vanished"); cursor.next_byte()?.expect("n character vanished");
kind = kind.to_bigint(); kind = kind.to_bigint();
} }
Some(b'.') => { Some(b'.') => {
if kind.base() == 10 { if kind.base() == 10 {
// Only base 10 numbers can have a decimal seperator. // Only base 10 numbers can have a decimal separator.
// Number literal lexing finished if a . is found for a number in a different base. // Number literal lexing finished if a . is found for a number in a different base.
if self.init != b'.' {
cursor.next_byte()?.expect(". token vanished"); cursor.next_byte()?.expect("'.' token vanished");
buf.push(b'.'); // Consume the . buf.push(b'.'); // Consume the .
}
kind = NumericKind::Rational; kind = NumericKind::Rational;
// Consume digits until a non-digit character is encountered or all the characters are consumed. if cursor.peek()? == Some(b'_') {
cursor.take_while_ascii_pred(&mut buf, &|c: char| c.is_digit(kind.base()))?; return Err(Error::syntax(
"numeric separator not allowed after '.'",
cursor.pos(),
));
}
// Consume digits and separators until a non-digit non-separator
// character is encountered or all the characters are consumed.
take_integer(&mut buf, cursor, &kind, true)?;
// The non-digit character at this point must be an 'e' or 'E' to indicate an Exponent Part. // The non-digit character at this point must be an 'e' or 'E' to indicate an Exponent Part.
// Another '.' or 'n' is not allowed. // Another '.' or 'n' is not allowed.

41
boa/src/syntax/lexer/tests.rs

@ -389,6 +389,44 @@ fn numbers() {
expect_tokens(&mut lexer, &expected); expect_tokens(&mut lexer, &expected);
} }
#[test]
fn numbers_with_separators() {
let mut lexer = Lexer::new(
"1_0 2_0 0x3_4 056 7.8_9 4_2. 5_0e2 5_0e+2 5_0e-4 0b1_0 1_0.0_0e2 1.0E-0_1 -3_2".as_bytes(),
);
let expected = [
TokenKind::numeric_literal(10),
TokenKind::numeric_literal(20),
TokenKind::numeric_literal(52),
TokenKind::numeric_literal(46),
TokenKind::numeric_literal(7.89),
TokenKind::numeric_literal(42),
TokenKind::numeric_literal(5000),
TokenKind::numeric_literal(5000),
TokenKind::numeric_literal(0.005),
TokenKind::numeric_literal(2),
TokenKind::numeric_literal(1000),
TokenKind::numeric_literal(0.1),
TokenKind::Punctuator(Punctuator::Sub),
TokenKind::numeric_literal(32),
];
expect_tokens(&mut lexer, &expected);
}
#[test]
fn numbers_with_bad_separators() {
let numbers = [
"0b_10", "0x_10", "10_", "1._10", "1e+_10", "1E_10", "10__00",
];
for n in numbers.iter() {
let mut lexer = Lexer::new(n.as_bytes());
assert!(lexer.next().is_err());
}
}
#[test] #[test]
fn big_exp_numbers() { fn big_exp_numbers() {
let mut lexer = Lexer::new(&b"1.0e25 1.0e36 9.0e50"[..]); let mut lexer = Lexer::new(&b"1.0e25 1.0e36 9.0e50"[..]);
@ -418,8 +456,7 @@ fn implicit_octal_edge_case() {
let expected = [ let expected = [
TokenKind::numeric_literal(36), TokenKind::numeric_literal(36),
TokenKind::Punctuator(Punctuator::Dot), TokenKind::numeric_literal(0.5),
TokenKind::numeric_literal(5),
TokenKind::numeric_literal(94.5), TokenKind::numeric_literal(94.5),
]; ];

Loading…
Cancel
Save