Browse Source

Fix tokenizing Unicode escape sequence in string literal (#826)

pull/833/head
Jevan Chan 4 years ago committed by GitHub
parent
commit
01dbf8ba4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 51
      boa/src/syntax/lexer/cursor.rs
  2. 2
      boa/src/syntax/lexer/tests.rs

51
boa/src/syntax/lexer/cursor.rs

@ -7,7 +7,6 @@ use std::io::{self, Bytes, Error, ErrorKind, Read};
#[derive(Debug)] #[derive(Debug)]
pub(super) struct Cursor<R> { pub(super) struct Cursor<R> {
iter: InnerIter<R>, iter: InnerIter<R>,
peeked: Option<Option<char>>,
pos: Position, pos: Position,
strict_mode: bool, strict_mode: bool,
} }
@ -53,7 +52,6 @@ where
pub(super) fn new(inner: R) -> Self { pub(super) fn new(inner: R) -> Self {
Self { Self {
iter: InnerIter::new(inner.bytes()), iter: InnerIter::new(inner.bytes()),
peeked: None,
pos: Position::new(1, 1), pos: Position::new(1, 1),
strict_mode: false, strict_mode: false,
} }
@ -64,14 +62,7 @@ where
pub(super) fn peek(&mut self) -> Result<Option<char>, Error> { pub(super) fn peek(&mut self) -> Result<Option<char>, Error> {
let _timer = BoaProfiler::global().start_event("cursor::peek()", "Lexing"); let _timer = BoaProfiler::global().start_event("cursor::peek()", "Lexing");
let iter = &mut self.iter; self.iter.peek_char()
if let Some(v) = self.peeked {
Ok(v)
} else {
let val = iter.next_char()?;
self.peeked = Some(val);
Ok(val)
}
} }
/// Compares the character passed in to the next character, if they match true is returned and the buffer is incremented /// Compares the character passed in to the next character, if they match true is returned and the buffer is incremented
@ -81,7 +72,7 @@ where
Ok(match self.peek()? { Ok(match self.peek()? {
Some(next) if next == peek => { Some(next) if next == peek => {
let _ = self.peeked.take(); let _ = self.iter.next_char();
true true
} }
_ => false, _ => false,
@ -164,17 +155,14 @@ where
pub(crate) fn next_char(&mut self) -> Result<Option<char>, Error> { pub(crate) fn next_char(&mut self) -> Result<Option<char>, Error> {
let _timer = BoaProfiler::global().start_event("cursor::next_char()", "Lexing"); let _timer = BoaProfiler::global().start_event("cursor::next_char()", "Lexing");
let chr = match self.peeked.take() { let chr = self.iter.next_char()?;
Some(v) => v,
None => self.iter.next_char()?,
};
match chr { match chr {
Some('\r') => { Some('\r') => {
// Try to take a newline if it's next, for windows "\r\n" newlines // Try to take a newline if it's next, for windows "\r\n" newlines
// Otherwise, treat as a Mac OS9 bare '\r' newline // Otherwise, treat as a Mac OS9 bare '\r' newline
if self.peek()? == Some('\n') { if self.peek()? == Some('\n') {
self.peeked.take(); let _ = self.iter.next_char();
} }
self.next_line(); self.next_line();
} }
@ -191,13 +179,17 @@ where
#[derive(Debug)] #[derive(Debug)]
struct InnerIter<R> { struct InnerIter<R> {
iter: Bytes<R>, iter: Bytes<R>,
peeked_char: Option<Option<char>>,
} }
impl<R> InnerIter<R> { impl<R> InnerIter<R> {
/// Creates a new inner iterator. /// Creates a new inner iterator.
#[inline] #[inline]
fn new(iter: Bytes<R>) -> Self { fn new(iter: Bytes<R>) -> Self {
Self { iter } Self {
iter,
peeked_char: None,
}
} }
} }
@ -222,8 +214,25 @@ where
Ok(()) Ok(())
} }
/// Peeks the next UTF-8 checked character.
#[inline]
pub(super) fn peek_char(&mut self) -> Result<Option<char>, Error> {
if let Some(v) = self.peeked_char {
Ok(v)
} else {
let chr = self.next_char()?;
self.peeked_char = Some(chr);
Ok(chr)
}
}
/// Retrieves the next UTF-8 checked character. /// Retrieves the next UTF-8 checked character.
fn next_char(&mut self) -> io::Result<Option<char>> { fn next_char(&mut self) -> io::Result<Option<char>> {
if let Some(v) = self.peeked_char {
let _ = self.peeked_char.take();
return Ok(v);
}
let first_byte = match self.iter.next().transpose()? { let first_byte = match self.iter.next().transpose()? {
Some(b) => b, Some(b) => b,
None => return Ok(None), None => return Ok(None),
@ -283,11 +292,9 @@ where
/// Retrieves the next ASCII checked character. /// Retrieves the next ASCII checked character.
#[inline] #[inline]
fn next_ascii(&mut self) -> io::Result<Option<u8>> { fn next_ascii(&mut self) -> io::Result<Option<u8>> {
let next_byte = self.iter.next().transpose()?; match self.next_char() {
Ok(Some(chr)) if chr.is_ascii() => Ok(Some(chr as u8)),
match next_byte { Ok(None) => Ok(None),
Some(next) if next <= 0x7F => Ok(Some(next)),
None => Ok(None),
_ => Err(io::Error::new( _ => Err(io::Error::new(
io::ErrorKind::InvalidData, io::ErrorKind::InvalidData,
"non-ASCII byte found", "non-ASCII byte found",

2
boa/src/syntax/lexer/tests.rs

@ -628,7 +628,7 @@ fn illegal_following_numeric_literal() {
#[test] #[test]
fn codepoint_with_no_braces() { fn codepoint_with_no_braces() {
let mut lexer = Lexer::new(&br#""test\uD83Dtest""#[..]); let mut lexer = Lexer::new(&br#""test\uD38Dtest""#[..]);
assert!(lexer.next().is_ok()); assert!(lexer.next().is_ok());
} }

Loading…
Cancel
Save