Browse Source

Strict Mode Lex/Parse (#717)

* Add strict mode to lex trait

* Lexer reserved keyword identifier strict mode lexing

* Prevent with statement in strict mode

* Add strict_mode flag to parse methods

* Enable function scope strict mode

* Script scope strict mode

* Delete statement strict mode

* Function decl in block in strict mode

* Fix merge

* Strict mode code tests

* Move strict_mode flag onto Lexer

* Move strict_mode lex flag to Lexer Cursor

* Isolated failing test parts

* Fixed bug in func decl in block in strict mode

* BindingIdentifier strict mode

* use to_string

* Remove dbg

* Add missing spec ref

* Missing spec ref for dup func params

* Remove clone

* .to_string -> .as_ref
pull/799/head
Paul Lancaster 4 years ago committed by GitHub
parent
commit
6bcfc7a237
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 121
      boa/src/exec/tests.rs
  2. 12
      boa/src/syntax/lexer/cursor.rs
  3. 32
      boa/src/syntax/lexer/identifier.rs
  4. 17
      boa/src/syntax/lexer/mod.rs
  5. 9
      boa/src/syntax/lexer/number.rs
  6. 10
      boa/src/syntax/parser/cursor/buffered_lexer/mod.rs
  7. 10
      boa/src/syntax/parser/cursor/mod.rs
  8. 2
      boa/src/syntax/parser/expression/mod.rs
  9. 16
      boa/src/syntax/parser/expression/unary.rs
  10. 21
      boa/src/syntax/parser/function/mod.rs
  11. 21
      boa/src/syntax/parser/mod.rs
  12. 13
      boa/src/syntax/parser/statement/block/mod.rs
  13. 67
      boa/src/syntax/parser/statement/mod.rs
  14. 2
      boa/src/syntax/parser/statement/switch/mod.rs

121
boa/src/exec/tests.rs

@ -1383,3 +1383,124 @@ fn test_identifier_op() {
let scenario = "break = 1";
assert_eq!(&exec(scenario), "\"SyntaxError\": \"expected token \'identifier\', got \'=\' in binding identifier at line 1, col 7\"");
}
#[test]
fn test_strict_mode_octal() {
// Checks as per https://tc39.es/ecma262/#sec-literals-numeric-literals that 0 prefix
// octal number literal syntax is a syntax error in strict mode.
let scenario = r#"
'use strict';
var n = 023;
"#;
let mut engine = Context::new();
let string = dbg!(forward(&mut engine, scenario));
assert!(string.starts_with("Uncaught \"SyntaxError\": "));
}
#[test]
fn test_strict_mode_with() {
// Checks as per https://tc39.es/ecma262/#sec-with-statement-static-semantics-early-errors
// that a with statement is an error in strict mode code.
let scenario = r#"
'use strict';
function f(x, o) {
with (o) {
console.log(x);
}
}
"#;
let mut engine = Context::new();
let string = dbg!(forward(&mut engine, scenario));
assert!(string.starts_with("Uncaught \"SyntaxError\": "));
}
#[test]
fn test_strict_mode_delete() {
// Checks as per https://tc39.es/ecma262/#sec-delete-operator-static-semantics-early-errors
// that delete on a variable name is an error in strict mode code.
let scenario = r#"
'use strict';
let x = 10;
delete x;
"#;
let mut engine = Context::new();
let string = dbg!(forward(&mut engine, scenario));
assert!(string.starts_with("Uncaught \"SyntaxError\": "));
}
#[test]
fn test_strict_mode_reserved_name() {
// Checks that usage of a reserved keyword for an identifier name is
// an error in strict mode code as per https://tc39.es/ecma262/#sec-strict-mode-of-ecmascript.
let test_cases = [
"var implements = 10;",
"var interface = 10;",
"var package = 10;",
"var private = 10;",
"var protected = 10;",
"var public = 10;",
"var static = 10;",
"var eval = 10;",
"var arguments = 10;",
"var let = 10;",
"var yield = 10;",
];
for case in test_cases.iter() {
let mut engine = Context::new();
let scenario = format!("'use strict'; \n {}", case);
let string = dbg!(forward(&mut engine, &scenario));
assert!(string.starts_with("Uncaught \"SyntaxError\": "));
}
}
#[test]
fn test_strict_mode_func_decl_in_block() {
// Checks that a function declaration in a block is an error in
// strict mode code as per https://tc39.es/ecma262/#early-error.
let scenario = r#"
'use strict';
let a = 4;
let b = 5;
if (a < b) { function f() {} }
"#;
let mut engine = Context::new();
let string = dbg!(forward(&mut engine, scenario));
assert!(string.starts_with("Uncaught \"SyntaxError\": "));
}
#[test]
fn test_strict_mode_dup_func_parameters() {
// Checks that a function cannot contain duplicate parameter
// names in strict mode code as per https://tc39.es/ecma262/#sec-function-definitions-static-semantics-early-errors.
let scenario = r#"
'use strict';
function f(a, b, b) {}
"#;
let mut engine = Context::new();
let string = dbg!(forward(&mut engine, scenario));
assert!(string.starts_with("Uncaught \"SyntaxError\": "));
}

12
boa/src/syntax/lexer/cursor.rs

@ -9,6 +9,7 @@ pub(super) struct Cursor<R> {
iter: InnerIter<R>,
peeked: Option<Option<char>>,
pos: Position,
strict_mode: bool,
}
impl<R> Cursor<R> {
@ -38,6 +39,16 @@ impl<R> Cursor<R> {
let current_line = self.pos.line_number();
self.pos = Position::new(current_line, 1);
}
#[inline]
pub(super) fn strict_mode(&self) -> bool {
self.strict_mode
}
#[inline]
pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
self.strict_mode = strict_mode
}
}
impl<R> Cursor<R>
@ -51,6 +62,7 @@ where
iter: InnerIter::new(inner.bytes()),
peeked: None,
pos: Position::new(1, 1),
strict_mode: false,
}
}

32
boa/src/syntax/lexer/identifier.rs

@ -4,12 +4,26 @@ use super::{Cursor, Error, Tokenizer};
use crate::{
profiler::BoaProfiler,
syntax::{
ast::{Position, Span},
ast::{Keyword, Position, Span},
lexer::{Token, TokenKind},
},
};
use std::io::Read;
const STRICT_FORBIDDEN_IDENTIFIERS: [&str; 11] = [
"eval",
"arguments",
"implements",
"interface",
"let",
"package",
"private",
"protected",
"public",
"static",
"yield",
];
/// Identifier lexing.
///
/// More information:
@ -49,8 +63,24 @@ impl<R> Tokenizer<R> for Identifier {
"null" => TokenKind::NullLiteral,
slice => {
if let Ok(keyword) = slice.parse() {
if cursor.strict_mode() && keyword == Keyword::With {
return Err(Error::Syntax(
"using 'with' statement not allowed in strict mode".into(),
start_pos,
));
}
TokenKind::Keyword(keyword)
} else {
if cursor.strict_mode() && STRICT_FORBIDDEN_IDENTIFIERS.contains(&slice) {
return Err(Error::Syntax(
format!(
"using future reserved keyword '{}' not allowed in strict mode",
slice
)
.into(),
start_pos,
));
}
TokenKind::identifier(slice)
}
}

17
boa/src/syntax/lexer/mod.rs

@ -90,6 +90,16 @@ impl<R> Lexer<R> {
self.goal_symbol
}
#[inline]
pub(super) fn strict_mode(&self) -> bool {
self.cursor.strict_mode()
}
#[inline]
pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
self.cursor.set_strict_mode(strict_mode)
}
/// Creates a new lexer.
#[inline]
pub fn new(reader: R) -> Self
@ -180,9 +190,6 @@ impl<R> Lexer<R> {
}
};
// TODO, setting strict mode on/off.
let strict_mode = false;
let token = match next_chr {
'\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new(
TokenKind::LineTerminator,
@ -190,9 +197,7 @@ impl<R> Lexer<R> {
)),
'"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start),
'`' => TemplateLiteral.lex(&mut self.cursor, start),
_ if next_chr.is_digit(10) => {
NumberLiteral::new(next_chr, strict_mode).lex(&mut self.cursor, start)
}
_ if next_chr.is_digit(10) => NumberLiteral::new(next_chr).lex(&mut self.cursor, start),
_ if next_chr.is_alphabetic() || next_chr == '$' || next_chr == '_' => {
Identifier::new(next_chr).lex(&mut self.cursor, start)
}

9
boa/src/syntax/lexer/number.rs

@ -24,13 +24,12 @@ use std::{io::Read, str::FromStr};
#[derive(Debug, Clone, Copy)]
pub(super) struct NumberLiteral {
init: char,
strict_mode: bool,
}
impl NumberLiteral {
/// Creates a new string literal lexer.
pub(super) fn new(init: char, strict_mode: bool) -> Self {
Self { init, strict_mode }
pub(super) fn new(init: char) -> Self {
Self { init }
}
}
@ -187,7 +186,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
ch => {
if ch.is_digit(8) {
// LegacyOctalIntegerLiteral
if self.strict_mode {
if cursor.strict_mode() {
// LegacyOctalIntegerLiteral is forbidden with strict mode true.
return Err(Error::syntax(
"implicit octal literals are not allowed in strict mode",
@ -205,7 +204,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
// Indicates a numerical digit comes after then 0 but it isn't an octal digit
// so therefore this must be a number with an unneeded leading 0. This is
// forbidden in strict mode.
if self.strict_mode {
if cursor.strict_mode() {
return Err(Error::syntax(
"leading 0's are not allowed in strict mode",
start_pos,

10
boa/src/syntax/parser/cursor/buffered_lexer/mod.rs

@ -82,6 +82,16 @@ where
self.lexer.lex_slash_token(start).map_err(|e| e.into())
}
#[inline]
pub(super) fn strict_mode(&self) -> bool {
self.lexer.strict_mode()
}
#[inline]
pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
self.lexer.set_strict_mode(strict_mode)
}
/// Fills the peeking buffer with the next token.
///
/// It will not fill two line terminators one after the other.

10
boa/src/syntax/parser/cursor/mod.rs

@ -56,6 +56,16 @@ where
self.buffered_lexer.peek(skip_n, true)
}
#[inline]
pub(super) fn strict_mode(&self) -> bool {
self.buffered_lexer.strict_mode()
}
#[inline]
pub(super) fn set_strict_mode(&mut self, strict_mode: bool) {
self.buffered_lexer.set_strict_mode(strict_mode)
}
/// Returns an error if the next token is not of kind `kind`.
///
/// Note: it will consume the next token only if the next token is the expected type.

2
boa/src/syntax/parser/expression/mod.rs

@ -61,7 +61,7 @@ macro_rules! expression { ($name:ident, $lower:ident, [$( $op:path ),*], [$( $lo
{
type Output = Node;
fn parse(self, cursor: &mut Cursor<R>) -> ParseResult {
fn parse(self, cursor: &mut Cursor<R>)-> ParseResult {
let _timer = BoaProfiler::global().start_event($profile, "Parsing");
if $goal.is_some() {

16
boa/src/syntax/parser/expression/unary.rs

@ -15,7 +15,7 @@ use crate::{
op::UnaryOp,
Keyword, Punctuator,
},
lexer::TokenKind,
lexer::{Error as LexError, TokenKind},
parser::{
expression::update::UpdateExpression, AllowAwait, AllowYield, Cursor, ParseError,
ParseResult, TokenParser,
@ -62,10 +62,22 @@ where
let _timer = BoaProfiler::global().start_event("UnaryExpression", "Parsing");
let tok = cursor.peek(0)?.ok_or(ParseError::AbruptEnd)?;
let token_start = tok.span().start();
match tok.kind() {
TokenKind::Keyword(Keyword::Delete) => {
cursor.next()?.expect("Delete keyword vanished"); // Consume the token.
Ok(node::UnaryOp::new(UnaryOp::Delete, self.parse(cursor)?).into())
let val = self.parse(cursor)?;
if cursor.strict_mode() {
if let Node::Identifier(_) = val {
return Err(ParseError::lex(LexError::Syntax(
"Delete <variable> statements not allowed in strict mode".into(),
token_start,
)));
}
}
Ok(node::UnaryOp::new(UnaryOp::Delete, val).into())
}
TokenKind::Keyword(Keyword::Void) => {
cursor.next()?.expect("Void keyword vanished"); // Consume the token.

21
boa/src/syntax/parser/function/mod.rs

@ -259,12 +259,27 @@ where
fn parse(self, cursor: &mut Cursor<R>) -> Result<Self::Output, ParseError> {
let _timer = BoaProfiler::global().start_event("FunctionStatementList", "Parsing");
let global_strict_mode = cursor.strict_mode();
if let Some(tk) = cursor.peek(0)? {
if tk.kind() == &Punctuator::CloseBlock.into() {
return Ok(Vec::new().into());
match tk.kind() {
TokenKind::Punctuator(Punctuator::CloseBlock) => {
return Ok(Vec::new().into());
}
TokenKind::StringLiteral(string) | TokenKind::TemplateLiteral(string) => {
if string == &"use strict".into() {
cursor.set_strict_mode(true);
}
}
_ => {}
}
}
StatementList::new(self.allow_yield, self.allow_await, true, true).parse(cursor)
let stmlist =
StatementList::new(self.allow_yield, self.allow_await, true, true, true).parse(cursor);
// Reset strict mode back to the global scope.
cursor.set_strict_mode(global_strict_mode);
stmlist
}
}

21
boa/src/syntax/parser/mod.rs

@ -9,7 +9,7 @@ mod statement;
mod tests;
pub use self::error::{ParseError, ParseResult};
use crate::syntax::ast::node::StatementList;
use crate::syntax::{ast::node::StatementList, lexer::TokenKind};
use cursor::Cursor;
@ -121,10 +121,19 @@ where
type Output = StatementList;
fn parse(self, cursor: &mut Cursor<R>) -> Result<Self::Output, ParseError> {
if cursor.peek(0)?.is_some() {
ScriptBody.parse(cursor)
} else {
Ok(StatementList::from(Vec::new()))
match cursor.peek(0)? {
Some(tok) => {
match tok.kind() {
TokenKind::StringLiteral(string) | TokenKind::TemplateLiteral(string) => {
if string.as_ref() == "use strict" {
cursor.set_strict_mode(true);
}
}
_ => {}
}
ScriptBody.parse(cursor)
}
None => Ok(StatementList::from(Vec::new())),
}
}
}
@ -145,6 +154,6 @@ where
type Output = StatementList;
fn parse(self, cursor: &mut Cursor<R>) -> Result<Self::Output, ParseError> {
self::statement::StatementList::new(false, false, false, false).parse(cursor)
self::statement::StatementList::new(false, false, false, false, true).parse(cursor)
}
}

13
boa/src/syntax/parser/statement/block/mod.rs

@ -78,10 +78,15 @@ where
}
}
let statement_list =
StatementList::new(self.allow_yield, self.allow_await, self.allow_return, true)
.parse(cursor)
.map(node::Block::from)?;
let statement_list = StatementList::new(
self.allow_yield,
self.allow_await,
self.allow_return,
true,
true,
)
.parse(cursor)
.map(node::Block::from)?;
cursor.expect(Punctuator::CloseBlock, "block")?;
Ok(statement_list)

67
boa/src/syntax/parser/statement/mod.rs

@ -41,7 +41,7 @@ use super::{AllowAwait, AllowReturn, AllowYield, Cursor, ParseError, TokenParser
use crate::{
syntax::{
ast::{node, Keyword, Node, Punctuator},
lexer::{InputElement, TokenKind},
lexer::{Error as LexError, InputElement, TokenKind},
},
BoaProfiler,
};
@ -215,6 +215,7 @@ pub(super) struct StatementList {
allow_await: AllowAwait,
allow_return: AllowReturn,
break_when_closingbraces: bool,
in_block: bool,
}
impl StatementList {
@ -224,6 +225,7 @@ impl StatementList {
allow_await: A,
allow_return: R,
break_when_closingbraces: bool,
in_block: bool,
) -> Self
where
Y: Into<AllowYield>,
@ -235,6 +237,7 @@ impl StatementList {
allow_await: allow_await.into(),
allow_return: allow_return.into(),
break_when_closingbraces,
in_block,
}
}
@ -268,9 +271,13 @@ impl StatementList {
return Err(ParseError::AbruptEnd);
}
let item =
StatementListItem::new(self.allow_yield, self.allow_await, self.allow_return)
.parse(cursor)?;
let item = StatementListItem::new(
self.allow_yield,
self.allow_await,
self.allow_return,
self.in_block,
)
.parse(cursor)?;
items.push(item);
@ -313,9 +320,13 @@ where
_ => {}
}
let item =
StatementListItem::new(self.allow_yield, self.allow_await, self.allow_return)
.parse(cursor)?;
let item = StatementListItem::new(
self.allow_yield,
self.allow_await,
self.allow_return,
self.in_block,
)
.parse(cursor)?;
items.push(item);
// move the cursor forward for any consecutive semicolon.
@ -343,11 +354,12 @@ struct StatementListItem {
allow_yield: AllowYield,
allow_await: AllowAwait,
allow_return: AllowReturn,
in_block: bool,
}
impl StatementListItem {
/// Creates a new `StatementListItem` parser.
fn new<Y, A, R>(allow_yield: Y, allow_await: A, allow_return: R) -> Self
fn new<Y, A, R>(allow_yield: Y, allow_await: A, allow_return: R, in_block: bool) -> Self
where
Y: Into<AllowYield>,
A: Into<AllowAwait>,
@ -357,6 +369,7 @@ impl StatementListItem {
allow_yield: allow_yield.into(),
allow_await: allow_await.into(),
allow_return: allow_return.into(),
in_block,
}
}
}
@ -369,12 +382,20 @@ where
fn parse(self, cursor: &mut Cursor<R>) -> Result<Self::Output, ParseError> {
let _timer = BoaProfiler::global().start_event("StatementListItem", "Parsing");
let strict_mode = cursor.strict_mode();
let tok = cursor.peek(0)?.ok_or(ParseError::AbruptEnd)?;
match *tok.kind() {
TokenKind::Keyword(Keyword::Function)
| TokenKind::Keyword(Keyword::Const)
| TokenKind::Keyword(Keyword::Let) => {
TokenKind::Keyword(Keyword::Function) => {
if strict_mode && self.in_block {
return Err(ParseError::lex(LexError::Syntax(
"Function declaration in blocks not allowed in strict mode".into(),
tok.span().start(),
)));
}
Declaration::new(self.allow_yield, self.allow_await, true).parse(cursor)
}
TokenKind::Keyword(Keyword::Const) | TokenKind::Keyword(Keyword::Let) => {
Declaration::new(self.allow_yield, self.allow_await, true).parse(cursor)
}
_ => {
@ -426,16 +447,34 @@ where
{
type Output = Box<str>;
/// Strict mode parsing as per https://tc39.es/ecma262/#sec-identifiers-static-semantics-early-errors.
fn parse(self, cursor: &mut Cursor<R>) -> Result<Self::Output, ParseError> {
let _timer = BoaProfiler::global().start_event("BindingIdentifier", "Parsing");
// TODO: strict mode.
let next_token = cursor.next()?.ok_or(ParseError::AbruptEnd)?;
match next_token.kind() {
TokenKind::Identifier(ref s) => Ok(s.clone()),
TokenKind::Keyword(k @ Keyword::Yield) if !self.allow_yield.0 => Ok(k.as_str().into()),
TokenKind::Keyword(k @ Keyword::Await) if !self.allow_await.0 => Ok(k.as_str().into()),
TokenKind::Keyword(k @ Keyword::Yield) if !self.allow_yield.0 => {
if cursor.strict_mode() {
Err(ParseError::lex(LexError::Syntax(
"yield keyword in binding identifier not allowed in strict mode".into(),
next_token.span().start(),
)))
} else {
Ok(k.as_str().into())
}
}
TokenKind::Keyword(k @ Keyword::Await) if !self.allow_await.0 => {
if cursor.strict_mode() {
Err(ParseError::lex(LexError::Syntax(
"await keyword in binding identifier not allowed in strict mode".into(),
next_token.span().start(),
)))
} else {
Ok(k.as_str().into())
}
}
_ => Err(ParseError::expected(
vec![TokenKind::identifier("identifier")],
next_token,

2
boa/src/syntax/parser/statement/switch/mod.rs

@ -130,6 +130,7 @@ where
self.allow_await,
self.allow_return,
true,
false,
)
.parse_generalised(cursor, &CASE_BREAK_TOKENS)?;
@ -151,6 +152,7 @@ where
self.allow_await,
self.allow_return,
true,
false,
)
.parse_generalised(cursor, &CASE_BREAK_TOKENS)?;

Loading…
Cancel
Save