From 6bcfc7a2373890a2b37c7814cb4ceade91bf4e02 Mon Sep 17 00:00:00 2001 From: Paul Lancaster Date: Mon, 5 Oct 2020 19:11:10 +0100 Subject: [PATCH] Strict Mode Lex/Parse (#717) * Add strict mode to lex trait * Lexer reserved keyword identifier strict mode lexing * Prevent with statement in strict mode * Add strict_mode flag to parse methods * Enable function scope strict mode * Script scope strict mode * Delete statement strict mode * Function decl in block in strict mode * Fix merge * Strict mode code tests * Move strict_mode flag onto Lexer * Move strict_mode lex flag to Lexer Cursor * Isolated failing test parts * Fixed bug in func decl in block in strict mode * BindingIdentifier strict mode * use to_string * Remove dbg * Add missing spec ref * Missing spec ref for dup func params * Remove clone * .to_string -> .as_ref --- boa/src/exec/tests.rs | 121 ++++++++++++++++++ boa/src/syntax/lexer/cursor.rs | 12 ++ boa/src/syntax/lexer/identifier.rs | 32 ++++- boa/src/syntax/lexer/mod.rs | 17 ++- boa/src/syntax/lexer/number.rs | 9 +- .../parser/cursor/buffered_lexer/mod.rs | 10 ++ boa/src/syntax/parser/cursor/mod.rs | 10 ++ boa/src/syntax/parser/expression/mod.rs | 2 +- boa/src/syntax/parser/expression/unary.rs | 16 ++- boa/src/syntax/parser/function/mod.rs | 21 ++- boa/src/syntax/parser/mod.rs | 21 ++- boa/src/syntax/parser/statement/block/mod.rs | 13 +- boa/src/syntax/parser/statement/mod.rs | 67 ++++++++-- boa/src/syntax/parser/statement/switch/mod.rs | 2 + 14 files changed, 311 insertions(+), 42 deletions(-) diff --git a/boa/src/exec/tests.rs b/boa/src/exec/tests.rs index 7ffd08e366..f385cfeb42 100644 --- a/boa/src/exec/tests.rs +++ b/boa/src/exec/tests.rs @@ -1383,3 +1383,124 @@ fn test_identifier_op() { let scenario = "break = 1"; assert_eq!(&exec(scenario), "\"SyntaxError\": \"expected token \'identifier\', got \'=\' in binding identifier at line 1, col 7\""); } + +#[test] +fn test_strict_mode_octal() { + // Checks as per https://tc39.es/ecma262/#sec-literals-numeric-literals that 0 prefix + // octal number literal syntax is a syntax error in strict mode. + + let scenario = r#" + 'use strict'; + var n = 023; + "#; + + let mut engine = Context::new(); + + let string = dbg!(forward(&mut engine, scenario)); + + assert!(string.starts_with("Uncaught \"SyntaxError\": ")); +} + +#[test] +fn test_strict_mode_with() { + // Checks as per https://tc39.es/ecma262/#sec-with-statement-static-semantics-early-errors + // that a with statement is an error in strict mode code. + + let scenario = r#" + 'use strict'; + function f(x, o) { + with (o) { + console.log(x); + } + } + "#; + + let mut engine = Context::new(); + + let string = dbg!(forward(&mut engine, scenario)); + + assert!(string.starts_with("Uncaught \"SyntaxError\": ")); +} + +#[test] +fn test_strict_mode_delete() { + // Checks as per https://tc39.es/ecma262/#sec-delete-operator-static-semantics-early-errors + // that delete on a variable name is an error in strict mode code. + + let scenario = r#" + 'use strict'; + let x = 10; + delete x; + "#; + + let mut engine = Context::new(); + + let string = dbg!(forward(&mut engine, scenario)); + + assert!(string.starts_with("Uncaught \"SyntaxError\": ")); +} + +#[test] +fn test_strict_mode_reserved_name() { + // Checks that usage of a reserved keyword for an identifier name is + // an error in strict mode code as per https://tc39.es/ecma262/#sec-strict-mode-of-ecmascript. + + let test_cases = [ + "var implements = 10;", + "var interface = 10;", + "var package = 10;", + "var private = 10;", + "var protected = 10;", + "var public = 10;", + "var static = 10;", + "var eval = 10;", + "var arguments = 10;", + "var let = 10;", + "var yield = 10;", + ]; + + for case in test_cases.iter() { + let mut engine = Context::new(); + let scenario = format!("'use strict'; \n {}", case); + + let string = dbg!(forward(&mut engine, &scenario)); + + assert!(string.starts_with("Uncaught \"SyntaxError\": ")); + } +} + +#[test] +fn test_strict_mode_func_decl_in_block() { + // Checks that a function declaration in a block is an error in + // strict mode code as per https://tc39.es/ecma262/#early-error. + + let scenario = r#" + 'use strict'; + let a = 4; + let b = 5; + if (a < b) { function f() {} } + "#; + + let mut engine = Context::new(); + + let string = dbg!(forward(&mut engine, scenario)); + + assert!(string.starts_with("Uncaught \"SyntaxError\": ")); +} + +#[test] +fn test_strict_mode_dup_func_parameters() { + // Checks that a function cannot contain duplicate parameter + // names in strict mode code as per https://tc39.es/ecma262/#sec-function-definitions-static-semantics-early-errors. + + let scenario = r#" + 'use strict'; + function f(a, b, b) {} + "#; + + let mut engine = Context::new(); + + let string = dbg!(forward(&mut engine, scenario)); + + assert!(string.starts_with("Uncaught \"SyntaxError\": ")); +} diff --git a/boa/src/syntax/lexer/cursor.rs b/boa/src/syntax/lexer/cursor.rs index 855595d952..e52614a4af 100644 --- a/boa/src/syntax/lexer/cursor.rs +++ b/boa/src/syntax/lexer/cursor.rs @@ -9,6 +9,7 @@ pub(super) struct Cursor { iter: InnerIter, peeked: Option>, pos: Position, + strict_mode: bool, } impl Cursor { @@ -38,6 +39,16 @@ impl Cursor { let current_line = self.pos.line_number(); self.pos = Position::new(current_line, 1); } + + #[inline] + pub(super) fn strict_mode(&self) -> bool { + self.strict_mode + } + + #[inline] + pub(super) fn set_strict_mode(&mut self, strict_mode: bool) { + self.strict_mode = strict_mode + } } impl Cursor @@ -51,6 +62,7 @@ where iter: InnerIter::new(inner.bytes()), peeked: None, pos: Position::new(1, 1), + strict_mode: false, } } diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs index 15dfecbb7c..19dd6dc608 100644 --- a/boa/src/syntax/lexer/identifier.rs +++ b/boa/src/syntax/lexer/identifier.rs @@ -4,12 +4,26 @@ use super::{Cursor, Error, Tokenizer}; use crate::{ profiler::BoaProfiler, syntax::{ - ast::{Position, Span}, + ast::{Keyword, Position, Span}, lexer::{Token, TokenKind}, }, }; use std::io::Read; +const STRICT_FORBIDDEN_IDENTIFIERS: [&str; 11] = [ + "eval", + "arguments", + "implements", + "interface", + "let", + "package", + "private", + "protected", + "public", + "static", + "yield", +]; + /// Identifier lexing. /// /// More information: @@ -49,8 +63,24 @@ impl Tokenizer for Identifier { "null" => TokenKind::NullLiteral, slice => { if let Ok(keyword) = slice.parse() { + if cursor.strict_mode() && keyword == Keyword::With { + return Err(Error::Syntax( + "using 'with' statement not allowed in strict mode".into(), + start_pos, + )); + } TokenKind::Keyword(keyword) } else { + if cursor.strict_mode() && STRICT_FORBIDDEN_IDENTIFIERS.contains(&slice) { + return Err(Error::Syntax( + format!( + "using future reserved keyword '{}' not allowed in strict mode", + slice + ) + .into(), + start_pos, + )); + } TokenKind::identifier(slice) } } diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs index 4fd8f75c3a..f5f356b496 100644 --- a/boa/src/syntax/lexer/mod.rs +++ b/boa/src/syntax/lexer/mod.rs @@ -90,6 +90,16 @@ impl Lexer { self.goal_symbol } + #[inline] + pub(super) fn strict_mode(&self) -> bool { + self.cursor.strict_mode() + } + + #[inline] + pub(super) fn set_strict_mode(&mut self, strict_mode: bool) { + self.cursor.set_strict_mode(strict_mode) + } + /// Creates a new lexer. #[inline] pub fn new(reader: R) -> Self @@ -180,9 +190,6 @@ impl Lexer { } }; - // TODO, setting strict mode on/off. - let strict_mode = false; - let token = match next_chr { '\r' | '\n' | '\u{2028}' | '\u{2029}' => Ok(Token::new( TokenKind::LineTerminator, @@ -190,9 +197,7 @@ impl Lexer { )), '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start), '`' => TemplateLiteral.lex(&mut self.cursor, start), - _ if next_chr.is_digit(10) => { - NumberLiteral::new(next_chr, strict_mode).lex(&mut self.cursor, start) - } + _ if next_chr.is_digit(10) => NumberLiteral::new(next_chr).lex(&mut self.cursor, start), _ if next_chr.is_alphabetic() || next_chr == '$' || next_chr == '_' => { Identifier::new(next_chr).lex(&mut self.cursor, start) } diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs index 8391a3ee12..6e1c4581f8 100644 --- a/boa/src/syntax/lexer/number.rs +++ b/boa/src/syntax/lexer/number.rs @@ -24,13 +24,12 @@ use std::{io::Read, str::FromStr}; #[derive(Debug, Clone, Copy)] pub(super) struct NumberLiteral { init: char, - strict_mode: bool, } impl NumberLiteral { /// Creates a new string literal lexer. - pub(super) fn new(init: char, strict_mode: bool) -> Self { - Self { init, strict_mode } + pub(super) fn new(init: char) -> Self { + Self { init } } } @@ -187,7 +186,7 @@ impl Tokenizer for NumberLiteral { ch => { if ch.is_digit(8) { // LegacyOctalIntegerLiteral - if self.strict_mode { + if cursor.strict_mode() { // LegacyOctalIntegerLiteral is forbidden with strict mode true. return Err(Error::syntax( "implicit octal literals are not allowed in strict mode", @@ -205,7 +204,7 @@ impl Tokenizer for NumberLiteral { // Indicates a numerical digit comes after then 0 but it isn't an octal digit // so therefore this must be a number with an unneeded leading 0. This is // forbidden in strict mode. - if self.strict_mode { + if cursor.strict_mode() { return Err(Error::syntax( "leading 0's are not allowed in strict mode", start_pos, diff --git a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs index bed01da9ef..3e96b95545 100644 --- a/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs +++ b/boa/src/syntax/parser/cursor/buffered_lexer/mod.rs @@ -82,6 +82,16 @@ where self.lexer.lex_slash_token(start).map_err(|e| e.into()) } + #[inline] + pub(super) fn strict_mode(&self) -> bool { + self.lexer.strict_mode() + } + + #[inline] + pub(super) fn set_strict_mode(&mut self, strict_mode: bool) { + self.lexer.set_strict_mode(strict_mode) + } + /// Fills the peeking buffer with the next token. /// /// It will not fill two line terminators one after the other. diff --git a/boa/src/syntax/parser/cursor/mod.rs b/boa/src/syntax/parser/cursor/mod.rs index 8375b0bcfd..b33851f7fd 100644 --- a/boa/src/syntax/parser/cursor/mod.rs +++ b/boa/src/syntax/parser/cursor/mod.rs @@ -56,6 +56,16 @@ where self.buffered_lexer.peek(skip_n, true) } + #[inline] + pub(super) fn strict_mode(&self) -> bool { + self.buffered_lexer.strict_mode() + } + + #[inline] + pub(super) fn set_strict_mode(&mut self, strict_mode: bool) { + self.buffered_lexer.set_strict_mode(strict_mode) + } + /// Returns an error if the next token is not of kind `kind`. /// /// Note: it will consume the next token only if the next token is the expected type. diff --git a/boa/src/syntax/parser/expression/mod.rs b/boa/src/syntax/parser/expression/mod.rs index cb88a3174f..fe9060854e 100644 --- a/boa/src/syntax/parser/expression/mod.rs +++ b/boa/src/syntax/parser/expression/mod.rs @@ -61,7 +61,7 @@ macro_rules! expression { ($name:ident, $lower:ident, [$( $op:path ),*], [$( $lo { type Output = Node; - fn parse(self, cursor: &mut Cursor) -> ParseResult { + fn parse(self, cursor: &mut Cursor)-> ParseResult { let _timer = BoaProfiler::global().start_event($profile, "Parsing"); if $goal.is_some() { diff --git a/boa/src/syntax/parser/expression/unary.rs b/boa/src/syntax/parser/expression/unary.rs index 4966224b6c..6ed9f5378c 100644 --- a/boa/src/syntax/parser/expression/unary.rs +++ b/boa/src/syntax/parser/expression/unary.rs @@ -15,7 +15,7 @@ use crate::{ op::UnaryOp, Keyword, Punctuator, }, - lexer::TokenKind, + lexer::{Error as LexError, TokenKind}, parser::{ expression::update::UpdateExpression, AllowAwait, AllowYield, Cursor, ParseError, ParseResult, TokenParser, @@ -62,10 +62,22 @@ where let _timer = BoaProfiler::global().start_event("UnaryExpression", "Parsing"); let tok = cursor.peek(0)?.ok_or(ParseError::AbruptEnd)?; + let token_start = tok.span().start(); match tok.kind() { TokenKind::Keyword(Keyword::Delete) => { cursor.next()?.expect("Delete keyword vanished"); // Consume the token. - Ok(node::UnaryOp::new(UnaryOp::Delete, self.parse(cursor)?).into()) + let val = self.parse(cursor)?; + + if cursor.strict_mode() { + if let Node::Identifier(_) = val { + return Err(ParseError::lex(LexError::Syntax( + "Delete statements not allowed in strict mode".into(), + token_start, + ))); + } + } + + Ok(node::UnaryOp::new(UnaryOp::Delete, val).into()) } TokenKind::Keyword(Keyword::Void) => { cursor.next()?.expect("Void keyword vanished"); // Consume the token. diff --git a/boa/src/syntax/parser/function/mod.rs b/boa/src/syntax/parser/function/mod.rs index 382bb5f9f8..37414bf06e 100644 --- a/boa/src/syntax/parser/function/mod.rs +++ b/boa/src/syntax/parser/function/mod.rs @@ -259,12 +259,27 @@ where fn parse(self, cursor: &mut Cursor) -> Result { let _timer = BoaProfiler::global().start_event("FunctionStatementList", "Parsing"); + + let global_strict_mode = cursor.strict_mode(); if let Some(tk) = cursor.peek(0)? { - if tk.kind() == &Punctuator::CloseBlock.into() { - return Ok(Vec::new().into()); + match tk.kind() { + TokenKind::Punctuator(Punctuator::CloseBlock) => { + return Ok(Vec::new().into()); + } + TokenKind::StringLiteral(string) | TokenKind::TemplateLiteral(string) => { + if string == &"use strict".into() { + cursor.set_strict_mode(true); + } + } + _ => {} } } - StatementList::new(self.allow_yield, self.allow_await, true, true).parse(cursor) + let stmlist = + StatementList::new(self.allow_yield, self.allow_await, true, true, true).parse(cursor); + + // Reset strict mode back to the global scope. + cursor.set_strict_mode(global_strict_mode); + stmlist } } diff --git a/boa/src/syntax/parser/mod.rs b/boa/src/syntax/parser/mod.rs index b5199b2822..9a206ec6d9 100644 --- a/boa/src/syntax/parser/mod.rs +++ b/boa/src/syntax/parser/mod.rs @@ -9,7 +9,7 @@ mod statement; mod tests; pub use self::error::{ParseError, ParseResult}; -use crate::syntax::ast::node::StatementList; +use crate::syntax::{ast::node::StatementList, lexer::TokenKind}; use cursor::Cursor; @@ -121,10 +121,19 @@ where type Output = StatementList; fn parse(self, cursor: &mut Cursor) -> Result { - if cursor.peek(0)?.is_some() { - ScriptBody.parse(cursor) - } else { - Ok(StatementList::from(Vec::new())) + match cursor.peek(0)? { + Some(tok) => { + match tok.kind() { + TokenKind::StringLiteral(string) | TokenKind::TemplateLiteral(string) => { + if string.as_ref() == "use strict" { + cursor.set_strict_mode(true); + } + } + _ => {} + } + ScriptBody.parse(cursor) + } + None => Ok(StatementList::from(Vec::new())), } } } @@ -145,6 +154,6 @@ where type Output = StatementList; fn parse(self, cursor: &mut Cursor) -> Result { - self::statement::StatementList::new(false, false, false, false).parse(cursor) + self::statement::StatementList::new(false, false, false, false, true).parse(cursor) } } diff --git a/boa/src/syntax/parser/statement/block/mod.rs b/boa/src/syntax/parser/statement/block/mod.rs index 4ab0bf86a9..a703230c4a 100644 --- a/boa/src/syntax/parser/statement/block/mod.rs +++ b/boa/src/syntax/parser/statement/block/mod.rs @@ -78,10 +78,15 @@ where } } - let statement_list = - StatementList::new(self.allow_yield, self.allow_await, self.allow_return, true) - .parse(cursor) - .map(node::Block::from)?; + let statement_list = StatementList::new( + self.allow_yield, + self.allow_await, + self.allow_return, + true, + true, + ) + .parse(cursor) + .map(node::Block::from)?; cursor.expect(Punctuator::CloseBlock, "block")?; Ok(statement_list) diff --git a/boa/src/syntax/parser/statement/mod.rs b/boa/src/syntax/parser/statement/mod.rs index 35b0d7c66f..1fab174adf 100644 --- a/boa/src/syntax/parser/statement/mod.rs +++ b/boa/src/syntax/parser/statement/mod.rs @@ -41,7 +41,7 @@ use super::{AllowAwait, AllowReturn, AllowYield, Cursor, ParseError, TokenParser use crate::{ syntax::{ ast::{node, Keyword, Node, Punctuator}, - lexer::{InputElement, TokenKind}, + lexer::{Error as LexError, InputElement, TokenKind}, }, BoaProfiler, }; @@ -215,6 +215,7 @@ pub(super) struct StatementList { allow_await: AllowAwait, allow_return: AllowReturn, break_when_closingbraces: bool, + in_block: bool, } impl StatementList { @@ -224,6 +225,7 @@ impl StatementList { allow_await: A, allow_return: R, break_when_closingbraces: bool, + in_block: bool, ) -> Self where Y: Into, @@ -235,6 +237,7 @@ impl StatementList { allow_await: allow_await.into(), allow_return: allow_return.into(), break_when_closingbraces, + in_block, } } @@ -268,9 +271,13 @@ impl StatementList { return Err(ParseError::AbruptEnd); } - let item = - StatementListItem::new(self.allow_yield, self.allow_await, self.allow_return) - .parse(cursor)?; + let item = StatementListItem::new( + self.allow_yield, + self.allow_await, + self.allow_return, + self.in_block, + ) + .parse(cursor)?; items.push(item); @@ -313,9 +320,13 @@ where _ => {} } - let item = - StatementListItem::new(self.allow_yield, self.allow_await, self.allow_return) - .parse(cursor)?; + let item = StatementListItem::new( + self.allow_yield, + self.allow_await, + self.allow_return, + self.in_block, + ) + .parse(cursor)?; items.push(item); // move the cursor forward for any consecutive semicolon. @@ -343,11 +354,12 @@ struct StatementListItem { allow_yield: AllowYield, allow_await: AllowAwait, allow_return: AllowReturn, + in_block: bool, } impl StatementListItem { /// Creates a new `StatementListItem` parser. - fn new(allow_yield: Y, allow_await: A, allow_return: R) -> Self + fn new(allow_yield: Y, allow_await: A, allow_return: R, in_block: bool) -> Self where Y: Into, A: Into, @@ -357,6 +369,7 @@ impl StatementListItem { allow_yield: allow_yield.into(), allow_await: allow_await.into(), allow_return: allow_return.into(), + in_block, } } } @@ -369,12 +382,20 @@ where fn parse(self, cursor: &mut Cursor) -> Result { let _timer = BoaProfiler::global().start_event("StatementListItem", "Parsing"); + let strict_mode = cursor.strict_mode(); let tok = cursor.peek(0)?.ok_or(ParseError::AbruptEnd)?; match *tok.kind() { - TokenKind::Keyword(Keyword::Function) - | TokenKind::Keyword(Keyword::Const) - | TokenKind::Keyword(Keyword::Let) => { + TokenKind::Keyword(Keyword::Function) => { + if strict_mode && self.in_block { + return Err(ParseError::lex(LexError::Syntax( + "Function declaration in blocks not allowed in strict mode".into(), + tok.span().start(), + ))); + } + Declaration::new(self.allow_yield, self.allow_await, true).parse(cursor) + } + TokenKind::Keyword(Keyword::Const) | TokenKind::Keyword(Keyword::Let) => { Declaration::new(self.allow_yield, self.allow_await, true).parse(cursor) } _ => { @@ -426,16 +447,34 @@ where { type Output = Box; + /// Strict mode parsing as per https://tc39.es/ecma262/#sec-identifiers-static-semantics-early-errors. fn parse(self, cursor: &mut Cursor) -> Result { let _timer = BoaProfiler::global().start_event("BindingIdentifier", "Parsing"); - // TODO: strict mode. let next_token = cursor.next()?.ok_or(ParseError::AbruptEnd)?; match next_token.kind() { TokenKind::Identifier(ref s) => Ok(s.clone()), - TokenKind::Keyword(k @ Keyword::Yield) if !self.allow_yield.0 => Ok(k.as_str().into()), - TokenKind::Keyword(k @ Keyword::Await) if !self.allow_await.0 => Ok(k.as_str().into()), + TokenKind::Keyword(k @ Keyword::Yield) if !self.allow_yield.0 => { + if cursor.strict_mode() { + Err(ParseError::lex(LexError::Syntax( + "yield keyword in binding identifier not allowed in strict mode".into(), + next_token.span().start(), + ))) + } else { + Ok(k.as_str().into()) + } + } + TokenKind::Keyword(k @ Keyword::Await) if !self.allow_await.0 => { + if cursor.strict_mode() { + Err(ParseError::lex(LexError::Syntax( + "await keyword in binding identifier not allowed in strict mode".into(), + next_token.span().start(), + ))) + } else { + Ok(k.as_str().into()) + } + } _ => Err(ParseError::expected( vec![TokenKind::identifier("identifier")], next_token, diff --git a/boa/src/syntax/parser/statement/switch/mod.rs b/boa/src/syntax/parser/statement/switch/mod.rs index f19a6104a3..e9369bcb12 100644 --- a/boa/src/syntax/parser/statement/switch/mod.rs +++ b/boa/src/syntax/parser/statement/switch/mod.rs @@ -130,6 +130,7 @@ where self.allow_await, self.allow_return, true, + false, ) .parse_generalised(cursor, &CASE_BREAK_TOKENS)?; @@ -151,6 +152,7 @@ where self.allow_await, self.allow_return, true, + false, ) .parse_generalised(cursor, &CASE_BREAK_TOKENS)?;