From d76e8bf108e54aea56b33a26e2b560323b05a6a7 Mon Sep 17 00:00:00 2001 From: Theia Vogel Date: Mon, 5 Oct 2020 14:49:26 -0700 Subject: [PATCH] Fix cyclic JSON.stringify / primitive conversion stack overflows (#777) * Add tests for cyclic conversions * Add another test for a non-cyclic scenario * Refactor the existing RecursionLimiter type We can use the existing RecursionLimiter type used by GcObject's Debug impl for this purpose. We just need to refactor it to allow both liveness and visitation tracking, using a HashMap of ptrs to states instead of a HashSet of ptrs. * Fix `Value::to_json` to not overflow. Use the newly refactored RecursionLimiter to check for recursion, and limit it. Throw a TypeError as mandated by the spec. * Fix ordinary_to_primitive to not overflow Use the new RecursionLimiter type to prevent overflows from conversions in ordinary_to_primitive. The spec doesn't say what to do here, so we follow v8 / SpiderMonkey in returning a default value for the type hint -- either 0. or "". More details in the method documentation. * Move ordinary_to_primitive to GcObject * Move object_to_json to GcObject * Add test for console.log --- boa/src/context.rs | 47 +-------- boa/src/object/gcobject.rs | 205 +++++++++++++++++++++++++++++++------ boa/src/object/mod.rs | 2 +- boa/src/value/mod.rs | 33 +----- boa/src/value/tests.rs | 121 ++++++++++++++++++++++ 5 files changed, 298 insertions(+), 110 deletions(-) diff --git a/boa/src/context.rs b/boa/src/context.rs index d68faacc1e..6b03a5c54a 100644 --- a/boa/src/context.rs +++ b/boa/src/context.rs @@ -23,7 +23,7 @@ use crate::{ }, Parser, }, - value::{PreferredType, RcString, RcSymbol, Type, Value}, + value::{RcString, RcSymbol, Value}, BoaProfiler, Executable, Result, }; use std::result::Result as StdResult; @@ -498,51 +498,6 @@ impl Context { Err(()) } - /// Converts an object to a primitive. - /// - /// More information: - /// - [ECMAScript][spec] - /// - /// [spec]: https://tc39.es/ecma262/#sec-ordinarytoprimitive - pub(crate) fn ordinary_to_primitive( - &mut self, - o: &Value, - hint: PreferredType, - ) -> Result { - // 1. Assert: Type(O) is Object. - debug_assert!(o.get_type() == Type::Object); - // 2. Assert: Type(hint) is String and its value is either "string" or "number". - debug_assert!(hint == PreferredType::String || hint == PreferredType::Number); - - // 3. If hint is "string", then - // a. Let methodNames be « "toString", "valueOf" ». - // 4. Else, - // a. Let methodNames be « "valueOf", "toString" ». - let method_names = if hint == PreferredType::String { - ["toString", "valueOf"] - } else { - ["valueOf", "toString"] - }; - - // 5. For each name in methodNames in List order, do - for name in &method_names { - // a. Let method be ? Get(O, name). - let method: Value = o.get_field(*name); - // b. If IsCallable(method) is true, then - if method.is_function() { - // i. Let result be ? Call(method, O). - let result = self.call(&method, &o, &[])?; - // ii. If Type(result) is not Object, return result. - if !result.is_object() { - return Ok(result); - } - } - } - - // 6. Throw a TypeError exception. - self.throw_type_error("cannot convert object to primitive value") - } - /// https://tc39.es/ecma262/#sec-hasproperty pub(crate) fn has_property(&self, obj: &Value, key: &PropertyKey) -> bool { if let Some(obj) = obj.as_object() { diff --git a/boa/src/object/gcobject.rs b/boa/src/object/gcobject.rs index 546aed91a6..3776ff1237 100644 --- a/boa/src/object/gcobject.rs +++ b/boa/src/object/gcobject.rs @@ -11,12 +11,14 @@ use crate::{ function_environment_record::BindingStatus, lexical_environment::new_function_environment, }, syntax::ast::node::RcStatementList, + value::PreferredType, Context, Executable, Result, Value, }; use gc::{Finalize, Gc, GcCell, GcCellRef, GcCellRefMut, Trace}; +use serde_json::{map::Map, Value as JSONValue}; use std::{ cell::RefCell, - collections::HashSet, + collections::HashMap, error::Error, fmt::{self, Debug, Display}, result::Result as StdResult, @@ -267,6 +269,111 @@ impl GcObject { } } } + + /// Converts an object to a primitive. + /// + /// Diverges from the spec to prevent a stack overflow when the object is recursive. + /// For example, + /// ```javascript + /// let a = [1]; + /// a[1] = a; + /// console.log(a.toString()); // We print "1," + /// ``` + /// The spec doesn't mention what to do in this situation, but a naive implementation + /// would overflow the stack recursively calling `toString()`. We follow v8 and SpiderMonkey + /// instead by returning a default value for the given `hint` -- either `0.` or `""`. + /// Example in v8: https://repl.it/repls/IvoryCircularCertification#index.js + /// + /// More information: + /// - [ECMAScript][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-ordinarytoprimitive + pub(crate) fn ordinary_to_primitive( + &self, + interpreter: &mut Context, + hint: PreferredType, + ) -> Result { + // 1. Assert: Type(O) is Object. + // Already is GcObject by type. + // 2. Assert: Type(hint) is String and its value is either "string" or "number". + debug_assert!(hint == PreferredType::String || hint == PreferredType::Number); + + // Diverge from the spec here to make sure we aren't going to overflow the stack by converting + // a recursive structure + // We can follow v8 & SpiderMonkey's lead and return a default value for the hint in this situation + // (see https://repl.it/repls/IvoryCircularCertification#index.js) + let recursion_limiter = RecursionLimiter::new(&self); + if recursion_limiter.live { + // we're in a recursive object, bail + return Ok(match hint { + PreferredType::Number => Value::from(0), + PreferredType::String => Value::from(""), + PreferredType::Default => unreachable!("checked type hint in step 2"), + }); + } + + // 3. If hint is "string", then + // a. Let methodNames be « "toString", "valueOf" ». + // 4. Else, + // a. Let methodNames be « "valueOf", "toString" ». + let method_names = if hint == PreferredType::String { + ["toString", "valueOf"] + } else { + ["valueOf", "toString"] + }; + + // 5. For each name in methodNames in List order, do + let this = Value::from(self.clone()); + for name in &method_names { + // a. Let method be ? Get(O, name). + let method: Value = this.get_field(*name); + // b. If IsCallable(method) is true, then + if method.is_function() { + // i. Let result be ? Call(method, O). + let result = interpreter.call(&method, &this, &[])?; + // ii. If Type(result) is not Object, return result. + if !result.is_object() { + return Ok(result); + } + } + } + + // 6. Throw a TypeError exception. + interpreter.throw_type_error("cannot convert object to primitive value") + } + + /// Converts an object to JSON, checking for reference cycles and throwing a TypeError if one is found + pub(crate) fn to_json(&self, interpreter: &mut Context) -> Result { + let rec_limiter = RecursionLimiter::new(self); + if rec_limiter.live { + Err(interpreter.construct_type_error("cyclic object value")) + } else if self.borrow().is_array() { + let mut keys: Vec = self.borrow().index_property_keys().cloned().collect(); + keys.sort(); + let mut arr: Vec = Vec::with_capacity(keys.len()); + let this = Value::from(self.clone()); + for key in keys { + let value = this.get_field(key); + if value.is_undefined() || value.is_function() || value.is_symbol() { + arr.push(JSONValue::Null); + } else { + arr.push(value.to_json(interpreter)?); + } + } + Ok(JSONValue::Array(arr)) + } else { + let mut new_obj = Map::new(); + let this = Value::from(self.clone()); + for k in self.borrow().keys() { + let key = k.clone(); + let value = this.get_field(k.to_string()); + if !value.is_undefined() && !value.is_function() && !value.is_symbol() { + new_obj.insert(key.to_string(), value.to_json(interpreter)?); + } + } + Ok(JSONValue::Object(new_obj)) + } + } } impl AsRef> for GcObject { @@ -302,45 +409,57 @@ impl Display for BorrowMutError { impl Error for BorrowMutError {} -/// Prevents infinite recursion during `Debug::fmt`. -#[derive(Debug)] -struct RecursionLimiter { - /// If this was the first `GcObject` in the tree. - free: bool, - /// If this is the first time a specific `GcObject` has been seen. - first: bool, +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum RecursionValueState { + /// This value is "live": there's an active RecursionLimiter that hasn't been dropped. + Live, + /// This value has been seen before, but the recursion limiter has been dropped. + /// For example: + /// ```javascript + /// let b = []; + /// JSON.stringify([ // Create a recursion limiter for the root here + /// b, // state for b's &GcObject here is None + /// b, // state for b's &GcObject here is Visited + /// ]); + /// ``` + Visited, } -impl Clone for RecursionLimiter { - fn clone(&self) -> Self { - Self { - // Cloning this value would result in a premature free. - free: false, - // Cloning this vlaue would result in a value being written multiple times. - first: false, - } - } +/// Prevents infinite recursion during `Debug::fmt`, `JSON.stringify`, and other conversions. +/// This uses a thread local, so is not safe to use where the object graph will be traversed by +/// multiple threads! +#[derive(Debug)] +pub struct RecursionLimiter { + /// If this was the first `GcObject` in the tree. + top_level: bool, + /// The ptr being kept in the HashSet, so we can delete it when we drop. + ptr: usize, + /// If this GcObject has been visited before in the graph, but not in the current branch. + pub visited: bool, + /// If this GcObject has been visited in the current branch of the graph. + pub live: bool, } impl Drop for RecursionLimiter { fn drop(&mut self) { - // Typically, calling hs.remove(ptr) for "first" objects would be the correct choice here. This would allow the - // same object to appear multiple times in the output (provided it does not appear under itself recursively). - // However, the JS object hierarchy involves quite a bit of repitition, and the sheer amount of data makes - // understanding the Debug output impossible; limiting the usefulness of it. - // - // Instead, the entire hashset is emptied at by the first GcObject involved. This means that objects will appear - // at most once, throughout the graph, hopefully making things a bit clearer. - if self.free { - Self::VISITED.with(|hs| hs.borrow_mut().clear()); + if self.top_level { + // When the top level of the graph is dropped, we can free the entire map for the next traversal. + Self::SEEN.with(|hm| hm.borrow_mut().clear()); + } else if !self.live { + // This was the first RL for this object to become live, so it's no longer live now that it's dropped. + Self::SEEN.with(|hm| { + hm.borrow_mut() + .insert(self.ptr, RecursionValueState::Visited) + }); } } } impl RecursionLimiter { thread_local! { - /// The list of pointers to `GcObject` that have been visited during the current `Debug::fmt` graph. - static VISITED: RefCell> = RefCell::new(HashSet::new()); + /// The map of pointers to `GcObject` that have been visited during the current `Debug::fmt` graph, + /// and the current state of their RecursionLimiter (dropped or live -- see `RecursionValueState`) + static SEEN: RefCell> = RefCell::new(HashMap::new()); } /// Determines if the specified `GcObject` has been visited, and returns a struct that will free it when dropped. @@ -348,15 +467,27 @@ impl RecursionLimiter { /// This is done by maintaining a thread-local hashset containing the pointers of `GcObject` values that have been /// visited. The first `GcObject` visited will clear the hashset, while any others will check if they are contained /// by the hashset. - fn new(o: &GcObject) -> Self { + pub fn new(o: &GcObject) -> Self { // We shouldn't have to worry too much about this being moved during Debug::fmt. let ptr = (o.as_ref() as *const _) as usize; - let (free, first) = Self::VISITED.with(|hs| { - let mut hs = hs.borrow_mut(); - (hs.is_empty(), hs.insert(ptr)) + let (top_level, visited, live) = Self::SEEN.with(|hm| { + let mut hm = hm.borrow_mut(); + let top_level = hm.is_empty(); + let old_state = hm.insert(ptr, RecursionValueState::Live); + + ( + top_level, + old_state == Some(RecursionValueState::Visited), + old_state == Some(RecursionValueState::Live), + ) }); - Self { free, first } + Self { + top_level, + ptr, + visited, + live, + } } } @@ -364,7 +495,13 @@ impl Debug for GcObject { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { let limiter = RecursionLimiter::new(&self); - if limiter.first { + // Typically, using `!limiter.live` would be good enough here. + // However, the JS object hierarchy involves quite a bit of repitition, and the sheer amount of data makes + // understanding the Debug output impossible; limiting the usefulness of it. + // + // Instead, we check if the object has appeared before in the entire graph. This means that objects will appear + // at most once, hopefully making things a bit clearer. + if !limiter.visited && !limiter.live { f.debug_tuple("GcObject").field(&self.0).finish() } else { f.write_str("{ ... }") diff --git a/boa/src/object/mod.rs b/boa/src/object/mod.rs index e165422492..e6fa412a0c 100644 --- a/boa/src/object/mod.rs +++ b/boa/src/object/mod.rs @@ -25,7 +25,7 @@ mod gcobject; mod internal_methods; mod iter; -pub use gcobject::{GcObject, Ref, RefMut}; +pub use gcobject::{GcObject, RecursionLimiter, Ref, RefMut}; pub use iter::*; /// Static `prototype`, usually set on constructors as a key to point to their respective prototype object. diff --git a/boa/src/value/mod.rs b/boa/src/value/mod.rs index c89b8b187f..262eb99061 100644 --- a/boa/src/value/mod.rs +++ b/boa/src/value/mod.rs @@ -15,7 +15,7 @@ use crate::{ BoaProfiler, Context, Result, }; use gc::{Finalize, GcCellRef, GcCellRefMut, Trace}; -use serde_json::{map::Map, Number as JSONNumber, Value as JSONValue}; +use serde_json::{Number as JSONNumber, Value as JSONValue}; use std::{ collections::HashSet, convert::TryFrom, @@ -225,32 +225,7 @@ impl Value { match *self { Self::Null => Ok(JSONValue::Null), Self::Boolean(b) => Ok(JSONValue::Bool(b)), - Self::Object(ref obj) => { - if obj.borrow().is_array() { - let mut keys: Vec = obj.borrow().index_property_keys().cloned().collect(); - keys.sort(); - let mut arr: Vec = Vec::with_capacity(keys.len()); - for key in keys { - let value = self.get_field(key); - if value.is_undefined() || value.is_function() || value.is_symbol() { - arr.push(JSONValue::Null); - } else { - arr.push(value.to_json(interpreter)?); - } - } - Ok(JSONValue::Array(arr)) - } else { - let mut new_obj = Map::new(); - for k in obj.borrow().keys() { - let key = k.clone(); - let value = self.get_field(k.to_string()); - if !value.is_undefined() && !value.is_function() && !value.is_symbol() { - new_obj.insert(key.to_string(), value.to_json(interpreter)?); - } - } - Ok(JSONValue::Object(new_obj)) - } - } + Self::Object(ref obj) => obj.to_json(interpreter), Self::String(ref str) => Ok(JSONValue::String(str.to_string())), Self::Rational(num) => Ok(JSONNumber::from_f64(num) .map(JSONValue::Number) @@ -592,7 +567,7 @@ impl Value { pub fn to_primitive(&self, ctx: &mut Context, preferred_type: PreferredType) -> Result { // 1. Assert: input is an ECMAScript language value. (always a value not need to check) // 2. If Type(input) is Object, then - if let Value::Object(_) = self { + if let Value::Object(obj) = self { let mut hint = preferred_type; // Skip d, e we don't support Symbols yet @@ -603,7 +578,7 @@ impl Value { }; // g. Return ? OrdinaryToPrimitive(input, hint). - ctx.ordinary_to_primitive(self, hint) + obj.ordinary_to_primitive(ctx, hint) } else { // 3. Return input. Ok(self.clone()) diff --git a/boa/src/value/tests.rs b/boa/src/value/tests.rs index ce667c1ee1..a39fcd61a8 100644 --- a/boa/src/value/tests.rs +++ b/boa/src/value/tests.rs @@ -496,6 +496,127 @@ toString: { ); } +/// Test cyclic conversions that previously caused stack overflows +/// Relevant mitigations for these are in `GcObject::ordinary_to_primitive` and +/// `GcObject::to_json` +mod cyclic_conversions { + use super::*; + + #[test] + fn to_json_cyclic() { + let mut engine = Context::new(); + let src = r#" + let a = []; + a[0] = a; + JSON.stringify(a) + "#; + + assert_eq!( + forward(&mut engine, src), + r#"Uncaught "TypeError": "cyclic object value""#, + ); + } + + #[test] + fn to_json_noncyclic() { + let mut engine = Context::new(); + let src = r#" + let b = []; + let a = [b, b]; + JSON.stringify(a) + "#; + + let value = forward_val(&mut engine, src).unwrap(); + let result = value.as_string().unwrap(); + assert_eq!(result, "[[],[]]",); + } + + // These tests don't throw errors. Instead we mirror Chrome / Firefox behavior for these conversions + #[test] + fn to_string_cyclic() { + let mut engine = Context::new(); + let src = r#" + let a = []; + a[0] = a; + a.toString() + "#; + + let value = forward_val(&mut engine, src).unwrap(); + let result = value.as_string().unwrap(); + assert_eq!(result, ""); + } + + #[test] + fn to_number_cyclic() { + let mut engine = Context::new(); + let src = r#" + let a = []; + a[0] = a; + +a + "#; + + let value = forward_val(&mut engine, src).unwrap(); + let result = value.as_number().unwrap(); + assert_eq!(result, 0.); + } + + #[test] + fn to_boolean_cyclic() { + // this already worked before the mitigation, but we don't want to cause a regression + let mut engine = Context::new(); + let src = r#" + let a = []; + a[0] = a; + !!a + "#; + + let value = forward_val(&mut engine, src).unwrap(); + // There isn't an as_boolean function for some reason? + assert_eq!(value, Value::Boolean(true)); + } + + #[test] + fn to_bigint_cyclic() { + let mut engine = Context::new(); + let src = r#" + let a = []; + a[0] = a; + BigInt(a) + "#; + + let value = forward_val(&mut engine, src).unwrap(); + let result = value.as_bigint().unwrap().to_f64(); + assert_eq!(result, 0.); + } + + #[test] + fn to_u32_cyclic() { + let mut engine = Context::new(); + let src = r#" + let a = []; + a[0] = a; + a | 0 + "#; + + let value = forward_val(&mut engine, src).unwrap(); + let result = value.as_number().unwrap(); + assert_eq!(result, 0.); + } + + #[test] + fn console_log_cyclic() { + let mut engine = Context::new(); + let src = r#" + let a = [1]; + a[1] = a; + console.log(a); + "#; + + let _ = forward(&mut engine, src); + // Should not stack overflow + } +} + mod abstract_relational_comparison { use super::*; macro_rules! check_comparison {