Browse Source

Fix cyclic JSON.stringify / primitive conversion stack overflows (#777)

* Add tests for cyclic conversions

* Add another test for a non-cyclic scenario

* Refactor the existing RecursionLimiter type

We can use the existing RecursionLimiter type
used by GcObject's Debug impl for this purpose.
We just need to refactor it to allow both liveness
and visitation tracking, using a HashMap of ptrs
to states instead of a HashSet of ptrs.

* Fix `Value::to_json` to not overflow.

Use the newly refactored RecursionLimiter to
check for recursion, and limit it. Throw a TypeError
as mandated by the spec.

* Fix ordinary_to_primitive to not overflow

Use the new RecursionLimiter type to prevent
overflows from conversions in ordinary_to_primitive.
The spec doesn't say what to do here, so we follow
v8 / SpiderMonkey in returning a default value for
the type hint -- either 0. or "". More details in
the method documentation.

* Move ordinary_to_primitive to GcObject

* Move object_to_json to GcObject

* Add test for console.log
pull/805/head
Theia Vogel 4 years ago committed by GitHub
parent
commit
d76e8bf108
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 47
      boa/src/context.rs
  2. 205
      boa/src/object/gcobject.rs
  3. 2
      boa/src/object/mod.rs
  4. 33
      boa/src/value/mod.rs
  5. 121
      boa/src/value/tests.rs

47
boa/src/context.rs

@ -23,7 +23,7 @@ use crate::{
},
Parser,
},
value::{PreferredType, RcString, RcSymbol, Type, Value},
value::{RcString, RcSymbol, Value},
BoaProfiler, Executable, Result,
};
use std::result::Result as StdResult;
@ -498,51 +498,6 @@ impl Context {
Err(())
}
/// Converts an object to a primitive.
///
/// More information:
/// - [ECMAScript][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-ordinarytoprimitive
pub(crate) fn ordinary_to_primitive(
&mut self,
o: &Value,
hint: PreferredType,
) -> Result<Value> {
// 1. Assert: Type(O) is Object.
debug_assert!(o.get_type() == Type::Object);
// 2. Assert: Type(hint) is String and its value is either "string" or "number".
debug_assert!(hint == PreferredType::String || hint == PreferredType::Number);
// 3. If hint is "string", then
// a. Let methodNames be « "toString", "valueOf" ».
// 4. Else,
// a. Let methodNames be « "valueOf", "toString" ».
let method_names = if hint == PreferredType::String {
["toString", "valueOf"]
} else {
["valueOf", "toString"]
};
// 5. For each name in methodNames in List order, do
for name in &method_names {
// a. Let method be ? Get(O, name).
let method: Value = o.get_field(*name);
// b. If IsCallable(method) is true, then
if method.is_function() {
// i. Let result be ? Call(method, O).
let result = self.call(&method, &o, &[])?;
// ii. If Type(result) is not Object, return result.
if !result.is_object() {
return Ok(result);
}
}
}
// 6. Throw a TypeError exception.
self.throw_type_error("cannot convert object to primitive value")
}
/// https://tc39.es/ecma262/#sec-hasproperty
pub(crate) fn has_property(&self, obj: &Value, key: &PropertyKey) -> bool {
if let Some(obj) = obj.as_object() {

205
boa/src/object/gcobject.rs

@ -11,12 +11,14 @@ use crate::{
function_environment_record::BindingStatus, lexical_environment::new_function_environment,
},
syntax::ast::node::RcStatementList,
value::PreferredType,
Context, Executable, Result, Value,
};
use gc::{Finalize, Gc, GcCell, GcCellRef, GcCellRefMut, Trace};
use serde_json::{map::Map, Value as JSONValue};
use std::{
cell::RefCell,
collections::HashSet,
collections::HashMap,
error::Error,
fmt::{self, Debug, Display},
result::Result as StdResult,
@ -267,6 +269,111 @@ impl GcObject {
}
}
}
/// Converts an object to a primitive.
///
/// Diverges from the spec to prevent a stack overflow when the object is recursive.
/// For example,
/// ```javascript
/// let a = [1];
/// a[1] = a;
/// console.log(a.toString()); // We print "1,"
/// ```
/// The spec doesn't mention what to do in this situation, but a naive implementation
/// would overflow the stack recursively calling `toString()`. We follow v8 and SpiderMonkey
/// instead by returning a default value for the given `hint` -- either `0.` or `""`.
/// Example in v8: https://repl.it/repls/IvoryCircularCertification#index.js
///
/// More information:
/// - [ECMAScript][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-ordinarytoprimitive
pub(crate) fn ordinary_to_primitive(
&self,
interpreter: &mut Context,
hint: PreferredType,
) -> Result<Value> {
// 1. Assert: Type(O) is Object.
// Already is GcObject by type.
// 2. Assert: Type(hint) is String and its value is either "string" or "number".
debug_assert!(hint == PreferredType::String || hint == PreferredType::Number);
// Diverge from the spec here to make sure we aren't going to overflow the stack by converting
// a recursive structure
// We can follow v8 & SpiderMonkey's lead and return a default value for the hint in this situation
// (see https://repl.it/repls/IvoryCircularCertification#index.js)
let recursion_limiter = RecursionLimiter::new(&self);
if recursion_limiter.live {
// we're in a recursive object, bail
return Ok(match hint {
PreferredType::Number => Value::from(0),
PreferredType::String => Value::from(""),
PreferredType::Default => unreachable!("checked type hint in step 2"),
});
}
// 3. If hint is "string", then
// a. Let methodNames be « "toString", "valueOf" ».
// 4. Else,
// a. Let methodNames be « "valueOf", "toString" ».
let method_names = if hint == PreferredType::String {
["toString", "valueOf"]
} else {
["valueOf", "toString"]
};
// 5. For each name in methodNames in List order, do
let this = Value::from(self.clone());
for name in &method_names {
// a. Let method be ? Get(O, name).
let method: Value = this.get_field(*name);
// b. If IsCallable(method) is true, then
if method.is_function() {
// i. Let result be ? Call(method, O).
let result = interpreter.call(&method, &this, &[])?;
// ii. If Type(result) is not Object, return result.
if !result.is_object() {
return Ok(result);
}
}
}
// 6. Throw a TypeError exception.
interpreter.throw_type_error("cannot convert object to primitive value")
}
/// Converts an object to JSON, checking for reference cycles and throwing a TypeError if one is found
pub(crate) fn to_json(&self, interpreter: &mut Context) -> Result<JSONValue> {
let rec_limiter = RecursionLimiter::new(self);
if rec_limiter.live {
Err(interpreter.construct_type_error("cyclic object value"))
} else if self.borrow().is_array() {
let mut keys: Vec<u32> = self.borrow().index_property_keys().cloned().collect();
keys.sort();
let mut arr: Vec<JSONValue> = Vec::with_capacity(keys.len());
let this = Value::from(self.clone());
for key in keys {
let value = this.get_field(key);
if value.is_undefined() || value.is_function() || value.is_symbol() {
arr.push(JSONValue::Null);
} else {
arr.push(value.to_json(interpreter)?);
}
}
Ok(JSONValue::Array(arr))
} else {
let mut new_obj = Map::new();
let this = Value::from(self.clone());
for k in self.borrow().keys() {
let key = k.clone();
let value = this.get_field(k.to_string());
if !value.is_undefined() && !value.is_function() && !value.is_symbol() {
new_obj.insert(key.to_string(), value.to_json(interpreter)?);
}
}
Ok(JSONValue::Object(new_obj))
}
}
}
impl AsRef<GcCell<Object>> for GcObject {
@ -302,45 +409,57 @@ impl Display for BorrowMutError {
impl Error for BorrowMutError {}
/// Prevents infinite recursion during `Debug::fmt`.
#[derive(Debug)]
struct RecursionLimiter {
/// If this was the first `GcObject` in the tree.
free: bool,
/// If this is the first time a specific `GcObject` has been seen.
first: bool,
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
enum RecursionValueState {
/// This value is "live": there's an active RecursionLimiter that hasn't been dropped.
Live,
/// This value has been seen before, but the recursion limiter has been dropped.
/// For example:
/// ```javascript
/// let b = [];
/// JSON.stringify([ // Create a recursion limiter for the root here
/// b, // state for b's &GcObject here is None
/// b, // state for b's &GcObject here is Visited
/// ]);
/// ```
Visited,
}
impl Clone for RecursionLimiter {
fn clone(&self) -> Self {
Self {
// Cloning this value would result in a premature free.
free: false,
// Cloning this vlaue would result in a value being written multiple times.
first: false,
}
}
/// Prevents infinite recursion during `Debug::fmt`, `JSON.stringify`, and other conversions.
/// This uses a thread local, so is not safe to use where the object graph will be traversed by
/// multiple threads!
#[derive(Debug)]
pub struct RecursionLimiter {
/// If this was the first `GcObject` in the tree.
top_level: bool,
/// The ptr being kept in the HashSet, so we can delete it when we drop.
ptr: usize,
/// If this GcObject has been visited before in the graph, but not in the current branch.
pub visited: bool,
/// If this GcObject has been visited in the current branch of the graph.
pub live: bool,
}
impl Drop for RecursionLimiter {
fn drop(&mut self) {
// Typically, calling hs.remove(ptr) for "first" objects would be the correct choice here. This would allow the
// same object to appear multiple times in the output (provided it does not appear under itself recursively).
// However, the JS object hierarchy involves quite a bit of repitition, and the sheer amount of data makes
// understanding the Debug output impossible; limiting the usefulness of it.
//
// Instead, the entire hashset is emptied at by the first GcObject involved. This means that objects will appear
// at most once, throughout the graph, hopefully making things a bit clearer.
if self.free {
Self::VISITED.with(|hs| hs.borrow_mut().clear());
if self.top_level {
// When the top level of the graph is dropped, we can free the entire map for the next traversal.
Self::SEEN.with(|hm| hm.borrow_mut().clear());
} else if !self.live {
// This was the first RL for this object to become live, so it's no longer live now that it's dropped.
Self::SEEN.with(|hm| {
hm.borrow_mut()
.insert(self.ptr, RecursionValueState::Visited)
});
}
}
}
impl RecursionLimiter {
thread_local! {
/// The list of pointers to `GcObject` that have been visited during the current `Debug::fmt` graph.
static VISITED: RefCell<HashSet<usize>> = RefCell::new(HashSet::new());
/// The map of pointers to `GcObject` that have been visited during the current `Debug::fmt` graph,
/// and the current state of their RecursionLimiter (dropped or live -- see `RecursionValueState`)
static SEEN: RefCell<HashMap<usize, RecursionValueState>> = RefCell::new(HashMap::new());
}
/// Determines if the specified `GcObject` has been visited, and returns a struct that will free it when dropped.
@ -348,15 +467,27 @@ impl RecursionLimiter {
/// This is done by maintaining a thread-local hashset containing the pointers of `GcObject` values that have been
/// visited. The first `GcObject` visited will clear the hashset, while any others will check if they are contained
/// by the hashset.
fn new(o: &GcObject) -> Self {
pub fn new(o: &GcObject) -> Self {
// We shouldn't have to worry too much about this being moved during Debug::fmt.
let ptr = (o.as_ref() as *const _) as usize;
let (free, first) = Self::VISITED.with(|hs| {
let mut hs = hs.borrow_mut();
(hs.is_empty(), hs.insert(ptr))
let (top_level, visited, live) = Self::SEEN.with(|hm| {
let mut hm = hm.borrow_mut();
let top_level = hm.is_empty();
let old_state = hm.insert(ptr, RecursionValueState::Live);
(
top_level,
old_state == Some(RecursionValueState::Visited),
old_state == Some(RecursionValueState::Live),
)
});
Self { free, first }
Self {
top_level,
ptr,
visited,
live,
}
}
}
@ -364,7 +495,13 @@ impl Debug for GcObject {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
let limiter = RecursionLimiter::new(&self);
if limiter.first {
// Typically, using `!limiter.live` would be good enough here.
// However, the JS object hierarchy involves quite a bit of repitition, and the sheer amount of data makes
// understanding the Debug output impossible; limiting the usefulness of it.
//
// Instead, we check if the object has appeared before in the entire graph. This means that objects will appear
// at most once, hopefully making things a bit clearer.
if !limiter.visited && !limiter.live {
f.debug_tuple("GcObject").field(&self.0).finish()
} else {
f.write_str("{ ... }")

2
boa/src/object/mod.rs

@ -25,7 +25,7 @@ mod gcobject;
mod internal_methods;
mod iter;
pub use gcobject::{GcObject, Ref, RefMut};
pub use gcobject::{GcObject, RecursionLimiter, Ref, RefMut};
pub use iter::*;
/// Static `prototype`, usually set on constructors as a key to point to their respective prototype object.

33
boa/src/value/mod.rs

@ -15,7 +15,7 @@ use crate::{
BoaProfiler, Context, Result,
};
use gc::{Finalize, GcCellRef, GcCellRefMut, Trace};
use serde_json::{map::Map, Number as JSONNumber, Value as JSONValue};
use serde_json::{Number as JSONNumber, Value as JSONValue};
use std::{
collections::HashSet,
convert::TryFrom,
@ -225,32 +225,7 @@ impl Value {
match *self {
Self::Null => Ok(JSONValue::Null),
Self::Boolean(b) => Ok(JSONValue::Bool(b)),
Self::Object(ref obj) => {
if obj.borrow().is_array() {
let mut keys: Vec<u32> = obj.borrow().index_property_keys().cloned().collect();
keys.sort();
let mut arr: Vec<JSONValue> = Vec::with_capacity(keys.len());
for key in keys {
let value = self.get_field(key);
if value.is_undefined() || value.is_function() || value.is_symbol() {
arr.push(JSONValue::Null);
} else {
arr.push(value.to_json(interpreter)?);
}
}
Ok(JSONValue::Array(arr))
} else {
let mut new_obj = Map::new();
for k in obj.borrow().keys() {
let key = k.clone();
let value = self.get_field(k.to_string());
if !value.is_undefined() && !value.is_function() && !value.is_symbol() {
new_obj.insert(key.to_string(), value.to_json(interpreter)?);
}
}
Ok(JSONValue::Object(new_obj))
}
}
Self::Object(ref obj) => obj.to_json(interpreter),
Self::String(ref str) => Ok(JSONValue::String(str.to_string())),
Self::Rational(num) => Ok(JSONNumber::from_f64(num)
.map(JSONValue::Number)
@ -592,7 +567,7 @@ impl Value {
pub fn to_primitive(&self, ctx: &mut Context, preferred_type: PreferredType) -> Result<Value> {
// 1. Assert: input is an ECMAScript language value. (always a value not need to check)
// 2. If Type(input) is Object, then
if let Value::Object(_) = self {
if let Value::Object(obj) = self {
let mut hint = preferred_type;
// Skip d, e we don't support Symbols yet
@ -603,7 +578,7 @@ impl Value {
};
// g. Return ? OrdinaryToPrimitive(input, hint).
ctx.ordinary_to_primitive(self, hint)
obj.ordinary_to_primitive(ctx, hint)
} else {
// 3. Return input.
Ok(self.clone())

121
boa/src/value/tests.rs

@ -496,6 +496,127 @@ toString: {
);
}
/// Test cyclic conversions that previously caused stack overflows
/// Relevant mitigations for these are in `GcObject::ordinary_to_primitive` and
/// `GcObject::to_json`
mod cyclic_conversions {
use super::*;
#[test]
fn to_json_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
JSON.stringify(a)
"#;
assert_eq!(
forward(&mut engine, src),
r#"Uncaught "TypeError": "cyclic object value""#,
);
}
#[test]
fn to_json_noncyclic() {
let mut engine = Context::new();
let src = r#"
let b = [];
let a = [b, b];
JSON.stringify(a)
"#;
let value = forward_val(&mut engine, src).unwrap();
let result = value.as_string().unwrap();
assert_eq!(result, "[[],[]]",);
}
// These tests don't throw errors. Instead we mirror Chrome / Firefox behavior for these conversions
#[test]
fn to_string_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
a.toString()
"#;
let value = forward_val(&mut engine, src).unwrap();
let result = value.as_string().unwrap();
assert_eq!(result, "");
}
#[test]
fn to_number_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
+a
"#;
let value = forward_val(&mut engine, src).unwrap();
let result = value.as_number().unwrap();
assert_eq!(result, 0.);
}
#[test]
fn to_boolean_cyclic() {
// this already worked before the mitigation, but we don't want to cause a regression
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
!!a
"#;
let value = forward_val(&mut engine, src).unwrap();
// There isn't an as_boolean function for some reason?
assert_eq!(value, Value::Boolean(true));
}
#[test]
fn to_bigint_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
BigInt(a)
"#;
let value = forward_val(&mut engine, src).unwrap();
let result = value.as_bigint().unwrap().to_f64();
assert_eq!(result, 0.);
}
#[test]
fn to_u32_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
a | 0
"#;
let value = forward_val(&mut engine, src).unwrap();
let result = value.as_number().unwrap();
assert_eq!(result, 0.);
}
#[test]
fn console_log_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [1];
a[1] = a;
console.log(a);
"#;
let _ = forward(&mut engine, src);
// Should not stack overflow
}
}
mod abstract_relational_comparison {
use super::*;
macro_rules! check_comparison {

Loading…
Cancel
Save