Browse Source

Fix parsing floats panics and bugs (#1110)

* Fix parsing float panic

* Fix float parsing functions

* Fix parseFloat bugs

* Fix parseInt and parseFloat bugs

* Fix clippy

* Add todo comment

* Update boa/src/builtins/string/mod.rs

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
pull/1116/head
Jevan Chan 4 years ago committed by GitHub
parent
commit
9b1264fc7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      Cargo.lock
  2. 1
      boa/Cargo.toml
  3. 51
      boa/src/builtins/number/mod.rs
  4. 50
      boa/src/builtins/string/mod.rs
  5. 4
      boa/src/syntax/lexer/number.rs
  6. 26
      boa/src/value/mod.rs

7
Cargo.lock generated

@ -8,6 +8,7 @@ dependencies = [
"boa_unicode",
"chrono",
"criterion",
"fast-float",
"float-cmp",
"gc",
"indexmap",
@ -361,6 +362,12 @@ version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "fast-float"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
[[package]]
name = "float-cmp"
version = "0.8.0"

1
boa/Cargo.toml

@ -35,6 +35,7 @@ bitflags = "1.2.1"
indexmap = "1.6.1"
ryu-js = "0.2.1"
chrono = "0.4.19"
fast-float = "0.2.0"
# Optional Dependencies
measureme = { version = "9.0.0", optional = true }

51
boa/src/builtins/number/mod.rs

@ -14,6 +14,7 @@
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number
use super::function::make_builtin_fn;
use super::string::is_trimmable_whitespace;
use crate::{
builtins::BuiltIn,
object::{ConstructorBuilder, ObjectData, PROTOTYPE},
@ -673,7 +674,7 @@ impl Number {
let input_string = val.to_string(context)?;
// 2. Let S be ! TrimString(inputString, start).
let mut var_s = input_string.trim();
let mut var_s = input_string.trim_start_matches(is_trimmable_whitespace);
// 3. Let sign be 1.
// 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS),
@ -789,31 +790,37 @@ impl Number {
///
/// [spec]: https://tc39.es/ecma262/#sec-parsefloat-string
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
pub(crate) fn parse_float(_: &Value, args: &[Value], _ctx: &mut Context) -> Result<Value> {
pub(crate) fn parse_float(_: &Value, args: &[Value], context: &mut Context) -> Result<Value> {
if let Some(val) = args.get(0) {
match val {
Value::String(s) => {
if let Ok(i) = s.parse::<i32>() {
// Attempt to parse an integer first so that it can be stored as an integer
// to improve performance
Ok(Value::integer(i))
} else if let Ok(f) = s.parse::<f64>() {
Ok(Value::rational(f))
} else {
// String can't be parsed.
Ok(Value::from(f64::NAN))
}
}
Value::Integer(i) => Ok(Value::integer(*i)),
Value::Rational(f) => Ok(Value::rational(*f)),
_ => {
// Wrong argument type to parseFloat.
Ok(Value::from(f64::NAN))
}
let input_string = val.to_string(context)?;
let s = input_string.trim_start_matches(is_trimmable_whitespace);
let s_prefix_lower = s.chars().take(4).collect::<String>().to_ascii_lowercase();
// TODO: write our own lexer to match syntax StrDecimalLiteral
if s.starts_with("Infinity") || s.starts_with("+Infinity") {
Ok(Value::from(f64::INFINITY))
} else if s.starts_with("-Infinity") {
Ok(Value::from(f64::NEG_INFINITY))
} else if s_prefix_lower.starts_with("inf")
|| s_prefix_lower.starts_with("+inf")
|| s_prefix_lower.starts_with("-inf")
{
// Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
Ok(Value::nan())
} else {
Ok(fast_float::parse_partial::<f64, _>(s)
.map(|(f, len)| {
if len > 0 {
Value::rational(f)
} else {
Value::nan()
}
})
.unwrap_or_else(|_| Value::nan()))
}
} else {
// Not enough arguments to parseFloat.
Ok(Value::from(f64::NAN))
Ok(Value::nan())
}
}

50
boa/src/builtins/string/mod.rs

@ -52,6 +52,27 @@ pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8,
Some((cp, 2, false))
}
/// Helper function to check if a `char` is trimmable.
#[inline]
pub(crate) fn is_trimmable_whitespace(c: char) -> bool {
// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
//
// Rust uses \p{White_Space} by default, which also includes:
// `\u{0085}' (next line)
// And does not include:
// '\u{FEFF}' (zero width non-breaking space)
// Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
matches!(
c,
'\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
// Unicode Space_Separator category
'\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
// Line terminators: https://tc39.es/ecma262/#sec-line-terminators
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
)
}
fn is_leading_surrogate(value: u16) -> bool {
(0xD800..=0xDBFF).contains(&value)
}
@ -968,27 +989,6 @@ impl String {
Self::string_pad(primitive, max_length, fill_string, true)
}
/// Helper function to check if a `char` is trimmable.
#[inline]
fn is_trimmable_whitespace(c: char) -> bool {
// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
//
// Rust uses \p{White_Space} by default, which also includes:
// `\u{0085}' (next line)
// And does not include:
// '\u{FEFF}' (zero width non-breaking space)
// Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
matches!(
c,
'\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
// Unicode Space_Seperator category
'\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
// Line terminators: https://tc39.es/ecma262/#sec-line-terminators
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
)
}
/// String.prototype.trim()
///
/// The `trim()` method removes whitespace from both ends of a string.
@ -1004,9 +1004,7 @@ impl String {
pub(crate) fn trim(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
let this = this.require_object_coercible(context)?;
let string = this.to_string(context)?;
Ok(Value::from(
string.trim_matches(Self::is_trimmable_whitespace),
))
Ok(Value::from(string.trim_matches(is_trimmable_whitespace)))
}
/// `String.prototype.trimStart()`
@ -1024,7 +1022,7 @@ impl String {
pub(crate) fn trim_start(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
let string = this.to_string(context)?;
Ok(Value::from(
string.trim_start_matches(Self::is_trimmable_whitespace),
string.trim_start_matches(is_trimmable_whitespace),
))
}
@ -1044,7 +1042,7 @@ impl String {
let this = this.require_object_coercible(context)?;
let string = this.to_string(context)?;
Ok(Value::from(
string.trim_end_matches(Self::is_trimmable_whitespace),
string.trim_end_matches(is_trimmable_whitespace),
))
}

4
boa/src/syntax/lexer/number.rs

@ -9,8 +9,8 @@ use crate::{
lexer::{token::Numeric, Token},
},
};
use std::io::Read;
use std::str;
use std::{io::Read, str::FromStr};
/// Number literal lexing.
///
@ -381,7 +381,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
)
}
NumericKind::Rational /* base: 10 */ => {
let val = f64::from_str(num_str).expect("Failed to parse float after checks");
let val: f64 = fast_float::parse(num_str).expect("Failed to parse float after checks");
let int_val = val as i32;
// The truncated float should be identically to the non-truncated float for the conversion to be loss-less,

26
boa/src/value/mod.rs

@ -8,6 +8,7 @@ mod tests;
use crate::{
builtins::{
number::{f64_to_int32, f64_to_uint32},
string::is_trimmable_whitespace,
BigInt, Number,
},
object::{GcObject, Object, ObjectData},
@ -809,12 +810,29 @@ impl Value {
Value::Null => Ok(0.0),
Value::Undefined => Ok(f64::NAN),
Value::Boolean(b) => Ok(if b { 1.0 } else { 0.0 }),
// TODO: this is probably not 100% correct, see https://tc39.es/ecma262/#sec-tonumber-applied-to-the-string-type
Value::String(ref string) => {
if string.trim().is_empty() {
return Ok(0.0);
let string = string.trim_matches(is_trimmable_whitespace);
// TODO: write our own lexer to match syntax StrDecimalLiteral
match string {
"" => Ok(0.0),
"Infinity" | "+Infinity" => Ok(f64::INFINITY),
"-Infinity" => Ok(f64::NEG_INFINITY),
_ if matches!(
string
.chars()
.take(4)
.collect::<String>()
.to_ascii_lowercase()
.as_str(),
"inf" | "+inf" | "-inf" | "nan" | "+nan" | "-nan"
) =>
{
// Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
Ok(f64::NAN)
}
_ => Ok(fast_float::parse(string).unwrap_or(f64::NAN)),
}
Ok(string.parse().unwrap_or(f64::NAN))
}
Value::Rational(number) => Ok(number),
Value::Integer(integer) => Ok(f64::from(integer)),

Loading…
Cancel
Save