Fix parsing floats panics and bugs (#1110)

* Fix parsing float panic * Fix float parsing functions * Fix parseFloat bugs * Fix parseInt and parseFloat bugs * Fix clippy * Add todo comment * Update boa/src/builtins/string/mod.rs Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com> Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
4 years ago · 9b1264fc7e
6 changed files with 85 additions and 54 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -8,6 +8,7 @@ dependencies = [
 "boa_unicode",
 "chrono",
 "criterion",
+ "fast-float",
 "float-cmp",
 "gc",
 "indexmap",
@ -361,6 +362,12 @@ version = "1.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"

+[[package]]
+name = "fast-float"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
+
 [[package]]
 name = "float-cmp"
 version = "0.8.0"
--- a/boa/Cargo.toml
+++ b/boa/Cargo.toml
@ -35,6 +35,7 @@ bitflags = "1.2.1"
 indexmap = "1.6.1"
 ryu-js = "0.2.1"
 chrono = "0.4.19"
+fast-float = "0.2.0"

 # Optional Dependencies
 measureme = { version = "9.0.0", optional = true }
--- a/boa/src/builtins/number/mod.rs
+++ b/boa/src/builtins/number/mod.rs
@ -14,6 +14,7 @@
 //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number

 use super::function::make_builtin_fn;
+use super::string::is_trimmable_whitespace;
 use crate::{
    builtins::BuiltIn,
    object::{ConstructorBuilder, ObjectData, PROTOTYPE},
@ -673,7 +674,7 @@ impl Number {
            let input_string = val.to_string(context)?;

            // 2. Let S be ! TrimString(inputString, start).
-            let mut var_s = input_string.trim();
+            let mut var_s = input_string.trim_start_matches(is_trimmable_whitespace);

            // 3. Let sign be 1.
            // 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS),
@ -789,31 +790,37 @@ impl Number {
    ///
    /// [spec]: https://tc39.es/ecma262/#sec-parsefloat-string
    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
-    pub(crate) fn parse_float(_: &Value, args: &[Value], _ctx: &mut Context) -> Result<Value> {
+    pub(crate) fn parse_float(_: &Value, args: &[Value], context: &mut Context) -> Result<Value> {
        if let Some(val) = args.get(0) {
-            match val {
-                Value::String(s) => {
-                    if let Ok(i) = s.parse::<i32>() {
-                        // Attempt to parse an integer first so that it can be stored as an integer
-                        // to improve performance
-                        Ok(Value::integer(i))
-                    } else if let Ok(f) = s.parse::<f64>() {
-                        Ok(Value::rational(f))
-                    } else {
-                        // String can't be parsed.
-                        Ok(Value::from(f64::NAN))
-                    }
-                }
-                Value::Integer(i) => Ok(Value::integer(*i)),
-                Value::Rational(f) => Ok(Value::rational(*f)),
-                _ => {
-                    // Wrong argument type to parseFloat.
-                    Ok(Value::from(f64::NAN))
-                }
+            let input_string = val.to_string(context)?;
+            let s = input_string.trim_start_matches(is_trimmable_whitespace);
+            let s_prefix_lower = s.chars().take(4).collect::<String>().to_ascii_lowercase();
+
+            // TODO: write our own lexer to match syntax StrDecimalLiteral
+            if s.starts_with("Infinity") || s.starts_with("+Infinity") {
+                Ok(Value::from(f64::INFINITY))
+            } else if s.starts_with("-Infinity") {
+                Ok(Value::from(f64::NEG_INFINITY))
+            } else if s_prefix_lower.starts_with("inf")
+                || s_prefix_lower.starts_with("+inf")
+                || s_prefix_lower.starts_with("-inf")
+            {
+                // Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
+                Ok(Value::nan())
+            } else {
+                Ok(fast_float::parse_partial::<f64, _>(s)
+                    .map(|(f, len)| {
+                        if len > 0 {
+                            Value::rational(f)
+                        } else {
+                            Value::nan()
+                        }
+                    })
+                    .unwrap_or_else(|_| Value::nan()))
            }
        } else {
            // Not enough arguments to parseFloat.
-            Ok(Value::from(f64::NAN))
+            Ok(Value::nan())
        }
    }

--- a/boa/src/builtins/string/mod.rs
+++ b/boa/src/builtins/string/mod.rs
@ -52,6 +52,27 @@ pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8,
    Some((cp, 2, false))
 }

+/// Helper function to check if a `char` is trimmable.
+#[inline]
+pub(crate) fn is_trimmable_whitespace(c: char) -> bool {
+    // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
+    //
+    // Rust uses \p{White_Space} by default, which also includes:
+    // `\u{0085}' (next line)
+    // And does not include:
+    // '\u{FEFF}' (zero width non-breaking space)
+    // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
+    matches!(
+        c,
+        '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
+    // Unicode Space_Separator category
+    '\u{1680}' | '\u{2000}'
+            ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
+    // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
+    '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
+    )
+}
+
 fn is_leading_surrogate(value: u16) -> bool {
    (0xD800..=0xDBFF).contains(&value)
 }
@ -968,27 +989,6 @@ impl String {
        Self::string_pad(primitive, max_length, fill_string, true)
    }

-    /// Helper function to check if a `char` is trimmable.
-    #[inline]
-    fn is_trimmable_whitespace(c: char) -> bool {
-        // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
-        //
-        // Rust uses \p{White_Space} by default, which also includes:
-        // `\u{0085}' (next line)
-        // And does not include:
-        // '\u{FEFF}' (zero width non-breaking space)
-        // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
-        matches!(
-            c,
-            '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
-        // Unicode Space_Seperator category
-        '\u{1680}' | '\u{2000}'
-                ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
-        // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
-        '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
-        )
-    }
-
    /// String.prototype.trim()
    ///
    /// The `trim()` method removes whitespace from both ends of a string.
@ -1004,9 +1004,7 @@ impl String {
    pub(crate) fn trim(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
        let this = this.require_object_coercible(context)?;
        let string = this.to_string(context)?;
-        Ok(Value::from(
-            string.trim_matches(Self::is_trimmable_whitespace),
-        ))
+        Ok(Value::from(string.trim_matches(is_trimmable_whitespace)))
    }

    /// `String.prototype.trimStart()`
@ -1024,7 +1022,7 @@ impl String {
    pub(crate) fn trim_start(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
        let string = this.to_string(context)?;
        Ok(Value::from(
-            string.trim_start_matches(Self::is_trimmable_whitespace),
+            string.trim_start_matches(is_trimmable_whitespace),
        ))
    }

@ -1044,7 +1042,7 @@ impl String {
        let this = this.require_object_coercible(context)?;
        let string = this.to_string(context)?;
        Ok(Value::from(
-            string.trim_end_matches(Self::is_trimmable_whitespace),
+            string.trim_end_matches(is_trimmable_whitespace),
        ))
    }

--- a/boa/src/syntax/lexer/number.rs
+++ b/boa/src/syntax/lexer/number.rs
@ -9,8 +9,8 @@ use crate::{
        lexer::{token::Numeric, Token},
    },
 };
+use std::io::Read;
 use std::str;
-use std::{io::Read, str::FromStr};

 /// Number literal lexing.
 ///
@ -381,7 +381,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
                    )
            }
            NumericKind::Rational /* base: 10 */ => {
-                let val = f64::from_str(num_str).expect("Failed to parse float after checks");
+                let val: f64 = fast_float::parse(num_str).expect("Failed to parse float after checks");
                let int_val = val as i32;

                // The truncated float should be identically to the non-truncated float for the conversion to be loss-less,
--- a/boa/src/value/mod.rs
+++ b/boa/src/value/mod.rs
@ -8,6 +8,7 @@ mod tests;
 use crate::{
    builtins::{
        number::{f64_to_int32, f64_to_uint32},
+        string::is_trimmable_whitespace,
        BigInt, Number,
    },
    object::{GcObject, Object, ObjectData},
@ -809,12 +810,29 @@ impl Value {
            Value::Null => Ok(0.0),
            Value::Undefined => Ok(f64::NAN),
            Value::Boolean(b) => Ok(if b { 1.0 } else { 0.0 }),
-            // TODO: this is probably not 100% correct, see https://tc39.es/ecma262/#sec-tonumber-applied-to-the-string-type
            Value::String(ref string) => {
-                if string.trim().is_empty() {
-                    return Ok(0.0);
+                let string = string.trim_matches(is_trimmable_whitespace);
+
+                // TODO: write our own lexer to match syntax StrDecimalLiteral
+                match string {
+                    "" => Ok(0.0),
+                    "Infinity" | "+Infinity" => Ok(f64::INFINITY),
+                    "-Infinity" => Ok(f64::NEG_INFINITY),
+                    _ if matches!(
+                        string
+                            .chars()
+                            .take(4)
+                            .collect::<String>()
+                            .to_ascii_lowercase()
+                            .as_str(),
+                        "inf" | "+inf" | "-inf" | "nan" | "+nan" | "-nan"
+                    ) =>
+                    {
+                        // Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
+                        Ok(f64::NAN)
+                    }
+                    _ => Ok(fast_float::parse(string).unwrap_or(f64::NAN)),
                }
-                Ok(string.parse().unwrap_or(f64::NAN))
            }
            Value::Rational(number) => Ok(number),
            Value::Integer(integer) => Ok(f64::from(integer)),