From 9b1264fc7e3cdbcf10d06244b9ad4c76ad05a7ab Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Thu, 4 Feb 2021 06:12:08 -0800
Subject: [PATCH] Fix parsing floats panics and bugs (#1110)

* Fix parsing float panic

* Fix float parsing functions

* Fix parseFloat bugs

* Fix parseInt and parseFloat bugs

* Fix clippy

* Add todo comment

* Update boa/src/builtins/string/mod.rs

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
---
 Cargo.lock                     |  7 +++++
 boa/Cargo.toml                 |  1 +
 boa/src/builtins/number/mod.rs | 51 +++++++++++++++++++---------------
 boa/src/builtins/string/mod.rs | 50 ++++++++++++++++-----------------
 boa/src/syntax/lexer/number.rs |  4 +--
 boa/src/value/mod.rs           | 26 ++++++++++++++---
 6 files changed, 85 insertions(+), 54 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index a20e0a000b..cc4c126900 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8,6 +8,7 @@ dependencies = [
  "boa_unicode",
  "chrono",
  "criterion",
+ "fast-float",
  "float-cmp",
  "gc",
  "indexmap",
@@ -361,6 +362,12 @@ version = "1.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
 
+[[package]]
+name = "fast-float"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
+
 [[package]]
 name = "float-cmp"
 version = "0.8.0"
diff --git a/boa/Cargo.toml b/boa/Cargo.toml
index 38c390447b..c8e8e16374 100644
--- a/boa/Cargo.toml
+++ b/boa/Cargo.toml
@@ -35,6 +35,7 @@ bitflags = "1.2.1"
 indexmap = "1.6.1"
 ryu-js = "0.2.1"
 chrono = "0.4.19"
+fast-float = "0.2.0"
 
 # Optional Dependencies
 measureme = { version = "9.0.0", optional = true }
diff --git a/boa/src/builtins/number/mod.rs b/boa/src/builtins/number/mod.rs
index df5bef5f72..e1753997bc 100644
--- a/boa/src/builtins/number/mod.rs
+++ b/boa/src/builtins/number/mod.rs
@@ -14,6 +14,7 @@
 //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number
 
 use super::function::make_builtin_fn;
+use super::string::is_trimmable_whitespace;
 use crate::{
     builtins::BuiltIn,
     object::{ConstructorBuilder, ObjectData, PROTOTYPE},
@@ -673,7 +674,7 @@ impl Number {
             let input_string = val.to_string(context)?;
 
             // 2. Let S be ! TrimString(inputString, start).
-            let mut var_s = input_string.trim();
+            let mut var_s = input_string.trim_start_matches(is_trimmable_whitespace);
 
             // 3. Let sign be 1.
             // 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS),
@@ -789,31 +790,37 @@ impl Number {
     ///
     /// [spec]: https://tc39.es/ecma262/#sec-parsefloat-string
     /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
-    pub(crate) fn parse_float(_: &Value, args: &[Value], _ctx: &mut Context) -> Result<Value> {
+    pub(crate) fn parse_float(_: &Value, args: &[Value], context: &mut Context) -> Result<Value> {
         if let Some(val) = args.get(0) {
-            match val {
-                Value::String(s) => {
-                    if let Ok(i) = s.parse::<i32>() {
-                        // Attempt to parse an integer first so that it can be stored as an integer
-                        // to improve performance
-                        Ok(Value::integer(i))
-                    } else if let Ok(f) = s.parse::<f64>() {
-                        Ok(Value::rational(f))
-                    } else {
-                        // String can't be parsed.
-                        Ok(Value::from(f64::NAN))
-                    }
-                }
-                Value::Integer(i) => Ok(Value::integer(*i)),
-                Value::Rational(f) => Ok(Value::rational(*f)),
-                _ => {
-                    // Wrong argument type to parseFloat.
-                    Ok(Value::from(f64::NAN))
-                }
+            let input_string = val.to_string(context)?;
+            let s = input_string.trim_start_matches(is_trimmable_whitespace);
+            let s_prefix_lower = s.chars().take(4).collect::<String>().to_ascii_lowercase();
+
+            // TODO: write our own lexer to match syntax StrDecimalLiteral
+            if s.starts_with("Infinity") || s.starts_with("+Infinity") {
+                Ok(Value::from(f64::INFINITY))
+            } else if s.starts_with("-Infinity") {
+                Ok(Value::from(f64::NEG_INFINITY))
+            } else if s_prefix_lower.starts_with("inf")
+                || s_prefix_lower.starts_with("+inf")
+                || s_prefix_lower.starts_with("-inf")
+            {
+                // Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
+                Ok(Value::nan())
+            } else {
+                Ok(fast_float::parse_partial::<f64, _>(s)
+                    .map(|(f, len)| {
+                        if len > 0 {
+                            Value::rational(f)
+                        } else {
+                            Value::nan()
+                        }
+                    })
+                    .unwrap_or_else(|_| Value::nan()))
             }
         } else {
             // Not enough arguments to parseFloat.
-            Ok(Value::from(f64::NAN))
+            Ok(Value::nan())
         }
     }
 
diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs
index 9749b9516e..b7ee8b07b4 100644
--- a/boa/src/builtins/string/mod.rs
+++ b/boa/src/builtins/string/mod.rs
@@ -52,6 +52,27 @@ pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8,
     Some((cp, 2, false))
 }
 
+/// Helper function to check if a `char` is trimmable.
+#[inline]
+pub(crate) fn is_trimmable_whitespace(c: char) -> bool {
+    // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
+    //
+    // Rust uses \p{White_Space} by default, which also includes:
+    // `\u{0085}' (next line)
+    // And does not include:
+    // '\u{FEFF}' (zero width non-breaking space)
+    // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
+    matches!(
+        c,
+        '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
+    // Unicode Space_Separator category
+    '\u{1680}' | '\u{2000}'
+            ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
+    // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
+    '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
+    )
+}
+
 fn is_leading_surrogate(value: u16) -> bool {
     (0xD800..=0xDBFF).contains(&value)
 }
@@ -968,27 +989,6 @@ impl String {
         Self::string_pad(primitive, max_length, fill_string, true)
     }
 
-    /// Helper function to check if a `char` is trimmable.
-    #[inline]
-    fn is_trimmable_whitespace(c: char) -> bool {
-        // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
-        //
-        // Rust uses \p{White_Space} by default, which also includes:
-        // `\u{0085}' (next line)
-        // And does not include:
-        // '\u{FEFF}' (zero width non-breaking space)
-        // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
-        matches!(
-            c,
-            '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
-        // Unicode Space_Seperator category
-        '\u{1680}' | '\u{2000}'
-                ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
-        // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
-        '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
-        )
-    }
-
     /// String.prototype.trim()
     ///
     /// The `trim()` method removes whitespace from both ends of a string.
@@ -1004,9 +1004,7 @@ impl String {
     pub(crate) fn trim(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
         let this = this.require_object_coercible(context)?;
         let string = this.to_string(context)?;
-        Ok(Value::from(
-            string.trim_matches(Self::is_trimmable_whitespace),
-        ))
+        Ok(Value::from(string.trim_matches(is_trimmable_whitespace)))
     }
 
     /// `String.prototype.trimStart()`
@@ -1024,7 +1022,7 @@ impl String {
     pub(crate) fn trim_start(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
         let string = this.to_string(context)?;
         Ok(Value::from(
-            string.trim_start_matches(Self::is_trimmable_whitespace),
+            string.trim_start_matches(is_trimmable_whitespace),
         ))
     }
 
@@ -1044,7 +1042,7 @@ impl String {
         let this = this.require_object_coercible(context)?;
         let string = this.to_string(context)?;
         Ok(Value::from(
-            string.trim_end_matches(Self::is_trimmable_whitespace),
+            string.trim_end_matches(is_trimmable_whitespace),
         ))
     }
 
diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs
index 46ba2b65c0..e58fccfe20 100644
--- a/boa/src/syntax/lexer/number.rs
+++ b/boa/src/syntax/lexer/number.rs
@@ -9,8 +9,8 @@ use crate::{
         lexer::{token::Numeric, Token},
     },
 };
+use std::io::Read;
 use std::str;
-use std::{io::Read, str::FromStr};
 
 /// Number literal lexing.
 ///
@@ -381,7 +381,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
                     )
             }
             NumericKind::Rational /* base: 10 */ => {
-                let val = f64::from_str(num_str).expect("Failed to parse float after checks");
+                let val: f64 = fast_float::parse(num_str).expect("Failed to parse float after checks");
                 let int_val = val as i32;
 
                 // The truncated float should be identically to the non-truncated float for the conversion to be loss-less,
diff --git a/boa/src/value/mod.rs b/boa/src/value/mod.rs
index 5e00b90674..44c91047ba 100644
--- a/boa/src/value/mod.rs
+++ b/boa/src/value/mod.rs
@@ -8,6 +8,7 @@ mod tests;
 use crate::{
     builtins::{
         number::{f64_to_int32, f64_to_uint32},
+        string::is_trimmable_whitespace,
         BigInt, Number,
     },
     object::{GcObject, Object, ObjectData},
@@ -809,12 +810,29 @@ impl Value {
             Value::Null => Ok(0.0),
             Value::Undefined => Ok(f64::NAN),
             Value::Boolean(b) => Ok(if b { 1.0 } else { 0.0 }),
-            // TODO: this is probably not 100% correct, see https://tc39.es/ecma262/#sec-tonumber-applied-to-the-string-type
             Value::String(ref string) => {
-                if string.trim().is_empty() {
-                    return Ok(0.0);
+                let string = string.trim_matches(is_trimmable_whitespace);
+
+                // TODO: write our own lexer to match syntax StrDecimalLiteral
+                match string {
+                    "" => Ok(0.0),
+                    "Infinity" | "+Infinity" => Ok(f64::INFINITY),
+                    "-Infinity" => Ok(f64::NEG_INFINITY),
+                    _ if matches!(
+                        string
+                            .chars()
+                            .take(4)
+                            .collect::<String>()
+                            .to_ascii_lowercase()
+                            .as_str(),
+                        "inf" | "+inf" | "-inf" | "nan" | "+nan" | "-nan"
+                    ) =>
+                    {
+                        // Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
+                        Ok(f64::NAN)
+                    }
+                    _ => Ok(fast_float::parse(string).unwrap_or(f64::NAN)),
                 }
-                Ok(string.parse().unwrap_or(f64::NAN))
             }
             Value::Rational(number) => Ok(number),
             Value::Integer(integer) => Ok(f64::from(integer)),