From 9b1264fc7e3cdbcf10d06244b9ad4c76ad05a7ab Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Thu, 4 Feb 2021 06:12:08 -0800 Subject: [PATCH] Fix parsing floats panics and bugs (#1110) * Fix parsing float panic * Fix float parsing functions * Fix parseFloat bugs * Fix parseInt and parseFloat bugs * Fix clippy * Add todo comment * Update boa/src/builtins/string/mod.rs Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com> Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com> --- Cargo.lock | 7 +++++ boa/Cargo.toml | 1 + boa/src/builtins/number/mod.rs | 51 +++++++++++++++++++--------------- boa/src/builtins/string/mod.rs | 50 ++++++++++++++++----------------- boa/src/syntax/lexer/number.rs | 4 +-- boa/src/value/mod.rs | 26 ++++++++++++++--- 6 files changed, 85 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a20e0a000b..cc4c126900 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,7 @@ dependencies = [ "boa_unicode", "chrono", "criterion", + "fast-float", "float-cmp", "gc", "indexmap", @@ -361,6 +362,12 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "fast-float" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" + [[package]] name = "float-cmp" version = "0.8.0" diff --git a/boa/Cargo.toml b/boa/Cargo.toml index 38c390447b..c8e8e16374 100644 --- a/boa/Cargo.toml +++ b/boa/Cargo.toml @@ -35,6 +35,7 @@ bitflags = "1.2.1" indexmap = "1.6.1" ryu-js = "0.2.1" chrono = "0.4.19" +fast-float = "0.2.0" # Optional Dependencies measureme = { version = "9.0.0", optional = true } diff --git a/boa/src/builtins/number/mod.rs b/boa/src/builtins/number/mod.rs index df5bef5f72..e1753997bc 100644 --- a/boa/src/builtins/number/mod.rs +++ b/boa/src/builtins/number/mod.rs @@ -14,6 +14,7 @@ //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number use super::function::make_builtin_fn; +use super::string::is_trimmable_whitespace; use crate::{ builtins::BuiltIn, object::{ConstructorBuilder, ObjectData, PROTOTYPE}, @@ -673,7 +674,7 @@ impl Number { let input_string = val.to_string(context)?; // 2. Let S be ! TrimString(inputString, start). - let mut var_s = input_string.trim(); + let mut var_s = input_string.trim_start_matches(is_trimmable_whitespace); // 3. Let sign be 1. // 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS), @@ -789,31 +790,37 @@ impl Number { /// /// [spec]: https://tc39.es/ecma262/#sec-parsefloat-string /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat - pub(crate) fn parse_float(_: &Value, args: &[Value], _ctx: &mut Context) -> Result { + pub(crate) fn parse_float(_: &Value, args: &[Value], context: &mut Context) -> Result { if let Some(val) = args.get(0) { - match val { - Value::String(s) => { - if let Ok(i) = s.parse::() { - // Attempt to parse an integer first so that it can be stored as an integer - // to improve performance - Ok(Value::integer(i)) - } else if let Ok(f) = s.parse::() { - Ok(Value::rational(f)) - } else { - // String can't be parsed. - Ok(Value::from(f64::NAN)) - } - } - Value::Integer(i) => Ok(Value::integer(*i)), - Value::Rational(f) => Ok(Value::rational(*f)), - _ => { - // Wrong argument type to parseFloat. - Ok(Value::from(f64::NAN)) - } + let input_string = val.to_string(context)?; + let s = input_string.trim_start_matches(is_trimmable_whitespace); + let s_prefix_lower = s.chars().take(4).collect::().to_ascii_lowercase(); + + // TODO: write our own lexer to match syntax StrDecimalLiteral + if s.starts_with("Infinity") || s.starts_with("+Infinity") { + Ok(Value::from(f64::INFINITY)) + } else if s.starts_with("-Infinity") { + Ok(Value::from(f64::NEG_INFINITY)) + } else if s_prefix_lower.starts_with("inf") + || s_prefix_lower.starts_with("+inf") + || s_prefix_lower.starts_with("-inf") + { + // Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity + Ok(Value::nan()) + } else { + Ok(fast_float::parse_partial::(s) + .map(|(f, len)| { + if len > 0 { + Value::rational(f) + } else { + Value::nan() + } + }) + .unwrap_or_else(|_| Value::nan())) } } else { // Not enough arguments to parseFloat. - Ok(Value::from(f64::NAN)) + Ok(Value::nan()) } } diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 9749b9516e..b7ee8b07b4 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -52,6 +52,27 @@ pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8, Some((cp, 2, false)) } +/// Helper function to check if a `char` is trimmable. +#[inline] +pub(crate) fn is_trimmable_whitespace(c: char) -> bool { + // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does + // + // Rust uses \p{White_Space} by default, which also includes: + // `\u{0085}' (next line) + // And does not include: + // '\u{FEFF}' (zero width non-breaking space) + // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space + matches!( + c, + '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' | + // Unicode Space_Separator category + '\u{1680}' | '\u{2000}' + ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' | + // Line terminators: https://tc39.es/ecma262/#sec-line-terminators + '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}' + ) +} + fn is_leading_surrogate(value: u16) -> bool { (0xD800..=0xDBFF).contains(&value) } @@ -968,27 +989,6 @@ impl String { Self::string_pad(primitive, max_length, fill_string, true) } - /// Helper function to check if a `char` is trimmable. - #[inline] - fn is_trimmable_whitespace(c: char) -> bool { - // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does - // - // Rust uses \p{White_Space} by default, which also includes: - // `\u{0085}' (next line) - // And does not include: - // '\u{FEFF}' (zero width non-breaking space) - // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space - matches!( - c, - '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' | - // Unicode Space_Seperator category - '\u{1680}' | '\u{2000}' - ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' | - // Line terminators: https://tc39.es/ecma262/#sec-line-terminators - '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}' - ) - } - /// String.prototype.trim() /// /// The `trim()` method removes whitespace from both ends of a string. @@ -1004,9 +1004,7 @@ impl String { pub(crate) fn trim(this: &Value, _: &[Value], context: &mut Context) -> Result { let this = this.require_object_coercible(context)?; let string = this.to_string(context)?; - Ok(Value::from( - string.trim_matches(Self::is_trimmable_whitespace), - )) + Ok(Value::from(string.trim_matches(is_trimmable_whitespace))) } /// `String.prototype.trimStart()` @@ -1024,7 +1022,7 @@ impl String { pub(crate) fn trim_start(this: &Value, _: &[Value], context: &mut Context) -> Result { let string = this.to_string(context)?; Ok(Value::from( - string.trim_start_matches(Self::is_trimmable_whitespace), + string.trim_start_matches(is_trimmable_whitespace), )) } @@ -1044,7 +1042,7 @@ impl String { let this = this.require_object_coercible(context)?; let string = this.to_string(context)?; Ok(Value::from( - string.trim_end_matches(Self::is_trimmable_whitespace), + string.trim_end_matches(is_trimmable_whitespace), )) } diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs index 46ba2b65c0..e58fccfe20 100644 --- a/boa/src/syntax/lexer/number.rs +++ b/boa/src/syntax/lexer/number.rs @@ -9,8 +9,8 @@ use crate::{ lexer::{token::Numeric, Token}, }, }; +use std::io::Read; use std::str; -use std::{io::Read, str::FromStr}; /// Number literal lexing. /// @@ -381,7 +381,7 @@ impl Tokenizer for NumberLiteral { ) } NumericKind::Rational /* base: 10 */ => { - let val = f64::from_str(num_str).expect("Failed to parse float after checks"); + let val: f64 = fast_float::parse(num_str).expect("Failed to parse float after checks"); let int_val = val as i32; // The truncated float should be identically to the non-truncated float for the conversion to be loss-less, diff --git a/boa/src/value/mod.rs b/boa/src/value/mod.rs index 5e00b90674..44c91047ba 100644 --- a/boa/src/value/mod.rs +++ b/boa/src/value/mod.rs @@ -8,6 +8,7 @@ mod tests; use crate::{ builtins::{ number::{f64_to_int32, f64_to_uint32}, + string::is_trimmable_whitespace, BigInt, Number, }, object::{GcObject, Object, ObjectData}, @@ -809,12 +810,29 @@ impl Value { Value::Null => Ok(0.0), Value::Undefined => Ok(f64::NAN), Value::Boolean(b) => Ok(if b { 1.0 } else { 0.0 }), - // TODO: this is probably not 100% correct, see https://tc39.es/ecma262/#sec-tonumber-applied-to-the-string-type Value::String(ref string) => { - if string.trim().is_empty() { - return Ok(0.0); + let string = string.trim_matches(is_trimmable_whitespace); + + // TODO: write our own lexer to match syntax StrDecimalLiteral + match string { + "" => Ok(0.0), + "Infinity" | "+Infinity" => Ok(f64::INFINITY), + "-Infinity" => Ok(f64::NEG_INFINITY), + _ if matches!( + string + .chars() + .take(4) + .collect::() + .to_ascii_lowercase() + .as_str(), + "inf" | "+inf" | "-inf" | "nan" | "+nan" | "-nan" + ) => + { + // Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity + Ok(f64::NAN) + } + _ => Ok(fast_float::parse(string).unwrap_or(f64::NAN)), } - Ok(string.parse().unwrap_or(f64::NAN)) } Value::Rational(number) => Ok(number), Value::Integer(integer) => Ok(f64::from(integer)),