Merge pull request #11 from aldanor/feature/fix-decimal-trailing-0s

aldanor · web-flow · commit 8bdcda9c05b4 · 2021-01-11T23:37:28.000Z
Fix a bug with trailing zeros when parsing decimals
diff --git a/README.md b/README.md
@@ -92,15 +92,15 @@ C++ library, here are few brief notes:
 
 ## Benchmarks
 
-Below is the table of average timings in nanoseconds for parsing a single number 
+Below is the table of best timings in nanoseconds for parsing a single number 
 into a 64-bit float.
 
 |                  | `canada` | `mesh`   | `uniform` | `iidi` | `iei`  | `rec32` |
 | ---------------- | -------- | -------- | --------- | ------ | ------ | ------- |
-| fast-float       | 22.08    | 11.10    | 20.04     | 40.77  | 26.33  | 29.84   |
-| lexical          | 61.63    | 25.10    | 53.77     | 72.33  | 53.39  | 72.40   |
-| lexical/lossy    | 61.51    | 25.24    | 54.00     | 71.30  | 52.87  | 71.71   |
-| from_str         | 175.07   | 22.58    | 103.00    | 228.78 | 115.76 | 211.13  |
+| fast-float       | 21.58    | 10.70    | 19.36     | 40.50  | 26.07  | 29.13   |
+| lexical          | 65.90    | 23.28    | 54.75     | 75.80  | 52.18  | 75.36   |
+| lexical/lossy    | 65.90    | 23.28    | 54.75     | 75.80  | 52.18  | 75.36   |
+| from_str         | 174.43   | 22.30    | 99.93     | 227.76 | 111.31 | 204.46  |
 | fast_float (C++) | 22.78    | 10.99    | 20.05     | 41.12  | 27.51  | 30.85   |
 | abseil (C++)     | 42.66    | 32.88    | 46.01     | 50.83  | 46.33  | 49.95   |
 | netlib (C++)     | 57.53    | 24.86    | 64.72     | 56.63  | 36.20  | 67.29   |
@@ -109,8 +109,7 @@ into a 64-bit float.
 Parsers:
 
 - `fast-float` - this very crate
-- `lexical` – from `lexical_core` crate, v0.7
-- `lexical/lossy` - from `lexical_core` crate, v0.7 (lossy parser)
+- `lexical` – `lexical_core`, v0.7 (non-lossy; same performance as lossy)
 - `from_str` – Rust standard library, `FromStr` trait
 - `fast_float (C++)` – original C++ implementation of 'fast-float' method
 - `abseil (C++)` – Abseil C++ Common Libraries
diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs
@@ -7,7 +7,7 @@ use std::str::FromStr;
 use std::time::Instant;
 
 use fastrand::Rng;
-use lexical::{FromLexical, FromLexicalLossy};
+use lexical::FromLexical;
 use structopt::StructOpt;
 
 use fast_float::FastFloat;
@@ -109,7 +109,6 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>(
 enum Method {
     FastFloat,
     Lexical,
-    LexicalLossy,
     FromStr,
 }
 
@@ -126,12 +125,11 @@ impl Method {
         match self {
             Self::FastFloat => "fast-float",
             Self::Lexical => "lexical",
-            Self::LexicalLossy => "lexical/lossy",
             Self::FromStr => "from_str",
         }
     }
 
-    fn run_as<T: FastFloat + FromLexical + FromLexicalLossy + FromStr>(
+    fn run_as<T: FastFloat + FromLexical + FromStr>(
         &self,
         input: &Input,
         repeat: usize,
@@ -147,11 +145,6 @@ impl Method {
                     .unwrap_or_default()
                     .0
             }),
-            Self::LexicalLossy => run_bench(data, repeat, |s: &str| {
-                lexical_core::parse_partial_lossy::<T>(s.as_bytes())
-                    .unwrap_or_default()
-                    .0
-            }),
             Self::FromStr => run_bench(data, repeat, |s: &str| s.parse::<T>().unwrap_or_default()),
         };
 
@@ -172,12 +165,7 @@ impl Method {
     }
 
     pub fn all() -> &'static [Self] {
-        &[
-            Method::FastFloat,
-            Method::Lexical,
-            Method::LexicalLossy,
-            Method::FromStr,
-        ]
+        &[Method::FastFloat, Method::Lexical, Method::FromStr]
     }
 }
 
diff --git a/src/decimal.rs b/src/decimal.rs
@@ -190,6 +190,7 @@ impl Decimal {
 pub fn parse_decimal(mut s: &[u8]) -> Decimal {
     // can't fail since it follows a call to parse_number
     let mut d = Decimal::default();
+    let start = s;
     let c = s.get_first();
     d.negative = c == b'-';
     if c == b'-' || c == b'+' {
@@ -217,6 +218,24 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
         parse_digits(&mut s, |digit| d.try_add_digit(digit));
         d.decimal_point = s.len() as i32 - first.len() as i32;
     }
+    if d.num_digits != 0 {
+        // Ignore the trailing zeros if there are any
+        let mut n_trailing_zeros = 0;
+        for &c in start[..(start.len() - s.len())].iter().rev() {
+            if c == b'0' {
+                n_trailing_zeros += 1;
+            } else if c != b'.' {
+                break;
+            }
+        }
+        d.decimal_point += n_trailing_zeros as i32;
+        d.num_digits -= n_trailing_zeros;
+        d.decimal_point += d.num_digits as i32;
+        if d.num_digits > Decimal::MAX_DIGITS {
+            d.truncated = true;
+            d.num_digits = Decimal::MAX_DIGITS;
+        }
+    }
     if s.check_first2(b'e', b'E') {
         s = s.advance(1);
         let mut neg_exp = false;
@@ -234,11 +253,6 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
         });
         d.decimal_point += if neg_exp { -exp_num } else { exp_num };
     }
-    d.decimal_point += d.num_digits as i32;
-    if d.num_digits > Decimal::MAX_DIGITS {
-        d.truncated = true;
-        d.num_digits = Decimal::MAX_DIGITS;
-    }
     for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
         d.digits[i] = 0;
     }
diff --git a/src/number.rs b/src/number.rs
@@ -68,30 +68,26 @@ fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
 }
 
 #[inline]
-fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize {
+fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) {
     // may cause overflows, to be handled later
-    let mut count = 0;
     if cfg!(target_endian = "little") {
         if let Some(v) = s.try_read_u64() {
             if is_8digits_le(v) {
                 *x = x
                     .wrapping_mul(1_0000_0000)
                     .wrapping_add(parse_8digits_le(v));
                 s.step_by(8);
-                count = 8;
                 if let Some(v) = s.try_read_u64() {
                     if is_8digits_le(v) {
                         *x = x
                             .wrapping_mul(1_0000_0000)
                             .wrapping_add(parse_8digits_le(v));
                         s.step_by(8);
-                        count = 16;
                     }
                 }
             }
         }
     }
-    count
 }
 
 #[inline]
@@ -124,18 +120,22 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 {
 
 #[inline]
 pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
-    // assuming s.len() >= 1
+    debug_assert!(!s.is_empty());
+
     let mut s = AsciiStr::new(s);
     let start = s;
 
     // handle optional +/- sign
     let mut negative = false;
-    if s.first_either(b'-', b'+') {
-        negative = s.first_is(b'-');
+    if s.first() == b'-' {
+        negative = true;
         if s.step().is_empty() {
             return None;
         }
+    } else if s.first() == b'+' && s.step().is_empty() {
+        return None;
     }
+    debug_assert!(!s.is_empty());
 
     // parse initial digits before dot
     let mut mantissa = 0_u64;
@@ -171,8 +171,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
     let len = s.offset_from(&start) as _;
 
     // handle uncommon case with many digits
-    n_digits -= 19;
-    if n_digits <= 0 {
+    if n_digits <= 19 {
         return Some((
             Number {
                 exponent,
@@ -184,6 +183,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
         ));
     }
 
+    n_digits -= 19;
     let mut many_digits = false;
     let mut p = digits_start;
     while p.check_first_either(b'0', b'.') {
diff --git a/tests/test_basic.rs b/tests/test_basic.rs
@@ -21,13 +21,14 @@ macro_rules! check {
         check!($ty, $s, core::$ty::NEG_INFINITY)
     }};
     ($ty:ident, $s:expr, $e:expr) => {{
-        let s = $s.as_bytes();
+        let string = String::from($s);
+        let s = string.as_bytes();
         let expected: $ty = $e;
         let result = fast_float::parse::<$ty, _>(s).unwrap();
         assert_eq!(result, expected);
         let lex = lexical_core::parse::<$ty>(s).unwrap();
         assert_eq!(result, lex);
-        let std = <$ty>::from_str($s);
+        let std = <$ty>::from_str(string.as_str());
         if let Ok(std) = std {
             // stdlib can't parse all weird floats
             if std.is_finite() && result.is_finite() {
@@ -84,6 +85,14 @@ macro_rules! check_f64_neg_inf {
     };
 }
 
+fn append_zeros(s: impl AsRef<str>, n: usize) -> String {
+    let mut s = String::from(s.as_ref());
+    for _ in 0..n {
+        s.push('0');
+    }
+    s
+}
+
 #[test]
 fn test_f64_inf() {
     check_f64_inf!("INF");
@@ -200,6 +209,17 @@ fn test_f64_long() {
 
 #[test]
 fn test_f64_general() {
+    check_f64!("9007199254740993.0", hexf64("0x1.p+53"));
+    check_f64!(append_zeros("9007199254740993.0", 1000), hexf64("0x1.p+53"));
+    check_f64!("10000000000000000000", hexf64("0x1.158e460913dp+63"));
+    check_f64!(
+        "10000000000000000000000000000001000000000000",
+        hexf64("0x1.cb2d6f618c879p+142")
+    );
+    check_f64!(
+        "10000000000000000000000000000000000000000001",
+        hexf64("0x1.cb2d6f618c879p+142")
+    );
     check_f64!(1.1920928955078125e-07);
     check_f64!("-0", -0.0);
     check_f64!(
@@ -277,6 +297,23 @@ fn test_f32_inf() {
 
 #[test]
 fn test_f32_basic() {
+    let f1 = "\
+        1.175494140627517859246175898662808184331245864732796240031385942718174675986064\
+        7699724722770042717456817626953125";
+    check_f32!(f1, hexf32("0x1.2ced3p+0"));
+    check_f32!(format!("{}e-38", f1), hexf32("0x1.fffff8p-127"));
+    check_f32!(
+        format!("{}e-38", append_zeros(f1, 655)),
+        hexf32("0x1.fffff8p-127")
+    );
+    check_f32!(
+        format!("{}e-38", append_zeros(f1, 656)),
+        hexf32("0x1.fffff8p-127")
+    );
+    check_f32!(
+        format!("{}e-38", append_zeros(f1, 1000)),
+        hexf32("0x1.fffff8p-127")
+    );
     check_f32!(1.00000006e+09);
     check_f32!(1.4012984643e-45);
     check_f32!(1.1754942107e-38);
diff --git a/tests/test_exhaustive.rs b/tests/test_exhaustive.rs
@@ -1,6 +1,6 @@
 #[test]
 #[ignore]
-fn test_f32_exhaustive() {
+fn test_f32_exhaustive_ryu() {
     let mut buf = ryu::Buffer::new();
     for i in 0..0xFFFF_FFFF_u32 {
         let a: f32 = unsafe { core::mem::transmute(i) };
@@ -9,3 +9,15 @@ fn test_f32_exhaustive() {
         assert!(a == b || (a.is_nan() && b.is_nan()));
     }
 }
+
+#[test]
+#[ignore]
+fn test_f32_exhaustive_lexical() {
+    let mut buf = [0; 1024];
+    for i in 0..0xFFFF_FFFF_u32 {
+        let a: f32 = unsafe { core::mem::transmute(i) };
+        let s = lexical_core::write(a, &mut buf);
+        let b: f32 = fast_float::parse(s).unwrap();
+        assert!(a == b || (a.is_nan() && b.is_nan()));
+    }
+}