Skip to content

Commit 8bdcda9

Browse files
authored
Merge pull request #11 from aldanor/feature/fix-decimal-trailing-0s
Fix a bug with trailing zeros when parsing decimals
2 parents a48effb + 6872921 commit 8bdcda9

File tree

6 files changed

+90
-40
lines changed

6 files changed

+90
-40
lines changed

README.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,15 @@ C++ library, here are few brief notes:
9292

9393
## Benchmarks
9494

95-
Below is the table of average timings in nanoseconds for parsing a single number
95+
Below is the table of best timings in nanoseconds for parsing a single number
9696
into a 64-bit float.
9797

9898
| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` |
9999
| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- |
100-
| fast-float | 22.08 | 11.10 | 20.04 | 40.77 | 26.33 | 29.84 |
101-
| lexical | 61.63 | 25.10 | 53.77 | 72.33 | 53.39 | 72.40 |
102-
| lexical/lossy | 61.51 | 25.24 | 54.00 | 71.30 | 52.87 | 71.71 |
103-
| from_str | 175.07 | 22.58 | 103.00 | 228.78 | 115.76 | 211.13 |
100+
| fast-float | 21.58 | 10.70 | 19.36 | 40.50 | 26.07 | 29.13 |
101+
| lexical | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 |
102+
| lexical/lossy | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 |
103+
| from_str | 174.43 | 22.30 | 99.93 | 227.76 | 111.31 | 204.46 |
104104
| fast_float (C++) | 22.78 | 10.99 | 20.05 | 41.12 | 27.51 | 30.85 |
105105
| abseil (C++) | 42.66 | 32.88 | 46.01 | 50.83 | 46.33 | 49.95 |
106106
| netlib (C++) | 57.53 | 24.86 | 64.72 | 56.63 | 36.20 | 67.29 |
@@ -109,8 +109,7 @@ into a 64-bit float.
109109
Parsers:
110110

111111
- `fast-float` - this very crate
112-
- `lexical` – from `lexical_core` crate, v0.7
113-
- `lexical/lossy` - from `lexical_core` crate, v0.7 (lossy parser)
112+
- `lexical``lexical_core`, v0.7 (non-lossy; same performance as lossy)
114113
- `from_str` – Rust standard library, `FromStr` trait
115114
- `fast_float (C++)` – original C++ implementation of 'fast-float' method
116115
- `abseil (C++)` – Abseil C++ Common Libraries

extras/simple-bench/src/main.rs

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::str::FromStr;
77
use std::time::Instant;
88

99
use fastrand::Rng;
10-
use lexical::{FromLexical, FromLexicalLossy};
10+
use lexical::FromLexical;
1111
use structopt::StructOpt;
1212

1313
use fast_float::FastFloat;
@@ -109,7 +109,6 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>(
109109
enum Method {
110110
FastFloat,
111111
Lexical,
112-
LexicalLossy,
113112
FromStr,
114113
}
115114

@@ -126,12 +125,11 @@ impl Method {
126125
match self {
127126
Self::FastFloat => "fast-float",
128127
Self::Lexical => "lexical",
129-
Self::LexicalLossy => "lexical/lossy",
130128
Self::FromStr => "from_str",
131129
}
132130
}
133131

134-
fn run_as<T: FastFloat + FromLexical + FromLexicalLossy + FromStr>(
132+
fn run_as<T: FastFloat + FromLexical + FromStr>(
135133
&self,
136134
input: &Input,
137135
repeat: usize,
@@ -147,11 +145,6 @@ impl Method {
147145
.unwrap_or_default()
148146
.0
149147
}),
150-
Self::LexicalLossy => run_bench(data, repeat, |s: &str| {
151-
lexical_core::parse_partial_lossy::<T>(s.as_bytes())
152-
.unwrap_or_default()
153-
.0
154-
}),
155148
Self::FromStr => run_bench(data, repeat, |s: &str| s.parse::<T>().unwrap_or_default()),
156149
};
157150

@@ -172,12 +165,7 @@ impl Method {
172165
}
173166

174167
pub fn all() -> &'static [Self] {
175-
&[
176-
Method::FastFloat,
177-
Method::Lexical,
178-
Method::LexicalLossy,
179-
Method::FromStr,
180-
]
168+
&[Method::FastFloat, Method::Lexical, Method::FromStr]
181169
}
182170
}
183171

src/decimal.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ impl Decimal {
190190
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
191191
// can't fail since it follows a call to parse_number
192192
let mut d = Decimal::default();
193+
let start = s;
193194
let c = s.get_first();
194195
d.negative = c == b'-';
195196
if c == b'-' || c == b'+' {
@@ -217,6 +218,24 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
217218
parse_digits(&mut s, |digit| d.try_add_digit(digit));
218219
d.decimal_point = s.len() as i32 - first.len() as i32;
219220
}
221+
if d.num_digits != 0 {
222+
// Ignore the trailing zeros if there are any
223+
let mut n_trailing_zeros = 0;
224+
for &c in start[..(start.len() - s.len())].iter().rev() {
225+
if c == b'0' {
226+
n_trailing_zeros += 1;
227+
} else if c != b'.' {
228+
break;
229+
}
230+
}
231+
d.decimal_point += n_trailing_zeros as i32;
232+
d.num_digits -= n_trailing_zeros;
233+
d.decimal_point += d.num_digits as i32;
234+
if d.num_digits > Decimal::MAX_DIGITS {
235+
d.truncated = true;
236+
d.num_digits = Decimal::MAX_DIGITS;
237+
}
238+
}
220239
if s.check_first2(b'e', b'E') {
221240
s = s.advance(1);
222241
let mut neg_exp = false;
@@ -234,11 +253,6 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
234253
});
235254
d.decimal_point += if neg_exp { -exp_num } else { exp_num };
236255
}
237-
d.decimal_point += d.num_digits as i32;
238-
if d.num_digits > Decimal::MAX_DIGITS {
239-
d.truncated = true;
240-
d.num_digits = Decimal::MAX_DIGITS;
241-
}
242256
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
243257
d.digits[i] = 0;
244258
}

src/number.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,30 +68,26 @@ fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
6868
}
6969

7070
#[inline]
71-
fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize {
71+
fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) {
7272
// may cause overflows, to be handled later
73-
let mut count = 0;
7473
if cfg!(target_endian = "little") {
7574
if let Some(v) = s.try_read_u64() {
7675
if is_8digits_le(v) {
7776
*x = x
7877
.wrapping_mul(1_0000_0000)
7978
.wrapping_add(parse_8digits_le(v));
8079
s.step_by(8);
81-
count = 8;
8280
if let Some(v) = s.try_read_u64() {
8381
if is_8digits_le(v) {
8482
*x = x
8583
.wrapping_mul(1_0000_0000)
8684
.wrapping_add(parse_8digits_le(v));
8785
s.step_by(8);
88-
count = 16;
8986
}
9087
}
9188
}
9289
}
9390
}
94-
count
9591
}
9692

9793
#[inline]
@@ -124,18 +120,22 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 {
124120

125121
#[inline]
126122
pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
127-
// assuming s.len() >= 1
123+
debug_assert!(!s.is_empty());
124+
128125
let mut s = AsciiStr::new(s);
129126
let start = s;
130127

131128
// handle optional +/- sign
132129
let mut negative = false;
133-
if s.first_either(b'-', b'+') {
134-
negative = s.first_is(b'-');
130+
if s.first() == b'-' {
131+
negative = true;
135132
if s.step().is_empty() {
136133
return None;
137134
}
135+
} else if s.first() == b'+' && s.step().is_empty() {
136+
return None;
138137
}
138+
debug_assert!(!s.is_empty());
139139

140140
// parse initial digits before dot
141141
let mut mantissa = 0_u64;
@@ -171,8 +171,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
171171
let len = s.offset_from(&start) as _;
172172

173173
// handle uncommon case with many digits
174-
n_digits -= 19;
175-
if n_digits <= 0 {
174+
if n_digits <= 19 {
176175
return Some((
177176
Number {
178177
exponent,
@@ -184,6 +183,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
184183
));
185184
}
186185

186+
n_digits -= 19;
187187
let mut many_digits = false;
188188
let mut p = digits_start;
189189
while p.check_first_either(b'0', b'.') {

tests/test_basic.rs

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ macro_rules! check {
2121
check!($ty, $s, core::$ty::NEG_INFINITY)
2222
}};
2323
($ty:ident, $s:expr, $e:expr) => {{
24-
let s = $s.as_bytes();
24+
let string = String::from($s);
25+
let s = string.as_bytes();
2526
let expected: $ty = $e;
2627
let result = fast_float::parse::<$ty, _>(s).unwrap();
2728
assert_eq!(result, expected);
2829
let lex = lexical_core::parse::<$ty>(s).unwrap();
2930
assert_eq!(result, lex);
30-
let std = <$ty>::from_str($s);
31+
let std = <$ty>::from_str(string.as_str());
3132
if let Ok(std) = std {
3233
// stdlib can't parse all weird floats
3334
if std.is_finite() && result.is_finite() {
@@ -84,6 +85,14 @@ macro_rules! check_f64_neg_inf {
8485
};
8586
}
8687

88+
fn append_zeros(s: impl AsRef<str>, n: usize) -> String {
89+
let mut s = String::from(s.as_ref());
90+
for _ in 0..n {
91+
s.push('0');
92+
}
93+
s
94+
}
95+
8796
#[test]
8897
fn test_f64_inf() {
8998
check_f64_inf!("INF");
@@ -200,6 +209,17 @@ fn test_f64_long() {
200209

201210
#[test]
202211
fn test_f64_general() {
212+
check_f64!("9007199254740993.0", hexf64("0x1.p+53"));
213+
check_f64!(append_zeros("9007199254740993.0", 1000), hexf64("0x1.p+53"));
214+
check_f64!("10000000000000000000", hexf64("0x1.158e460913dp+63"));
215+
check_f64!(
216+
"10000000000000000000000000000001000000000000",
217+
hexf64("0x1.cb2d6f618c879p+142")
218+
);
219+
check_f64!(
220+
"10000000000000000000000000000000000000000001",
221+
hexf64("0x1.cb2d6f618c879p+142")
222+
);
203223
check_f64!(1.1920928955078125e-07);
204224
check_f64!("-0", -0.0);
205225
check_f64!(
@@ -277,6 +297,23 @@ fn test_f32_inf() {
277297

278298
#[test]
279299
fn test_f32_basic() {
300+
let f1 = "\
301+
1.175494140627517859246175898662808184331245864732796240031385942718174675986064\
302+
7699724722770042717456817626953125";
303+
check_f32!(f1, hexf32("0x1.2ced3p+0"));
304+
check_f32!(format!("{}e-38", f1), hexf32("0x1.fffff8p-127"));
305+
check_f32!(
306+
format!("{}e-38", append_zeros(f1, 655)),
307+
hexf32("0x1.fffff8p-127")
308+
);
309+
check_f32!(
310+
format!("{}e-38", append_zeros(f1, 656)),
311+
hexf32("0x1.fffff8p-127")
312+
);
313+
check_f32!(
314+
format!("{}e-38", append_zeros(f1, 1000)),
315+
hexf32("0x1.fffff8p-127")
316+
);
280317
check_f32!(1.00000006e+09);
281318
check_f32!(1.4012984643e-45);
282319
check_f32!(1.1754942107e-38);

tests/test_exhaustive.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#[test]
22
#[ignore]
3-
fn test_f32_exhaustive() {
3+
fn test_f32_exhaustive_ryu() {
44
let mut buf = ryu::Buffer::new();
55
for i in 0..0xFFFF_FFFF_u32 {
66
let a: f32 = unsafe { core::mem::transmute(i) };
@@ -9,3 +9,15 @@ fn test_f32_exhaustive() {
99
assert!(a == b || (a.is_nan() && b.is_nan()));
1010
}
1111
}
12+
13+
#[test]
14+
#[ignore]
15+
fn test_f32_exhaustive_lexical() {
16+
let mut buf = [0; 1024];
17+
for i in 0..0xFFFF_FFFF_u32 {
18+
let a: f32 = unsafe { core::mem::transmute(i) };
19+
let s = lexical_core::write(a, &mut buf);
20+
let b: f32 = fast_float::parse(s).unwrap();
21+
assert!(a == b || (a.is_nan() && b.is_nan()));
22+
}
23+
}

0 commit comments

Comments
 (0)