Skip to content

Commit c875f07

Browse files
committed
Fixes #1384, by ensuring specials are properly parsed.
1 parent 615e6a4 commit c875f07

File tree

3 files changed

+343
-64
lines changed

3 files changed

+343
-64
lines changed

src/number/complete.rs

Lines changed: 158 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Parsers recognizing numbers, complete input version
22
3+
use core::{f32, f64};
34
use crate::branch::alt;
45
use crate::bytes::complete::tag;
56
use crate::character::complete::{char, digit1, sign};
@@ -1426,6 +1427,8 @@ where
14261427
)(input)
14271428
}
14281429

1430+
///
1431+
14291432
/// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data
14301433
///
14311434
/// *Complete version*: Can parse until the end of input.
@@ -1442,7 +1445,6 @@ where
14421445
T: AsBytes,
14431446
{
14441447
let (i, sign) = sign(input.clone())?;
1445-
14461448
//let (i, zeroes) = take_while(|c: <T as InputTakeAtPosition>::Item| c.as_char() == '0')(i)?;
14471449
let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0' as u8) {
14481450
Some(index) => i.take_split(index),
@@ -1517,6 +1519,85 @@ where
15171519
Ok((i, (sign, integer, fraction, exp)))
15181520
}
15191521

1522+
macro_rules! float_finite {
1523+
($input:ident, $t:ty) => {{
1524+
let (i, (sign, integer, fraction, exponent)) = recognize_float_parts($input)?;
1525+
1526+
let mut float: $t = minimal_lexical::parse_float(
1527+
integer.as_bytes().iter(),
1528+
fraction.as_bytes().iter(),
1529+
exponent,
1530+
);
1531+
if !sign {
1532+
float = -float;
1533+
}
1534+
1535+
Ok((i, float))
1536+
}};
1537+
}
1538+
1539+
macro_rules! float_nonfinite {
1540+
($input:ident, $t:ident) => {{
1541+
let (input, sign) = sign($input.clone())?;
1542+
let b = input.as_bytes();
1543+
let (mut float, count) = if b.len() >= 3 {
1544+
if crate::number::case_insensitive_cmp(b, b"nan") {
1545+
($t::NAN, 3)
1546+
} else if b.len() >= 8 && crate::number::case_insensitive_cmp(b, b"infinity") {
1547+
($t::INFINITY, 8)
1548+
} else if crate::number::case_insensitive_cmp(b, b"inf") {
1549+
($t::INFINITY, 3)
1550+
} else {
1551+
return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float)));
1552+
}
1553+
} else {
1554+
return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float)));
1555+
};
1556+
if !sign {
1557+
float = -float;
1558+
}
1559+
1560+
Ok((input.slice(count..), float))
1561+
}};
1562+
}
1563+
1564+
/// Recognizes floating point number in text format and returns a f32.
1565+
///
1566+
/// *Complete version*: Can parse until the end of input. This only handles
1567+
/// finite (non-special floats).
1568+
/// ```
1569+
pub fn float_finite<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E>
1570+
where
1571+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1572+
T: Clone + Offset,
1573+
T: InputIter + InputLength + InputTake,
1574+
<T as InputIter>::Item: AsChar + Copy,
1575+
<T as InputIter>::IterElem: Clone,
1576+
T: InputTakeAtPosition,
1577+
<T as InputTakeAtPosition>::Item: AsChar,
1578+
T: AsBytes,
1579+
T: for<'a> Compare<&'a [u8]>,
1580+
{
1581+
float_finite!(input, f32)
1582+
}
1583+
1584+
/// Recognizes floating point number in text format and returns a f32.
1585+
/// This only handles non-finite (special) values.
1586+
pub fn float_nonfinite<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E>
1587+
where
1588+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1589+
T: Clone + Offset,
1590+
T: InputIter + InputLength + InputTake,
1591+
<T as InputIter>::Item: AsChar + Copy,
1592+
<T as InputIter>::IterElem: Clone,
1593+
T: InputTakeAtPosition,
1594+
<T as InputTakeAtPosition>::Item: AsChar,
1595+
T: AsBytes,
1596+
T: for<'a> Compare<&'a [u8]>,
1597+
{
1598+
float_nonfinite!(input, f32)
1599+
}
1600+
15201601
/// Recognizes floating point number in text format and returns a f32.
15211602
///
15221603
/// *Complete version*: Can parse until the end of input.
@@ -1546,30 +1627,56 @@ where
15461627
T: AsBytes,
15471628
T: for<'a> Compare<&'a [u8]>,
15481629
{
1549-
let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?;
1630+
let res = float_finite::<T, E>(input.clone());
1631+
res.or_else(|_| float_nonfinite::<T, E>(input))
1632+
}
15501633

1551-
let mut float: f32 = minimal_lexical::parse_float(
1552-
integer.as_bytes().iter(),
1553-
fraction.as_bytes().iter(),
1554-
exponent,
1555-
);
1556-
if !sign {
1557-
float = -float;
1558-
}
1634+
/// Recognizes floating point number in text format and returns a f64.
1635+
///
1636+
/// *Complete version*: Can parse until the end of input. This only handles
1637+
/// finite (non-special floats).
1638+
pub fn double_finite<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E>
1639+
where
1640+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1641+
T: Clone + Offset,
1642+
T: InputIter + InputLength + InputTake,
1643+
<T as InputIter>::Item: AsChar + Copy,
1644+
<T as InputIter>::IterElem: Clone,
1645+
T: InputTakeAtPosition,
1646+
<T as InputTakeAtPosition>::Item: AsChar,
1647+
T: AsBytes,
1648+
T: for<'a> Compare<&'a [u8]>,
1649+
{
1650+
float_finite!(input, f64)
1651+
}
15591652

1560-
Ok((i, float))
1653+
/// Recognizes floating point number in text format and returns a f64.
1654+
/// This only handles non-finite (special) values.
1655+
pub fn double_nonfinite<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E>
1656+
where
1657+
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
1658+
T: Clone + Offset,
1659+
T: InputIter + InputLength + InputTake,
1660+
<T as InputIter>::Item: AsChar + Copy,
1661+
<T as InputIter>::IterElem: Clone,
1662+
T: InputTakeAtPosition,
1663+
<T as InputTakeAtPosition>::Item: AsChar,
1664+
T: AsBytes,
1665+
T: for<'a> Compare<&'a [u8]>,
1666+
{
1667+
float_nonfinite!(input, f64)
15611668
}
15621669

1563-
/// Recognizes floating point number in text format and returns a f32.
1670+
/// Recognizes floating point number in text format and returns a f64.
15641671
///
15651672
/// *Complete version*: Can parse until the end of input.
15661673
/// ```rust
15671674
/// # use nom::{Err, error::ErrorKind, Needed};
15681675
/// # use nom::Needed::Size;
1569-
/// use nom::number::complete::float;
1676+
/// use nom::number::complete::double;
15701677
///
15711678
/// let parser = |s| {
1572-
/// float(s)
1679+
/// double(s)
15731680
/// };
15741681
///
15751682
/// assert_eq!(parser("11e-1"), Ok(("", 1.1)));
@@ -1589,18 +1696,8 @@ where
15891696
T: AsBytes,
15901697
T: for<'a> Compare<&'a [u8]>,
15911698
{
1592-
let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?;
1593-
1594-
let mut float: f64 = minimal_lexical::parse_float(
1595-
integer.as_bytes().iter(),
1596-
fraction.as_bytes().iter(),
1597-
exponent,
1598-
);
1599-
if !sign {
1600-
float = -float;
1601-
}
1602-
1603-
Ok((i, float))
1699+
let res = double_finite::<T, E>(input.clone());
1700+
res.or_else(|_| double_nonfinite::<T, E>(input))
16041701
}
16051702

16061703
#[cfg(test)]
@@ -1618,6 +1715,23 @@ mod tests {
16181715
};
16191716
);
16201717

1718+
// Need more complex logic, since NaN != NaN.
1719+
macro_rules! assert_float_eq {
1720+
($left: expr, $right: expr) => {
1721+
let left: $crate::IResult<_, _, (_, ErrorKind)> = $left;
1722+
let right: $crate::IResult<_, _, (_, ErrorKind)> = $right;
1723+
if let Ok((_, float)) = right {
1724+
if float.is_nan() {
1725+
assert!(left.unwrap().1.is_nan());
1726+
} else {
1727+
assert_eq!(left, right);
1728+
}
1729+
}else {
1730+
assert_eq!(left, right);
1731+
}
1732+
};
1733+
}
1734+
16211735
#[test]
16221736
fn i8_tests() {
16231737
assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0)));
@@ -1942,6 +2056,8 @@ mod tests {
19422056
"12.34",
19432057
"-1.234E-12",
19442058
"-1.234e-12",
2059+
"NaN",
2060+
"inf",
19452061
];
19462062

19472063
for test in test_cases.drain(..) {
@@ -1951,15 +2067,24 @@ mod tests {
19512067
println!("now parsing: {} -> {}", test, expected32);
19522068

19532069
let larger = format!("{}", test);
1954-
assert_parse!(recognize_float(&larger[..]), Ok(("", test)));
2070+
if expected32.is_finite() {
2071+
assert_parse!(recognize_float(&larger[..]), Ok(("", test)));
2072+
}
19552073

1956-
assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32)));
1957-
assert_parse!(float(&larger[..]), Ok(("", expected32)));
2074+
assert_float_eq!(float(larger.as_bytes()), Ok((&b""[..], expected32)));
2075+
assert_float_eq!(float(&larger[..]), Ok(("", expected32)));
19582076

1959-
assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64)));
1960-
assert_parse!(double(&larger[..]), Ok(("", expected64)));
2077+
assert_float_eq!(double(larger.as_bytes()), Ok((&b""[..], expected64)));
2078+
assert_float_eq!(double(&larger[..]), Ok(("", expected64)));
19612079
}
19622080

2081+
// b"infinity" and case-insensitive floats don't work until recent
2082+
// rustc versions, so just test they work here.
2083+
assert_float_eq!(float("nan".as_bytes()), Ok((&b""[..], f32::NAN)));
2084+
assert_float_eq!(float("infinity".as_bytes()), Ok((&b""[..], f32::INFINITY)));
2085+
assert_float_eq!(double("nan".as_bytes()), Ok((&b""[..], f64::NAN)));
2086+
assert_float_eq!(double("infinity".as_bytes()), Ok((&b""[..], f64::INFINITY)));
2087+
19632088
let remaining_exponent = "-1.234E-";
19642089
assert_parse!(
19652090
recognize_float(remaining_exponent),
@@ -2051,8 +2176,8 @@ mod tests {
20512176
}
20522177

20532178
fn parse_f64(i: &str) -> IResult<&str, f64, ()> {
2054-
match recognize_float(i) {
2055-
Err(e) => Err(e),
2179+
match recognize_float::<_, ()>(i) {
2180+
Err(_) => Err(Err::Error(())),
20562181
Ok((i, s)) => {
20572182
if s.is_empty() {
20582183
return Err(Err::Error(()));

src/number/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,13 @@ pub enum Endianness {
1313
/// Will match the host's endianness
1414
Native,
1515
}
16+
17+
/// Case-insensitive comparison of digits. Only works if `y` is only ASCII letters.
18+
#[inline]
19+
fn case_insensitive_cmp(x: &[u8], y: &[u8]) -> bool {
20+
let d = (x.iter().zip(y.iter())).fold(0, |d, (xi, yi)| d | xi ^ yi);
21+
// This uses the trick that 'a' - 'A' == 0x20, and this is true
22+
// for all characters, so as long as `yi` is a valid ASCII letter,
23+
// `xi ^ yi` can only be 0 or 0x20.
24+
d == 0 || d == 0x20
25+
}

0 commit comments

Comments
 (0)