Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-cast/src/parse.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! [`Parser`] implementations for converting strings to Arrow types
19
//!
20
//! Used by the CSV and JSON readers to convert strings to Arrow types
21
use arrow_array::timezone::Tz;
22
use arrow_array::types::*;
23
use arrow_array::ArrowNativeTypeOp;
24
use arrow_buffer::ArrowNativeType;
25
use arrow_schema::ArrowError;
26
use chrono::prelude::*;
27
use half::f16;
28
use std::str::FromStr;
29
30
/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31
#[inline]
32
0
fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33
0
    digits[..N]
34
0
        .iter()
35
0
        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36
0
        * 10_u32.pow((9 - N) as _)
37
0
}
38
39
/// Helper for parsing RFC3339 timestamps
40
struct TimestampParser {
41
    /// The timestamp bytes to parse minus `b'0'`
42
    ///
43
    /// This makes interpretation as an integer inexpensive
44
    digits: [u8; 32],
45
    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46
    mask: u32,
47
}
48
49
impl TimestampParser {
50
0
    fn new(bytes: &[u8]) -> Self {
51
0
        let mut digits = [0; 32];
52
0
        let mut mask = 0;
53
54
        // Treating all bytes the same way, helps LLVM vectorise this correctly
55
0
        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56
0
            *o = i.wrapping_sub(b'0');
57
0
            mask |= ((*o < 10) as u32) << idx
58
        }
59
60
0
        Self { digits, mask }
61
0
    }
62
63
    /// Returns true if the byte at `idx` in the original string equals `b`
64
0
    fn test(&self, idx: usize, b: u8) -> bool {
65
0
        self.digits[idx] == b.wrapping_sub(b'0')
66
0
    }
67
68
    /// Parses a date of the form `1997-01-31`
69
0
    fn date(&self) -> Option<NaiveDate> {
70
0
        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71
0
            return None;
72
0
        }
73
74
0
        let year = self.digits[0] as u16 * 1000
75
0
            + self.digits[1] as u16 * 100
76
0
            + self.digits[2] as u16 * 10
77
0
            + self.digits[3] as u16;
78
79
0
        let month = self.digits[5] * 10 + self.digits[6];
80
0
        let day = self.digits[8] * 10 + self.digits[9];
81
82
0
        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83
0
    }
84
85
    /// Parses a time of any of forms
86
    /// - `09:26:56`
87
    /// - `09:26:56.123`
88
    /// - `09:26:56.123456`
89
    /// - `09:26:56.123456789`
90
    /// - `092656`
91
    ///
92
    /// Returning the end byte offset
93
0
    fn time(&self) -> Option<(NaiveTime, usize)> {
94
        // Make a NaiveTime handling leap seconds
95
0
        let time = |hour, min, sec, nano| match sec {
96
            60 => {
97
0
                let nano = 1_000_000_000 + nano;
98
0
                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99
            }
100
0
            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101
0
        };
102
103
0
        match (self.mask >> 11) & 0b11111111 {
104
            // 09:26:56
105
0
            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106
0
                let hour = self.digits[11] * 10 + self.digits[12];
107
0
                let minute = self.digits[14] * 10 + self.digits[15];
108
0
                let second = self.digits[17] * 10 + self.digits[18];
109
110
0
                match self.test(19, b'.') {
111
                    true => {
112
0
                        let digits = (self.mask >> 20).trailing_ones();
113
0
                        let nanos = match digits {
114
0
                            0 => return None,
115
0
                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116
0
                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117
0
                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118
0
                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119
0
                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120
0
                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121
0
                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122
0
                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123
0
                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124
                        };
125
0
                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126
                    }
127
0
                    false => Some((time(hour, minute, second, 0)?, 19)),
128
                }
129
            }
130
            // 092656
131
            0b111111 => {
132
0
                let hour = self.digits[11] * 10 + self.digits[12];
133
0
                let minute = self.digits[13] * 10 + self.digits[14];
134
0
                let second = self.digits[15] * 10 + self.digits[16];
135
0
                let time = time(hour, minute, second, 0)?;
136
0
                Some((time, 17))
137
            }
138
0
            _ => None,
139
        }
140
0
    }
141
}
142
143
/// Accepts a string and parses it relative to the provided `timezone`
144
///
145
/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146
/// accepts strings that use a space ` ` to separate the date and time
147
/// as well as strings that have no explicit timezone offset.
148
///
149
/// Examples of accepted inputs:
150
/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151
/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152
/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153
/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154
/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155
/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156
/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157
/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158
/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159
/// * `1997-01-31`                      # close to RCF3339, only date no time
160
///
161
/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162
///
163
/// * `2023-01-01 040506 America/Los_Angeles`
164
///
165
/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166
/// will be returned
167
///
168
/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169
/// are not supported, like
170
///
171
/// * "2023-01-01 04:05:06.789 +07:30:00",
172
/// * "2023-01-01 040506 +07:30:00",
173
/// * "2023-01-01 04:05:06.789 PST",
174
///
175
/// [IANA timezones]: https://www.iana.org/time-zones
176
0
pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177
0
    let err =
178
0
        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180
0
    let bytes = s.as_bytes();
181
0
    if bytes.len() < 10 {
182
0
        return Err(err("timestamp must contain at least 10 characters"));
183
0
    }
184
185
0
    let parser = TimestampParser::new(bytes);
186
0
    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187
0
    if bytes.len() == 10 {
188
0
        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189
0
        return timezone
190
0
            .from_local_datetime(&datetime)
191
0
            .single()
192
0
            .ok_or_else(|| err("error computing timezone offset"));
193
0
    }
194
195
0
    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196
0
        return Err(err("invalid timestamp separator"));
197
0
    }
198
199
0
    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200
0
    let datetime = date.and_time(time);
201
202
0
    if tz_offset == 32 {
203
        // Decimal overrun
204
0
        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205
0
            tz_offset += 1;
206
0
        }
207
0
    }
208
209
0
    if bytes.len() <= tz_offset {
210
0
        return timezone
211
0
            .from_local_datetime(&datetime)
212
0
            .single()
213
0
            .ok_or_else(|| err("error computing timezone offset"));
214
0
    }
215
216
0
    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217
0
        return Ok(timezone.from_utc_datetime(&datetime));
218
0
    }
219
220
    // Parse remainder of string as timezone
221
0
    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222
0
    let parsed = parsed_tz
223
0
        .from_local_datetime(&datetime)
224
0
        .single()
225
0
        .ok_or_else(|| err("error computing timezone offset"))?;
226
227
0
    Ok(parsed.with_timezone(timezone))
228
0
}
229
230
/// Accepts a string in RFC3339 / ISO8601 standard format and some
231
/// variants and converts it to a nanosecond precision timestamp.
232
///
233
/// See [`string_to_datetime`] for the full set of supported formats
234
///
235
/// Implements the `to_timestamp` function to convert a string to a
236
/// timestamp, following the model of spark SQL’s to_`timestamp`.
237
///
238
/// Internally, this function uses the `chrono` library for the
239
/// datetime parsing
240
///
241
/// We hope to extend this function in the future with a second
242
/// parameter to specifying the format string.
243
///
244
/// ## Timestamp Precision
245
///
246
/// Function uses the maximum precision timestamps supported by
247
/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248
/// means the range of dates that timestamps can represent is ~1677 AD
249
/// to 2262 AM
250
///
251
/// ## Timezone / Offset Handling
252
///
253
/// Numerical values of timestamps are stored compared to offset UTC.
254
///
255
/// This function interprets string without an explicit time zone as timestamps
256
/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257
///
258
/// In particular:
259
///
260
/// ```
261
/// # use arrow_cast::parse::string_to_timestamp_nanos;
262
/// // Note all three of these timestamps are parsed as the same value
263
/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264
/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265
/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266
///
267
/// assert_eq!(a, b);
268
/// assert_eq!(b, c);
269
/// ```
270
///
271
#[inline]
272
0
pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273
0
    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274
0
}
275
276
/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277
#[inline]
278
0
fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279
0
    dt.and_utc()
280
0
        .timestamp_nanos_opt()
281
0
        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282
0
}
283
284
/// Accepts a string in ISO8601 standard format and some
285
/// variants and converts it to nanoseconds since midnight.
286
///
287
/// Examples of accepted inputs:
288
///
289
/// * `09:26:56.123 AM`
290
/// * `23:59:59`
291
/// * `6:00 pm`
292
///
293
/// Internally, this function uses the `chrono` library for the time parsing
294
///
295
/// ## Timezone / Offset Handling
296
///
297
/// This function does not support parsing strings with a timezone
298
/// or offset specified, as it considers only time since midnight.
299
0
pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300
0
    let nt = string_to_time(s)
301
0
        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302
0
    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303
0
}
304
305
0
fn string_to_time(s: &str) -> Option<NaiveTime> {
306
0
    let bytes = s.as_bytes();
307
0
    if bytes.len() < 4 {
308
0
        return None;
309
0
    }
310
311
0
    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312
0
        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313
0
        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314
0
        _ => (None, bytes),
315
    };
316
317
0
    if bytes.len() < 4 {
318
0
        return None;
319
0
    }
320
321
0
    let mut digits = [b'0'; 6];
322
323
    // Extract hour
324
0
    let bytes = match (bytes[1], bytes[2]) {
325
        (b':', _) => {
326
0
            digits[1] = bytes[0];
327
0
            &bytes[2..]
328
        }
329
        (_, b':') => {
330
0
            digits[0] = bytes[0];
331
0
            digits[1] = bytes[1];
332
0
            &bytes[3..]
333
        }
334
0
        _ => return None,
335
    };
336
337
0
    if bytes.len() < 2 {
338
0
        return None; // Minutes required
339
0
    }
340
341
    // Extract minutes
342
0
    digits[2] = bytes[0];
343
0
    digits[3] = bytes[1];
344
345
0
    let nanoseconds = match bytes.get(2) {
346
        Some(b':') => {
347
0
            if bytes.len() < 5 {
348
0
                return None;
349
0
            }
350
351
            // Extract seconds
352
0
            digits[4] = bytes[3];
353
0
            digits[5] = bytes[4];
354
355
            // Extract sub-seconds if any
356
0
            match bytes.get(5) {
357
                Some(b'.') => {
358
0
                    let decimal = &bytes[6..];
359
0
                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360
0
                        return None;
361
0
                    }
362
0
                    match decimal.len() {
363
0
                        0 => return None,
364
0
                        1 => parse_nanos::<1, b'0'>(decimal),
365
0
                        2 => parse_nanos::<2, b'0'>(decimal),
366
0
                        3 => parse_nanos::<3, b'0'>(decimal),
367
0
                        4 => parse_nanos::<4, b'0'>(decimal),
368
0
                        5 => parse_nanos::<5, b'0'>(decimal),
369
0
                        6 => parse_nanos::<6, b'0'>(decimal),
370
0
                        7 => parse_nanos::<7, b'0'>(decimal),
371
0
                        8 => parse_nanos::<8, b'0'>(decimal),
372
0
                        _ => parse_nanos::<9, b'0'>(decimal),
373
                    }
374
                }
375
0
                Some(_) => return None,
376
0
                None => 0,
377
            }
378
        }
379
0
        Some(_) => return None,
380
0
        None => 0,
381
    };
382
383
0
    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384
0
    if digits.iter().any(|x| *x > 9) {
385
0
        return None;
386
0
    }
387
388
0
    let hour = match (digits[0] * 10 + digits[1], am) {
389
0
        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390
0
        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391
0
        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392
0
        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393
0
        (_, Some(_)) => return None,
394
0
        (h, None) => h,
395
    };
396
397
    // Handle leap second
398
0
    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399
0
        60 => (59, nanoseconds + 1_000_000_000),
400
0
        s => (s, nanoseconds),
401
    };
402
403
0
    NaiveTime::from_hms_nano_opt(
404
0
        hour as _,
405
0
        (digits[2] * 10 + digits[3]) as _,
406
0
        second as _,
407
0
        nanoseconds,
408
    )
409
0
}
410
411
/// Specialized parsing implementations to convert strings to Arrow types.
412
///
413
/// This is used by csv and json reader and can be used directly as well.
414
///
415
/// # Example
416
///
417
/// To parse a string to a [`Date32Type`]:
418
///
419
/// ```
420
/// use arrow_cast::parse::Parser;
421
/// use arrow_array::types::Date32Type;
422
/// let date = Date32Type::parse("2021-01-01").unwrap();
423
/// assert_eq!(date, 18628);
424
/// ```
425
///
426
/// To parse a string to a [`TimestampNanosecondType`]:
427
///
428
/// ```
429
/// use arrow_cast::parse::Parser;
430
/// use arrow_array::types::TimestampNanosecondType;
431
/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432
/// assert_eq!(ts, 1609459200123456789);
433
/// ```
434
pub trait Parser: ArrowPrimitiveType {
435
    /// Parse a string to the native type
436
    fn parse(string: &str) -> Option<Self::Native>;
437
438
    /// Parse a string to the native type with a format string
439
    ///
440
    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441
    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442
        Self::parse(string)
443
    }
444
}
445
446
impl Parser for Float16Type {
447
0
    fn parse(string: &str) -> Option<f16> {
448
0
        lexical_core::parse(string.as_bytes())
449
0
            .ok()
450
0
            .map(f16::from_f32)
451
0
    }
452
}
453
454
impl Parser for Float32Type {
455
0
    fn parse(string: &str) -> Option<f32> {
456
0
        lexical_core::parse(string.as_bytes()).ok()
457
0
    }
458
}
459
460
impl Parser for Float64Type {
461
0
    fn parse(string: &str) -> Option<f64> {
462
0
        lexical_core::parse(string.as_bytes()).ok()
463
0
    }
464
}
465
466
macro_rules! parser_primitive {
467
    ($t:ty) => {
468
        impl Parser for $t {
469
0
            fn parse(string: &str) -> Option<Self::Native> {
470
0
                if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471
0
                    return None;
472
0
                }
473
0
                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474
0
                    string.as_bytes(),
475
                ) {
476
0
                    (Some(n), x) if x == string.len() => Some(n),
477
0
                    _ => None,
478
                }
479
0
            }
480
        }
481
    };
482
}
483
parser_primitive!(UInt64Type);
484
parser_primitive!(UInt32Type);
485
parser_primitive!(UInt16Type);
486
parser_primitive!(UInt8Type);
487
parser_primitive!(Int64Type);
488
parser_primitive!(Int32Type);
489
parser_primitive!(Int16Type);
490
parser_primitive!(Int8Type);
491
parser_primitive!(DurationNanosecondType);
492
parser_primitive!(DurationMicrosecondType);
493
parser_primitive!(DurationMillisecondType);
494
parser_primitive!(DurationSecondType);
495
496
impl Parser for TimestampNanosecondType {
497
0
    fn parse(string: &str) -> Option<i64> {
498
0
        string_to_timestamp_nanos(string).ok()
499
0
    }
500
}
501
502
impl Parser for TimestampMicrosecondType {
503
0
    fn parse(string: &str) -> Option<i64> {
504
0
        let nanos = string_to_timestamp_nanos(string).ok();
505
0
        nanos.map(|x| x / 1000)
506
0
    }
507
}
508
509
impl Parser for TimestampMillisecondType {
510
0
    fn parse(string: &str) -> Option<i64> {
511
0
        let nanos = string_to_timestamp_nanos(string).ok();
512
0
        nanos.map(|x| x / 1_000_000)
513
0
    }
514
}
515
516
impl Parser for TimestampSecondType {
517
0
    fn parse(string: &str) -> Option<i64> {
518
0
        let nanos = string_to_timestamp_nanos(string).ok();
519
0
        nanos.map(|x| x / 1_000_000_000)
520
0
    }
521
}
522
523
impl Parser for Time64NanosecondType {
524
    // Will truncate any fractions of a nanosecond
525
0
    fn parse(string: &str) -> Option<Self::Native> {
526
0
        string_to_time_nanoseconds(string)
527
0
            .ok()
528
0
            .or_else(|| string.parse::<Self::Native>().ok())
529
0
    }
530
531
0
    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532
0
        let nt = NaiveTime::parse_from_str(string, format).ok()?;
533
0
        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534
0
    }
535
}
536
537
impl Parser for Time64MicrosecondType {
538
    // Will truncate any fractions of a microsecond
539
0
    fn parse(string: &str) -> Option<Self::Native> {
540
0
        string_to_time_nanoseconds(string)
541
0
            .ok()
542
0
            .map(|nanos| nanos / 1_000)
543
0
            .or_else(|| string.parse::<Self::Native>().ok())
544
0
    }
545
546
0
    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547
0
        let nt = NaiveTime::parse_from_str(string, format).ok()?;
548
0
        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549
0
    }
550
}
551
552
impl Parser for Time32MillisecondType {
553
    // Will truncate any fractions of a millisecond
554
0
    fn parse(string: &str) -> Option<Self::Native> {
555
0
        string_to_time_nanoseconds(string)
556
0
            .ok()
557
0
            .map(|nanos| (nanos / 1_000_000) as i32)
558
0
            .or_else(|| string.parse::<Self::Native>().ok())
559
0
    }
560
561
0
    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562
0
        let nt = NaiveTime::parse_from_str(string, format).ok()?;
563
0
        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564
0
    }
565
}
566
567
impl Parser for Time32SecondType {
568
    // Will truncate any fractions of a second
569
0
    fn parse(string: &str) -> Option<Self::Native> {
570
0
        string_to_time_nanoseconds(string)
571
0
            .ok()
572
0
            .map(|nanos| (nanos / 1_000_000_000) as i32)
573
0
            .or_else(|| string.parse::<Self::Native>().ok())
574
0
    }
575
576
0
    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577
0
        let nt = NaiveTime::parse_from_str(string, format).ok()?;
578
0
        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579
0
    }
580
}
581
582
/// Number of days between 0001-01-01 and 1970-01-01
583
const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585
/// Error message if nanosecond conversion request beyond supported interval
586
const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588
0
fn parse_date(string: &str) -> Option<NaiveDate> {
589
    // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06"
590
    //
591
    // According to [ISO 8601], years have:
592
    //  Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by
593
    //  zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol.
594
    //
595
    // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE
596
0
    if string.starts_with('+') || string.starts_with('-') {
597
        // Skip the sign and look for the hyphen that terminates the year digits.
598
        // According to ISO 8601 the unsigned part must be at least 4 digits.
599
0
        let rest = &string[1..];
600
0
        let hyphen = rest.find('-')?;
601
0
        if hyphen < 4 {
602
0
            return None;
603
0
        }
604
        // The year substring is the sign and the digits (but not the separator)
605
        // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999"
606
0
        let year: i32 = string[..hyphen + 1].parse().ok()?;
607
        // The remainder should begin with a '-' which we strip off, leaving the month-day part.
608
0
        let remainder = string[hyphen + 1..].strip_prefix('-')?;
609
0
        let mut parts = remainder.splitn(2, '-');
610
0
        let month: u32 = parts.next()?.parse().ok()?;
611
0
        let day: u32 = parts.next()?.parse().ok()?;
612
0
        return NaiveDate::from_ymd_opt(year, month, day);
613
0
    }
614
615
0
    if string.len() > 10 {
616
        // Try to parse as datetime and return just the date part
617
0
        return string_to_datetime(&Utc, string)
618
0
            .map(|dt| dt.date_naive())
619
0
            .ok();
620
0
    };
621
0
    let mut digits = [0; 10];
622
0
    let mut mask = 0;
623
624
    // Treating all bytes the same way, helps LLVM vectorise this correctly
625
0
    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
626
0
        *o = i.wrapping_sub(b'0');
627
0
        mask |= ((*o < 10) as u16) << idx
628
    }
629
630
    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
631
632
    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
633
0
    if digits[4] != HYPHEN {
634
0
        let (year, month, day) = match (mask, string.len()) {
635
0
            (0b11111111, 8) => (
636
0
                digits[0] as u16 * 1000
637
0
                    + digits[1] as u16 * 100
638
0
                    + digits[2] as u16 * 10
639
0
                    + digits[3] as u16,
640
0
                digits[4] * 10 + digits[5],
641
0
                digits[6] * 10 + digits[7],
642
0
            ),
643
0
            _ => return None,
644
        };
645
0
        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
646
0
    }
647
648
0
    let (month, day) = match mask {
649
        0b1101101111 => {
650
0
            if digits[7] != HYPHEN {
651
0
                return None;
652
0
            }
653
0
            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
654
        }
655
        0b101101111 => {
656
0
            if digits[7] != HYPHEN {
657
0
                return None;
658
0
            }
659
0
            (digits[5] * 10 + digits[6], digits[8])
660
        }
661
        0b110101111 => {
662
0
            if digits[6] != HYPHEN {
663
0
                return None;
664
0
            }
665
0
            (digits[5], digits[7] * 10 + digits[8])
666
        }
667
        0b10101111 => {
668
0
            if digits[6] != HYPHEN {
669
0
                return None;
670
0
            }
671
0
            (digits[5], digits[7])
672
        }
673
0
        _ => return None,
674
    };
675
676
0
    let year =
677
0
        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
678
679
0
    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
680
0
}
681
682
impl Parser for Date32Type {
683
0
    fn parse(string: &str) -> Option<i32> {
684
0
        let date = parse_date(string)?;
685
0
        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
686
0
    }
687
688
0
    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
689
0
        let date = NaiveDate::parse_from_str(string, format).ok()?;
690
0
        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
691
0
    }
692
}
693
694
impl Parser for Date64Type {
695
0
    fn parse(string: &str) -> Option<i64> {
696
0
        if string.len() <= 10 {
697
0
            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
698
0
            Some(datetime.and_utc().timestamp_millis())
699
        } else {
700
0
            let date_time = string_to_datetime(&Utc, string).ok()?;
701
0
            Some(date_time.timestamp_millis())
702
        }
703
0
    }
704
705
0
    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
706
        use chrono::format::Fixed;
707
        use chrono::format::StrftimeItems;
708
0
        let fmt = StrftimeItems::new(format);
709
0
        let has_zone = fmt.into_iter().any(|item| match item {
710
0
            chrono::format::Item::Fixed(fixed_item) => matches!(
711
0
                fixed_item,
712
                Fixed::RFC2822
713
                    | Fixed::RFC3339
714
                    | Fixed::TimezoneName
715
                    | Fixed::TimezoneOffsetColon
716
                    | Fixed::TimezoneOffsetColonZ
717
                    | Fixed::TimezoneOffset
718
                    | Fixed::TimezoneOffsetZ
719
            ),
720
0
            _ => false,
721
0
        });
722
0
        if has_zone {
723
0
            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
724
0
            Some(date_time.timestamp_millis())
725
        } else {
726
0
            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
727
0
            Some(date_time.and_utc().timestamp_millis())
728
        }
729
0
    }
730
}
731
732
fn parse_e_notation<T: DecimalType>(
733
    s: &str,
734
    mut digits: u16,
735
    mut fractionals: i16,
736
    mut result: T::Native,
737
    index: usize,
738
    precision: u16,
739
    scale: i16,
740
) -> Result<T::Native, ArrowError> {
741
    let mut exp: i16 = 0;
742
    let base = T::Native::usize_as(10);
743
744
    let mut exp_start: bool = false;
745
    // e has a plus sign
746
    let mut pos_shift_direction: bool = true;
747
748
    // skip to point or exponent index
749
    let mut bs;
750
    if fractionals > 0 {
751
        // it's a fraction, so the point index needs to be skipped, so +1
752
        bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
753
    } else {
754
        // it's actually an integer that is already written into the result, so let's skip on to e
755
        bs = s.as_bytes().iter().skip(index);
756
    }
757
758
    while let Some(b) = bs.next() {
759
        match b {
760
            b'0'..=b'9' => {
761
                result = result.mul_wrapping(base);
762
                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
763
                if fractionals > 0 {
764
                    fractionals += 1;
765
                }
766
                digits += 1;
767
            }
768
            &b'e' | &b'E' => {
769
                exp_start = true;
770
            }
771
            _ => {
772
                return Err(ArrowError::ParseError(format!(
773
                    "can't parse the string value {s} to decimal"
774
                )));
775
            }
776
        };
777
778
        if exp_start {
779
            pos_shift_direction = match bs.next() {
780
                Some(&b'-') => false,
781
                Some(&b'+') => true,
782
                Some(b) => {
783
                    if !b.is_ascii_digit() {
784
                        return Err(ArrowError::ParseError(format!(
785
                            "can't parse the string value {s} to decimal"
786
                        )));
787
                    }
788
789
                    exp *= 10;
790
                    exp += (b - b'0') as i16;
791
792
                    true
793
                }
794
                None => {
795
                    return Err(ArrowError::ParseError(format!(
796
                        "can't parse the string value {s} to decimal"
797
                    )))
798
                }
799
            };
800
801
            for b in bs.by_ref() {
802
                if !b.is_ascii_digit() {
803
                    return Err(ArrowError::ParseError(format!(
804
                        "can't parse the string value {s} to decimal"
805
                    )));
806
                }
807
                exp *= 10;
808
                exp += (b - b'0') as i16;
809
            }
810
        }
811
    }
812
813
    if digits == 0 && fractionals == 0 && exp == 0 {
814
        return Err(ArrowError::ParseError(format!(
815
            "can't parse the string value {s} to decimal"
816
        )));
817
    }
818
819
    if !pos_shift_direction {
820
        // exponent has a large negative sign
821
        // 1.12345e-30 => 0.0{29}12345, scale = 5
822
        if exp - (digits as i16 + scale) > 0 {
823
            return Ok(T::Native::usize_as(0));
824
        }
825
        exp *= -1;
826
    }
827
828
    // point offset
829
    exp = fractionals - exp;
830
    // We have zeros on the left, we need to count them
831
    if !pos_shift_direction && exp > digits as i16 {
832
        digits = exp as u16;
833
    }
834
    // Number of numbers to be removed or added
835
    exp = scale - exp;
836
837
    if (digits as i16 + exp) as u16 > precision {
838
        return Err(ArrowError::ParseError(format!(
839
            "parse decimal overflow ({s})"
840
        )));
841
    }
842
843
    if exp < 0 {
844
        result = result.div_wrapping(base.pow_wrapping(-exp as _));
845
    } else {
846
        result = result.mul_wrapping(base.pow_wrapping(exp as _));
847
    }
848
849
    Ok(result)
850
}
851
852
/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
853
/// The result value can't be out of bounds.
854
pub fn parse_decimal<T: DecimalType>(
855
    s: &str,
856
    precision: u8,
857
    scale: i8,
858
) -> Result<T::Native, ArrowError> {
859
    let mut result = T::Native::usize_as(0);
860
    let mut fractionals: i8 = 0;
861
    let mut digits: u8 = 0;
862
    let base = T::Native::usize_as(10);
863
864
    let bs = s.as_bytes();
865
    let (signed, negative) = match bs.first() {
866
        Some(b'-') => (true, true),
867
        Some(b'+') => (true, false),
868
        _ => (false, false),
869
    };
870
871
    if bs.is_empty() || signed && bs.len() == 1 {
872
        return Err(ArrowError::ParseError(format!(
873
            "can't parse the string value {s} to decimal"
874
        )));
875
    }
876
877
    // Iterate over the raw input bytes, skipping the sign if any
878
    let mut bs = bs.iter().enumerate().skip(signed as usize);
879
880
    let mut is_e_notation = false;
881
882
    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
883
    // Thus, if we validate the precision correctly, we can skip overflow checks.
884
    while let Some((index, b)) = bs.next() {
885
        match b {
886
            b'0'..=b'9' => {
887
                if digits == 0 && *b == b'0' {
888
                    // Ignore leading zeros.
889
                    continue;
890
                }
891
                digits += 1;
892
                result = result.mul_wrapping(base);
893
                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
894
            }
895
            b'.' => {
896
                let point_index = index;
897
898
                for (_, b) in bs.by_ref() {
899
                    if !b.is_ascii_digit() {
900
                        if *b == b'e' || *b == b'E' {
901
                            result = parse_e_notation::<T>(
902
                                s,
903
                                digits as u16,
904
                                fractionals as i16,
905
                                result,
906
                                point_index,
907
                                precision as u16,
908
                                scale as i16,
909
                            )?;
910
911
                            is_e_notation = true;
912
913
                            break;
914
                        }
915
                        return Err(ArrowError::ParseError(format!(
916
                            "can't parse the string value {s} to decimal"
917
                        )));
918
                    }
919
                    if fractionals == scale && scale != 0 {
920
                        // We have processed all the digits that we need. All that
921
                        // is left is to validate that the rest of the string contains
922
                        // valid digits.
923
                        continue;
924
                    }
925
                    fractionals += 1;
926
                    digits += 1;
927
                    result = result.mul_wrapping(base);
928
                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
929
                }
930
931
                if is_e_notation {
932
                    break;
933
                }
934
935
                // Fail on "."
936
                if digits == 0 {
937
                    return Err(ArrowError::ParseError(format!(
938
                        "can't parse the string value {s} to decimal"
939
                    )));
940
                }
941
            }
942
            b'e' | b'E' => {
943
                result = parse_e_notation::<T>(
944
                    s,
945
                    digits as u16,
946
                    fractionals as i16,
947
                    result,
948
                    index,
949
                    precision as u16,
950
                    scale as i16,
951
                )?;
952
953
                is_e_notation = true;
954
955
                break;
956
            }
957
            _ => {
958
                return Err(ArrowError::ParseError(format!(
959
                    "can't parse the string value {s} to decimal"
960
                )));
961
            }
962
        }
963
    }
964
965
    if !is_e_notation {
966
        if fractionals < scale {
967
            let exp = scale - fractionals;
968
            if exp as u8 + digits > precision {
969
                return Err(ArrowError::ParseError(format!(
970
                    "parse decimal overflow ({s})"
971
                )));
972
            }
973
            let mul = base.pow_wrapping(exp as _);
974
            result = result.mul_wrapping(mul);
975
        } else if digits > precision {
976
            return Err(ArrowError::ParseError(format!(
977
                "parse decimal overflow ({s})"
978
            )));
979
        }
980
    }
981
982
    Ok(if negative {
983
        result.neg_wrapping()
984
    } else {
985
        result
986
    })
987
}
988
989
/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
990
0
pub fn parse_interval_year_month(
991
0
    value: &str,
992
0
) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
993
0
    let config = IntervalParseConfig::new(IntervalUnit::Year);
994
0
    let interval = Interval::parse(value, &config)?;
995
996
0
    let months = interval.to_year_months().map_err(|_| {
997
0
        ArrowError::CastError(format!(
998
0
            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
999
0
        ))
1000
0
    })?;
1001
1002
0
    Ok(IntervalYearMonthType::make_value(0, months))
1003
0
}
1004
1005
/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
1006
0
pub fn parse_interval_day_time(
1007
0
    value: &str,
1008
0
) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1009
0
    let config = IntervalParseConfig::new(IntervalUnit::Day);
1010
0
    let interval = Interval::parse(value, &config)?;
1011
1012
0
    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1013
0
        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1014
0
    )))?;
1015
1016
0
    Ok(IntervalDayTimeType::make_value(days, millis))
1017
0
}
1018
1019
/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1020
0
pub fn parse_interval_month_day_nano_config(
1021
0
    value: &str,
1022
0
    config: IntervalParseConfig,
1023
0
) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1024
0
    let interval = Interval::parse(value, &config)?;
1025
1026
0
    let (months, days, nanos) = interval.to_month_day_nanos();
1027
1028
0
    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1029
0
}
1030
1031
/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1032
0
pub fn parse_interval_month_day_nano(
1033
0
    value: &str,
1034
0
) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1035
0
    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1036
0
}
1037
1038
const NANOS_PER_MILLIS: i64 = 1_000_000;
1039
const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1040
const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1041
const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1042
#[cfg(test)]
1043
const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1044
1045
/// Config to parse interval strings
1046
///
1047
/// Currently stores the `default_unit` to use if the string doesn't have one specified
1048
#[derive(Debug, Clone)]
1049
pub struct IntervalParseConfig {
1050
    /// The default unit to use if none is specified
1051
    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1052
    default_unit: IntervalUnit,
1053
}
1054
1055
impl IntervalParseConfig {
1056
    /// Create a new [IntervalParseConfig] with the given default unit
1057
0
    pub fn new(default_unit: IntervalUnit) -> Self {
1058
0
        Self { default_unit }
1059
0
    }
1060
}
1061
1062
#[rustfmt::skip]
1063
#[derive(Debug, Clone, Copy)]
1064
#[repr(u16)]
1065
/// Represents the units of an interval, with each variant
1066
/// corresponding to a bit in the interval's bitfield representation
1067
pub enum IntervalUnit {
1068
    /// A Century
1069
    Century     = 0b_0000_0000_0001,
1070
    /// A Decade
1071
    Decade      = 0b_0000_0000_0010,
1072
    /// A Year
1073
    Year        = 0b_0000_0000_0100,
1074
    /// A Month
1075
    Month       = 0b_0000_0000_1000,
1076
    /// A Week
1077
    Week        = 0b_0000_0001_0000,
1078
    /// A Day
1079
    Day         = 0b_0000_0010_0000,
1080
    /// An Hour
1081
    Hour        = 0b_0000_0100_0000,
1082
    /// A Minute
1083
    Minute      = 0b_0000_1000_0000,
1084
    /// A Second
1085
    Second      = 0b_0001_0000_0000,
1086
    /// A Millisecond
1087
    Millisecond = 0b_0010_0000_0000,
1088
    /// A Microsecond
1089
    Microsecond = 0b_0100_0000_0000,
1090
    /// A Nanosecond
1091
    Nanosecond  = 0b_1000_0000_0000,
1092
}
1093
1094
/// Logic for parsing interval unit strings
1095
///
1096
/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1097
/// for a list of unit names supported by PostgreSQL which we try to match here.
1098
impl FromStr for IntervalUnit {
1099
    type Err = ArrowError;
1100
1101
0
    fn from_str(s: &str) -> Result<Self, ArrowError> {
1102
0
        match s.to_lowercase().as_str() {
1103
0
            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1104
0
            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1105
0
            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1106
0
            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1107
0
            "w" | "week" | "weeks" => Ok(Self::Week),
1108
0
            "d" | "day" | "days" => Ok(Self::Day),
1109
0
            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1110
0
            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1111
0
            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1112
0
            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1113
0
                Ok(Self::Millisecond)
1114
            }
1115
0
            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1116
0
                Ok(Self::Microsecond)
1117
            }
1118
0
            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1119
0
            _ => Err(ArrowError::InvalidArgumentError(format!(
1120
0
                "Unknown interval type: {s}"
1121
0
            ))),
1122
        }
1123
0
    }
1124
}
1125
1126
impl IntervalUnit {
1127
0
    fn from_str_or_config(
1128
0
        s: Option<&str>,
1129
0
        config: &IntervalParseConfig,
1130
0
    ) -> Result<Self, ArrowError> {
1131
0
        match s {
1132
0
            Some(s) => s.parse(),
1133
0
            None => Ok(config.default_unit),
1134
        }
1135
0
    }
1136
}
1137
1138
/// A tuple representing (months, days, nanoseconds) in an interval
1139
pub type MonthDayNano = (i32, i32, i64);
1140
1141
/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1142
const INTERVAL_PRECISION: u32 = 15;
1143
1144
#[derive(Clone, Copy, Debug, PartialEq)]
1145
struct IntervalAmount {
1146
    /// The integer component of the interval amount
1147
    integer: i64,
1148
    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1149
    frac: i64,
1150
}
1151
1152
#[cfg(test)]
1153
impl IntervalAmount {
1154
    fn new(integer: i64, frac: i64) -> Self {
1155
        Self { integer, frac }
1156
    }
1157
}
1158
1159
impl FromStr for IntervalAmount {
1160
    type Err = ArrowError;
1161
1162
0
    fn from_str(s: &str) -> Result<Self, Self::Err> {
1163
0
        match s.split_once('.') {
1164
0
            Some((integer, frac))
1165
0
                if frac.len() <= INTERVAL_PRECISION as usize
1166
0
                    && !frac.is_empty()
1167
0
                    && !frac.starts_with('-') =>
1168
            {
1169
                // integer will be "" for values like ".5"
1170
                // and "-" for values like "-.5"
1171
0
                let explicit_neg = integer.starts_with('-');
1172
0
                let integer = if integer.is_empty() || integer == "-" {
1173
0
                    Ok(0)
1174
                } else {
1175
0
                    integer.parse::<i64>().map_err(|_| {
1176
0
                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1177
0
                    })
1178
0
                }?;
1179
1180
0
                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1181
0
                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1182
0
                })?;
1183
1184
                // scale fractional part by interval precision
1185
0
                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1186
1187
                // propagate the sign of the integer part to the fractional part
1188
0
                let frac = if integer < 0 || explicit_neg {
1189
0
                    -frac
1190
                } else {
1191
0
                    frac
1192
                };
1193
1194
0
                let result = Self { integer, frac };
1195
1196
0
                Ok(result)
1197
            }
1198
0
            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1199
0
                "Failed to parse {s} as interval amount"
1200
0
            ))),
1201
0
            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1202
0
                Err(ArrowError::ParseError(format!(
1203
0
                    "{s} exceeds the precision available for interval amount"
1204
0
                )))
1205
            }
1206
            Some(_) | None => {
1207
0
                let integer = s.parse::<i64>().map_err(|_| {
1208
0
                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1209
0
                })?;
1210
1211
0
                let result = Self { integer, frac: 0 };
1212
0
                Ok(result)
1213
            }
1214
        }
1215
0
    }
1216
}
1217
1218
#[derive(Debug, Default, PartialEq)]
1219
struct Interval {
1220
    months: i32,
1221
    days: i32,
1222
    nanos: i64,
1223
}
1224
1225
impl Interval {
1226
0
    fn new(months: i32, days: i32, nanos: i64) -> Self {
1227
0
        Self {
1228
0
            months,
1229
0
            days,
1230
0
            nanos,
1231
0
        }
1232
0
    }
1233
1234
0
    fn to_year_months(&self) -> Result<i32, ArrowError> {
1235
0
        match (self.months, self.days, self.nanos) {
1236
0
            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1237
0
            _ => Err(ArrowError::InvalidArgumentError(format!(
1238
0
                "Unable to represent interval with days and nanos as year-months: {self:?}"
1239
0
            ))),
1240
        }
1241
0
    }
1242
1243
0
    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1244
0
        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1245
1246
0
        match self.nanos {
1247
0
            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1248
0
                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1249
0
                    ArrowError::InvalidArgumentError(format!(
1250
0
                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1251
0
                        self.nanos
1252
0
                    ))
1253
0
                })?;
1254
1255
0
                Ok((days, millis))
1256
            }
1257
0
            nanos => Err(ArrowError::InvalidArgumentError(format!(
1258
0
                "Unable to represent {nanos} as milliseconds"
1259
0
            ))),
1260
        }
1261
0
    }
1262
1263
0
    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1264
0
        (self.months, self.days, self.nanos)
1265
0
    }
1266
1267
    /// Parse string value in traditional Postgres format such as
1268
    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1269
0
    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1270
0
        let components = parse_interval_components(value, config)?;
1271
1272
0
        components
1273
0
            .into_iter()
1274
0
            .try_fold(Self::default(), |result, (amount, unit)| {
1275
0
                result.add(amount, unit)
1276
0
            })
1277
0
    }
1278
1279
    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1280
    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1281
    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1282
    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1283
    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1284
    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1285
0
    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1286
0
        let result = match unit {
1287
            IntervalUnit::Century => {
1288
0
                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1289
0
                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1290
0
                let months = months_int
1291
0
                    .add_checked(month_frac)?
1292
0
                    .try_into()
1293
0
                    .map_err(|_| {
1294
0
                        ArrowError::ParseError(format!(
1295
0
                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1296
0
                            &amount.integer
1297
0
                        ))
1298
0
                    })?;
1299
1300
0
                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1301
            }
1302
            IntervalUnit::Decade => {
1303
0
                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1304
1305
0
                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1306
0
                let months = months_int
1307
0
                    .add_checked(month_frac)?
1308
0
                    .try_into()
1309
0
                    .map_err(|_| {
1310
0
                        ArrowError::ParseError(format!(
1311
0
                            "Unable to represent {} decades as months in a signed 32-bit integer",
1312
0
                            &amount.integer
1313
0
                        ))
1314
0
                    })?;
1315
1316
0
                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1317
            }
1318
            IntervalUnit::Year => {
1319
0
                let months_int = amount.integer.mul_checked(12)?;
1320
0
                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1321
0
                let months = months_int
1322
0
                    .add_checked(month_frac)?
1323
0
                    .try_into()
1324
0
                    .map_err(|_| {
1325
0
                        ArrowError::ParseError(format!(
1326
0
                            "Unable to represent {} years as months in a signed 32-bit integer",
1327
0
                            &amount.integer
1328
0
                        ))
1329
0
                    })?;
1330
1331
0
                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1332
            }
1333
            IntervalUnit::Month => {
1334
0
                let months = amount.integer.try_into().map_err(|_| {
1335
0
                    ArrowError::ParseError(format!(
1336
0
                        "Unable to represent {} months in a signed 32-bit integer",
1337
0
                        &amount.integer
1338
0
                    ))
1339
0
                })?;
1340
1341
0
                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1342
0
                let days = days.try_into().map_err(|_| {
1343
0
                    ArrowError::ParseError(format!(
1344
0
                        "Unable to represent {} months as days in a signed 32-bit integer",
1345
0
                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1346
0
                    ))
1347
0
                })?;
1348
1349
0
                Self::new(
1350
0
                    self.months.add_checked(months)?,
1351
0
                    self.days.add_checked(days)?,
1352
0
                    self.nanos,
1353
                )
1354
            }
1355
            IntervalUnit::Week => {
1356
0
                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1357
0
                    ArrowError::ParseError(format!(
1358
0
                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1359
0
                        &amount.integer
1360
0
                    ))
1361
0
                })?;
1362
1363
0
                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1364
1365
0
                Self::new(
1366
0
                    self.months,
1367
0
                    self.days.add_checked(days)?,
1368
0
                    self.nanos.add_checked(nanos)?,
1369
                )
1370
            }
1371
            IntervalUnit::Day => {
1372
0
                let days = amount.integer.try_into().map_err(|_| {
1373
0
                    ArrowError::InvalidArgumentError(format!(
1374
0
                        "Unable to represent {} days in a signed 32-bit integer",
1375
0
                        amount.integer
1376
0
                    ))
1377
0
                })?;
1378
1379
0
                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1380
1381
0
                Self::new(
1382
0
                    self.months,
1383
0
                    self.days.add_checked(days)?,
1384
0
                    self.nanos.add_checked(nanos)?,
1385
                )
1386
            }
1387
            IntervalUnit::Hour => {
1388
0
                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1389
0
                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1390
0
                let nanos = nanos_int.add_checked(nanos_frac)?;
1391
1392
0
                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1393
            }
1394
            IntervalUnit::Minute => {
1395
0
                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1396
0
                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1397
1398
0
                let nanos = nanos_int.add_checked(nanos_frac)?;
1399
1400
0
                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1401
            }
1402
            IntervalUnit::Second => {
1403
0
                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1404
0
                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1405
0
                let nanos = nanos_int.add_checked(nanos_frac)?;
1406
1407
0
                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1408
            }
1409
            IntervalUnit::Millisecond => {
1410
0
                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1411
0
                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1412
0
                let nanos = nanos_int.add_checked(nanos_frac)?;
1413
1414
0
                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1415
            }
1416
            IntervalUnit::Microsecond => {
1417
0
                let nanos_int = amount.integer.mul_checked(1_000)?;
1418
0
                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1419
0
                let nanos = nanos_int.add_checked(nanos_frac)?;
1420
1421
0
                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1422
            }
1423
            IntervalUnit::Nanosecond => {
1424
0
                let nanos_int = amount.integer;
1425
0
                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1426
0
                let nanos = nanos_int.add_checked(nanos_frac)?;
1427
1428
0
                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1429
            }
1430
        };
1431
1432
0
        Ok(result)
1433
0
    }
1434
}
1435
1436
/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1437
0
fn parse_interval_components(
1438
0
    value: &str,
1439
0
    config: &IntervalParseConfig,
1440
0
) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1441
0
    let raw_pairs = split_interval_components(value);
1442
1443
    // parse amounts and units
1444
0
    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1445
0
        .iter()
1446
0
        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1447
0
        .collect()
1448
    else {
1449
0
        return Err(ArrowError::ParseError(format!(
1450
0
            "Invalid input syntax for type interval: {value:?}"
1451
0
        )));
1452
    };
1453
1454
    // collect parsed results
1455
0
    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1456
1457
    // duplicate units?
1458
0
    let mut observed_interval_types = 0;
1459
0
    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1460
0
        if observed_interval_types & (*unit as u16) != 0 {
1461
0
            return Err(ArrowError::ParseError(format!(
1462
0
                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1463
0
                value,
1464
0
                raw_unit.unwrap_or_default(),
1465
0
            )));
1466
0
        }
1467
1468
0
        observed_interval_types |= *unit as u16;
1469
    }
1470
1471
0
    let result = amounts.iter().copied().zip(units.iter().copied());
1472
1473
0
    Ok(result.collect::<Vec<_>>())
1474
0
}
1475
1476
/// Split an interval into a vec of amounts and units.
1477
///
1478
/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1479
///
1480
/// This should match the behavior of PostgreSQL's interval parser.
1481
0
fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1482
0
    let mut result = vec![];
1483
0
    let mut words = value.split(char::is_whitespace);
1484
0
    while let Some(word) = words.next() {
1485
0
        if let Some(split_word_at) = word.find(not_interval_amount) {
1486
0
            let (amount, unit) = word.split_at(split_word_at);
1487
0
            result.push((amount, Some(unit)));
1488
0
        } else if let Some(unit) = words.next() {
1489
0
            result.push((word, Some(unit)));
1490
0
        } else {
1491
0
            result.push((word, None));
1492
0
            break;
1493
        }
1494
    }
1495
0
    result
1496
0
}
1497
1498
/// test if a character is NOT part of an interval numeric amount
1499
0
fn not_interval_amount(c: char) -> bool {
1500
0
    !c.is_ascii_digit() && c != '.' && c != '-'
1501
0
}
1502
1503
#[cfg(test)]
1504
mod tests {
1505
    use super::*;
1506
    use arrow_array::temporal_conversions::date32_to_datetime;
1507
    use arrow_buffer::i256;
1508
1509
    #[test]
1510
    fn test_parse_nanos() {
1511
        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1512
        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1513
        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1514
    }
1515
1516
    #[test]
1517
    fn string_to_timestamp_timezone() {
1518
        // Explicit timezone
1519
        assert_eq!(
1520
            1599572549190855000,
1521
            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1522
        );
1523
        assert_eq!(
1524
            1599572549190855000,
1525
            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1526
        );
1527
        assert_eq!(
1528
            1599572549000000000,
1529
            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1530
        ); // no fractional part
1531
        assert_eq!(
1532
            1599590549190855000,
1533
            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1534
        );
1535
    }
1536
1537
    #[test]
1538
    fn string_to_timestamp_timezone_space() {
1539
        // Ensure space rather than T between time and date is accepted
1540
        assert_eq!(
1541
            1599572549190855000,
1542
            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1543
        );
1544
        assert_eq!(
1545
            1599572549190855000,
1546
            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1547
        );
1548
        assert_eq!(
1549
            1599572549000000000,
1550
            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1551
        ); // no fractional part
1552
        assert_eq!(
1553
            1599590549190855000,
1554
            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1555
        );
1556
    }
1557
1558
    #[test]
1559
    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1560
    fn string_to_timestamp_no_timezone() {
1561
        // This test is designed to succeed in regardless of the local
1562
        // timezone the test machine is running. Thus it is still
1563
        // somewhat susceptible to bugs in the use of chrono
1564
        let naive_datetime = NaiveDateTime::new(
1565
            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1566
            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1567
        );
1568
1569
        // Ensure both T and ' ' variants work
1570
        assert_eq!(
1571
            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1572
            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1573
        );
1574
1575
        assert_eq!(
1576
            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1577
            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1578
        );
1579
1580
        // Also ensure that parsing timestamps with no fractional
1581
        // second part works as well
1582
        let datetime_whole_secs = NaiveDateTime::new(
1583
            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1584
            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1585
        )
1586
        .and_utc();
1587
1588
        // Ensure both T and ' ' variants work
1589
        assert_eq!(
1590
            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1591
            parse_timestamp("2020-09-08T13:42:29").unwrap()
1592
        );
1593
1594
        assert_eq!(
1595
            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1596
            parse_timestamp("2020-09-08 13:42:29").unwrap()
1597
        );
1598
1599
        // ensure without time work
1600
        // no time, should be the nano second at
1601
        // 2020-09-08 0:0:0
1602
        let datetime_no_time = NaiveDateTime::new(
1603
            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1604
            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1605
        )
1606
        .and_utc();
1607
1608
        assert_eq!(
1609
            datetime_no_time.timestamp_nanos_opt().unwrap(),
1610
            parse_timestamp("2020-09-08").unwrap()
1611
        )
1612
    }
1613
1614
    #[test]
1615
    fn string_to_timestamp_chrono() {
1616
        let cases = [
1617
            "2020-09-08T13:42:29Z",
1618
            "1969-01-01T00:00:00.1Z",
1619
            "2020-09-08T12:00:12.12345678+00:00",
1620
            "2020-09-08T12:00:12+00:00",
1621
            "2020-09-08T12:00:12.1+00:00",
1622
            "2020-09-08T12:00:12.12+00:00",
1623
            "2020-09-08T12:00:12.123+00:00",
1624
            "2020-09-08T12:00:12.1234+00:00",
1625
            "2020-09-08T12:00:12.12345+00:00",
1626
            "2020-09-08T12:00:12.123456+00:00",
1627
            "2020-09-08T12:00:12.1234567+00:00",
1628
            "2020-09-08T12:00:12.12345678+00:00",
1629
            "2020-09-08T12:00:12.123456789+00:00",
1630
            "2020-09-08T12:00:12.12345678912z",
1631
            "2020-09-08T12:00:12.123456789123Z",
1632
            "2020-09-08T12:00:12.123456789123+02:00",
1633
            "2020-09-08T12:00:12.12345678912345Z",
1634
            "2020-09-08T12:00:12.1234567891234567+02:00",
1635
            "2020-09-08T12:00:60Z",
1636
            "2020-09-08T12:00:60.123Z",
1637
            "2020-09-08T12:00:60.123456+02:00",
1638
            "2020-09-08T12:00:60.1234567891234567+02:00",
1639
            "2020-09-08T12:00:60.999999999+02:00",
1640
            "2020-09-08t12:00:12.12345678+00:00",
1641
            "2020-09-08t12:00:12+00:00",
1642
            "2020-09-08t12:00:12Z",
1643
        ];
1644
1645
        for case in cases {
1646
            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1647
            let chrono_utc = chrono.with_timezone(&Utc);
1648
1649
            let custom = string_to_datetime(&Utc, case).unwrap();
1650
            assert_eq!(chrono_utc, custom)
1651
        }
1652
    }
1653
1654
    #[test]
1655
    fn string_to_timestamp_naive() {
1656
        let cases = [
1657
            "2018-11-13T17:11:10.011375885995",
1658
            "2030-12-04T17:11:10.123",
1659
            "2030-12-04T17:11:10.1234",
1660
            "2030-12-04T17:11:10.123456",
1661
        ];
1662
        for case in cases {
1663
            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1664
            let custom = string_to_datetime(&Utc, case).unwrap();
1665
            assert_eq!(chrono, custom.naive_utc())
1666
        }
1667
    }
1668
1669
    #[test]
1670
    fn string_to_timestamp_invalid() {
1671
        // Test parsing invalid formats
1672
        let cases = [
1673
            ("", "timestamp must contain at least 10 characters"),
1674
            ("SS", "timestamp must contain at least 10 characters"),
1675
            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1676
            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1677
            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1678
            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1679
            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1680
            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1681
            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1682
            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1683
            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1684
            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1685
            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1686
            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1687
            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1688
            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1689
            ("1997-01-31T092656.123Z", "error parsing time"),
1690
            ("1997-01-10T12:00:06.", "error parsing time"),
1691
            ("1997-01-10T12:00:06. ", "error parsing time"),
1692
        ];
1693
1694
        for (s, ctx) in cases {
1695
            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1696
            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1697
            assert_eq!(actual, expected)
1698
        }
1699
    }
1700
1701
    // Parse a timestamp to timestamp int with a useful human readable error message
1702
    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1703
        let result = string_to_timestamp_nanos(s);
1704
        if let Err(e) = &result {
1705
            eprintln!("Error parsing timestamp '{s}': {e:?}");
1706
        }
1707
        result
1708
    }
1709
1710
    #[test]
1711
    fn string_without_timezone_to_timestamp() {
1712
        // string without timezone should always output the same regardless the local or session timezone
1713
1714
        let naive_datetime = NaiveDateTime::new(
1715
            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1716
            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1717
        );
1718
1719
        // Ensure both T and ' ' variants work
1720
        assert_eq!(
1721
            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1722
            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1723
        );
1724
1725
        assert_eq!(
1726
            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1727
            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1728
        );
1729
1730
        let naive_datetime = NaiveDateTime::new(
1731
            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1732
            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1733
        );
1734
1735
        // Ensure both T and ' ' variants work
1736
        assert_eq!(
1737
            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1738
            parse_timestamp("2020-09-08T13:42:29").unwrap()
1739
        );
1740
1741
        assert_eq!(
1742
            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1743
            parse_timestamp("2020-09-08 13:42:29").unwrap()
1744
        );
1745
1746
        let tz: Tz = "+02:00".parse().unwrap();
1747
        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1748
        let utc = date.naive_utc().to_string();
1749
        assert_eq!(utc, "2020-09-08 11:42:29");
1750
        let local = date.naive_local().to_string();
1751
        assert_eq!(local, "2020-09-08 13:42:29");
1752
1753
        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1754
        let utc = date.naive_utc().to_string();
1755
        assert_eq!(utc, "2020-09-08 13:42:29");
1756
        let local = date.naive_local().to_string();
1757
        assert_eq!(local, "2020-09-08 15:42:29");
1758
1759
        let dt =
1760
            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1761
        let local: Tz = "+08:00".parse().unwrap();
1762
1763
        // Parsed as offset from UTC
1764
        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1765
        assert_eq!(dt, date.naive_utc());
1766
        assert_ne!(dt, date.naive_local());
1767
1768
        // Parsed as offset from local
1769
        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1770
        assert_eq!(dt, date.naive_local());
1771
        assert_ne!(dt, date.naive_utc());
1772
    }
1773
1774
    #[test]
1775
    fn parse_date32() {
1776
        let cases = [
1777
            "2020-09-08",
1778
            "2020-9-8",
1779
            "2020-09-8",
1780
            "2020-9-08",
1781
            "2020-12-1",
1782
            "1690-2-5",
1783
            "2020-09-08 01:02:03",
1784
        ];
1785
        for case in cases {
1786
            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1787
            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1788
                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1789
                .unwrap();
1790
            assert_eq!(v.date(), expected);
1791
        }
1792
1793
        let err_cases = [
1794
            "",
1795
            "80-01-01",
1796
            "342",
1797
            "Foo",
1798
            "2020-09-08-03",
1799
            "2020--04-03",
1800
            "2020--",
1801
            "2020-09-08 01",
1802
            "2020-09-08 01:02",
1803
            "2020-09-08 01-02-03",
1804
            "2020-9-8 01:02:03",
1805
            "2020-09-08 1:2:3",
1806
        ];
1807
        for case in err_cases {
1808
            assert_eq!(Date32Type::parse(case), None);
1809
        }
1810
    }
1811
1812
    #[test]
1813
    fn parse_time64_nanos() {
1814
        assert_eq!(
1815
            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1816
            Some(7_801_123_456_789)
1817
        );
1818
        assert_eq!(
1819
            Time64NanosecondType::parse("02:10:01.1234567"),
1820
            Some(7_801_123_456_700)
1821
        );
1822
        assert_eq!(
1823
            Time64NanosecondType::parse("2:10:01.1234567"),
1824
            Some(7_801_123_456_700)
1825
        );
1826
        assert_eq!(
1827
            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1828
            Some(601_123_456_789)
1829
        );
1830
        assert_eq!(
1831
            Time64NanosecondType::parse("12:10:01.123456789 am"),
1832
            Some(601_123_456_789)
1833
        );
1834
        assert_eq!(
1835
            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1836
            Some(51_001_123_456_780)
1837
        );
1838
        assert_eq!(
1839
            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1840
            Some(51_001_123_456_780)
1841
        );
1842
        assert_eq!(
1843
            Time64NanosecondType::parse("02:10:01"),
1844
            Some(7_801_000_000_000)
1845
        );
1846
        assert_eq!(
1847
            Time64NanosecondType::parse("2:10:01"),
1848
            Some(7_801_000_000_000)
1849
        );
1850
        assert_eq!(
1851
            Time64NanosecondType::parse("12:10:01 AM"),
1852
            Some(601_000_000_000)
1853
        );
1854
        assert_eq!(
1855
            Time64NanosecondType::parse("12:10:01 am"),
1856
            Some(601_000_000_000)
1857
        );
1858
        assert_eq!(
1859
            Time64NanosecondType::parse("2:10:01 PM"),
1860
            Some(51_001_000_000_000)
1861
        );
1862
        assert_eq!(
1863
            Time64NanosecondType::parse("2:10:01 pm"),
1864
            Some(51_001_000_000_000)
1865
        );
1866
        assert_eq!(
1867
            Time64NanosecondType::parse("02:10"),
1868
            Some(7_800_000_000_000)
1869
        );
1870
        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1871
        assert_eq!(
1872
            Time64NanosecondType::parse("12:10 AM"),
1873
            Some(600_000_000_000)
1874
        );
1875
        assert_eq!(
1876
            Time64NanosecondType::parse("12:10 am"),
1877
            Some(600_000_000_000)
1878
        );
1879
        assert_eq!(
1880
            Time64NanosecondType::parse("2:10 PM"),
1881
            Some(51_000_000_000_000)
1882
        );
1883
        assert_eq!(
1884
            Time64NanosecondType::parse("2:10 pm"),
1885
            Some(51_000_000_000_000)
1886
        );
1887
1888
        // parse directly as nanoseconds
1889
        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1890
1891
        // leap second
1892
        assert_eq!(
1893
            Time64NanosecondType::parse("23:59:60"),
1894
            Some(86_400_000_000_000)
1895
        );
1896
1897
        // custom format
1898
        assert_eq!(
1899
            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1900
            Some(7_801_123_456_700)
1901
        );
1902
    }
1903
1904
    #[test]
1905
    fn parse_time64_micros() {
1906
        // expected formats
1907
        assert_eq!(
1908
            Time64MicrosecondType::parse("02:10:01.1234"),
1909
            Some(7_801_123_400)
1910
        );
1911
        assert_eq!(
1912
            Time64MicrosecondType::parse("2:10:01.1234"),
1913
            Some(7_801_123_400)
1914
        );
1915
        assert_eq!(
1916
            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1917
            Some(601_123_456)
1918
        );
1919
        assert_eq!(
1920
            Time64MicrosecondType::parse("12:10:01.123456 am"),
1921
            Some(601_123_456)
1922
        );
1923
        assert_eq!(
1924
            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1925
            Some(51_001_123_450)
1926
        );
1927
        assert_eq!(
1928
            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1929
            Some(51_001_123_450)
1930
        );
1931
        assert_eq!(
1932
            Time64MicrosecondType::parse("02:10:01"),
1933
            Some(7_801_000_000)
1934
        );
1935
        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1936
        assert_eq!(
1937
            Time64MicrosecondType::parse("12:10:01 AM"),
1938
            Some(601_000_000)
1939
        );
1940
        assert_eq!(
1941
            Time64MicrosecondType::parse("12:10:01 am"),
1942
            Some(601_000_000)
1943
        );
1944
        assert_eq!(
1945
            Time64MicrosecondType::parse("2:10:01 PM"),
1946
            Some(51_001_000_000)
1947
        );
1948
        assert_eq!(
1949
            Time64MicrosecondType::parse("2:10:01 pm"),
1950
            Some(51_001_000_000)
1951
        );
1952
        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1953
        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1954
        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1955
        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1956
        assert_eq!(
1957
            Time64MicrosecondType::parse("2:10 PM"),
1958
            Some(51_000_000_000)
1959
        );
1960
        assert_eq!(
1961
            Time64MicrosecondType::parse("2:10 pm"),
1962
            Some(51_000_000_000)
1963
        );
1964
1965
        // parse directly as microseconds
1966
        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1967
1968
        // leap second
1969
        assert_eq!(
1970
            Time64MicrosecondType::parse("23:59:60"),
1971
            Some(86_400_000_000)
1972
        );
1973
1974
        // custom format
1975
        assert_eq!(
1976
            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1977
            Some(7_801_123_400)
1978
        );
1979
    }
1980
1981
    #[test]
1982
    fn parse_time32_millis() {
1983
        // expected formats
1984
        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1985
        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1986
        assert_eq!(
1987
            Time32MillisecondType::parse("12:10:01.123 AM"),
1988
            Some(601_123)
1989
        );
1990
        assert_eq!(
1991
            Time32MillisecondType::parse("12:10:01.123 am"),
1992
            Some(601_123)
1993
        );
1994
        assert_eq!(
1995
            Time32MillisecondType::parse("2:10:01.12 PM"),
1996
            Some(51_001_120)
1997
        );
1998
        assert_eq!(
1999
            Time32MillisecondType::parse("2:10:01.12 pm"),
2000
            Some(51_001_120)
2001
        );
2002
        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2003
        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2004
        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2005
        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2006
        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2007
        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2008
        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2009
        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2010
        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2011
        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2012
        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2013
        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2014
2015
        // parse directly as milliseconds
2016
        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2017
2018
        // leap second
2019
        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2020
2021
        // custom format
2022
        assert_eq!(
2023
            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2024
            Some(7_801_100)
2025
        );
2026
    }
2027
2028
    #[test]
2029
    fn parse_time32_secs() {
2030
        // expected formats
2031
        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2032
        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2033
        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2034
        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2035
        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2036
        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2037
        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2038
        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2039
        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2040
        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2041
        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2042
        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2043
        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2044
2045
        // parse directly as seconds
2046
        assert_eq!(Time32SecondType::parse("1"), Some(1));
2047
2048
        // leap second
2049
        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2050
2051
        // custom format
2052
        assert_eq!(
2053
            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2054
            Some(7_801)
2055
        );
2056
    }
2057
2058
    #[test]
2059
    fn test_string_to_time_invalid() {
2060
        let cases = [
2061
            "25:00",
2062
            "9:00:",
2063
            "009:00",
2064
            "09:0:00",
2065
            "25:00:00",
2066
            "13:00 AM",
2067
            "13:00 PM",
2068
            "12:00. AM",
2069
            "09:0:00",
2070
            "09:01:0",
2071
            "09:01:1",
2072
            "9:1:0",
2073
            "09:01:0",
2074
            "1:00.123",
2075
            "1:00:00.123f",
2076
            " 9:00:00",
2077
            ":09:00",
2078
            "T9:00:00",
2079
            "AM",
2080
        ];
2081
        for case in cases {
2082
            assert!(string_to_time(case).is_none(), "{case}");
2083
        }
2084
    }
2085
2086
    #[test]
2087
    fn test_string_to_time_chrono() {
2088
        let cases = [
2089
            ("1:00", "%H:%M"),
2090
            ("12:00", "%H:%M"),
2091
            ("13:00", "%H:%M"),
2092
            ("24:00", "%H:%M"),
2093
            ("1:00:00", "%H:%M:%S"),
2094
            ("12:00:30", "%H:%M:%S"),
2095
            ("13:00:59", "%H:%M:%S"),
2096
            ("24:00:60", "%H:%M:%S"),
2097
            ("09:00:00", "%H:%M:%S%.f"),
2098
            ("0:00:30.123456", "%H:%M:%S%.f"),
2099
            ("0:00 AM", "%I:%M %P"),
2100
            ("1:00 AM", "%I:%M %P"),
2101
            ("12:00 AM", "%I:%M %P"),
2102
            ("13:00 AM", "%I:%M %P"),
2103
            ("0:00 PM", "%I:%M %P"),
2104
            ("1:00 PM", "%I:%M %P"),
2105
            ("12:00 PM", "%I:%M %P"),
2106
            ("13:00 PM", "%I:%M %P"),
2107
            ("1:00 pM", "%I:%M %P"),
2108
            ("1:00 Pm", "%I:%M %P"),
2109
            ("1:00 aM", "%I:%M %P"),
2110
            ("1:00 Am", "%I:%M %P"),
2111
            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2112
            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2113
            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2114
            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2115
            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2116
            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2117
            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2118
        ];
2119
        for (s, format) in cases {
2120
            let chrono = NaiveTime::parse_from_str(s, format).ok();
2121
            let custom = string_to_time(s);
2122
            assert_eq!(chrono, custom, "{s}");
2123
        }
2124
    }
2125
2126
    #[test]
2127
    fn test_parse_interval() {
2128
        let config = IntervalParseConfig::new(IntervalUnit::Month);
2129
2130
        assert_eq!(
2131
            Interval::new(1i32, 0i32, 0i64),
2132
            Interval::parse("1 month", &config).unwrap(),
2133
        );
2134
2135
        assert_eq!(
2136
            Interval::new(2i32, 0i32, 0i64),
2137
            Interval::parse("2 month", &config).unwrap(),
2138
        );
2139
2140
        assert_eq!(
2141
            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2142
            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2143
        );
2144
2145
        assert_eq!(
2146
            Interval::new(0i32, 15i32, 0),
2147
            Interval::parse("0.5 months", &config).unwrap(),
2148
        );
2149
2150
        assert_eq!(
2151
            Interval::new(0i32, 15i32, 0),
2152
            Interval::parse(".5 months", &config).unwrap(),
2153
        );
2154
2155
        assert_eq!(
2156
            Interval::new(0i32, -15i32, 0),
2157
            Interval::parse("-0.5 months", &config).unwrap(),
2158
        );
2159
2160
        assert_eq!(
2161
            Interval::new(0i32, -15i32, 0),
2162
            Interval::parse("-.5 months", &config).unwrap(),
2163
        );
2164
2165
        assert_eq!(
2166
            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2167
            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2168
        );
2169
2170
        assert_eq!(
2171
            Interval::parse("1 centurys 1 month", &config)
2172
                .unwrap_err()
2173
                .to_string(),
2174
            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2175
        );
2176
2177
        assert_eq!(
2178
            Interval::new(37i32, 0i32, 0i64),
2179
            Interval::parse("3 year 1 month", &config).unwrap(),
2180
        );
2181
2182
        assert_eq!(
2183
            Interval::new(35i32, 0i32, 0i64),
2184
            Interval::parse("3 year -1 month", &config).unwrap(),
2185
        );
2186
2187
        assert_eq!(
2188
            Interval::new(-37i32, 0i32, 0i64),
2189
            Interval::parse("-3 year -1 month", &config).unwrap(),
2190
        );
2191
2192
        assert_eq!(
2193
            Interval::new(-35i32, 0i32, 0i64),
2194
            Interval::parse("-3 year 1 month", &config).unwrap(),
2195
        );
2196
2197
        assert_eq!(
2198
            Interval::new(0i32, 5i32, 0i64),
2199
            Interval::parse("5 days", &config).unwrap(),
2200
        );
2201
2202
        assert_eq!(
2203
            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2204
            Interval::parse("7 days 3 hours", &config).unwrap(),
2205
        );
2206
2207
        assert_eq!(
2208
            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2209
            Interval::parse("7 days 5 minutes", &config).unwrap(),
2210
        );
2211
2212
        assert_eq!(
2213
            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2214
            Interval::parse("7 days -5 minutes", &config).unwrap(),
2215
        );
2216
2217
        assert_eq!(
2218
            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2219
            Interval::parse("-7 days 5 hours", &config).unwrap(),
2220
        );
2221
2222
        assert_eq!(
2223
            Interval::new(
2224
                0i32,
2225
                -7i32,
2226
                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2227
            ),
2228
            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2229
        );
2230
2231
        assert_eq!(
2232
            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2233
            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2234
        );
2235
2236
        assert_eq!(
2237
            Interval::new(
2238
                12i32,
2239
                1i32,
2240
                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2241
            ),
2242
            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2243
        );
2244
2245
        assert_eq!(
2246
            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2247
            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2248
        );
2249
2250
        assert_eq!(
2251
            Interval::new(12i32, 1i32, 1000i64),
2252
            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2253
        );
2254
2255
        assert_eq!(
2256
            Interval::new(12i32, 1i32, 1i64),
2257
            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2258
        );
2259
2260
        assert_eq!(
2261
            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2262
            Interval::parse("1 month -1 second", &config).unwrap(),
2263
        );
2264
2265
        assert_eq!(
2266
            Interval::new(
2267
                -13i32,
2268
                -8i32,
2269
                -NANOS_PER_HOUR
2270
                    - NANOS_PER_MINUTE
2271
                    - NANOS_PER_SECOND
2272
                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2273
            ),
2274
            Interval::parse(
2275
                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2276
                &config
2277
            )
2278
            .unwrap(),
2279
        );
2280
2281
        // no units
2282
        assert_eq!(
2283
            Interval::new(1, 0, 0),
2284
            Interval::parse("1", &config).unwrap()
2285
        );
2286
        assert_eq!(
2287
            Interval::new(42, 0, 0),
2288
            Interval::parse("42", &config).unwrap()
2289
        );
2290
        assert_eq!(
2291
            Interval::new(0, 0, 42_000_000_000),
2292
            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2293
        );
2294
2295
        // shorter units
2296
        assert_eq!(
2297
            Interval::new(1, 0, 0),
2298
            Interval::parse("1 mon", &config).unwrap()
2299
        );
2300
        assert_eq!(
2301
            Interval::new(1, 0, 0),
2302
            Interval::parse("1 mons", &config).unwrap()
2303
        );
2304
        assert_eq!(
2305
            Interval::new(0, 0, 1_000_000),
2306
            Interval::parse("1 ms", &config).unwrap()
2307
        );
2308
        assert_eq!(
2309
            Interval::new(0, 0, 1_000),
2310
            Interval::parse("1 us", &config).unwrap()
2311
        );
2312
2313
        // no space
2314
        assert_eq!(
2315
            Interval::new(0, 0, 1_000),
2316
            Interval::parse("1us", &config).unwrap()
2317
        );
2318
        assert_eq!(
2319
            Interval::new(0, 0, NANOS_PER_SECOND),
2320
            Interval::parse("1s", &config).unwrap()
2321
        );
2322
        assert_eq!(
2323
            Interval::new(1, 2, 10_864_000_000_000),
2324
            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2325
        );
2326
2327
        assert_eq!(
2328
            Interval::new(
2329
                -13i32,
2330
                -8i32,
2331
                -NANOS_PER_HOUR
2332
                    - NANOS_PER_MINUTE
2333
                    - NANOS_PER_SECOND
2334
                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2335
            ),
2336
            Interval::parse(
2337
                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2338
                &config
2339
            )
2340
            .unwrap(),
2341
        );
2342
2343
        assert_eq!(
2344
            Interval::parse("1h s", &config).unwrap_err().to_string(),
2345
            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2346
        );
2347
2348
        assert_eq!(
2349
            Interval::parse("1XX", &config).unwrap_err().to_string(),
2350
            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2351
        );
2352
    }
2353
2354
    #[test]
2355
    fn test_duplicate_interval_type() {
2356
        let config = IntervalParseConfig::new(IntervalUnit::Month);
2357
2358
        let err = Interval::parse("1 month 1 second 1 second", &config)
2359
            .expect_err("parsing interval should have failed");
2360
        assert_eq!(
2361
            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2362
            format!("{err:?}")
2363
        );
2364
2365
        // test with singular and plural forms
2366
        let err = Interval::parse("1 century 2 centuries", &config)
2367
            .expect_err("parsing interval should have failed");
2368
        assert_eq!(
2369
            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2370
            format!("{err:?}")
2371
        );
2372
    }
2373
2374
    #[test]
2375
    fn test_interval_amount_parsing() {
2376
        // integer
2377
        let result = IntervalAmount::from_str("123").unwrap();
2378
        let expected = IntervalAmount::new(123, 0);
2379
2380
        assert_eq!(result, expected);
2381
2382
        // positive w/ fractional
2383
        let result = IntervalAmount::from_str("0.3").unwrap();
2384
        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2385
2386
        assert_eq!(result, expected);
2387
2388
        // negative w/ fractional
2389
        let result = IntervalAmount::from_str("-3.5").unwrap();
2390
        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2391
2392
        assert_eq!(result, expected);
2393
2394
        // invalid: missing fractional
2395
        let result = IntervalAmount::from_str("3.");
2396
        assert!(result.is_err());
2397
2398
        // invalid: sign in fractional
2399
        let result = IntervalAmount::from_str("3.-5");
2400
        assert!(result.is_err());
2401
    }
2402
2403
    #[test]
2404
    fn test_interval_precision() {
2405
        let config = IntervalParseConfig::new(IntervalUnit::Month);
2406
2407
        let result = Interval::parse("100000.1 days", &config).unwrap();
2408
        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2409
2410
        assert_eq!(result, expected);
2411
    }
2412
2413
    #[test]
2414
    fn test_interval_addition() {
2415
        // add 4.1 centuries
2416
        let start = Interval::new(1, 2, 3);
2417
        let expected = Interval::new(4921, 2, 3);
2418
2419
        let result = start
2420
            .add(
2421
                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2422
                IntervalUnit::Century,
2423
            )
2424
            .unwrap();
2425
2426
        assert_eq!(result, expected);
2427
2428
        // add 10.25 decades
2429
        let start = Interval::new(1, 2, 3);
2430
        let expected = Interval::new(1231, 2, 3);
2431
2432
        let result = start
2433
            .add(
2434
                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2435
                IntervalUnit::Decade,
2436
            )
2437
            .unwrap();
2438
2439
        assert_eq!(result, expected);
2440
2441
        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2442
        let start = Interval::new(1, 2, 3);
2443
        let expected = Interval::new(364, 2, 3);
2444
2445
        let result = start
2446
            .add(
2447
                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2448
                IntervalUnit::Year,
2449
            )
2450
            .unwrap();
2451
2452
        assert_eq!(result, expected);
2453
2454
        // add 1.5 months
2455
        let start = Interval::new(1, 2, 3);
2456
        let expected = Interval::new(2, 17, 3);
2457
2458
        let result = start
2459
            .add(
2460
                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2461
                IntervalUnit::Month,
2462
            )
2463
            .unwrap();
2464
2465
        assert_eq!(result, expected);
2466
2467
        // add -2 weeks
2468
        let start = Interval::new(1, 25, 3);
2469
        let expected = Interval::new(1, 11, 3);
2470
2471
        let result = start
2472
            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2473
            .unwrap();
2474
2475
        assert_eq!(result, expected);
2476
2477
        // add 2.2 days
2478
        let start = Interval::new(12, 15, 3);
2479
        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2480
2481
        let result = start
2482
            .add(
2483
                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2484
                IntervalUnit::Day,
2485
            )
2486
            .unwrap();
2487
2488
        assert_eq!(result, expected);
2489
2490
        // add 12.5 hours
2491
        let start = Interval::new(1, 2, 3);
2492
        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2493
2494
        let result = start
2495
            .add(
2496
                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2497
                IntervalUnit::Hour,
2498
            )
2499
            .unwrap();
2500
2501
        assert_eq!(result, expected);
2502
2503
        // add -1.5 minutes
2504
        let start = Interval::new(0, 0, -3);
2505
        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2506
2507
        let result = start
2508
            .add(
2509
                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2510
                IntervalUnit::Minute,
2511
            )
2512
            .unwrap();
2513
2514
        assert_eq!(result, expected);
2515
    }
2516
2517
    #[test]
2518
    fn string_to_timestamp_old() {
2519
        parse_timestamp("1677-06-14T07:29:01.256")
2520
            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2521
            .unwrap_err();
2522
    }
2523
2524
    #[test]
2525
    fn test_parse_decimal_with_parameter() {
2526
        let tests = [
2527
            ("0", 0i128),
2528
            ("123.123", 123123i128),
2529
            ("123.1234", 123123i128),
2530
            ("123.1", 123100i128),
2531
            ("123", 123000i128),
2532
            ("-123.123", -123123i128),
2533
            ("-123.1234", -123123i128),
2534
            ("-123.1", -123100i128),
2535
            ("-123", -123000i128),
2536
            ("0.0000123", 0i128),
2537
            ("12.", 12000i128),
2538
            ("-12.", -12000i128),
2539
            ("00.1", 100i128),
2540
            ("-00.1", -100i128),
2541
            ("12345678912345678.1234", 12345678912345678123i128),
2542
            ("-12345678912345678.1234", -12345678912345678123i128),
2543
            ("99999999999999999.999", 99999999999999999999i128),
2544
            ("-99999999999999999.999", -99999999999999999999i128),
2545
            (".123", 123i128),
2546
            ("-.123", -123i128),
2547
            ("123.", 123000i128),
2548
            ("-123.", -123000i128),
2549
        ];
2550
        for (s, i) in tests {
2551
            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2552
            assert_eq!(i, result_128.unwrap());
2553
            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2554
            assert_eq!(i256::from_i128(i), result_256.unwrap());
2555
        }
2556
2557
        let e_notation_tests = [
2558
            ("1.23e3", "1230.0", 2),
2559
            ("5.6714e+2", "567.14", 4),
2560
            ("5.6714e-2", "0.056714", 4),
2561
            ("5.6714e-2", "0.056714", 3),
2562
            ("5.6741214125e2", "567.41214125", 4),
2563
            ("8.91E4", "89100.0", 2),
2564
            ("3.14E+5", "314000.0", 2),
2565
            ("2.718e0", "2.718", 2),
2566
            ("9.999999e-1", "0.9999999", 4),
2567
            ("1.23e+3", "1230", 2),
2568
            ("1.234559e+3", "1234.559", 2),
2569
            ("1.00E-10", "0.0000000001", 11),
2570
            ("1.23e-4", "0.000123", 2),
2571
            ("9.876e7", "98760000.0", 2),
2572
            ("5.432E+8", "543200000.0", 10),
2573
            ("1.234567e9", "1234567000.0", 2),
2574
            ("1.234567e2", "123.45670000", 2),
2575
            ("4749.3e-5", "0.047493", 10),
2576
            ("4749.3e+5", "474930000", 10),
2577
            ("4749.3e-5", "0.047493", 1),
2578
            ("4749.3e+5", "474930000", 1),
2579
            ("0E-8", "0", 10),
2580
            ("0E+6", "0", 10),
2581
            ("1E-8", "0.00000001", 10),
2582
            ("12E+6", "12000000", 10),
2583
            ("12E-6", "0.000012", 10),
2584
            ("0.1e-6", "0.0000001", 10),
2585
            ("0.1e+6", "100000", 10),
2586
            ("0.12e-6", "0.00000012", 10),
2587
            ("0.12e+6", "120000", 10),
2588
            ("000000000001e0", "000000000001", 3),
2589
            ("000001.1034567002e0", "000001.1034567002", 3),
2590
            ("1.234e16", "12340000000000000", 0),
2591
            ("123.4e16", "1234000000000000000", 0),
2592
        ];
2593
        for (e, d, scale) in e_notation_tests {
2594
            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2595
            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2596
            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2597
            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2598
            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2599
            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2600
        }
2601
        let can_not_parse_tests = [
2602
            "123,123",
2603
            ".",
2604
            "123.123.123",
2605
            "",
2606
            "+",
2607
            "-",
2608
            "e",
2609
            "1.3e+e3",
2610
            "5.6714ee-2",
2611
            "4.11ee-+4",
2612
            "4.11e++4",
2613
            "1.1e.12",
2614
            "1.23e+3.",
2615
            "1.23e+3.1",
2616
        ];
2617
        for s in can_not_parse_tests {
2618
            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2619
            assert_eq!(
2620
                format!("Parser error: can't parse the string value {s} to decimal"),
2621
                result_128.unwrap_err().to_string()
2622
            );
2623
            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2624
            assert_eq!(
2625
                format!("Parser error: can't parse the string value {s} to decimal"),
2626
                result_256.unwrap_err().to_string()
2627
            );
2628
        }
2629
        let overflow_parse_tests = [
2630
            ("12345678", 3),
2631
            ("1.2345678e7", 3),
2632
            ("12345678.9", 3),
2633
            ("1.23456789e+7", 3),
2634
            ("99999999.99", 3),
2635
            ("9.999999999e7", 3),
2636
            ("12345678908765.123456", 3),
2637
            ("123456789087651234.56e-4", 3),
2638
            ("1234560000000", 0),
2639
            ("1.23456e12", 0),
2640
        ];
2641
        for (s, scale) in overflow_parse_tests {
2642
            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2643
            let expected_128 = "Parser error: parse decimal overflow";
2644
            let actual_128 = result_128.unwrap_err().to_string();
2645
2646
            assert!(
2647
                actual_128.contains(expected_128),
2648
                "actual: '{actual_128}', expected: '{expected_128}'"
2649
            );
2650
2651
            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2652
            let expected_256 = "Parser error: parse decimal overflow";
2653
            let actual_256 = result_256.unwrap_err().to_string();
2654
2655
            assert!(
2656
                actual_256.contains(expected_256),
2657
                "actual: '{actual_256}', expected: '{expected_256}'"
2658
            );
2659
        }
2660
2661
        let edge_tests_128 = [
2662
            (
2663
                "99999999999999999999999999999999999999",
2664
                99999999999999999999999999999999999999i128,
2665
                0,
2666
            ),
2667
            (
2668
                "999999999999999999999999999999999999.99",
2669
                99999999999999999999999999999999999999i128,
2670
                2,
2671
            ),
2672
            (
2673
                "9999999999999999999999999.9999999999999",
2674
                99999999999999999999999999999999999999i128,
2675
                13,
2676
            ),
2677
            (
2678
                "9999999999999999999999999",
2679
                99999999999999999999999990000000000000i128,
2680
                13,
2681
            ),
2682
            (
2683
                "0.99999999999999999999999999999999999999",
2684
                99999999999999999999999999999999999999i128,
2685
                38,
2686
            ),
2687
            (
2688
                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2689
                0i128,
2690
                15,
2691
            ),
2692
            (
2693
                "1.016744e-320",
2694
                0i128,
2695
                15,
2696
            ),
2697
            (
2698
                "-1e3",
2699
                -1000000000i128,
2700
                6,
2701
            ),
2702
            (
2703
                "+1e3",
2704
                1000000000i128,
2705
                6,
2706
            ),
2707
            (
2708
                "-1e31",
2709
                -10000000000000000000000000000000000000i128,
2710
                6,
2711
            ),
2712
        ];
2713
        for (s, i, scale) in edge_tests_128 {
2714
            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2715
            assert_eq!(i, result_128.unwrap());
2716
        }
2717
        let edge_tests_256 = [
2718
            (
2719
                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2720
                i256::from_string(
2721
                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2722
                )
2723
                .unwrap(),
2724
                0,
2725
            ),
2726
            (
2727
                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2728
                i256::from_string(
2729
                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2730
                )
2731
                .unwrap(),
2732
                4,
2733
            ),
2734
            (
2735
                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2736
                i256::from_string(
2737
                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2738
                )
2739
                .unwrap(),
2740
                26,
2741
            ),
2742
            (
2743
                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2744
                i256::from_string(
2745
                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2746
                )
2747
                .unwrap(),
2748
                26,
2749
            ),
2750
            (
2751
                "99999999999999999999999999999999999999999999999999",
2752
                i256::from_string(
2753
                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2754
                )
2755
                .unwrap(),
2756
                26,
2757
            ),
2758
            (
2759
                "9.9999999999999999999999999999999999999999999999999e+49",
2760
                i256::from_string(
2761
                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2762
                )
2763
                .unwrap(),
2764
                26,
2765
            ),
2766
        ];
2767
        for (s, i, scale) in edge_tests_256 {
2768
            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2769
            assert_eq!(i, result.unwrap());
2770
        }
2771
    }
2772
2773
    #[test]
2774
    fn test_parse_empty() {
2775
        assert_eq!(Int32Type::parse(""), None);
2776
        assert_eq!(Int64Type::parse(""), None);
2777
        assert_eq!(UInt32Type::parse(""), None);
2778
        assert_eq!(UInt64Type::parse(""), None);
2779
        assert_eq!(Float32Type::parse(""), None);
2780
        assert_eq!(Float64Type::parse(""), None);
2781
        assert_eq!(Int32Type::parse("+"), None);
2782
        assert_eq!(Int64Type::parse("+"), None);
2783
        assert_eq!(UInt32Type::parse("+"), None);
2784
        assert_eq!(UInt64Type::parse("+"), None);
2785
        assert_eq!(Float32Type::parse("+"), None);
2786
        assert_eq!(Float64Type::parse("+"), None);
2787
        assert_eq!(TimestampNanosecondType::parse(""), None);
2788
        assert_eq!(Date32Type::parse(""), None);
2789
    }
2790
2791
    #[test]
2792
    fn test_parse_interval_month_day_nano_config() {
2793
        let interval = parse_interval_month_day_nano_config(
2794
            "1",
2795
            IntervalParseConfig::new(IntervalUnit::Second),
2796
        )
2797
        .unwrap();
2798
        assert_eq!(interval.months, 0);
2799
        assert_eq!(interval.days, 0);
2800
        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2801
    }
2802
}