/Users/andrewlamb/Software/arrow-rs/arrow-cast/src/parse.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! [`Parser`] implementations for converting strings to Arrow types |
19 | | //! |
20 | | //! Used by the CSV and JSON readers to convert strings to Arrow types |
21 | | use arrow_array::timezone::Tz; |
22 | | use arrow_array::types::*; |
23 | | use arrow_array::ArrowNativeTypeOp; |
24 | | use arrow_buffer::ArrowNativeType; |
25 | | use arrow_schema::ArrowError; |
26 | | use chrono::prelude::*; |
27 | | use half::f16; |
28 | | use std::str::FromStr; |
29 | | |
30 | | /// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O` |
31 | | #[inline] |
32 | 0 | fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 { |
33 | 0 | digits[..N] |
34 | 0 | .iter() |
35 | 0 | .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32) |
36 | 0 | * 10_u32.pow((9 - N) as _) |
37 | 0 | } |
38 | | |
39 | | /// Helper for parsing RFC3339 timestamps |
40 | | struct TimestampParser { |
41 | | /// The timestamp bytes to parse minus `b'0'` |
42 | | /// |
43 | | /// This makes interpretation as an integer inexpensive |
44 | | digits: [u8; 32], |
45 | | /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit |
46 | | mask: u32, |
47 | | } |
48 | | |
49 | | impl TimestampParser { |
50 | 0 | fn new(bytes: &[u8]) -> Self { |
51 | 0 | let mut digits = [0; 32]; |
52 | 0 | let mut mask = 0; |
53 | | |
54 | | // Treating all bytes the same way, helps LLVM vectorise this correctly |
55 | 0 | for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() { |
56 | 0 | *o = i.wrapping_sub(b'0'); |
57 | 0 | mask |= ((*o < 10) as u32) << idx |
58 | | } |
59 | | |
60 | 0 | Self { digits, mask } |
61 | 0 | } |
62 | | |
63 | | /// Returns true if the byte at `idx` in the original string equals `b` |
64 | 0 | fn test(&self, idx: usize, b: u8) -> bool { |
65 | 0 | self.digits[idx] == b.wrapping_sub(b'0') |
66 | 0 | } |
67 | | |
68 | | /// Parses a date of the form `1997-01-31` |
69 | 0 | fn date(&self) -> Option<NaiveDate> { |
70 | 0 | if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') { |
71 | 0 | return None; |
72 | 0 | } |
73 | | |
74 | 0 | let year = self.digits[0] as u16 * 1000 |
75 | 0 | + self.digits[1] as u16 * 100 |
76 | 0 | + self.digits[2] as u16 * 10 |
77 | 0 | + self.digits[3] as u16; |
78 | | |
79 | 0 | let month = self.digits[5] * 10 + self.digits[6]; |
80 | 0 | let day = self.digits[8] * 10 + self.digits[9]; |
81 | | |
82 | 0 | NaiveDate::from_ymd_opt(year as _, month as _, day as _) |
83 | 0 | } |
84 | | |
85 | | /// Parses a time of any of forms |
86 | | /// - `09:26:56` |
87 | | /// - `09:26:56.123` |
88 | | /// - `09:26:56.123456` |
89 | | /// - `09:26:56.123456789` |
90 | | /// - `092656` |
91 | | /// |
92 | | /// Returning the end byte offset |
93 | 0 | fn time(&self) -> Option<(NaiveTime, usize)> { |
94 | | // Make a NaiveTime handling leap seconds |
95 | 0 | let time = |hour, min, sec, nano| match sec { |
96 | | 60 => { |
97 | 0 | let nano = 1_000_000_000 + nano; |
98 | 0 | NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano) |
99 | | } |
100 | 0 | _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano), |
101 | 0 | }; |
102 | | |
103 | 0 | match (self.mask >> 11) & 0b11111111 { |
104 | | // 09:26:56 |
105 | 0 | 0b11011011 if self.test(13, b':') && self.test(16, b':') => { |
106 | 0 | let hour = self.digits[11] * 10 + self.digits[12]; |
107 | 0 | let minute = self.digits[14] * 10 + self.digits[15]; |
108 | 0 | let second = self.digits[17] * 10 + self.digits[18]; |
109 | | |
110 | 0 | match self.test(19, b'.') { |
111 | | true => { |
112 | 0 | let digits = (self.mask >> 20).trailing_ones(); |
113 | 0 | let nanos = match digits { |
114 | 0 | 0 => return None, |
115 | 0 | 1 => parse_nanos::<1, 0>(&self.digits[20..21]), |
116 | 0 | 2 => parse_nanos::<2, 0>(&self.digits[20..22]), |
117 | 0 | 3 => parse_nanos::<3, 0>(&self.digits[20..23]), |
118 | 0 | 4 => parse_nanos::<4, 0>(&self.digits[20..24]), |
119 | 0 | 5 => parse_nanos::<5, 0>(&self.digits[20..25]), |
120 | 0 | 6 => parse_nanos::<6, 0>(&self.digits[20..26]), |
121 | 0 | 7 => parse_nanos::<7, 0>(&self.digits[20..27]), |
122 | 0 | 8 => parse_nanos::<8, 0>(&self.digits[20..28]), |
123 | 0 | _ => parse_nanos::<9, 0>(&self.digits[20..29]), |
124 | | }; |
125 | 0 | Some((time(hour, minute, second, nanos)?, 20 + digits as usize)) |
126 | | } |
127 | 0 | false => Some((time(hour, minute, second, 0)?, 19)), |
128 | | } |
129 | | } |
130 | | // 092656 |
131 | | 0b111111 => { |
132 | 0 | let hour = self.digits[11] * 10 + self.digits[12]; |
133 | 0 | let minute = self.digits[13] * 10 + self.digits[14]; |
134 | 0 | let second = self.digits[15] * 10 + self.digits[16]; |
135 | 0 | let time = time(hour, minute, second, 0)?; |
136 | 0 | Some((time, 17)) |
137 | | } |
138 | 0 | _ => None, |
139 | | } |
140 | 0 | } |
141 | | } |
142 | | |
143 | | /// Accepts a string and parses it relative to the provided `timezone` |
144 | | /// |
145 | | /// In addition to RFC3339 / ISO8601 standard timestamps, it also |
146 | | /// accepts strings that use a space ` ` to separate the date and time |
147 | | /// as well as strings that have no explicit timezone offset. |
148 | | /// |
149 | | /// Examples of accepted inputs: |
150 | | /// * `1997-01-31T09:26:56.123Z` # RCF3339 |
151 | | /// * `1997-01-31T09:26:56.123-05:00` # RCF3339 |
152 | | /// * `1997-01-31 09:26:56.123-05:00` # close to RCF3339 but with a space rather than T |
153 | | /// * `2023-01-01 04:05:06.789 -08` # close to RCF3339, no fractional seconds or time separator |
154 | | /// * `1997-01-31T09:26:56.123` # close to RCF3339 but no timezone offset specified |
155 | | /// * `1997-01-31 09:26:56.123` # close to RCF3339 but uses a space and no timezone offset |
156 | | /// * `1997-01-31 09:26:56` # close to RCF3339, no fractional seconds |
157 | | /// * `1997-01-31 092656` # close to RCF3339, no fractional seconds |
158 | | /// * `1997-01-31 092656+04:00` # close to RCF3339, no fractional seconds or time separator |
159 | | /// * `1997-01-31` # close to RCF3339, only date no time |
160 | | /// |
161 | | /// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled |
162 | | /// |
163 | | /// * `2023-01-01 040506 America/Los_Angeles` |
164 | | /// |
165 | | /// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error |
166 | | /// will be returned |
167 | | /// |
168 | | /// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE> |
169 | | /// are not supported, like |
170 | | /// |
171 | | /// * "2023-01-01 04:05:06.789 +07:30:00", |
172 | | /// * "2023-01-01 040506 +07:30:00", |
173 | | /// * "2023-01-01 04:05:06.789 PST", |
174 | | /// |
175 | | /// [IANA timezones]: https://www.iana.org/time-zones |
176 | 0 | pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> { |
177 | 0 | let err = |
178 | 0 | |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}")); |
179 | | |
180 | 0 | let bytes = s.as_bytes(); |
181 | 0 | if bytes.len() < 10 { |
182 | 0 | return Err(err("timestamp must contain at least 10 characters")); |
183 | 0 | } |
184 | | |
185 | 0 | let parser = TimestampParser::new(bytes); |
186 | 0 | let date = parser.date().ok_or_else(|| err("error parsing date"))?; |
187 | 0 | if bytes.len() == 10 { |
188 | 0 | let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap()); |
189 | 0 | return timezone |
190 | 0 | .from_local_datetime(&datetime) |
191 | 0 | .single() |
192 | 0 | .ok_or_else(|| err("error computing timezone offset")); |
193 | 0 | } |
194 | | |
195 | 0 | if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') { |
196 | 0 | return Err(err("invalid timestamp separator")); |
197 | 0 | } |
198 | | |
199 | 0 | let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?; |
200 | 0 | let datetime = date.and_time(time); |
201 | | |
202 | 0 | if tz_offset == 32 { |
203 | | // Decimal overrun |
204 | 0 | while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() { |
205 | 0 | tz_offset += 1; |
206 | 0 | } |
207 | 0 | } |
208 | | |
209 | 0 | if bytes.len() <= tz_offset { |
210 | 0 | return timezone |
211 | 0 | .from_local_datetime(&datetime) |
212 | 0 | .single() |
213 | 0 | .ok_or_else(|| err("error computing timezone offset")); |
214 | 0 | } |
215 | | |
216 | 0 | if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 { |
217 | 0 | return Ok(timezone.from_utc_datetime(&datetime)); |
218 | 0 | } |
219 | | |
220 | | // Parse remainder of string as timezone |
221 | 0 | let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?; |
222 | 0 | let parsed = parsed_tz |
223 | 0 | .from_local_datetime(&datetime) |
224 | 0 | .single() |
225 | 0 | .ok_or_else(|| err("error computing timezone offset"))?; |
226 | | |
227 | 0 | Ok(parsed.with_timezone(timezone)) |
228 | 0 | } |
229 | | |
230 | | /// Accepts a string in RFC3339 / ISO8601 standard format and some |
231 | | /// variants and converts it to a nanosecond precision timestamp. |
232 | | /// |
233 | | /// See [`string_to_datetime`] for the full set of supported formats |
234 | | /// |
235 | | /// Implements the `to_timestamp` function to convert a string to a |
236 | | /// timestamp, following the model of spark SQL’s to_`timestamp`. |
237 | | /// |
238 | | /// Internally, this function uses the `chrono` library for the |
239 | | /// datetime parsing |
240 | | /// |
241 | | /// We hope to extend this function in the future with a second |
242 | | /// parameter to specifying the format string. |
243 | | /// |
244 | | /// ## Timestamp Precision |
245 | | /// |
246 | | /// Function uses the maximum precision timestamps supported by |
247 | | /// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This |
248 | | /// means the range of dates that timestamps can represent is ~1677 AD |
249 | | /// to 2262 AM |
250 | | /// |
251 | | /// ## Timezone / Offset Handling |
252 | | /// |
253 | | /// Numerical values of timestamps are stored compared to offset UTC. |
254 | | /// |
255 | | /// This function interprets string without an explicit time zone as timestamps |
256 | | /// relative to UTC, see [`string_to_datetime`] for alternative semantics |
257 | | /// |
258 | | /// In particular: |
259 | | /// |
260 | | /// ``` |
261 | | /// # use arrow_cast::parse::string_to_timestamp_nanos; |
262 | | /// // Note all three of these timestamps are parsed as the same value |
263 | | /// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap(); |
264 | | /// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap(); |
265 | | /// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap(); |
266 | | /// |
267 | | /// assert_eq!(a, b); |
268 | | /// assert_eq!(b, c); |
269 | | /// ``` |
270 | | /// |
271 | | #[inline] |
272 | 0 | pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> { |
273 | 0 | to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc()) |
274 | 0 | } |
275 | | |
276 | | /// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds |
277 | | #[inline] |
278 | 0 | fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> { |
279 | 0 | dt.and_utc() |
280 | 0 | .timestamp_nanos_opt() |
281 | 0 | .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string())) |
282 | 0 | } |
283 | | |
284 | | /// Accepts a string in ISO8601 standard format and some |
285 | | /// variants and converts it to nanoseconds since midnight. |
286 | | /// |
287 | | /// Examples of accepted inputs: |
288 | | /// |
289 | | /// * `09:26:56.123 AM` |
290 | | /// * `23:59:59` |
291 | | /// * `6:00 pm` |
292 | | /// |
293 | | /// Internally, this function uses the `chrono` library for the time parsing |
294 | | /// |
295 | | /// ## Timezone / Offset Handling |
296 | | /// |
297 | | /// This function does not support parsing strings with a timezone |
298 | | /// or offset specified, as it considers only time since midnight. |
299 | 0 | pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> { |
300 | 0 | let nt = string_to_time(s) |
301 | 0 | .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?; |
302 | 0 | Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64) |
303 | 0 | } |
304 | | |
305 | 0 | fn string_to_time(s: &str) -> Option<NaiveTime> { |
306 | 0 | let bytes = s.as_bytes(); |
307 | 0 | if bytes.len() < 4 { |
308 | 0 | return None; |
309 | 0 | } |
310 | | |
311 | 0 | let (am, bytes) = match bytes.get(bytes.len() - 3..) { |
312 | 0 | Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]), |
313 | 0 | Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]), |
314 | 0 | _ => (None, bytes), |
315 | | }; |
316 | | |
317 | 0 | if bytes.len() < 4 { |
318 | 0 | return None; |
319 | 0 | } |
320 | | |
321 | 0 | let mut digits = [b'0'; 6]; |
322 | | |
323 | | // Extract hour |
324 | 0 | let bytes = match (bytes[1], bytes[2]) { |
325 | | (b':', _) => { |
326 | 0 | digits[1] = bytes[0]; |
327 | 0 | &bytes[2..] |
328 | | } |
329 | | (_, b':') => { |
330 | 0 | digits[0] = bytes[0]; |
331 | 0 | digits[1] = bytes[1]; |
332 | 0 | &bytes[3..] |
333 | | } |
334 | 0 | _ => return None, |
335 | | }; |
336 | | |
337 | 0 | if bytes.len() < 2 { |
338 | 0 | return None; // Minutes required |
339 | 0 | } |
340 | | |
341 | | // Extract minutes |
342 | 0 | digits[2] = bytes[0]; |
343 | 0 | digits[3] = bytes[1]; |
344 | | |
345 | 0 | let nanoseconds = match bytes.get(2) { |
346 | | Some(b':') => { |
347 | 0 | if bytes.len() < 5 { |
348 | 0 | return None; |
349 | 0 | } |
350 | | |
351 | | // Extract seconds |
352 | 0 | digits[4] = bytes[3]; |
353 | 0 | digits[5] = bytes[4]; |
354 | | |
355 | | // Extract sub-seconds if any |
356 | 0 | match bytes.get(5) { |
357 | | Some(b'.') => { |
358 | 0 | let decimal = &bytes[6..]; |
359 | 0 | if decimal.iter().any(|x| !x.is_ascii_digit()) { |
360 | 0 | return None; |
361 | 0 | } |
362 | 0 | match decimal.len() { |
363 | 0 | 0 => return None, |
364 | 0 | 1 => parse_nanos::<1, b'0'>(decimal), |
365 | 0 | 2 => parse_nanos::<2, b'0'>(decimal), |
366 | 0 | 3 => parse_nanos::<3, b'0'>(decimal), |
367 | 0 | 4 => parse_nanos::<4, b'0'>(decimal), |
368 | 0 | 5 => parse_nanos::<5, b'0'>(decimal), |
369 | 0 | 6 => parse_nanos::<6, b'0'>(decimal), |
370 | 0 | 7 => parse_nanos::<7, b'0'>(decimal), |
371 | 0 | 8 => parse_nanos::<8, b'0'>(decimal), |
372 | 0 | _ => parse_nanos::<9, b'0'>(decimal), |
373 | | } |
374 | | } |
375 | 0 | Some(_) => return None, |
376 | 0 | None => 0, |
377 | | } |
378 | | } |
379 | 0 | Some(_) => return None, |
380 | 0 | None => 0, |
381 | | }; |
382 | | |
383 | 0 | digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0')); |
384 | 0 | if digits.iter().any(|x| *x > 9) { |
385 | 0 | return None; |
386 | 0 | } |
387 | | |
388 | 0 | let hour = match (digits[0] * 10 + digits[1], am) { |
389 | 0 | (12, Some(true)) => 0, // 12:00 AM -> 00:00 |
390 | 0 | (h @ 1..=11, Some(true)) => h, // 1:00 AM -> 01:00 |
391 | 0 | (12, Some(false)) => 12, // 12:00 PM -> 12:00 |
392 | 0 | (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00 |
393 | 0 | (_, Some(_)) => return None, |
394 | 0 | (h, None) => h, |
395 | | }; |
396 | | |
397 | | // Handle leap second |
398 | 0 | let (second, nanoseconds) = match digits[4] * 10 + digits[5] { |
399 | 0 | 60 => (59, nanoseconds + 1_000_000_000), |
400 | 0 | s => (s, nanoseconds), |
401 | | }; |
402 | | |
403 | 0 | NaiveTime::from_hms_nano_opt( |
404 | 0 | hour as _, |
405 | 0 | (digits[2] * 10 + digits[3]) as _, |
406 | 0 | second as _, |
407 | 0 | nanoseconds, |
408 | | ) |
409 | 0 | } |
410 | | |
411 | | /// Specialized parsing implementations to convert strings to Arrow types. |
412 | | /// |
413 | | /// This is used by csv and json reader and can be used directly as well. |
414 | | /// |
415 | | /// # Example |
416 | | /// |
417 | | /// To parse a string to a [`Date32Type`]: |
418 | | /// |
419 | | /// ``` |
420 | | /// use arrow_cast::parse::Parser; |
421 | | /// use arrow_array::types::Date32Type; |
422 | | /// let date = Date32Type::parse("2021-01-01").unwrap(); |
423 | | /// assert_eq!(date, 18628); |
424 | | /// ``` |
425 | | /// |
426 | | /// To parse a string to a [`TimestampNanosecondType`]: |
427 | | /// |
428 | | /// ``` |
429 | | /// use arrow_cast::parse::Parser; |
430 | | /// use arrow_array::types::TimestampNanosecondType; |
431 | | /// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap(); |
432 | | /// assert_eq!(ts, 1609459200123456789); |
433 | | /// ``` |
434 | | pub trait Parser: ArrowPrimitiveType { |
435 | | /// Parse a string to the native type |
436 | | fn parse(string: &str) -> Option<Self::Native>; |
437 | | |
438 | | /// Parse a string to the native type with a format string |
439 | | /// |
440 | | /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse) |
441 | | fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> { |
442 | | Self::parse(string) |
443 | | } |
444 | | } |
445 | | |
446 | | impl Parser for Float16Type { |
447 | 0 | fn parse(string: &str) -> Option<f16> { |
448 | 0 | lexical_core::parse(string.as_bytes()) |
449 | 0 | .ok() |
450 | 0 | .map(f16::from_f32) |
451 | 0 | } |
452 | | } |
453 | | |
454 | | impl Parser for Float32Type { |
455 | 0 | fn parse(string: &str) -> Option<f32> { |
456 | 0 | lexical_core::parse(string.as_bytes()).ok() |
457 | 0 | } |
458 | | } |
459 | | |
460 | | impl Parser for Float64Type { |
461 | 0 | fn parse(string: &str) -> Option<f64> { |
462 | 0 | lexical_core::parse(string.as_bytes()).ok() |
463 | 0 | } |
464 | | } |
465 | | |
466 | | macro_rules! parser_primitive { |
467 | | ($t:ty) => { |
468 | | impl Parser for $t { |
469 | 0 | fn parse(string: &str) -> Option<Self::Native> { |
470 | 0 | if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) { |
471 | 0 | return None; |
472 | 0 | } |
473 | 0 | match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked( |
474 | 0 | string.as_bytes(), |
475 | | ) { |
476 | 0 | (Some(n), x) if x == string.len() => Some(n), |
477 | 0 | _ => None, |
478 | | } |
479 | 0 | } |
480 | | } |
481 | | }; |
482 | | } |
483 | | parser_primitive!(UInt64Type); |
484 | | parser_primitive!(UInt32Type); |
485 | | parser_primitive!(UInt16Type); |
486 | | parser_primitive!(UInt8Type); |
487 | | parser_primitive!(Int64Type); |
488 | | parser_primitive!(Int32Type); |
489 | | parser_primitive!(Int16Type); |
490 | | parser_primitive!(Int8Type); |
491 | | parser_primitive!(DurationNanosecondType); |
492 | | parser_primitive!(DurationMicrosecondType); |
493 | | parser_primitive!(DurationMillisecondType); |
494 | | parser_primitive!(DurationSecondType); |
495 | | |
496 | | impl Parser for TimestampNanosecondType { |
497 | 0 | fn parse(string: &str) -> Option<i64> { |
498 | 0 | string_to_timestamp_nanos(string).ok() |
499 | 0 | } |
500 | | } |
501 | | |
502 | | impl Parser for TimestampMicrosecondType { |
503 | 0 | fn parse(string: &str) -> Option<i64> { |
504 | 0 | let nanos = string_to_timestamp_nanos(string).ok(); |
505 | 0 | nanos.map(|x| x / 1000) |
506 | 0 | } |
507 | | } |
508 | | |
509 | | impl Parser for TimestampMillisecondType { |
510 | 0 | fn parse(string: &str) -> Option<i64> { |
511 | 0 | let nanos = string_to_timestamp_nanos(string).ok(); |
512 | 0 | nanos.map(|x| x / 1_000_000) |
513 | 0 | } |
514 | | } |
515 | | |
516 | | impl Parser for TimestampSecondType { |
517 | 0 | fn parse(string: &str) -> Option<i64> { |
518 | 0 | let nanos = string_to_timestamp_nanos(string).ok(); |
519 | 0 | nanos.map(|x| x / 1_000_000_000) |
520 | 0 | } |
521 | | } |
522 | | |
523 | | impl Parser for Time64NanosecondType { |
524 | | // Will truncate any fractions of a nanosecond |
525 | 0 | fn parse(string: &str) -> Option<Self::Native> { |
526 | 0 | string_to_time_nanoseconds(string) |
527 | 0 | .ok() |
528 | 0 | .or_else(|| string.parse::<Self::Native>().ok()) |
529 | 0 | } |
530 | | |
531 | 0 | fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> { |
532 | 0 | let nt = NaiveTime::parse_from_str(string, format).ok()?; |
533 | 0 | Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64) |
534 | 0 | } |
535 | | } |
536 | | |
537 | | impl Parser for Time64MicrosecondType { |
538 | | // Will truncate any fractions of a microsecond |
539 | 0 | fn parse(string: &str) -> Option<Self::Native> { |
540 | 0 | string_to_time_nanoseconds(string) |
541 | 0 | .ok() |
542 | 0 | .map(|nanos| nanos / 1_000) |
543 | 0 | .or_else(|| string.parse::<Self::Native>().ok()) |
544 | 0 | } |
545 | | |
546 | 0 | fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> { |
547 | 0 | let nt = NaiveTime::parse_from_str(string, format).ok()?; |
548 | 0 | Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000) |
549 | 0 | } |
550 | | } |
551 | | |
552 | | impl Parser for Time32MillisecondType { |
553 | | // Will truncate any fractions of a millisecond |
554 | 0 | fn parse(string: &str) -> Option<Self::Native> { |
555 | 0 | string_to_time_nanoseconds(string) |
556 | 0 | .ok() |
557 | 0 | .map(|nanos| (nanos / 1_000_000) as i32) |
558 | 0 | .or_else(|| string.parse::<Self::Native>().ok()) |
559 | 0 | } |
560 | | |
561 | 0 | fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> { |
562 | 0 | let nt = NaiveTime::parse_from_str(string, format).ok()?; |
563 | 0 | Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000) |
564 | 0 | } |
565 | | } |
566 | | |
567 | | impl Parser for Time32SecondType { |
568 | | // Will truncate any fractions of a second |
569 | 0 | fn parse(string: &str) -> Option<Self::Native> { |
570 | 0 | string_to_time_nanoseconds(string) |
571 | 0 | .ok() |
572 | 0 | .map(|nanos| (nanos / 1_000_000_000) as i32) |
573 | 0 | .or_else(|| string.parse::<Self::Native>().ok()) |
574 | 0 | } |
575 | | |
576 | 0 | fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> { |
577 | 0 | let nt = NaiveTime::parse_from_str(string, format).ok()?; |
578 | 0 | Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000) |
579 | 0 | } |
580 | | } |
581 | | |
582 | | /// Number of days between 0001-01-01 and 1970-01-01 |
583 | | const EPOCH_DAYS_FROM_CE: i32 = 719_163; |
584 | | |
585 | | /// Error message if nanosecond conversion request beyond supported interval |
586 | | const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"; |
587 | | |
588 | 0 | fn parse_date(string: &str) -> Option<NaiveDate> { |
589 | | // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06" |
590 | | // |
591 | | // According to [ISO 8601], years have: |
592 | | // Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by |
593 | | // zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol. |
594 | | // |
595 | | // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE |
596 | 0 | if string.starts_with('+') || string.starts_with('-') { |
597 | | // Skip the sign and look for the hyphen that terminates the year digits. |
598 | | // According to ISO 8601 the unsigned part must be at least 4 digits. |
599 | 0 | let rest = &string[1..]; |
600 | 0 | let hyphen = rest.find('-')?; |
601 | 0 | if hyphen < 4 { |
602 | 0 | return None; |
603 | 0 | } |
604 | | // The year substring is the sign and the digits (but not the separator) |
605 | | // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999" |
606 | 0 | let year: i32 = string[..hyphen + 1].parse().ok()?; |
607 | | // The remainder should begin with a '-' which we strip off, leaving the month-day part. |
608 | 0 | let remainder = string[hyphen + 1..].strip_prefix('-')?; |
609 | 0 | let mut parts = remainder.splitn(2, '-'); |
610 | 0 | let month: u32 = parts.next()?.parse().ok()?; |
611 | 0 | let day: u32 = parts.next()?.parse().ok()?; |
612 | 0 | return NaiveDate::from_ymd_opt(year, month, day); |
613 | 0 | } |
614 | | |
615 | 0 | if string.len() > 10 { |
616 | | // Try to parse as datetime and return just the date part |
617 | 0 | return string_to_datetime(&Utc, string) |
618 | 0 | .map(|dt| dt.date_naive()) |
619 | 0 | .ok(); |
620 | 0 | }; |
621 | 0 | let mut digits = [0; 10]; |
622 | 0 | let mut mask = 0; |
623 | | |
624 | | // Treating all bytes the same way, helps LLVM vectorise this correctly |
625 | 0 | for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() { |
626 | 0 | *o = i.wrapping_sub(b'0'); |
627 | 0 | mask |= ((*o < 10) as u16) << idx |
628 | | } |
629 | | |
630 | | const HYPHEN: u8 = b'-'.wrapping_sub(b'0'); |
631 | | |
632 | | // refer to https://www.rfc-editor.org/rfc/rfc3339#section-3 |
633 | 0 | if digits[4] != HYPHEN { |
634 | 0 | let (year, month, day) = match (mask, string.len()) { |
635 | 0 | (0b11111111, 8) => ( |
636 | 0 | digits[0] as u16 * 1000 |
637 | 0 | + digits[1] as u16 * 100 |
638 | 0 | + digits[2] as u16 * 10 |
639 | 0 | + digits[3] as u16, |
640 | 0 | digits[4] * 10 + digits[5], |
641 | 0 | digits[6] * 10 + digits[7], |
642 | 0 | ), |
643 | 0 | _ => return None, |
644 | | }; |
645 | 0 | return NaiveDate::from_ymd_opt(year as _, month as _, day as _); |
646 | 0 | } |
647 | | |
648 | 0 | let (month, day) = match mask { |
649 | | 0b1101101111 => { |
650 | 0 | if digits[7] != HYPHEN { |
651 | 0 | return None; |
652 | 0 | } |
653 | 0 | (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9]) |
654 | | } |
655 | | 0b101101111 => { |
656 | 0 | if digits[7] != HYPHEN { |
657 | 0 | return None; |
658 | 0 | } |
659 | 0 | (digits[5] * 10 + digits[6], digits[8]) |
660 | | } |
661 | | 0b110101111 => { |
662 | 0 | if digits[6] != HYPHEN { |
663 | 0 | return None; |
664 | 0 | } |
665 | 0 | (digits[5], digits[7] * 10 + digits[8]) |
666 | | } |
667 | | 0b10101111 => { |
668 | 0 | if digits[6] != HYPHEN { |
669 | 0 | return None; |
670 | 0 | } |
671 | 0 | (digits[5], digits[7]) |
672 | | } |
673 | 0 | _ => return None, |
674 | | }; |
675 | | |
676 | 0 | let year = |
677 | 0 | digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16; |
678 | | |
679 | 0 | NaiveDate::from_ymd_opt(year as _, month as _, day as _) |
680 | 0 | } |
681 | | |
682 | | impl Parser for Date32Type { |
683 | 0 | fn parse(string: &str) -> Option<i32> { |
684 | 0 | let date = parse_date(string)?; |
685 | 0 | Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE) |
686 | 0 | } |
687 | | |
688 | 0 | fn parse_formatted(string: &str, format: &str) -> Option<i32> { |
689 | 0 | let date = NaiveDate::parse_from_str(string, format).ok()?; |
690 | 0 | Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE) |
691 | 0 | } |
692 | | } |
693 | | |
694 | | impl Parser for Date64Type { |
695 | 0 | fn parse(string: &str) -> Option<i64> { |
696 | 0 | if string.len() <= 10 { |
697 | 0 | let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default()); |
698 | 0 | Some(datetime.and_utc().timestamp_millis()) |
699 | | } else { |
700 | 0 | let date_time = string_to_datetime(&Utc, string).ok()?; |
701 | 0 | Some(date_time.timestamp_millis()) |
702 | | } |
703 | 0 | } |
704 | | |
705 | 0 | fn parse_formatted(string: &str, format: &str) -> Option<i64> { |
706 | | use chrono::format::Fixed; |
707 | | use chrono::format::StrftimeItems; |
708 | 0 | let fmt = StrftimeItems::new(format); |
709 | 0 | let has_zone = fmt.into_iter().any(|item| match item { |
710 | 0 | chrono::format::Item::Fixed(fixed_item) => matches!( |
711 | 0 | fixed_item, |
712 | | Fixed::RFC2822 |
713 | | | Fixed::RFC3339 |
714 | | | Fixed::TimezoneName |
715 | | | Fixed::TimezoneOffsetColon |
716 | | | Fixed::TimezoneOffsetColonZ |
717 | | | Fixed::TimezoneOffset |
718 | | | Fixed::TimezoneOffsetZ |
719 | | ), |
720 | 0 | _ => false, |
721 | 0 | }); |
722 | 0 | if has_zone { |
723 | 0 | let date_time = chrono::DateTime::parse_from_str(string, format).ok()?; |
724 | 0 | Some(date_time.timestamp_millis()) |
725 | | } else { |
726 | 0 | let date_time = NaiveDateTime::parse_from_str(string, format).ok()?; |
727 | 0 | Some(date_time.and_utc().timestamp_millis()) |
728 | | } |
729 | 0 | } |
730 | | } |
731 | | |
732 | | fn parse_e_notation<T: DecimalType>( |
733 | | s: &str, |
734 | | mut digits: u16, |
735 | | mut fractionals: i16, |
736 | | mut result: T::Native, |
737 | | index: usize, |
738 | | precision: u16, |
739 | | scale: i16, |
740 | | ) -> Result<T::Native, ArrowError> { |
741 | | let mut exp: i16 = 0; |
742 | | let base = T::Native::usize_as(10); |
743 | | |
744 | | let mut exp_start: bool = false; |
745 | | // e has a plus sign |
746 | | let mut pos_shift_direction: bool = true; |
747 | | |
748 | | // skip to point or exponent index |
749 | | let mut bs; |
750 | | if fractionals > 0 { |
751 | | // it's a fraction, so the point index needs to be skipped, so +1 |
752 | | bs = s.as_bytes().iter().skip(index + fractionals as usize + 1); |
753 | | } else { |
754 | | // it's actually an integer that is already written into the result, so let's skip on to e |
755 | | bs = s.as_bytes().iter().skip(index); |
756 | | } |
757 | | |
758 | | while let Some(b) = bs.next() { |
759 | | match b { |
760 | | b'0'..=b'9' => { |
761 | | result = result.mul_wrapping(base); |
762 | | result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize)); |
763 | | if fractionals > 0 { |
764 | | fractionals += 1; |
765 | | } |
766 | | digits += 1; |
767 | | } |
768 | | &b'e' | &b'E' => { |
769 | | exp_start = true; |
770 | | } |
771 | | _ => { |
772 | | return Err(ArrowError::ParseError(format!( |
773 | | "can't parse the string value {s} to decimal" |
774 | | ))); |
775 | | } |
776 | | }; |
777 | | |
778 | | if exp_start { |
779 | | pos_shift_direction = match bs.next() { |
780 | | Some(&b'-') => false, |
781 | | Some(&b'+') => true, |
782 | | Some(b) => { |
783 | | if !b.is_ascii_digit() { |
784 | | return Err(ArrowError::ParseError(format!( |
785 | | "can't parse the string value {s} to decimal" |
786 | | ))); |
787 | | } |
788 | | |
789 | | exp *= 10; |
790 | | exp += (b - b'0') as i16; |
791 | | |
792 | | true |
793 | | } |
794 | | None => { |
795 | | return Err(ArrowError::ParseError(format!( |
796 | | "can't parse the string value {s} to decimal" |
797 | | ))) |
798 | | } |
799 | | }; |
800 | | |
801 | | for b in bs.by_ref() { |
802 | | if !b.is_ascii_digit() { |
803 | | return Err(ArrowError::ParseError(format!( |
804 | | "can't parse the string value {s} to decimal" |
805 | | ))); |
806 | | } |
807 | | exp *= 10; |
808 | | exp += (b - b'0') as i16; |
809 | | } |
810 | | } |
811 | | } |
812 | | |
813 | | if digits == 0 && fractionals == 0 && exp == 0 { |
814 | | return Err(ArrowError::ParseError(format!( |
815 | | "can't parse the string value {s} to decimal" |
816 | | ))); |
817 | | } |
818 | | |
819 | | if !pos_shift_direction { |
820 | | // exponent has a large negative sign |
821 | | // 1.12345e-30 => 0.0{29}12345, scale = 5 |
822 | | if exp - (digits as i16 + scale) > 0 { |
823 | | return Ok(T::Native::usize_as(0)); |
824 | | } |
825 | | exp *= -1; |
826 | | } |
827 | | |
828 | | // point offset |
829 | | exp = fractionals - exp; |
830 | | // We have zeros on the left, we need to count them |
831 | | if !pos_shift_direction && exp > digits as i16 { |
832 | | digits = exp as u16; |
833 | | } |
834 | | // Number of numbers to be removed or added |
835 | | exp = scale - exp; |
836 | | |
837 | | if (digits as i16 + exp) as u16 > precision { |
838 | | return Err(ArrowError::ParseError(format!( |
839 | | "parse decimal overflow ({s})" |
840 | | ))); |
841 | | } |
842 | | |
843 | | if exp < 0 { |
844 | | result = result.div_wrapping(base.pow_wrapping(-exp as _)); |
845 | | } else { |
846 | | result = result.mul_wrapping(base.pow_wrapping(exp as _)); |
847 | | } |
848 | | |
849 | | Ok(result) |
850 | | } |
851 | | |
852 | | /// Parse the string format decimal value to i128/i256 format and checking the precision and scale. |
853 | | /// The result value can't be out of bounds. |
854 | | pub fn parse_decimal<T: DecimalType>( |
855 | | s: &str, |
856 | | precision: u8, |
857 | | scale: i8, |
858 | | ) -> Result<T::Native, ArrowError> { |
859 | | let mut result = T::Native::usize_as(0); |
860 | | let mut fractionals: i8 = 0; |
861 | | let mut digits: u8 = 0; |
862 | | let base = T::Native::usize_as(10); |
863 | | |
864 | | let bs = s.as_bytes(); |
865 | | let (signed, negative) = match bs.first() { |
866 | | Some(b'-') => (true, true), |
867 | | Some(b'+') => (true, false), |
868 | | _ => (false, false), |
869 | | }; |
870 | | |
871 | | if bs.is_empty() || signed && bs.len() == 1 { |
872 | | return Err(ArrowError::ParseError(format!( |
873 | | "can't parse the string value {s} to decimal" |
874 | | ))); |
875 | | } |
876 | | |
877 | | // Iterate over the raw input bytes, skipping the sign if any |
878 | | let mut bs = bs.iter().enumerate().skip(signed as usize); |
879 | | |
880 | | let mut is_e_notation = false; |
881 | | |
882 | | // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds. |
883 | | // Thus, if we validate the precision correctly, we can skip overflow checks. |
884 | | while let Some((index, b)) = bs.next() { |
885 | | match b { |
886 | | b'0'..=b'9' => { |
887 | | if digits == 0 && *b == b'0' { |
888 | | // Ignore leading zeros. |
889 | | continue; |
890 | | } |
891 | | digits += 1; |
892 | | result = result.mul_wrapping(base); |
893 | | result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize)); |
894 | | } |
895 | | b'.' => { |
896 | | let point_index = index; |
897 | | |
898 | | for (_, b) in bs.by_ref() { |
899 | | if !b.is_ascii_digit() { |
900 | | if *b == b'e' || *b == b'E' { |
901 | | result = parse_e_notation::<T>( |
902 | | s, |
903 | | digits as u16, |
904 | | fractionals as i16, |
905 | | result, |
906 | | point_index, |
907 | | precision as u16, |
908 | | scale as i16, |
909 | | )?; |
910 | | |
911 | | is_e_notation = true; |
912 | | |
913 | | break; |
914 | | } |
915 | | return Err(ArrowError::ParseError(format!( |
916 | | "can't parse the string value {s} to decimal" |
917 | | ))); |
918 | | } |
919 | | if fractionals == scale && scale != 0 { |
920 | | // We have processed all the digits that we need. All that |
921 | | // is left is to validate that the rest of the string contains |
922 | | // valid digits. |
923 | | continue; |
924 | | } |
925 | | fractionals += 1; |
926 | | digits += 1; |
927 | | result = result.mul_wrapping(base); |
928 | | result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize)); |
929 | | } |
930 | | |
931 | | if is_e_notation { |
932 | | break; |
933 | | } |
934 | | |
935 | | // Fail on "." |
936 | | if digits == 0 { |
937 | | return Err(ArrowError::ParseError(format!( |
938 | | "can't parse the string value {s} to decimal" |
939 | | ))); |
940 | | } |
941 | | } |
942 | | b'e' | b'E' => { |
943 | | result = parse_e_notation::<T>( |
944 | | s, |
945 | | digits as u16, |
946 | | fractionals as i16, |
947 | | result, |
948 | | index, |
949 | | precision as u16, |
950 | | scale as i16, |
951 | | )?; |
952 | | |
953 | | is_e_notation = true; |
954 | | |
955 | | break; |
956 | | } |
957 | | _ => { |
958 | | return Err(ArrowError::ParseError(format!( |
959 | | "can't parse the string value {s} to decimal" |
960 | | ))); |
961 | | } |
962 | | } |
963 | | } |
964 | | |
965 | | if !is_e_notation { |
966 | | if fractionals < scale { |
967 | | let exp = scale - fractionals; |
968 | | if exp as u8 + digits > precision { |
969 | | return Err(ArrowError::ParseError(format!( |
970 | | "parse decimal overflow ({s})" |
971 | | ))); |
972 | | } |
973 | | let mul = base.pow_wrapping(exp as _); |
974 | | result = result.mul_wrapping(mul); |
975 | | } else if digits > precision { |
976 | | return Err(ArrowError::ParseError(format!( |
977 | | "parse decimal overflow ({s})" |
978 | | ))); |
979 | | } |
980 | | } |
981 | | |
982 | | Ok(if negative { |
983 | | result.neg_wrapping() |
984 | | } else { |
985 | | result |
986 | | }) |
987 | | } |
988 | | |
989 | | /// Parse human-readable interval string to Arrow [IntervalYearMonthType] |
990 | 0 | pub fn parse_interval_year_month( |
991 | 0 | value: &str, |
992 | 0 | ) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> { |
993 | 0 | let config = IntervalParseConfig::new(IntervalUnit::Year); |
994 | 0 | let interval = Interval::parse(value, &config)?; |
995 | | |
996 | 0 | let months = interval.to_year_months().map_err(|_| { |
997 | 0 | ArrowError::CastError(format!( |
998 | 0 | "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed." |
999 | 0 | )) |
1000 | 0 | })?; |
1001 | | |
1002 | 0 | Ok(IntervalYearMonthType::make_value(0, months)) |
1003 | 0 | } |
1004 | | |
1005 | | /// Parse human-readable interval string to Arrow [IntervalDayTimeType] |
1006 | 0 | pub fn parse_interval_day_time( |
1007 | 0 | value: &str, |
1008 | 0 | ) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> { |
1009 | 0 | let config = IntervalParseConfig::new(IntervalUnit::Day); |
1010 | 0 | let interval = Interval::parse(value, &config)?; |
1011 | | |
1012 | 0 | let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!( |
1013 | 0 | "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds" |
1014 | 0 | )))?; |
1015 | | |
1016 | 0 | Ok(IntervalDayTimeType::make_value(days, millis)) |
1017 | 0 | } |
1018 | | |
1019 | | /// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType] |
1020 | 0 | pub fn parse_interval_month_day_nano_config( |
1021 | 0 | value: &str, |
1022 | 0 | config: IntervalParseConfig, |
1023 | 0 | ) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> { |
1024 | 0 | let interval = Interval::parse(value, &config)?; |
1025 | | |
1026 | 0 | let (months, days, nanos) = interval.to_month_day_nanos(); |
1027 | | |
1028 | 0 | Ok(IntervalMonthDayNanoType::make_value(months, days, nanos)) |
1029 | 0 | } |
1030 | | |
1031 | | /// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType] |
1032 | 0 | pub fn parse_interval_month_day_nano( |
1033 | 0 | value: &str, |
1034 | 0 | ) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> { |
1035 | 0 | parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month)) |
1036 | 0 | } |
1037 | | |
1038 | | const NANOS_PER_MILLIS: i64 = 1_000_000; |
1039 | | const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS; |
1040 | | const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND; |
1041 | | const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE; |
1042 | | #[cfg(test)] |
1043 | | const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR; |
1044 | | |
1045 | | /// Config to parse interval strings |
1046 | | /// |
1047 | | /// Currently stores the `default_unit` to use if the string doesn't have one specified |
1048 | | #[derive(Debug, Clone)] |
1049 | | pub struct IntervalParseConfig { |
1050 | | /// The default unit to use if none is specified |
1051 | | /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second] |
1052 | | default_unit: IntervalUnit, |
1053 | | } |
1054 | | |
1055 | | impl IntervalParseConfig { |
1056 | | /// Create a new [IntervalParseConfig] with the given default unit |
1057 | 0 | pub fn new(default_unit: IntervalUnit) -> Self { |
1058 | 0 | Self { default_unit } |
1059 | 0 | } |
1060 | | } |
1061 | | |
1062 | | #[rustfmt::skip] |
1063 | | #[derive(Debug, Clone, Copy)] |
1064 | | #[repr(u16)] |
1065 | | /// Represents the units of an interval, with each variant |
1066 | | /// corresponding to a bit in the interval's bitfield representation |
1067 | | pub enum IntervalUnit { |
1068 | | /// A Century |
1069 | | Century = 0b_0000_0000_0001, |
1070 | | /// A Decade |
1071 | | Decade = 0b_0000_0000_0010, |
1072 | | /// A Year |
1073 | | Year = 0b_0000_0000_0100, |
1074 | | /// A Month |
1075 | | Month = 0b_0000_0000_1000, |
1076 | | /// A Week |
1077 | | Week = 0b_0000_0001_0000, |
1078 | | /// A Day |
1079 | | Day = 0b_0000_0010_0000, |
1080 | | /// An Hour |
1081 | | Hour = 0b_0000_0100_0000, |
1082 | | /// A Minute |
1083 | | Minute = 0b_0000_1000_0000, |
1084 | | /// A Second |
1085 | | Second = 0b_0001_0000_0000, |
1086 | | /// A Millisecond |
1087 | | Millisecond = 0b_0010_0000_0000, |
1088 | | /// A Microsecond |
1089 | | Microsecond = 0b_0100_0000_0000, |
1090 | | /// A Nanosecond |
1091 | | Nanosecond = 0b_1000_0000_0000, |
1092 | | } |
1093 | | |
1094 | | /// Logic for parsing interval unit strings |
1095 | | /// |
1096 | | /// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189> |
1097 | | /// for a list of unit names supported by PostgreSQL which we try to match here. |
1098 | | impl FromStr for IntervalUnit { |
1099 | | type Err = ArrowError; |
1100 | | |
1101 | 0 | fn from_str(s: &str) -> Result<Self, ArrowError> { |
1102 | 0 | match s.to_lowercase().as_str() { |
1103 | 0 | "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century), |
1104 | 0 | "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade), |
1105 | 0 | "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year), |
1106 | 0 | "mon" | "mons" | "month" | "months" => Ok(Self::Month), |
1107 | 0 | "w" | "week" | "weeks" => Ok(Self::Week), |
1108 | 0 | "d" | "day" | "days" => Ok(Self::Day), |
1109 | 0 | "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour), |
1110 | 0 | "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute), |
1111 | 0 | "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second), |
1112 | 0 | "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => { |
1113 | 0 | Ok(Self::Millisecond) |
1114 | | } |
1115 | 0 | "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => { |
1116 | 0 | Ok(Self::Microsecond) |
1117 | | } |
1118 | 0 | "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond), |
1119 | 0 | _ => Err(ArrowError::InvalidArgumentError(format!( |
1120 | 0 | "Unknown interval type: {s}" |
1121 | 0 | ))), |
1122 | | } |
1123 | 0 | } |
1124 | | } |
1125 | | |
1126 | | impl IntervalUnit { |
1127 | 0 | fn from_str_or_config( |
1128 | 0 | s: Option<&str>, |
1129 | 0 | config: &IntervalParseConfig, |
1130 | 0 | ) -> Result<Self, ArrowError> { |
1131 | 0 | match s { |
1132 | 0 | Some(s) => s.parse(), |
1133 | 0 | None => Ok(config.default_unit), |
1134 | | } |
1135 | 0 | } |
1136 | | } |
1137 | | |
1138 | | /// A tuple representing (months, days, nanoseconds) in an interval |
1139 | | pub type MonthDayNano = (i32, i32, i64); |
1140 | | |
1141 | | /// Chosen based on the number of decimal digits in 1 week in nanoseconds |
1142 | | const INTERVAL_PRECISION: u32 = 15; |
1143 | | |
1144 | | #[derive(Clone, Copy, Debug, PartialEq)] |
1145 | | struct IntervalAmount { |
1146 | | /// The integer component of the interval amount |
1147 | | integer: i64, |
1148 | | /// The fractional component multiplied by 10^INTERVAL_PRECISION |
1149 | | frac: i64, |
1150 | | } |
1151 | | |
1152 | | #[cfg(test)] |
1153 | | impl IntervalAmount { |
1154 | | fn new(integer: i64, frac: i64) -> Self { |
1155 | | Self { integer, frac } |
1156 | | } |
1157 | | } |
1158 | | |
1159 | | impl FromStr for IntervalAmount { |
1160 | | type Err = ArrowError; |
1161 | | |
1162 | 0 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
1163 | 0 | match s.split_once('.') { |
1164 | 0 | Some((integer, frac)) |
1165 | 0 | if frac.len() <= INTERVAL_PRECISION as usize |
1166 | 0 | && !frac.is_empty() |
1167 | 0 | && !frac.starts_with('-') => |
1168 | | { |
1169 | | // integer will be "" for values like ".5" |
1170 | | // and "-" for values like "-.5" |
1171 | 0 | let explicit_neg = integer.starts_with('-'); |
1172 | 0 | let integer = if integer.is_empty() || integer == "-" { |
1173 | 0 | Ok(0) |
1174 | | } else { |
1175 | 0 | integer.parse::<i64>().map_err(|_| { |
1176 | 0 | ArrowError::ParseError(format!("Failed to parse {s} as interval amount")) |
1177 | 0 | }) |
1178 | 0 | }?; |
1179 | | |
1180 | 0 | let frac_unscaled = frac.parse::<i64>().map_err(|_| { |
1181 | 0 | ArrowError::ParseError(format!("Failed to parse {s} as interval amount")) |
1182 | 0 | })?; |
1183 | | |
1184 | | // scale fractional part by interval precision |
1185 | 0 | let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32); |
1186 | | |
1187 | | // propagate the sign of the integer part to the fractional part |
1188 | 0 | let frac = if integer < 0 || explicit_neg { |
1189 | 0 | -frac |
1190 | | } else { |
1191 | 0 | frac |
1192 | | }; |
1193 | | |
1194 | 0 | let result = Self { integer, frac }; |
1195 | | |
1196 | 0 | Ok(result) |
1197 | | } |
1198 | 0 | Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!( |
1199 | 0 | "Failed to parse {s} as interval amount" |
1200 | 0 | ))), |
1201 | 0 | Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => { |
1202 | 0 | Err(ArrowError::ParseError(format!( |
1203 | 0 | "{s} exceeds the precision available for interval amount" |
1204 | 0 | ))) |
1205 | | } |
1206 | | Some(_) | None => { |
1207 | 0 | let integer = s.parse::<i64>().map_err(|_| { |
1208 | 0 | ArrowError::ParseError(format!("Failed to parse {s} as interval amount")) |
1209 | 0 | })?; |
1210 | | |
1211 | 0 | let result = Self { integer, frac: 0 }; |
1212 | 0 | Ok(result) |
1213 | | } |
1214 | | } |
1215 | 0 | } |
1216 | | } |
1217 | | |
1218 | | #[derive(Debug, Default, PartialEq)] |
1219 | | struct Interval { |
1220 | | months: i32, |
1221 | | days: i32, |
1222 | | nanos: i64, |
1223 | | } |
1224 | | |
1225 | | impl Interval { |
1226 | 0 | fn new(months: i32, days: i32, nanos: i64) -> Self { |
1227 | 0 | Self { |
1228 | 0 | months, |
1229 | 0 | days, |
1230 | 0 | nanos, |
1231 | 0 | } |
1232 | 0 | } |
1233 | | |
1234 | 0 | fn to_year_months(&self) -> Result<i32, ArrowError> { |
1235 | 0 | match (self.months, self.days, self.nanos) { |
1236 | 0 | (months, days, nanos) if days == 0 && nanos == 0 => Ok(months), |
1237 | 0 | _ => Err(ArrowError::InvalidArgumentError(format!( |
1238 | 0 | "Unable to represent interval with days and nanos as year-months: {self:?}" |
1239 | 0 | ))), |
1240 | | } |
1241 | 0 | } |
1242 | | |
1243 | 0 | fn to_day_time(&self) -> Result<(i32, i32), ArrowError> { |
1244 | 0 | let days = self.months.mul_checked(30)?.add_checked(self.days)?; |
1245 | | |
1246 | 0 | match self.nanos { |
1247 | 0 | nanos if nanos % NANOS_PER_MILLIS == 0 => { |
1248 | 0 | let millis = (self.nanos / 1_000_000).try_into().map_err(|_| { |
1249 | 0 | ArrowError::InvalidArgumentError(format!( |
1250 | 0 | "Unable to represent {} nanos as milliseconds in a signed 32-bit integer", |
1251 | 0 | self.nanos |
1252 | 0 | )) |
1253 | 0 | })?; |
1254 | | |
1255 | 0 | Ok((days, millis)) |
1256 | | } |
1257 | 0 | nanos => Err(ArrowError::InvalidArgumentError(format!( |
1258 | 0 | "Unable to represent {nanos} as milliseconds" |
1259 | 0 | ))), |
1260 | | } |
1261 | 0 | } |
1262 | | |
1263 | 0 | fn to_month_day_nanos(&self) -> (i32, i32, i64) { |
1264 | 0 | (self.months, self.days, self.nanos) |
1265 | 0 | } |
1266 | | |
1267 | | /// Parse string value in traditional Postgres format such as |
1268 | | /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds` |
1269 | 0 | fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> { |
1270 | 0 | let components = parse_interval_components(value, config)?; |
1271 | | |
1272 | 0 | components |
1273 | 0 | .into_iter() |
1274 | 0 | .try_fold(Self::default(), |result, (amount, unit)| { |
1275 | 0 | result.add(amount, unit) |
1276 | 0 | }) |
1277 | 0 | } |
1278 | | |
1279 | | /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units. |
1280 | | /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos. |
1281 | | /// Fractional parts of weeks and days are represented using days and nanoseconds. |
1282 | | /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days |
1283 | | /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours |
1284 | | /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.) |
1285 | 0 | fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> { |
1286 | 0 | let result = match unit { |
1287 | | IntervalUnit::Century => { |
1288 | 0 | let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?; |
1289 | 0 | let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2); |
1290 | 0 | let months = months_int |
1291 | 0 | .add_checked(month_frac)? |
1292 | 0 | .try_into() |
1293 | 0 | .map_err(|_| { |
1294 | 0 | ArrowError::ParseError(format!( |
1295 | 0 | "Unable to represent {} centuries as months in a signed 32-bit integer", |
1296 | 0 | &amount.integer |
1297 | 0 | )) |
1298 | 0 | })?; |
1299 | | |
1300 | 0 | Self::new(self.months.add_checked(months)?, self.days, self.nanos) |
1301 | | } |
1302 | | IntervalUnit::Decade => { |
1303 | 0 | let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?; |
1304 | | |
1305 | 0 | let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1); |
1306 | 0 | let months = months_int |
1307 | 0 | .add_checked(month_frac)? |
1308 | 0 | .try_into() |
1309 | 0 | .map_err(|_| { |
1310 | 0 | ArrowError::ParseError(format!( |
1311 | 0 | "Unable to represent {} decades as months in a signed 32-bit integer", |
1312 | 0 | &amount.integer |
1313 | 0 | )) |
1314 | 0 | })?; |
1315 | | |
1316 | 0 | Self::new(self.months.add_checked(months)?, self.days, self.nanos) |
1317 | | } |
1318 | | IntervalUnit::Year => { |
1319 | 0 | let months_int = amount.integer.mul_checked(12)?; |
1320 | 0 | let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION); |
1321 | 0 | let months = months_int |
1322 | 0 | .add_checked(month_frac)? |
1323 | 0 | .try_into() |
1324 | 0 | .map_err(|_| { |
1325 | 0 | ArrowError::ParseError(format!( |
1326 | 0 | "Unable to represent {} years as months in a signed 32-bit integer", |
1327 | 0 | &amount.integer |
1328 | 0 | )) |
1329 | 0 | })?; |
1330 | | |
1331 | 0 | Self::new(self.months.add_checked(months)?, self.days, self.nanos) |
1332 | | } |
1333 | | IntervalUnit::Month => { |
1334 | 0 | let months = amount.integer.try_into().map_err(|_| { |
1335 | 0 | ArrowError::ParseError(format!( |
1336 | 0 | "Unable to represent {} months in a signed 32-bit integer", |
1337 | 0 | &amount.integer |
1338 | 0 | )) |
1339 | 0 | })?; |
1340 | | |
1341 | 0 | let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1); |
1342 | 0 | let days = days.try_into().map_err(|_| { |
1343 | 0 | ArrowError::ParseError(format!( |
1344 | 0 | "Unable to represent {} months as days in a signed 32-bit integer", |
1345 | 0 | amount.frac / 10_i64.pow(INTERVAL_PRECISION) |
1346 | 0 | )) |
1347 | 0 | })?; |
1348 | | |
1349 | 0 | Self::new( |
1350 | 0 | self.months.add_checked(months)?, |
1351 | 0 | self.days.add_checked(days)?, |
1352 | 0 | self.nanos, |
1353 | | ) |
1354 | | } |
1355 | | IntervalUnit::Week => { |
1356 | 0 | let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| { |
1357 | 0 | ArrowError::ParseError(format!( |
1358 | 0 | "Unable to represent {} weeks as days in a signed 32-bit integer", |
1359 | 0 | &amount.integer |
1360 | 0 | )) |
1361 | 0 | })?; |
1362 | | |
1363 | 0 | let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11); |
1364 | | |
1365 | 0 | Self::new( |
1366 | 0 | self.months, |
1367 | 0 | self.days.add_checked(days)?, |
1368 | 0 | self.nanos.add_checked(nanos)?, |
1369 | | ) |
1370 | | } |
1371 | | IntervalUnit::Day => { |
1372 | 0 | let days = amount.integer.try_into().map_err(|_| { |
1373 | 0 | ArrowError::InvalidArgumentError(format!( |
1374 | 0 | "Unable to represent {} days in a signed 32-bit integer", |
1375 | 0 | amount.integer |
1376 | 0 | )) |
1377 | 0 | })?; |
1378 | | |
1379 | 0 | let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11); |
1380 | | |
1381 | 0 | Self::new( |
1382 | 0 | self.months, |
1383 | 0 | self.days.add_checked(days)?, |
1384 | 0 | self.nanos.add_checked(nanos)?, |
1385 | | ) |
1386 | | } |
1387 | | IntervalUnit::Hour => { |
1388 | 0 | let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?; |
1389 | 0 | let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11); |
1390 | 0 | let nanos = nanos_int.add_checked(nanos_frac)?; |
1391 | | |
1392 | 0 | Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?) |
1393 | | } |
1394 | | IntervalUnit::Minute => { |
1395 | 0 | let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?; |
1396 | 0 | let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10); |
1397 | | |
1398 | 0 | let nanos = nanos_int.add_checked(nanos_frac)?; |
1399 | | |
1400 | 0 | Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?) |
1401 | | } |
1402 | | IntervalUnit::Second => { |
1403 | 0 | let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?; |
1404 | 0 | let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9); |
1405 | 0 | let nanos = nanos_int.add_checked(nanos_frac)?; |
1406 | | |
1407 | 0 | Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?) |
1408 | | } |
1409 | | IntervalUnit::Millisecond => { |
1410 | 0 | let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?; |
1411 | 0 | let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6); |
1412 | 0 | let nanos = nanos_int.add_checked(nanos_frac)?; |
1413 | | |
1414 | 0 | Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?) |
1415 | | } |
1416 | | IntervalUnit::Microsecond => { |
1417 | 0 | let nanos_int = amount.integer.mul_checked(1_000)?; |
1418 | 0 | let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3); |
1419 | 0 | let nanos = nanos_int.add_checked(nanos_frac)?; |
1420 | | |
1421 | 0 | Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?) |
1422 | | } |
1423 | | IntervalUnit::Nanosecond => { |
1424 | 0 | let nanos_int = amount.integer; |
1425 | 0 | let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION); |
1426 | 0 | let nanos = nanos_int.add_checked(nanos_frac)?; |
1427 | | |
1428 | 0 | Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?) |
1429 | | } |
1430 | | }; |
1431 | | |
1432 | 0 | Ok(result) |
1433 | 0 | } |
1434 | | } |
1435 | | |
1436 | | /// parse the string into a vector of interval components i.e. (amount, unit) tuples |
1437 | 0 | fn parse_interval_components( |
1438 | 0 | value: &str, |
1439 | 0 | config: &IntervalParseConfig, |
1440 | 0 | ) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> { |
1441 | 0 | let raw_pairs = split_interval_components(value); |
1442 | | |
1443 | | // parse amounts and units |
1444 | 0 | let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs |
1445 | 0 | .iter() |
1446 | 0 | .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?))) |
1447 | 0 | .collect() |
1448 | | else { |
1449 | 0 | return Err(ArrowError::ParseError(format!( |
1450 | 0 | "Invalid input syntax for type interval: {value:?}" |
1451 | 0 | ))); |
1452 | | }; |
1453 | | |
1454 | | // collect parsed results |
1455 | 0 | let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip(); |
1456 | | |
1457 | | // duplicate units? |
1458 | 0 | let mut observed_interval_types = 0; |
1459 | 0 | for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) { |
1460 | 0 | if observed_interval_types & (*unit as u16) != 0 { |
1461 | 0 | return Err(ArrowError::ParseError(format!( |
1462 | 0 | "Invalid input syntax for type interval: {:?}. Repeated type '{}'", |
1463 | 0 | value, |
1464 | 0 | raw_unit.unwrap_or_default(), |
1465 | 0 | ))); |
1466 | 0 | } |
1467 | | |
1468 | 0 | observed_interval_types |= *unit as u16; |
1469 | | } |
1470 | | |
1471 | 0 | let result = amounts.iter().copied().zip(units.iter().copied()); |
1472 | | |
1473 | 0 | Ok(result.collect::<Vec<_>>()) |
1474 | 0 | } |
1475 | | |
1476 | | /// Split an interval into a vec of amounts and units. |
1477 | | /// |
1478 | | /// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space. |
1479 | | /// |
1480 | | /// This should match the behavior of PostgreSQL's interval parser. |
1481 | 0 | fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> { |
1482 | 0 | let mut result = vec![]; |
1483 | 0 | let mut words = value.split(char::is_whitespace); |
1484 | 0 | while let Some(word) = words.next() { |
1485 | 0 | if let Some(split_word_at) = word.find(not_interval_amount) { |
1486 | 0 | let (amount, unit) = word.split_at(split_word_at); |
1487 | 0 | result.push((amount, Some(unit))); |
1488 | 0 | } else if let Some(unit) = words.next() { |
1489 | 0 | result.push((word, Some(unit))); |
1490 | 0 | } else { |
1491 | 0 | result.push((word, None)); |
1492 | 0 | break; |
1493 | | } |
1494 | | } |
1495 | 0 | result |
1496 | 0 | } |
1497 | | |
1498 | | /// test if a character is NOT part of an interval numeric amount |
1499 | 0 | fn not_interval_amount(c: char) -> bool { |
1500 | 0 | !c.is_ascii_digit() && c != '.' && c != '-' |
1501 | 0 | } |
1502 | | |
1503 | | #[cfg(test)] |
1504 | | mod tests { |
1505 | | use super::*; |
1506 | | use arrow_array::temporal_conversions::date32_to_datetime; |
1507 | | use arrow_buffer::i256; |
1508 | | |
1509 | | #[test] |
1510 | | fn test_parse_nanos() { |
1511 | | assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000); |
1512 | | assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000); |
1513 | | assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000); |
1514 | | } |
1515 | | |
1516 | | #[test] |
1517 | | fn string_to_timestamp_timezone() { |
1518 | | // Explicit timezone |
1519 | | assert_eq!( |
1520 | | 1599572549190855000, |
1521 | | parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap() |
1522 | | ); |
1523 | | assert_eq!( |
1524 | | 1599572549190855000, |
1525 | | parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap() |
1526 | | ); |
1527 | | assert_eq!( |
1528 | | 1599572549000000000, |
1529 | | parse_timestamp("2020-09-08T13:42:29Z").unwrap() |
1530 | | ); // no fractional part |
1531 | | assert_eq!( |
1532 | | 1599590549190855000, |
1533 | | parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap() |
1534 | | ); |
1535 | | } |
1536 | | |
1537 | | #[test] |
1538 | | fn string_to_timestamp_timezone_space() { |
1539 | | // Ensure space rather than T between time and date is accepted |
1540 | | assert_eq!( |
1541 | | 1599572549190855000, |
1542 | | parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap() |
1543 | | ); |
1544 | | assert_eq!( |
1545 | | 1599572549190855000, |
1546 | | parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap() |
1547 | | ); |
1548 | | assert_eq!( |
1549 | | 1599572549000000000, |
1550 | | parse_timestamp("2020-09-08 13:42:29Z").unwrap() |
1551 | | ); // no fractional part |
1552 | | assert_eq!( |
1553 | | 1599590549190855000, |
1554 | | parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap() |
1555 | | ); |
1556 | | } |
1557 | | |
1558 | | #[test] |
1559 | | #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime |
1560 | | fn string_to_timestamp_no_timezone() { |
1561 | | // This test is designed to succeed in regardless of the local |
1562 | | // timezone the test machine is running. Thus it is still |
1563 | | // somewhat susceptible to bugs in the use of chrono |
1564 | | let naive_datetime = NaiveDateTime::new( |
1565 | | NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(), |
1566 | | NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(), |
1567 | | ); |
1568 | | |
1569 | | // Ensure both T and ' ' variants work |
1570 | | assert_eq!( |
1571 | | naive_datetime.and_utc().timestamp_nanos_opt().unwrap(), |
1572 | | parse_timestamp("2020-09-08T13:42:29.190855").unwrap() |
1573 | | ); |
1574 | | |
1575 | | assert_eq!( |
1576 | | naive_datetime.and_utc().timestamp_nanos_opt().unwrap(), |
1577 | | parse_timestamp("2020-09-08 13:42:29.190855").unwrap() |
1578 | | ); |
1579 | | |
1580 | | // Also ensure that parsing timestamps with no fractional |
1581 | | // second part works as well |
1582 | | let datetime_whole_secs = NaiveDateTime::new( |
1583 | | NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(), |
1584 | | NaiveTime::from_hms_opt(13, 42, 29).unwrap(), |
1585 | | ) |
1586 | | .and_utc(); |
1587 | | |
1588 | | // Ensure both T and ' ' variants work |
1589 | | assert_eq!( |
1590 | | datetime_whole_secs.timestamp_nanos_opt().unwrap(), |
1591 | | parse_timestamp("2020-09-08T13:42:29").unwrap() |
1592 | | ); |
1593 | | |
1594 | | assert_eq!( |
1595 | | datetime_whole_secs.timestamp_nanos_opt().unwrap(), |
1596 | | parse_timestamp("2020-09-08 13:42:29").unwrap() |
1597 | | ); |
1598 | | |
1599 | | // ensure without time work |
1600 | | // no time, should be the nano second at |
1601 | | // 2020-09-08 0:0:0 |
1602 | | let datetime_no_time = NaiveDateTime::new( |
1603 | | NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(), |
1604 | | NaiveTime::from_hms_opt(0, 0, 0).unwrap(), |
1605 | | ) |
1606 | | .and_utc(); |
1607 | | |
1608 | | assert_eq!( |
1609 | | datetime_no_time.timestamp_nanos_opt().unwrap(), |
1610 | | parse_timestamp("2020-09-08").unwrap() |
1611 | | ) |
1612 | | } |
1613 | | |
1614 | | #[test] |
1615 | | fn string_to_timestamp_chrono() { |
1616 | | let cases = [ |
1617 | | "2020-09-08T13:42:29Z", |
1618 | | "1969-01-01T00:00:00.1Z", |
1619 | | "2020-09-08T12:00:12.12345678+00:00", |
1620 | | "2020-09-08T12:00:12+00:00", |
1621 | | "2020-09-08T12:00:12.1+00:00", |
1622 | | "2020-09-08T12:00:12.12+00:00", |
1623 | | "2020-09-08T12:00:12.123+00:00", |
1624 | | "2020-09-08T12:00:12.1234+00:00", |
1625 | | "2020-09-08T12:00:12.12345+00:00", |
1626 | | "2020-09-08T12:00:12.123456+00:00", |
1627 | | "2020-09-08T12:00:12.1234567+00:00", |
1628 | | "2020-09-08T12:00:12.12345678+00:00", |
1629 | | "2020-09-08T12:00:12.123456789+00:00", |
1630 | | "2020-09-08T12:00:12.12345678912z", |
1631 | | "2020-09-08T12:00:12.123456789123Z", |
1632 | | "2020-09-08T12:00:12.123456789123+02:00", |
1633 | | "2020-09-08T12:00:12.12345678912345Z", |
1634 | | "2020-09-08T12:00:12.1234567891234567+02:00", |
1635 | | "2020-09-08T12:00:60Z", |
1636 | | "2020-09-08T12:00:60.123Z", |
1637 | | "2020-09-08T12:00:60.123456+02:00", |
1638 | | "2020-09-08T12:00:60.1234567891234567+02:00", |
1639 | | "2020-09-08T12:00:60.999999999+02:00", |
1640 | | "2020-09-08t12:00:12.12345678+00:00", |
1641 | | "2020-09-08t12:00:12+00:00", |
1642 | | "2020-09-08t12:00:12Z", |
1643 | | ]; |
1644 | | |
1645 | | for case in cases { |
1646 | | let chrono = DateTime::parse_from_rfc3339(case).unwrap(); |
1647 | | let chrono_utc = chrono.with_timezone(&Utc); |
1648 | | |
1649 | | let custom = string_to_datetime(&Utc, case).unwrap(); |
1650 | | assert_eq!(chrono_utc, custom) |
1651 | | } |
1652 | | } |
1653 | | |
1654 | | #[test] |
1655 | | fn string_to_timestamp_naive() { |
1656 | | let cases = [ |
1657 | | "2018-11-13T17:11:10.011375885995", |
1658 | | "2030-12-04T17:11:10.123", |
1659 | | "2030-12-04T17:11:10.1234", |
1660 | | "2030-12-04T17:11:10.123456", |
1661 | | ]; |
1662 | | for case in cases { |
1663 | | let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap(); |
1664 | | let custom = string_to_datetime(&Utc, case).unwrap(); |
1665 | | assert_eq!(chrono, custom.naive_utc()) |
1666 | | } |
1667 | | } |
1668 | | |
1669 | | #[test] |
1670 | | fn string_to_timestamp_invalid() { |
1671 | | // Test parsing invalid formats |
1672 | | let cases = [ |
1673 | | ("", "timestamp must contain at least 10 characters"), |
1674 | | ("SS", "timestamp must contain at least 10 characters"), |
1675 | | ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"), |
1676 | | ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"), |
1677 | | ("1997-01-31 09:26:56.123Z", "error parsing time"), |
1678 | | ("1997:01:31T09:26:56.123Z", "error parsing date"), |
1679 | | ("1997:1:31T09:26:56.123Z", "error parsing date"), |
1680 | | ("1997-01-32T09:26:56.123Z", "error parsing date"), |
1681 | | ("1997-13-32T09:26:56.123Z", "error parsing date"), |
1682 | | ("1997-02-29T09:26:56.123Z", "error parsing date"), |
1683 | | ("2015-02-30T17:35:20-08:00", "error parsing date"), |
1684 | | ("1997-01-10T9:26:56.123Z", "error parsing time"), |
1685 | | ("2015-01-20T25:35:20-08:00", "error parsing time"), |
1686 | | ("1997-01-10T09:61:56.123Z", "error parsing time"), |
1687 | | ("1997-01-10T09:61:90.123Z", "error parsing time"), |
1688 | | ("1997-01-10T12:00:6.123Z", "error parsing time"), |
1689 | | ("1997-01-31T092656.123Z", "error parsing time"), |
1690 | | ("1997-01-10T12:00:06.", "error parsing time"), |
1691 | | ("1997-01-10T12:00:06. ", "error parsing time"), |
1692 | | ]; |
1693 | | |
1694 | | for (s, ctx) in cases { |
1695 | | let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}"); |
1696 | | let actual = string_to_datetime(&Utc, s).unwrap_err().to_string(); |
1697 | | assert_eq!(actual, expected) |
1698 | | } |
1699 | | } |
1700 | | |
1701 | | // Parse a timestamp to timestamp int with a useful human readable error message |
1702 | | fn parse_timestamp(s: &str) -> Result<i64, ArrowError> { |
1703 | | let result = string_to_timestamp_nanos(s); |
1704 | | if let Err(e) = &result { |
1705 | | eprintln!("Error parsing timestamp '{s}': {e:?}"); |
1706 | | } |
1707 | | result |
1708 | | } |
1709 | | |
1710 | | #[test] |
1711 | | fn string_without_timezone_to_timestamp() { |
1712 | | // string without timezone should always output the same regardless the local or session timezone |
1713 | | |
1714 | | let naive_datetime = NaiveDateTime::new( |
1715 | | NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(), |
1716 | | NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(), |
1717 | | ); |
1718 | | |
1719 | | // Ensure both T and ' ' variants work |
1720 | | assert_eq!( |
1721 | | naive_datetime.and_utc().timestamp_nanos_opt().unwrap(), |
1722 | | parse_timestamp("2020-09-08T13:42:29.190855").unwrap() |
1723 | | ); |
1724 | | |
1725 | | assert_eq!( |
1726 | | naive_datetime.and_utc().timestamp_nanos_opt().unwrap(), |
1727 | | parse_timestamp("2020-09-08 13:42:29.190855").unwrap() |
1728 | | ); |
1729 | | |
1730 | | let naive_datetime = NaiveDateTime::new( |
1731 | | NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(), |
1732 | | NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(), |
1733 | | ); |
1734 | | |
1735 | | // Ensure both T and ' ' variants work |
1736 | | assert_eq!( |
1737 | | naive_datetime.and_utc().timestamp_nanos_opt().unwrap(), |
1738 | | parse_timestamp("2020-09-08T13:42:29").unwrap() |
1739 | | ); |
1740 | | |
1741 | | assert_eq!( |
1742 | | naive_datetime.and_utc().timestamp_nanos_opt().unwrap(), |
1743 | | parse_timestamp("2020-09-08 13:42:29").unwrap() |
1744 | | ); |
1745 | | |
1746 | | let tz: Tz = "+02:00".parse().unwrap(); |
1747 | | let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap(); |
1748 | | let utc = date.naive_utc().to_string(); |
1749 | | assert_eq!(utc, "2020-09-08 11:42:29"); |
1750 | | let local = date.naive_local().to_string(); |
1751 | | assert_eq!(local, "2020-09-08 13:42:29"); |
1752 | | |
1753 | | let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap(); |
1754 | | let utc = date.naive_utc().to_string(); |
1755 | | assert_eq!(utc, "2020-09-08 13:42:29"); |
1756 | | let local = date.naive_local().to_string(); |
1757 | | assert_eq!(local, "2020-09-08 15:42:29"); |
1758 | | |
1759 | | let dt = |
1760 | | NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap(); |
1761 | | let local: Tz = "+08:00".parse().unwrap(); |
1762 | | |
1763 | | // Parsed as offset from UTC |
1764 | | let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap(); |
1765 | | assert_eq!(dt, date.naive_utc()); |
1766 | | assert_ne!(dt, date.naive_local()); |
1767 | | |
1768 | | // Parsed as offset from local |
1769 | | let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap(); |
1770 | | assert_eq!(dt, date.naive_local()); |
1771 | | assert_ne!(dt, date.naive_utc()); |
1772 | | } |
1773 | | |
1774 | | #[test] |
1775 | | fn parse_date32() { |
1776 | | let cases = [ |
1777 | | "2020-09-08", |
1778 | | "2020-9-8", |
1779 | | "2020-09-8", |
1780 | | "2020-9-08", |
1781 | | "2020-12-1", |
1782 | | "1690-2-5", |
1783 | | "2020-09-08 01:02:03", |
1784 | | ]; |
1785 | | for case in cases { |
1786 | | let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap(); |
1787 | | let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d") |
1788 | | .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S")) |
1789 | | .unwrap(); |
1790 | | assert_eq!(v.date(), expected); |
1791 | | } |
1792 | | |
1793 | | let err_cases = [ |
1794 | | "", |
1795 | | "80-01-01", |
1796 | | "342", |
1797 | | "Foo", |
1798 | | "2020-09-08-03", |
1799 | | "2020--04-03", |
1800 | | "2020--", |
1801 | | "2020-09-08 01", |
1802 | | "2020-09-08 01:02", |
1803 | | "2020-09-08 01-02-03", |
1804 | | "2020-9-8 01:02:03", |
1805 | | "2020-09-08 1:2:3", |
1806 | | ]; |
1807 | | for case in err_cases { |
1808 | | assert_eq!(Date32Type::parse(case), None); |
1809 | | } |
1810 | | } |
1811 | | |
1812 | | #[test] |
1813 | | fn parse_time64_nanos() { |
1814 | | assert_eq!( |
1815 | | Time64NanosecondType::parse("02:10:01.1234567899999999"), |
1816 | | Some(7_801_123_456_789) |
1817 | | ); |
1818 | | assert_eq!( |
1819 | | Time64NanosecondType::parse("02:10:01.1234567"), |
1820 | | Some(7_801_123_456_700) |
1821 | | ); |
1822 | | assert_eq!( |
1823 | | Time64NanosecondType::parse("2:10:01.1234567"), |
1824 | | Some(7_801_123_456_700) |
1825 | | ); |
1826 | | assert_eq!( |
1827 | | Time64NanosecondType::parse("12:10:01.123456789 AM"), |
1828 | | Some(601_123_456_789) |
1829 | | ); |
1830 | | assert_eq!( |
1831 | | Time64NanosecondType::parse("12:10:01.123456789 am"), |
1832 | | Some(601_123_456_789) |
1833 | | ); |
1834 | | assert_eq!( |
1835 | | Time64NanosecondType::parse("2:10:01.12345678 PM"), |
1836 | | Some(51_001_123_456_780) |
1837 | | ); |
1838 | | assert_eq!( |
1839 | | Time64NanosecondType::parse("2:10:01.12345678 pm"), |
1840 | | Some(51_001_123_456_780) |
1841 | | ); |
1842 | | assert_eq!( |
1843 | | Time64NanosecondType::parse("02:10:01"), |
1844 | | Some(7_801_000_000_000) |
1845 | | ); |
1846 | | assert_eq!( |
1847 | | Time64NanosecondType::parse("2:10:01"), |
1848 | | Some(7_801_000_000_000) |
1849 | | ); |
1850 | | assert_eq!( |
1851 | | Time64NanosecondType::parse("12:10:01 AM"), |
1852 | | Some(601_000_000_000) |
1853 | | ); |
1854 | | assert_eq!( |
1855 | | Time64NanosecondType::parse("12:10:01 am"), |
1856 | | Some(601_000_000_000) |
1857 | | ); |
1858 | | assert_eq!( |
1859 | | Time64NanosecondType::parse("2:10:01 PM"), |
1860 | | Some(51_001_000_000_000) |
1861 | | ); |
1862 | | assert_eq!( |
1863 | | Time64NanosecondType::parse("2:10:01 pm"), |
1864 | | Some(51_001_000_000_000) |
1865 | | ); |
1866 | | assert_eq!( |
1867 | | Time64NanosecondType::parse("02:10"), |
1868 | | Some(7_800_000_000_000) |
1869 | | ); |
1870 | | assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000)); |
1871 | | assert_eq!( |
1872 | | Time64NanosecondType::parse("12:10 AM"), |
1873 | | Some(600_000_000_000) |
1874 | | ); |
1875 | | assert_eq!( |
1876 | | Time64NanosecondType::parse("12:10 am"), |
1877 | | Some(600_000_000_000) |
1878 | | ); |
1879 | | assert_eq!( |
1880 | | Time64NanosecondType::parse("2:10 PM"), |
1881 | | Some(51_000_000_000_000) |
1882 | | ); |
1883 | | assert_eq!( |
1884 | | Time64NanosecondType::parse("2:10 pm"), |
1885 | | Some(51_000_000_000_000) |
1886 | | ); |
1887 | | |
1888 | | // parse directly as nanoseconds |
1889 | | assert_eq!(Time64NanosecondType::parse("1"), Some(1)); |
1890 | | |
1891 | | // leap second |
1892 | | assert_eq!( |
1893 | | Time64NanosecondType::parse("23:59:60"), |
1894 | | Some(86_400_000_000_000) |
1895 | | ); |
1896 | | |
1897 | | // custom format |
1898 | | assert_eq!( |
1899 | | Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"), |
1900 | | Some(7_801_123_456_700) |
1901 | | ); |
1902 | | } |
1903 | | |
1904 | | #[test] |
1905 | | fn parse_time64_micros() { |
1906 | | // expected formats |
1907 | | assert_eq!( |
1908 | | Time64MicrosecondType::parse("02:10:01.1234"), |
1909 | | Some(7_801_123_400) |
1910 | | ); |
1911 | | assert_eq!( |
1912 | | Time64MicrosecondType::parse("2:10:01.1234"), |
1913 | | Some(7_801_123_400) |
1914 | | ); |
1915 | | assert_eq!( |
1916 | | Time64MicrosecondType::parse("12:10:01.123456 AM"), |
1917 | | Some(601_123_456) |
1918 | | ); |
1919 | | assert_eq!( |
1920 | | Time64MicrosecondType::parse("12:10:01.123456 am"), |
1921 | | Some(601_123_456) |
1922 | | ); |
1923 | | assert_eq!( |
1924 | | Time64MicrosecondType::parse("2:10:01.12345 PM"), |
1925 | | Some(51_001_123_450) |
1926 | | ); |
1927 | | assert_eq!( |
1928 | | Time64MicrosecondType::parse("2:10:01.12345 pm"), |
1929 | | Some(51_001_123_450) |
1930 | | ); |
1931 | | assert_eq!( |
1932 | | Time64MicrosecondType::parse("02:10:01"), |
1933 | | Some(7_801_000_000) |
1934 | | ); |
1935 | | assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000)); |
1936 | | assert_eq!( |
1937 | | Time64MicrosecondType::parse("12:10:01 AM"), |
1938 | | Some(601_000_000) |
1939 | | ); |
1940 | | assert_eq!( |
1941 | | Time64MicrosecondType::parse("12:10:01 am"), |
1942 | | Some(601_000_000) |
1943 | | ); |
1944 | | assert_eq!( |
1945 | | Time64MicrosecondType::parse("2:10:01 PM"), |
1946 | | Some(51_001_000_000) |
1947 | | ); |
1948 | | assert_eq!( |
1949 | | Time64MicrosecondType::parse("2:10:01 pm"), |
1950 | | Some(51_001_000_000) |
1951 | | ); |
1952 | | assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000)); |
1953 | | assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000)); |
1954 | | assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000)); |
1955 | | assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000)); |
1956 | | assert_eq!( |
1957 | | Time64MicrosecondType::parse("2:10 PM"), |
1958 | | Some(51_000_000_000) |
1959 | | ); |
1960 | | assert_eq!( |
1961 | | Time64MicrosecondType::parse("2:10 pm"), |
1962 | | Some(51_000_000_000) |
1963 | | ); |
1964 | | |
1965 | | // parse directly as microseconds |
1966 | | assert_eq!(Time64MicrosecondType::parse("1"), Some(1)); |
1967 | | |
1968 | | // leap second |
1969 | | assert_eq!( |
1970 | | Time64MicrosecondType::parse("23:59:60"), |
1971 | | Some(86_400_000_000) |
1972 | | ); |
1973 | | |
1974 | | // custom format |
1975 | | assert_eq!( |
1976 | | Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"), |
1977 | | Some(7_801_123_400) |
1978 | | ); |
1979 | | } |
1980 | | |
1981 | | #[test] |
1982 | | fn parse_time32_millis() { |
1983 | | // expected formats |
1984 | | assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100)); |
1985 | | assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100)); |
1986 | | assert_eq!( |
1987 | | Time32MillisecondType::parse("12:10:01.123 AM"), |
1988 | | Some(601_123) |
1989 | | ); |
1990 | | assert_eq!( |
1991 | | Time32MillisecondType::parse("12:10:01.123 am"), |
1992 | | Some(601_123) |
1993 | | ); |
1994 | | assert_eq!( |
1995 | | Time32MillisecondType::parse("2:10:01.12 PM"), |
1996 | | Some(51_001_120) |
1997 | | ); |
1998 | | assert_eq!( |
1999 | | Time32MillisecondType::parse("2:10:01.12 pm"), |
2000 | | Some(51_001_120) |
2001 | | ); |
2002 | | assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000)); |
2003 | | assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000)); |
2004 | | assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000)); |
2005 | | assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000)); |
2006 | | assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000)); |
2007 | | assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000)); |
2008 | | assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000)); |
2009 | | assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000)); |
2010 | | assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000)); |
2011 | | assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000)); |
2012 | | assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000)); |
2013 | | assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000)); |
2014 | | |
2015 | | // parse directly as milliseconds |
2016 | | assert_eq!(Time32MillisecondType::parse("1"), Some(1)); |
2017 | | |
2018 | | // leap second |
2019 | | assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000)); |
2020 | | |
2021 | | // custom format |
2022 | | assert_eq!( |
2023 | | Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"), |
2024 | | Some(7_801_100) |
2025 | | ); |
2026 | | } |
2027 | | |
2028 | | #[test] |
2029 | | fn parse_time32_secs() { |
2030 | | // expected formats |
2031 | | assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801)); |
2032 | | assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801)); |
2033 | | assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801)); |
2034 | | assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601)); |
2035 | | assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601)); |
2036 | | assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001)); |
2037 | | assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001)); |
2038 | | assert_eq!(Time32SecondType::parse("02:10"), Some(7_800)); |
2039 | | assert_eq!(Time32SecondType::parse("2:10"), Some(7_800)); |
2040 | | assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600)); |
2041 | | assert_eq!(Time32SecondType::parse("12:10 am"), Some(600)); |
2042 | | assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000)); |
2043 | | assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000)); |
2044 | | |
2045 | | // parse directly as seconds |
2046 | | assert_eq!(Time32SecondType::parse("1"), Some(1)); |
2047 | | |
2048 | | // leap second |
2049 | | assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400)); |
2050 | | |
2051 | | // custom format |
2052 | | assert_eq!( |
2053 | | Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"), |
2054 | | Some(7_801) |
2055 | | ); |
2056 | | } |
2057 | | |
2058 | | #[test] |
2059 | | fn test_string_to_time_invalid() { |
2060 | | let cases = [ |
2061 | | "25:00", |
2062 | | "9:00:", |
2063 | | "009:00", |
2064 | | "09:0:00", |
2065 | | "25:00:00", |
2066 | | "13:00 AM", |
2067 | | "13:00 PM", |
2068 | | "12:00. AM", |
2069 | | "09:0:00", |
2070 | | "09:01:0", |
2071 | | "09:01:1", |
2072 | | "9:1:0", |
2073 | | "09:01:0", |
2074 | | "1:00.123", |
2075 | | "1:00:00.123f", |
2076 | | " 9:00:00", |
2077 | | ":09:00", |
2078 | | "T9:00:00", |
2079 | | "AM", |
2080 | | ]; |
2081 | | for case in cases { |
2082 | | assert!(string_to_time(case).is_none(), "{case}"); |
2083 | | } |
2084 | | } |
2085 | | |
2086 | | #[test] |
2087 | | fn test_string_to_time_chrono() { |
2088 | | let cases = [ |
2089 | | ("1:00", "%H:%M"), |
2090 | | ("12:00", "%H:%M"), |
2091 | | ("13:00", "%H:%M"), |
2092 | | ("24:00", "%H:%M"), |
2093 | | ("1:00:00", "%H:%M:%S"), |
2094 | | ("12:00:30", "%H:%M:%S"), |
2095 | | ("13:00:59", "%H:%M:%S"), |
2096 | | ("24:00:60", "%H:%M:%S"), |
2097 | | ("09:00:00", "%H:%M:%S%.f"), |
2098 | | ("0:00:30.123456", "%H:%M:%S%.f"), |
2099 | | ("0:00 AM", "%I:%M %P"), |
2100 | | ("1:00 AM", "%I:%M %P"), |
2101 | | ("12:00 AM", "%I:%M %P"), |
2102 | | ("13:00 AM", "%I:%M %P"), |
2103 | | ("0:00 PM", "%I:%M %P"), |
2104 | | ("1:00 PM", "%I:%M %P"), |
2105 | | ("12:00 PM", "%I:%M %P"), |
2106 | | ("13:00 PM", "%I:%M %P"), |
2107 | | ("1:00 pM", "%I:%M %P"), |
2108 | | ("1:00 Pm", "%I:%M %P"), |
2109 | | ("1:00 aM", "%I:%M %P"), |
2110 | | ("1:00 Am", "%I:%M %P"), |
2111 | | ("1:00:30.123456 PM", "%I:%M:%S%.f %P"), |
2112 | | ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"), |
2113 | | ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"), |
2114 | | ("1:00:30.1234 PM", "%I:%M:%S%.f %P"), |
2115 | | ("1:00:30.123456 PM", "%I:%M:%S%.f %P"), |
2116 | | ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"), |
2117 | | ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"), |
2118 | | ]; |
2119 | | for (s, format) in cases { |
2120 | | let chrono = NaiveTime::parse_from_str(s, format).ok(); |
2121 | | let custom = string_to_time(s); |
2122 | | assert_eq!(chrono, custom, "{s}"); |
2123 | | } |
2124 | | } |
2125 | | |
2126 | | #[test] |
2127 | | fn test_parse_interval() { |
2128 | | let config = IntervalParseConfig::new(IntervalUnit::Month); |
2129 | | |
2130 | | assert_eq!( |
2131 | | Interval::new(1i32, 0i32, 0i64), |
2132 | | Interval::parse("1 month", &config).unwrap(), |
2133 | | ); |
2134 | | |
2135 | | assert_eq!( |
2136 | | Interval::new(2i32, 0i32, 0i64), |
2137 | | Interval::parse("2 month", &config).unwrap(), |
2138 | | ); |
2139 | | |
2140 | | assert_eq!( |
2141 | | Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)), |
2142 | | Interval::parse("-1.5 months -3.2 days", &config).unwrap(), |
2143 | | ); |
2144 | | |
2145 | | assert_eq!( |
2146 | | Interval::new(0i32, 15i32, 0), |
2147 | | Interval::parse("0.5 months", &config).unwrap(), |
2148 | | ); |
2149 | | |
2150 | | assert_eq!( |
2151 | | Interval::new(0i32, 15i32, 0), |
2152 | | Interval::parse(".5 months", &config).unwrap(), |
2153 | | ); |
2154 | | |
2155 | | assert_eq!( |
2156 | | Interval::new(0i32, -15i32, 0), |
2157 | | Interval::parse("-0.5 months", &config).unwrap(), |
2158 | | ); |
2159 | | |
2160 | | assert_eq!( |
2161 | | Interval::new(0i32, -15i32, 0), |
2162 | | Interval::parse("-.5 months", &config).unwrap(), |
2163 | | ); |
2164 | | |
2165 | | assert_eq!( |
2166 | | Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR), |
2167 | | Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(), |
2168 | | ); |
2169 | | |
2170 | | assert_eq!( |
2171 | | Interval::parse("1 centurys 1 month", &config) |
2172 | | .unwrap_err() |
2173 | | .to_string(), |
2174 | | r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""# |
2175 | | ); |
2176 | | |
2177 | | assert_eq!( |
2178 | | Interval::new(37i32, 0i32, 0i64), |
2179 | | Interval::parse("3 year 1 month", &config).unwrap(), |
2180 | | ); |
2181 | | |
2182 | | assert_eq!( |
2183 | | Interval::new(35i32, 0i32, 0i64), |
2184 | | Interval::parse("3 year -1 month", &config).unwrap(), |
2185 | | ); |
2186 | | |
2187 | | assert_eq!( |
2188 | | Interval::new(-37i32, 0i32, 0i64), |
2189 | | Interval::parse("-3 year -1 month", &config).unwrap(), |
2190 | | ); |
2191 | | |
2192 | | assert_eq!( |
2193 | | Interval::new(-35i32, 0i32, 0i64), |
2194 | | Interval::parse("-3 year 1 month", &config).unwrap(), |
2195 | | ); |
2196 | | |
2197 | | assert_eq!( |
2198 | | Interval::new(0i32, 5i32, 0i64), |
2199 | | Interval::parse("5 days", &config).unwrap(), |
2200 | | ); |
2201 | | |
2202 | | assert_eq!( |
2203 | | Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR), |
2204 | | Interval::parse("7 days 3 hours", &config).unwrap(), |
2205 | | ); |
2206 | | |
2207 | | assert_eq!( |
2208 | | Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE), |
2209 | | Interval::parse("7 days 5 minutes", &config).unwrap(), |
2210 | | ); |
2211 | | |
2212 | | assert_eq!( |
2213 | | Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE), |
2214 | | Interval::parse("7 days -5 minutes", &config).unwrap(), |
2215 | | ); |
2216 | | |
2217 | | assert_eq!( |
2218 | | Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR), |
2219 | | Interval::parse("-7 days 5 hours", &config).unwrap(), |
2220 | | ); |
2221 | | |
2222 | | assert_eq!( |
2223 | | Interval::new( |
2224 | | 0i32, |
2225 | | -7i32, |
2226 | | -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND |
2227 | | ), |
2228 | | Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(), |
2229 | | ); |
2230 | | |
2231 | | assert_eq!( |
2232 | | Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS), |
2233 | | Interval::parse("1 year 25 millisecond", &config).unwrap(), |
2234 | | ); |
2235 | | |
2236 | | assert_eq!( |
2237 | | Interval::new( |
2238 | | 12i32, |
2239 | | 1i32, |
2240 | | (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64 |
2241 | | ), |
2242 | | Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(), |
2243 | | ); |
2244 | | |
2245 | | assert_eq!( |
2246 | | Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10), |
2247 | | Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(), |
2248 | | ); |
2249 | | |
2250 | | assert_eq!( |
2251 | | Interval::new(12i32, 1i32, 1000i64), |
2252 | | Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(), |
2253 | | ); |
2254 | | |
2255 | | assert_eq!( |
2256 | | Interval::new(12i32, 1i32, 1i64), |
2257 | | Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(), |
2258 | | ); |
2259 | | |
2260 | | assert_eq!( |
2261 | | Interval::new(1i32, 0i32, -NANOS_PER_SECOND), |
2262 | | Interval::parse("1 month -1 second", &config).unwrap(), |
2263 | | ); |
2264 | | |
2265 | | assert_eq!( |
2266 | | Interval::new( |
2267 | | -13i32, |
2268 | | -8i32, |
2269 | | -NANOS_PER_HOUR |
2270 | | - NANOS_PER_MINUTE |
2271 | | - NANOS_PER_SECOND |
2272 | | - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64 |
2273 | | ), |
2274 | | Interval::parse( |
2275 | | "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond", |
2276 | | &config |
2277 | | ) |
2278 | | .unwrap(), |
2279 | | ); |
2280 | | |
2281 | | // no units |
2282 | | assert_eq!( |
2283 | | Interval::new(1, 0, 0), |
2284 | | Interval::parse("1", &config).unwrap() |
2285 | | ); |
2286 | | assert_eq!( |
2287 | | Interval::new(42, 0, 0), |
2288 | | Interval::parse("42", &config).unwrap() |
2289 | | ); |
2290 | | assert_eq!( |
2291 | | Interval::new(0, 0, 42_000_000_000), |
2292 | | Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap() |
2293 | | ); |
2294 | | |
2295 | | // shorter units |
2296 | | assert_eq!( |
2297 | | Interval::new(1, 0, 0), |
2298 | | Interval::parse("1 mon", &config).unwrap() |
2299 | | ); |
2300 | | assert_eq!( |
2301 | | Interval::new(1, 0, 0), |
2302 | | Interval::parse("1 mons", &config).unwrap() |
2303 | | ); |
2304 | | assert_eq!( |
2305 | | Interval::new(0, 0, 1_000_000), |
2306 | | Interval::parse("1 ms", &config).unwrap() |
2307 | | ); |
2308 | | assert_eq!( |
2309 | | Interval::new(0, 0, 1_000), |
2310 | | Interval::parse("1 us", &config).unwrap() |
2311 | | ); |
2312 | | |
2313 | | // no space |
2314 | | assert_eq!( |
2315 | | Interval::new(0, 0, 1_000), |
2316 | | Interval::parse("1us", &config).unwrap() |
2317 | | ); |
2318 | | assert_eq!( |
2319 | | Interval::new(0, 0, NANOS_PER_SECOND), |
2320 | | Interval::parse("1s", &config).unwrap() |
2321 | | ); |
2322 | | assert_eq!( |
2323 | | Interval::new(1, 2, 10_864_000_000_000), |
2324 | | Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap() |
2325 | | ); |
2326 | | |
2327 | | assert_eq!( |
2328 | | Interval::new( |
2329 | | -13i32, |
2330 | | -8i32, |
2331 | | -NANOS_PER_HOUR |
2332 | | - NANOS_PER_MINUTE |
2333 | | - NANOS_PER_SECOND |
2334 | | - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64 |
2335 | | ), |
2336 | | Interval::parse( |
2337 | | "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond", |
2338 | | &config |
2339 | | ) |
2340 | | .unwrap(), |
2341 | | ); |
2342 | | |
2343 | | assert_eq!( |
2344 | | Interval::parse("1h s", &config).unwrap_err().to_string(), |
2345 | | r#"Parser error: Invalid input syntax for type interval: "1h s""# |
2346 | | ); |
2347 | | |
2348 | | assert_eq!( |
2349 | | Interval::parse("1XX", &config).unwrap_err().to_string(), |
2350 | | r#"Parser error: Invalid input syntax for type interval: "1XX""# |
2351 | | ); |
2352 | | } |
2353 | | |
2354 | | #[test] |
2355 | | fn test_duplicate_interval_type() { |
2356 | | let config = IntervalParseConfig::new(IntervalUnit::Month); |
2357 | | |
2358 | | let err = Interval::parse("1 month 1 second 1 second", &config) |
2359 | | .expect_err("parsing interval should have failed"); |
2360 | | assert_eq!( |
2361 | | r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#, |
2362 | | format!("{err:?}") |
2363 | | ); |
2364 | | |
2365 | | // test with singular and plural forms |
2366 | | let err = Interval::parse("1 century 2 centuries", &config) |
2367 | | .expect_err("parsing interval should have failed"); |
2368 | | assert_eq!( |
2369 | | r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#, |
2370 | | format!("{err:?}") |
2371 | | ); |
2372 | | } |
2373 | | |
2374 | | #[test] |
2375 | | fn test_interval_amount_parsing() { |
2376 | | // integer |
2377 | | let result = IntervalAmount::from_str("123").unwrap(); |
2378 | | let expected = IntervalAmount::new(123, 0); |
2379 | | |
2380 | | assert_eq!(result, expected); |
2381 | | |
2382 | | // positive w/ fractional |
2383 | | let result = IntervalAmount::from_str("0.3").unwrap(); |
2384 | | let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)); |
2385 | | |
2386 | | assert_eq!(result, expected); |
2387 | | |
2388 | | // negative w/ fractional |
2389 | | let result = IntervalAmount::from_str("-3.5").unwrap(); |
2390 | | let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)); |
2391 | | |
2392 | | assert_eq!(result, expected); |
2393 | | |
2394 | | // invalid: missing fractional |
2395 | | let result = IntervalAmount::from_str("3."); |
2396 | | assert!(result.is_err()); |
2397 | | |
2398 | | // invalid: sign in fractional |
2399 | | let result = IntervalAmount::from_str("3.-5"); |
2400 | | assert!(result.is_err()); |
2401 | | } |
2402 | | |
2403 | | #[test] |
2404 | | fn test_interval_precision() { |
2405 | | let config = IntervalParseConfig::new(IntervalUnit::Month); |
2406 | | |
2407 | | let result = Interval::parse("100000.1 days", &config).unwrap(); |
2408 | | let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10); |
2409 | | |
2410 | | assert_eq!(result, expected); |
2411 | | } |
2412 | | |
2413 | | #[test] |
2414 | | fn test_interval_addition() { |
2415 | | // add 4.1 centuries |
2416 | | let start = Interval::new(1, 2, 3); |
2417 | | let expected = Interval::new(4921, 2, 3); |
2418 | | |
2419 | | let result = start |
2420 | | .add( |
2421 | | IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)), |
2422 | | IntervalUnit::Century, |
2423 | | ) |
2424 | | .unwrap(); |
2425 | | |
2426 | | assert_eq!(result, expected); |
2427 | | |
2428 | | // add 10.25 decades |
2429 | | let start = Interval::new(1, 2, 3); |
2430 | | let expected = Interval::new(1231, 2, 3); |
2431 | | |
2432 | | let result = start |
2433 | | .add( |
2434 | | IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)), |
2435 | | IntervalUnit::Decade, |
2436 | | ) |
2437 | | .unwrap(); |
2438 | | |
2439 | | assert_eq!(result, expected); |
2440 | | |
2441 | | // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month) |
2442 | | let start = Interval::new(1, 2, 3); |
2443 | | let expected = Interval::new(364, 2, 3); |
2444 | | |
2445 | | let result = start |
2446 | | .add( |
2447 | | IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)), |
2448 | | IntervalUnit::Year, |
2449 | | ) |
2450 | | .unwrap(); |
2451 | | |
2452 | | assert_eq!(result, expected); |
2453 | | |
2454 | | // add 1.5 months |
2455 | | let start = Interval::new(1, 2, 3); |
2456 | | let expected = Interval::new(2, 17, 3); |
2457 | | |
2458 | | let result = start |
2459 | | .add( |
2460 | | IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)), |
2461 | | IntervalUnit::Month, |
2462 | | ) |
2463 | | .unwrap(); |
2464 | | |
2465 | | assert_eq!(result, expected); |
2466 | | |
2467 | | // add -2 weeks |
2468 | | let start = Interval::new(1, 25, 3); |
2469 | | let expected = Interval::new(1, 11, 3); |
2470 | | |
2471 | | let result = start |
2472 | | .add(IntervalAmount::new(-2, 0), IntervalUnit::Week) |
2473 | | .unwrap(); |
2474 | | |
2475 | | assert_eq!(result, expected); |
2476 | | |
2477 | | // add 2.2 days |
2478 | | let start = Interval::new(12, 15, 3); |
2479 | | let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND); |
2480 | | |
2481 | | let result = start |
2482 | | .add( |
2483 | | IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)), |
2484 | | IntervalUnit::Day, |
2485 | | ) |
2486 | | .unwrap(); |
2487 | | |
2488 | | assert_eq!(result, expected); |
2489 | | |
2490 | | // add 12.5 hours |
2491 | | let start = Interval::new(1, 2, 3); |
2492 | | let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND); |
2493 | | |
2494 | | let result = start |
2495 | | .add( |
2496 | | IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)), |
2497 | | IntervalUnit::Hour, |
2498 | | ) |
2499 | | .unwrap(); |
2500 | | |
2501 | | assert_eq!(result, expected); |
2502 | | |
2503 | | // add -1.5 minutes |
2504 | | let start = Interval::new(0, 0, -3); |
2505 | | let expected = Interval::new(0, 0, -90_000_000_000 - 3); |
2506 | | |
2507 | | let result = start |
2508 | | .add( |
2509 | | IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)), |
2510 | | IntervalUnit::Minute, |
2511 | | ) |
2512 | | .unwrap(); |
2513 | | |
2514 | | assert_eq!(result, expected); |
2515 | | } |
2516 | | |
2517 | | #[test] |
2518 | | fn string_to_timestamp_old() { |
2519 | | parse_timestamp("1677-06-14T07:29:01.256") |
2520 | | .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED))) |
2521 | | .unwrap_err(); |
2522 | | } |
2523 | | |
2524 | | #[test] |
2525 | | fn test_parse_decimal_with_parameter() { |
2526 | | let tests = [ |
2527 | | ("0", 0i128), |
2528 | | ("123.123", 123123i128), |
2529 | | ("123.1234", 123123i128), |
2530 | | ("123.1", 123100i128), |
2531 | | ("123", 123000i128), |
2532 | | ("-123.123", -123123i128), |
2533 | | ("-123.1234", -123123i128), |
2534 | | ("-123.1", -123100i128), |
2535 | | ("-123", -123000i128), |
2536 | | ("0.0000123", 0i128), |
2537 | | ("12.", 12000i128), |
2538 | | ("-12.", -12000i128), |
2539 | | ("00.1", 100i128), |
2540 | | ("-00.1", -100i128), |
2541 | | ("12345678912345678.1234", 12345678912345678123i128), |
2542 | | ("-12345678912345678.1234", -12345678912345678123i128), |
2543 | | ("99999999999999999.999", 99999999999999999999i128), |
2544 | | ("-99999999999999999.999", -99999999999999999999i128), |
2545 | | (".123", 123i128), |
2546 | | ("-.123", -123i128), |
2547 | | ("123.", 123000i128), |
2548 | | ("-123.", -123000i128), |
2549 | | ]; |
2550 | | for (s, i) in tests { |
2551 | | let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3); |
2552 | | assert_eq!(i, result_128.unwrap()); |
2553 | | let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3); |
2554 | | assert_eq!(i256::from_i128(i), result_256.unwrap()); |
2555 | | } |
2556 | | |
2557 | | let e_notation_tests = [ |
2558 | | ("1.23e3", "1230.0", 2), |
2559 | | ("5.6714e+2", "567.14", 4), |
2560 | | ("5.6714e-2", "0.056714", 4), |
2561 | | ("5.6714e-2", "0.056714", 3), |
2562 | | ("5.6741214125e2", "567.41214125", 4), |
2563 | | ("8.91E4", "89100.0", 2), |
2564 | | ("3.14E+5", "314000.0", 2), |
2565 | | ("2.718e0", "2.718", 2), |
2566 | | ("9.999999e-1", "0.9999999", 4), |
2567 | | ("1.23e+3", "1230", 2), |
2568 | | ("1.234559e+3", "1234.559", 2), |
2569 | | ("1.00E-10", "0.0000000001", 11), |
2570 | | ("1.23e-4", "0.000123", 2), |
2571 | | ("9.876e7", "98760000.0", 2), |
2572 | | ("5.432E+8", "543200000.0", 10), |
2573 | | ("1.234567e9", "1234567000.0", 2), |
2574 | | ("1.234567e2", "123.45670000", 2), |
2575 | | ("4749.3e-5", "0.047493", 10), |
2576 | | ("4749.3e+5", "474930000", 10), |
2577 | | ("4749.3e-5", "0.047493", 1), |
2578 | | ("4749.3e+5", "474930000", 1), |
2579 | | ("0E-8", "0", 10), |
2580 | | ("0E+6", "0", 10), |
2581 | | ("1E-8", "0.00000001", 10), |
2582 | | ("12E+6", "12000000", 10), |
2583 | | ("12E-6", "0.000012", 10), |
2584 | | ("0.1e-6", "0.0000001", 10), |
2585 | | ("0.1e+6", "100000", 10), |
2586 | | ("0.12e-6", "0.00000012", 10), |
2587 | | ("0.12e+6", "120000", 10), |
2588 | | ("000000000001e0", "000000000001", 3), |
2589 | | ("000001.1034567002e0", "000001.1034567002", 3), |
2590 | | ("1.234e16", "12340000000000000", 0), |
2591 | | ("123.4e16", "1234000000000000000", 0), |
2592 | | ]; |
2593 | | for (e, d, scale) in e_notation_tests { |
2594 | | let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale); |
2595 | | let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale); |
2596 | | assert_eq!(result_128_e.unwrap(), result_128_d.unwrap()); |
2597 | | let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale); |
2598 | | let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale); |
2599 | | assert_eq!(result_256_e.unwrap(), result_256_d.unwrap()); |
2600 | | } |
2601 | | let can_not_parse_tests = [ |
2602 | | "123,123", |
2603 | | ".", |
2604 | | "123.123.123", |
2605 | | "", |
2606 | | "+", |
2607 | | "-", |
2608 | | "e", |
2609 | | "1.3e+e3", |
2610 | | "5.6714ee-2", |
2611 | | "4.11ee-+4", |
2612 | | "4.11e++4", |
2613 | | "1.1e.12", |
2614 | | "1.23e+3.", |
2615 | | "1.23e+3.1", |
2616 | | ]; |
2617 | | for s in can_not_parse_tests { |
2618 | | let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3); |
2619 | | assert_eq!( |
2620 | | format!("Parser error: can't parse the string value {s} to decimal"), |
2621 | | result_128.unwrap_err().to_string() |
2622 | | ); |
2623 | | let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3); |
2624 | | assert_eq!( |
2625 | | format!("Parser error: can't parse the string value {s} to decimal"), |
2626 | | result_256.unwrap_err().to_string() |
2627 | | ); |
2628 | | } |
2629 | | let overflow_parse_tests = [ |
2630 | | ("12345678", 3), |
2631 | | ("1.2345678e7", 3), |
2632 | | ("12345678.9", 3), |
2633 | | ("1.23456789e+7", 3), |
2634 | | ("99999999.99", 3), |
2635 | | ("9.999999999e7", 3), |
2636 | | ("12345678908765.123456", 3), |
2637 | | ("123456789087651234.56e-4", 3), |
2638 | | ("1234560000000", 0), |
2639 | | ("1.23456e12", 0), |
2640 | | ]; |
2641 | | for (s, scale) in overflow_parse_tests { |
2642 | | let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale); |
2643 | | let expected_128 = "Parser error: parse decimal overflow"; |
2644 | | let actual_128 = result_128.unwrap_err().to_string(); |
2645 | | |
2646 | | assert!( |
2647 | | actual_128.contains(expected_128), |
2648 | | "actual: '{actual_128}', expected: '{expected_128}'" |
2649 | | ); |
2650 | | |
2651 | | let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale); |
2652 | | let expected_256 = "Parser error: parse decimal overflow"; |
2653 | | let actual_256 = result_256.unwrap_err().to_string(); |
2654 | | |
2655 | | assert!( |
2656 | | actual_256.contains(expected_256), |
2657 | | "actual: '{actual_256}', expected: '{expected_256}'" |
2658 | | ); |
2659 | | } |
2660 | | |
2661 | | let edge_tests_128 = [ |
2662 | | ( |
2663 | | "99999999999999999999999999999999999999", |
2664 | | 99999999999999999999999999999999999999i128, |
2665 | | 0, |
2666 | | ), |
2667 | | ( |
2668 | | "999999999999999999999999999999999999.99", |
2669 | | 99999999999999999999999999999999999999i128, |
2670 | | 2, |
2671 | | ), |
2672 | | ( |
2673 | | "9999999999999999999999999.9999999999999", |
2674 | | 99999999999999999999999999999999999999i128, |
2675 | | 13, |
2676 | | ), |
2677 | | ( |
2678 | | "9999999999999999999999999", |
2679 | | 99999999999999999999999990000000000000i128, |
2680 | | 13, |
2681 | | ), |
2682 | | ( |
2683 | | "0.99999999999999999999999999999999999999", |
2684 | | 99999999999999999999999999999999999999i128, |
2685 | | 38, |
2686 | | ), |
2687 | | ( |
2688 | | "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744", |
2689 | | 0i128, |
2690 | | 15, |
2691 | | ), |
2692 | | ( |
2693 | | "1.016744e-320", |
2694 | | 0i128, |
2695 | | 15, |
2696 | | ), |
2697 | | ( |
2698 | | "-1e3", |
2699 | | -1000000000i128, |
2700 | | 6, |
2701 | | ), |
2702 | | ( |
2703 | | "+1e3", |
2704 | | 1000000000i128, |
2705 | | 6, |
2706 | | ), |
2707 | | ( |
2708 | | "-1e31", |
2709 | | -10000000000000000000000000000000000000i128, |
2710 | | 6, |
2711 | | ), |
2712 | | ]; |
2713 | | for (s, i, scale) in edge_tests_128 { |
2714 | | let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale); |
2715 | | assert_eq!(i, result_128.unwrap()); |
2716 | | } |
2717 | | let edge_tests_256 = [ |
2718 | | ( |
2719 | | "9999999999999999999999999999999999999999999999999999999999999999999999999999", |
2720 | | i256::from_string( |
2721 | | "9999999999999999999999999999999999999999999999999999999999999999999999999999", |
2722 | | ) |
2723 | | .unwrap(), |
2724 | | 0, |
2725 | | ), |
2726 | | ( |
2727 | | "999999999999999999999999999999999999999999999999999999999999999999999999.9999", |
2728 | | i256::from_string( |
2729 | | "9999999999999999999999999999999999999999999999999999999999999999999999999999", |
2730 | | ) |
2731 | | .unwrap(), |
2732 | | 4, |
2733 | | ), |
2734 | | ( |
2735 | | "99999999999999999999999999999999999999999999999999.99999999999999999999999999", |
2736 | | i256::from_string( |
2737 | | "9999999999999999999999999999999999999999999999999999999999999999999999999999", |
2738 | | ) |
2739 | | .unwrap(), |
2740 | | 26, |
2741 | | ), |
2742 | | ( |
2743 | | "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49", |
2744 | | i256::from_string( |
2745 | | "9999999999999999999999999999999999999999999999999999999999999999999999999999", |
2746 | | ) |
2747 | | .unwrap(), |
2748 | | 26, |
2749 | | ), |
2750 | | ( |
2751 | | "99999999999999999999999999999999999999999999999999", |
2752 | | i256::from_string( |
2753 | | "9999999999999999999999999999999999999999999999999900000000000000000000000000", |
2754 | | ) |
2755 | | .unwrap(), |
2756 | | 26, |
2757 | | ), |
2758 | | ( |
2759 | | "9.9999999999999999999999999999999999999999999999999e+49", |
2760 | | i256::from_string( |
2761 | | "9999999999999999999999999999999999999999999999999900000000000000000000000000", |
2762 | | ) |
2763 | | .unwrap(), |
2764 | | 26, |
2765 | | ), |
2766 | | ]; |
2767 | | for (s, i, scale) in edge_tests_256 { |
2768 | | let result = parse_decimal::<Decimal256Type>(s, 76, scale); |
2769 | | assert_eq!(i, result.unwrap()); |
2770 | | } |
2771 | | } |
2772 | | |
2773 | | #[test] |
2774 | | fn test_parse_empty() { |
2775 | | assert_eq!(Int32Type::parse(""), None); |
2776 | | assert_eq!(Int64Type::parse(""), None); |
2777 | | assert_eq!(UInt32Type::parse(""), None); |
2778 | | assert_eq!(UInt64Type::parse(""), None); |
2779 | | assert_eq!(Float32Type::parse(""), None); |
2780 | | assert_eq!(Float64Type::parse(""), None); |
2781 | | assert_eq!(Int32Type::parse("+"), None); |
2782 | | assert_eq!(Int64Type::parse("+"), None); |
2783 | | assert_eq!(UInt32Type::parse("+"), None); |
2784 | | assert_eq!(UInt64Type::parse("+"), None); |
2785 | | assert_eq!(Float32Type::parse("+"), None); |
2786 | | assert_eq!(Float64Type::parse("+"), None); |
2787 | | assert_eq!(TimestampNanosecondType::parse(""), None); |
2788 | | assert_eq!(Date32Type::parse(""), None); |
2789 | | } |
2790 | | |
2791 | | #[test] |
2792 | | fn test_parse_interval_month_day_nano_config() { |
2793 | | let interval = parse_interval_month_day_nano_config( |
2794 | | "1", |
2795 | | IntervalParseConfig::new(IntervalUnit::Second), |
2796 | | ) |
2797 | | .unwrap(); |
2798 | | assert_eq!(interval.months, 0); |
2799 | | assert_eq!(interval.days, 0); |
2800 | | assert_eq!(interval.nanoseconds, NANOS_PER_SECOND); |
2801 | | } |
2802 | | } |