/Users/andrewlamb/Software/arrow-rs/arrow-array/src/temporal_conversions.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Conversion methods for dates and times. |
19 | | |
20 | | use crate::timezone::Tz; |
21 | | use crate::ArrowPrimitiveType; |
22 | | use arrow_schema::{DataType, TimeUnit}; |
23 | | use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc}; |
24 | | |
25 | | /// Number of seconds in a day |
26 | | pub const SECONDS_IN_DAY: i64 = 86_400; |
27 | | /// Number of milliseconds in a second |
28 | | pub const MILLISECONDS: i64 = 1_000; |
29 | | /// Number of microseconds in a second |
30 | | pub const MICROSECONDS: i64 = 1_000_000; |
31 | | /// Number of nanoseconds in a second |
32 | | pub const NANOSECONDS: i64 = 1_000_000_000; |
33 | | |
34 | | /// Number of milliseconds in a day |
35 | | pub const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS; |
36 | | /// Number of microseconds in a day |
37 | | pub const MICROSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MICROSECONDS; |
38 | | /// Number of nanoseconds in a day |
39 | | pub const NANOSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * NANOSECONDS; |
40 | | |
41 | | /// Constant from chrono crate |
42 | | /// |
43 | | /// Number of days between Januari 1, 1970 and December 31, 1 BCE which we define to be day 0. |
44 | | /// 4 full leap year cycles until December 31, 1600 4 * 146097 = 584388 |
45 | | /// 1 day until January 1, 1601 1 |
46 | | /// 369 years until Januari 1, 1970 369 * 365 = 134685 |
47 | | /// of which floor(369 / 4) are leap years floor(369 / 4) = 92 |
48 | | /// except for 1700, 1800 and 1900 -3 + |
49 | | /// -------- |
50 | | /// 719163 |
51 | | pub const UNIX_EPOCH_DAY: i64 = 719_163; |
52 | | |
53 | | /// converts a `i32` representing a `date32` to [`NaiveDateTime`] |
54 | | #[inline] |
55 | 0 | pub fn date32_to_datetime(v: i32) -> Option<NaiveDateTime> { |
56 | 0 | Some(DateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 0)?.naive_utc()) |
57 | 0 | } |
58 | | |
59 | | /// converts a `i64` representing a `date64` to [`NaiveDateTime`] |
60 | | #[inline] |
61 | 0 | pub fn date64_to_datetime(v: i64) -> Option<NaiveDateTime> { |
62 | 0 | let (sec, milli_sec) = split_second(v, MILLISECONDS); |
63 | | |
64 | 0 | let datetime = DateTime::from_timestamp( |
65 | | // extract seconds from milliseconds |
66 | 0 | sec, |
67 | | // discard extracted seconds and convert milliseconds to nanoseconds |
68 | 0 | milli_sec * MICROSECONDS as u32, |
69 | 0 | )?; |
70 | 0 | Some(datetime.naive_utc()) |
71 | 0 | } |
72 | | |
73 | | /// converts a `i32` representing a `time32(s)` to [`NaiveDateTime`] |
74 | | #[inline] |
75 | 0 | pub fn time32s_to_time(v: i32) -> Option<NaiveTime> { |
76 | 0 | NaiveTime::from_num_seconds_from_midnight_opt(v as u32, 0) |
77 | 0 | } |
78 | | |
79 | | /// converts a `i32` representing a `time32(ms)` to [`NaiveDateTime`] |
80 | | #[inline] |
81 | 0 | pub fn time32ms_to_time(v: i32) -> Option<NaiveTime> { |
82 | 0 | let v = v as i64; |
83 | 0 | NaiveTime::from_num_seconds_from_midnight_opt( |
84 | | // extract seconds from milliseconds |
85 | 0 | (v / MILLISECONDS) as u32, |
86 | | // discard extracted seconds and convert milliseconds to |
87 | | // nanoseconds |
88 | 0 | (v % MILLISECONDS * MICROSECONDS) as u32, |
89 | | ) |
90 | 0 | } |
91 | | |
92 | | /// converts a `i64` representing a `time64(us)` to [`NaiveDateTime`] |
93 | | #[inline] |
94 | 0 | pub fn time64us_to_time(v: i64) -> Option<NaiveTime> { |
95 | 0 | NaiveTime::from_num_seconds_from_midnight_opt( |
96 | | // extract seconds from microseconds |
97 | 0 | (v / MICROSECONDS) as u32, |
98 | | // discard extracted seconds and convert microseconds to |
99 | | // nanoseconds |
100 | 0 | (v % MICROSECONDS * MILLISECONDS) as u32, |
101 | | ) |
102 | 0 | } |
103 | | |
104 | | /// converts a `i64` representing a `time64(ns)` to [`NaiveDateTime`] |
105 | | #[inline] |
106 | 0 | pub fn time64ns_to_time(v: i64) -> Option<NaiveTime> { |
107 | 0 | NaiveTime::from_num_seconds_from_midnight_opt( |
108 | | // extract seconds from nanoseconds |
109 | 0 | (v / NANOSECONDS) as u32, |
110 | | // discard extracted seconds |
111 | 0 | (v % NANOSECONDS) as u32, |
112 | | ) |
113 | 0 | } |
114 | | |
115 | | /// converts [`NaiveTime`] to a `i32` representing a `time32(s)` |
116 | | #[inline] |
117 | 0 | pub fn time_to_time32s(v: NaiveTime) -> i32 { |
118 | 0 | v.num_seconds_from_midnight() as i32 |
119 | 0 | } |
120 | | |
121 | | /// converts [`NaiveTime`] to a `i32` representing a `time32(ms)` |
122 | | #[inline] |
123 | 0 | pub fn time_to_time32ms(v: NaiveTime) -> i32 { |
124 | 0 | (v.num_seconds_from_midnight() as i64 * MILLISECONDS |
125 | 0 | + v.nanosecond() as i64 * MILLISECONDS / NANOSECONDS) as i32 |
126 | 0 | } |
127 | | |
128 | | /// converts [`NaiveTime`] to a `i64` representing a `time64(us)` |
129 | | #[inline] |
130 | 0 | pub fn time_to_time64us(v: NaiveTime) -> i64 { |
131 | 0 | v.num_seconds_from_midnight() as i64 * MICROSECONDS |
132 | 0 | + v.nanosecond() as i64 * MICROSECONDS / NANOSECONDS |
133 | 0 | } |
134 | | |
135 | | /// converts [`NaiveTime`] to a `i64` representing a `time64(ns)` |
136 | | #[inline] |
137 | 0 | pub fn time_to_time64ns(v: NaiveTime) -> i64 { |
138 | 0 | v.num_seconds_from_midnight() as i64 * NANOSECONDS + v.nanosecond() as i64 |
139 | 0 | } |
140 | | |
141 | | /// converts a `i64` representing a `timestamp(s)` to [`NaiveDateTime`] |
142 | | #[inline] |
143 | 0 | pub fn timestamp_s_to_datetime(v: i64) -> Option<NaiveDateTime> { |
144 | 0 | Some(DateTime::from_timestamp(v, 0)?.naive_utc()) |
145 | 0 | } |
146 | | |
147 | | /// Similar to timestamp_s_to_datetime but only compute `date` |
148 | | #[inline] |
149 | | pub fn timestamp_s_to_date(secs: i64) -> Option<NaiveDateTime> { |
150 | | let days = secs.div_euclid(86_400) + UNIX_EPOCH_DAY; |
151 | | if days < i32::MIN as i64 || days > i32::MAX as i64 { |
152 | | return None; |
153 | | } |
154 | | let date = NaiveDate::from_num_days_from_ce_opt(days as i32)?; |
155 | | Some(date.and_time(NaiveTime::default()).and_utc().naive_utc()) |
156 | | } |
157 | | |
158 | | /// Similar to timestamp_s_to_datetime but only compute `time` |
159 | | #[inline] |
160 | | pub fn timestamp_s_to_time(secs: i64) -> Option<NaiveDateTime> { |
161 | | let secs = secs.rem_euclid(86_400); |
162 | | let time = NaiveTime::from_num_seconds_from_midnight_opt(secs as u32, 0)?; |
163 | | Some( |
164 | | DateTime::<Utc>::from_naive_utc_and_offset( |
165 | | NaiveDateTime::new(NaiveDate::default(), time), |
166 | | Utc, |
167 | | ) |
168 | | .naive_utc(), |
169 | | ) |
170 | | } |
171 | | |
172 | | /// converts a `i64` representing a `timestamp(ms)` to [`NaiveDateTime`] |
173 | | #[inline] |
174 | 0 | pub fn timestamp_ms_to_datetime(v: i64) -> Option<NaiveDateTime> { |
175 | 0 | let (sec, milli_sec) = split_second(v, MILLISECONDS); |
176 | | |
177 | 0 | let datetime = DateTime::from_timestamp( |
178 | | // extract seconds from milliseconds |
179 | 0 | sec, |
180 | | // discard extracted seconds and convert milliseconds to nanoseconds |
181 | 0 | milli_sec * MICROSECONDS as u32, |
182 | 0 | )?; |
183 | 0 | Some(datetime.naive_utc()) |
184 | 0 | } |
185 | | |
186 | | /// converts a `i64` representing a `timestamp(us)` to [`NaiveDateTime`] |
187 | | #[inline] |
188 | 0 | pub fn timestamp_us_to_datetime(v: i64) -> Option<NaiveDateTime> { |
189 | 0 | let (sec, micro_sec) = split_second(v, MICROSECONDS); |
190 | | |
191 | 0 | let datetime = DateTime::from_timestamp( |
192 | | // extract seconds from microseconds |
193 | 0 | sec, |
194 | | // discard extracted seconds and convert microseconds to nanoseconds |
195 | 0 | micro_sec * MILLISECONDS as u32, |
196 | 0 | )?; |
197 | 0 | Some(datetime.naive_utc()) |
198 | 0 | } |
199 | | |
200 | | /// converts a `i64` representing a `timestamp(ns)` to [`NaiveDateTime`] |
201 | | #[inline] |
202 | 0 | pub fn timestamp_ns_to_datetime(v: i64) -> Option<NaiveDateTime> { |
203 | 0 | let (sec, nano_sec) = split_second(v, NANOSECONDS); |
204 | | |
205 | 0 | let datetime = DateTime::from_timestamp( |
206 | | // extract seconds from nanoseconds |
207 | 0 | sec, // discard extracted seconds |
208 | 0 | nano_sec, |
209 | 0 | )?; |
210 | 0 | Some(datetime.naive_utc()) |
211 | 0 | } |
212 | | |
213 | | #[inline] |
214 | 0 | pub(crate) fn split_second(v: i64, base: i64) -> (i64, u32) { |
215 | 0 | (v.div_euclid(base), v.rem_euclid(base) as u32) |
216 | 0 | } |
217 | | |
218 | | /// converts a `i64` representing a `duration(s)` to [`Duration`] |
219 | | #[inline] |
220 | | #[deprecated(since = "55.2.0", note = "Use `try_duration_s_to_duration` instead")] |
221 | | pub fn duration_s_to_duration(v: i64) -> Duration { |
222 | | Duration::try_seconds(v).unwrap() |
223 | | } |
224 | | |
225 | | /// converts a `i64` representing a `duration(s)` to [`Option<Duration>`] |
226 | | #[inline] |
227 | 0 | pub fn try_duration_s_to_duration(v: i64) -> Option<Duration> { |
228 | 0 | Duration::try_seconds(v) |
229 | 0 | } |
230 | | |
231 | | /// converts a `i64` representing a `duration(ms)` to [`Duration`] |
232 | | #[inline] |
233 | | #[deprecated(since = "55.2.0", note = "Use `try_duration_ms_to_duration` instead")] |
234 | | pub fn duration_ms_to_duration(v: i64) -> Duration { |
235 | | Duration::try_seconds(v).unwrap() |
236 | | } |
237 | | |
238 | | /// converts a `i64` representing a `duration(ms)` to [`Option<Duration>`] |
239 | | #[inline] |
240 | 0 | pub fn try_duration_ms_to_duration(v: i64) -> Option<Duration> { |
241 | 0 | Duration::try_milliseconds(v) |
242 | 0 | } |
243 | | |
244 | | /// converts a `i64` representing a `duration(us)` to [`Duration`] |
245 | | #[inline] |
246 | 0 | pub fn duration_us_to_duration(v: i64) -> Duration { |
247 | 0 | Duration::microseconds(v) |
248 | 0 | } |
249 | | |
250 | | /// converts a `i64` representing a `duration(ns)` to [`Duration`] |
251 | | #[inline] |
252 | 0 | pub fn duration_ns_to_duration(v: i64) -> Duration { |
253 | 0 | Duration::nanoseconds(v) |
254 | 0 | } |
255 | | |
256 | | /// Converts an [`ArrowPrimitiveType`] to [`NaiveDateTime`] |
257 | 0 | pub fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> { |
258 | 0 | match T::DATA_TYPE { |
259 | 0 | DataType::Date32 => date32_to_datetime(v as i32), |
260 | 0 | DataType::Date64 => date64_to_datetime(v), |
261 | 0 | DataType::Time32(_) | DataType::Time64(_) => None, |
262 | 0 | DataType::Timestamp(unit, _) => match unit { |
263 | 0 | TimeUnit::Second => timestamp_s_to_datetime(v), |
264 | 0 | TimeUnit::Millisecond => timestamp_ms_to_datetime(v), |
265 | 0 | TimeUnit::Microsecond => timestamp_us_to_datetime(v), |
266 | 0 | TimeUnit::Nanosecond => timestamp_ns_to_datetime(v), |
267 | | }, |
268 | | // interval is not yet fully documented [ARROW-3097] |
269 | 0 | DataType::Interval(_) => None, |
270 | 0 | _ => None, |
271 | | } |
272 | 0 | } |
273 | | |
274 | | /// Converts an [`ArrowPrimitiveType`] to [`DateTime<Tz>`] |
275 | 0 | pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>(v: i64, tz: Tz) -> Option<DateTime<Tz>> { |
276 | 0 | let naive = as_datetime::<T>(v)?; |
277 | 0 | Some(Utc.from_utc_datetime(&naive).with_timezone(&tz)) |
278 | 0 | } |
279 | | |
280 | | /// Converts an [`ArrowPrimitiveType`] to [`NaiveDate`] |
281 | 0 | pub fn as_date<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDate> { |
282 | 0 | as_datetime::<T>(v).map(|datetime| datetime.date()) |
283 | 0 | } |
284 | | |
285 | | /// Converts an [`ArrowPrimitiveType`] to [`NaiveTime`] |
286 | 0 | pub fn as_time<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveTime> { |
287 | 0 | match T::DATA_TYPE { |
288 | 0 | DataType::Time32(unit) => { |
289 | | // safe to immediately cast to u32 as `self.value(i)` is positive i32 |
290 | 0 | let v = v as u32; |
291 | 0 | match unit { |
292 | 0 | TimeUnit::Second => time32s_to_time(v as i32), |
293 | 0 | TimeUnit::Millisecond => time32ms_to_time(v as i32), |
294 | 0 | _ => None, |
295 | | } |
296 | | } |
297 | 0 | DataType::Time64(unit) => match unit { |
298 | 0 | TimeUnit::Microsecond => time64us_to_time(v), |
299 | 0 | TimeUnit::Nanosecond => time64ns_to_time(v), |
300 | 0 | _ => None, |
301 | | }, |
302 | 0 | DataType::Timestamp(_, _) => as_datetime::<T>(v).map(|datetime| datetime.time()), |
303 | 0 | DataType::Date32 | DataType::Date64 => NaiveTime::from_hms_opt(0, 0, 0), |
304 | 0 | DataType::Interval(_) => None, |
305 | 0 | _ => None, |
306 | | } |
307 | 0 | } |
308 | | |
309 | | /// Converts an [`ArrowPrimitiveType`] to [`Duration`] |
310 | | pub fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> { |
311 | | match T::DATA_TYPE { |
312 | | DataType::Duration(unit) => match unit { |
313 | | TimeUnit::Second => try_duration_s_to_duration(v), |
314 | | TimeUnit::Millisecond => try_duration_ms_to_duration(v), |
315 | | TimeUnit::Microsecond => Some(duration_us_to_duration(v)), |
316 | | TimeUnit::Nanosecond => Some(duration_ns_to_duration(v)), |
317 | | }, |
318 | | _ => None, |
319 | | } |
320 | | } |
321 | | |
322 | | #[cfg(test)] |
323 | | mod tests { |
324 | | use crate::temporal_conversions::{ |
325 | | date64_to_datetime, split_second, timestamp_ms_to_datetime, timestamp_ns_to_datetime, |
326 | | timestamp_s_to_date, timestamp_s_to_datetime, timestamp_s_to_time, |
327 | | timestamp_us_to_datetime, NANOSECONDS, |
328 | | }; |
329 | | use chrono::DateTime; |
330 | | |
331 | | #[test] |
332 | | fn test_timestamp_func() { |
333 | | let timestamp = 1234; |
334 | | let datetime = timestamp_s_to_datetime(timestamp).unwrap(); |
335 | | let expected_date = datetime.date(); |
336 | | let expected_time = datetime.time(); |
337 | | |
338 | | assert_eq!( |
339 | | timestamp_s_to_date(timestamp).unwrap().date(), |
340 | | expected_date |
341 | | ); |
342 | | assert_eq!( |
343 | | timestamp_s_to_time(timestamp).unwrap().time(), |
344 | | expected_time |
345 | | ); |
346 | | } |
347 | | |
348 | | #[test] |
349 | | fn negative_input_timestamp_ns_to_datetime() { |
350 | | assert_eq!( |
351 | | timestamp_ns_to_datetime(-1), |
352 | | DateTime::from_timestamp(-1, 999_999_999).map(|x| x.naive_utc()) |
353 | | ); |
354 | | |
355 | | assert_eq!( |
356 | | timestamp_ns_to_datetime(-1_000_000_001), |
357 | | DateTime::from_timestamp(-2, 999_999_999).map(|x| x.naive_utc()) |
358 | | ); |
359 | | } |
360 | | |
361 | | #[test] |
362 | | fn negative_input_timestamp_us_to_datetime() { |
363 | | assert_eq!( |
364 | | timestamp_us_to_datetime(-1), |
365 | | DateTime::from_timestamp(-1, 999_999_000).map(|x| x.naive_utc()) |
366 | | ); |
367 | | |
368 | | assert_eq!( |
369 | | timestamp_us_to_datetime(-1_000_001), |
370 | | DateTime::from_timestamp(-2, 999_999_000).map(|x| x.naive_utc()) |
371 | | ); |
372 | | } |
373 | | |
374 | | #[test] |
375 | | fn negative_input_timestamp_ms_to_datetime() { |
376 | | assert_eq!( |
377 | | timestamp_ms_to_datetime(-1), |
378 | | DateTime::from_timestamp(-1, 999_000_000).map(|x| x.naive_utc()) |
379 | | ); |
380 | | |
381 | | assert_eq!( |
382 | | timestamp_ms_to_datetime(-1_001), |
383 | | DateTime::from_timestamp(-2, 999_000_000).map(|x| x.naive_utc()) |
384 | | ); |
385 | | } |
386 | | |
387 | | #[test] |
388 | | fn negative_input_date64_to_datetime() { |
389 | | assert_eq!( |
390 | | date64_to_datetime(-1), |
391 | | DateTime::from_timestamp(-1, 999_000_000).map(|x| x.naive_utc()) |
392 | | ); |
393 | | |
394 | | assert_eq!( |
395 | | date64_to_datetime(-1_001), |
396 | | DateTime::from_timestamp(-2, 999_000_000).map(|x| x.naive_utc()) |
397 | | ); |
398 | | } |
399 | | |
400 | | #[test] |
401 | | fn test_split_seconds() { |
402 | | let (sec, nano_sec) = split_second(100, NANOSECONDS); |
403 | | assert_eq!(sec, 0); |
404 | | assert_eq!(nano_sec, 100); |
405 | | |
406 | | let (sec, nano_sec) = split_second(123_000_000_456, NANOSECONDS); |
407 | | assert_eq!(sec, 123); |
408 | | assert_eq!(nano_sec, 456); |
409 | | |
410 | | let (sec, nano_sec) = split_second(-1, NANOSECONDS); |
411 | | assert_eq!(sec, -1); |
412 | | assert_eq!(nano_sec, 999_999_999); |
413 | | |
414 | | let (sec, nano_sec) = split_second(-123_000_000_001, NANOSECONDS); |
415 | | assert_eq!(sec, -124); |
416 | | assert_eq!(nano_sec, 999_999_999); |
417 | | } |
418 | | } |