/Users/andrewlamb/Software/arrow-rs/arrow-array/src/timezone.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Timezone for timestamp arrays |
19 | | |
20 | | use arrow_schema::ArrowError; |
21 | | use chrono::FixedOffset; |
22 | | pub use private::{Tz, TzOffset}; |
23 | | |
24 | | /// Parses a fixed offset of the form "+09:00", "-09" or "+0930" |
25 | 0 | fn parse_fixed_offset(tz: &str) -> Option<FixedOffset> { |
26 | 0 | let bytes = tz.as_bytes(); |
27 | | |
28 | 0 | let mut values = match bytes.len() { |
29 | | // [+-]XX:XX |
30 | 0 | 6 if bytes[3] == b':' => [bytes[1], bytes[2], bytes[4], bytes[5]], |
31 | | // [+-]XXXX |
32 | 0 | 5 => [bytes[1], bytes[2], bytes[3], bytes[4]], |
33 | | // [+-]XX |
34 | 0 | 3 => [bytes[1], bytes[2], b'0', b'0'], |
35 | 0 | _ => return None, |
36 | | }; |
37 | 0 | values.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0')); |
38 | 0 | if values.iter().any(|x| *x > 9) { |
39 | 0 | return None; |
40 | 0 | } |
41 | 0 | let secs = |
42 | 0 | (values[0] * 10 + values[1]) as i32 * 60 * 60 + (values[2] * 10 + values[3]) as i32 * 60; |
43 | | |
44 | 0 | match bytes[0] { |
45 | 0 | b'+' => FixedOffset::east_opt(secs), |
46 | 0 | b'-' => FixedOffset::west_opt(secs), |
47 | 0 | _ => None, |
48 | | } |
49 | 0 | } |
50 | | |
51 | | #[cfg(feature = "chrono-tz")] |
52 | | mod private { |
53 | | use super::*; |
54 | | use chrono::offset::TimeZone; |
55 | | use chrono::{LocalResult, NaiveDate, NaiveDateTime, Offset}; |
56 | | use std::fmt::Display; |
57 | | use std::str::FromStr; |
58 | | |
59 | | /// An [`Offset`] for [`Tz`] |
60 | | #[derive(Debug, Copy, Clone)] |
61 | | pub struct TzOffset { |
62 | | tz: Tz, |
63 | | offset: FixedOffset, |
64 | | } |
65 | | |
66 | | impl std::fmt::Display for TzOffset { |
67 | | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
68 | | self.offset.fmt(f) |
69 | | } |
70 | | } |
71 | | |
72 | | impl Offset for TzOffset { |
73 | | fn fix(&self) -> FixedOffset { |
74 | | self.offset |
75 | | } |
76 | | } |
77 | | |
78 | | /// An Arrow [`TimeZone`] |
79 | | #[derive(Debug, Copy, Clone)] |
80 | | pub struct Tz(TzInner); |
81 | | |
82 | | #[derive(Debug, Copy, Clone)] |
83 | | enum TzInner { |
84 | | Timezone(chrono_tz::Tz), |
85 | | Offset(FixedOffset), |
86 | | } |
87 | | |
88 | | impl FromStr for Tz { |
89 | | type Err = ArrowError; |
90 | | |
91 | | fn from_str(tz: &str) -> Result<Self, Self::Err> { |
92 | | match parse_fixed_offset(tz) { |
93 | | Some(offset) => Ok(Self(TzInner::Offset(offset))), |
94 | | None => Ok(Self(TzInner::Timezone(tz.parse().map_err(|e| { |
95 | | ArrowError::ParseError(format!("Invalid timezone \"{tz}\": {e}")) |
96 | | })?))), |
97 | | } |
98 | | } |
99 | | } |
100 | | |
101 | | impl Display for Tz { |
102 | | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
103 | | match self.0 { |
104 | | TzInner::Timezone(tz) => tz.fmt(f), |
105 | | TzInner::Offset(offset) => offset.fmt(f), |
106 | | } |
107 | | } |
108 | | } |
109 | | |
110 | | macro_rules! tz { |
111 | | ($s:ident, $tz:ident, $b:block) => { |
112 | | match $s.0 { |
113 | | TzInner::Timezone($tz) => $b, |
114 | | TzInner::Offset($tz) => $b, |
115 | | } |
116 | | }; |
117 | | } |
118 | | |
119 | | impl TimeZone for Tz { |
120 | | type Offset = TzOffset; |
121 | | |
122 | | fn from_offset(offset: &Self::Offset) -> Self { |
123 | | offset.tz |
124 | | } |
125 | | |
126 | | fn offset_from_local_date(&self, local: &NaiveDate) -> LocalResult<Self::Offset> { |
127 | | tz!(self, tz, { |
128 | | tz.offset_from_local_date(local).map(|x| TzOffset { |
129 | | tz: *self, |
130 | | offset: x.fix(), |
131 | | }) |
132 | | }) |
133 | | } |
134 | | |
135 | | fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> { |
136 | | tz!(self, tz, { |
137 | | tz.offset_from_local_datetime(local).map(|x| TzOffset { |
138 | | tz: *self, |
139 | | offset: x.fix(), |
140 | | }) |
141 | | }) |
142 | | } |
143 | | |
144 | | fn offset_from_utc_date(&self, utc: &NaiveDate) -> Self::Offset { |
145 | | tz!(self, tz, { |
146 | | TzOffset { |
147 | | tz: *self, |
148 | | offset: tz.offset_from_utc_date(utc).fix(), |
149 | | } |
150 | | }) |
151 | | } |
152 | | |
153 | | fn offset_from_utc_datetime(&self, utc: &NaiveDateTime) -> Self::Offset { |
154 | | tz!(self, tz, { |
155 | | TzOffset { |
156 | | tz: *self, |
157 | | offset: tz.offset_from_utc_datetime(utc).fix(), |
158 | | } |
159 | | }) |
160 | | } |
161 | | } |
162 | | |
163 | | #[cfg(test)] |
164 | | mod tests { |
165 | | use super::*; |
166 | | use chrono::{Timelike, Utc}; |
167 | | |
168 | | #[test] |
169 | | fn test_with_timezone() { |
170 | | let vals = [ |
171 | | Utc.timestamp_millis_opt(37800000).unwrap(), |
172 | | Utc.timestamp_millis_opt(86339000).unwrap(), |
173 | | ]; |
174 | | |
175 | | assert_eq!(10, vals[0].hour()); |
176 | | assert_eq!(23, vals[1].hour()); |
177 | | |
178 | | let tz: Tz = "America/Los_Angeles".parse().unwrap(); |
179 | | |
180 | | assert_eq!(2, vals[0].with_timezone(&tz).hour()); |
181 | | assert_eq!(15, vals[1].with_timezone(&tz).hour()); |
182 | | } |
183 | | |
184 | | #[test] |
185 | | fn test_using_chrono_tz_and_utc_naive_date_time() { |
186 | | let sydney_tz = "Australia/Sydney".to_string(); |
187 | | let tz: Tz = sydney_tz.parse().unwrap(); |
188 | | let sydney_offset_without_dst = FixedOffset::east_opt(10 * 60 * 60).unwrap(); |
189 | | let sydney_offset_with_dst = FixedOffset::east_opt(11 * 60 * 60).unwrap(); |
190 | | // Daylight savings ends |
191 | | // When local daylight time was about to reach |
192 | | // Sunday, 4 April 2021, 3:00:00 am clocks were turned backward 1 hour to |
193 | | // Sunday, 4 April 2021, 2:00:00 am local standard time instead. |
194 | | |
195 | | // Daylight savings starts |
196 | | // When local standard time was about to reach |
197 | | // Sunday, 3 October 2021, 2:00:00 am clocks were turned forward 1 hour to |
198 | | // Sunday, 3 October 2021, 3:00:00 am local daylight time instead. |
199 | | |
200 | | // Sydney 2021-04-04T02:30:00+11:00 is 2021-04-03T15:30:00Z |
201 | | let utc_just_before_sydney_dst_ends = NaiveDate::from_ymd_opt(2021, 4, 3) |
202 | | .unwrap() |
203 | | .and_hms_nano_opt(15, 30, 0, 0) |
204 | | .unwrap(); |
205 | | assert_eq!( |
206 | | tz.offset_from_utc_datetime(&utc_just_before_sydney_dst_ends) |
207 | | .fix(), |
208 | | sydney_offset_with_dst |
209 | | ); |
210 | | // Sydney 2021-04-04T02:30:00+10:00 is 2021-04-03T16:30:00Z |
211 | | let utc_just_after_sydney_dst_ends = NaiveDate::from_ymd_opt(2021, 4, 3) |
212 | | .unwrap() |
213 | | .and_hms_nano_opt(16, 30, 0, 0) |
214 | | .unwrap(); |
215 | | assert_eq!( |
216 | | tz.offset_from_utc_datetime(&utc_just_after_sydney_dst_ends) |
217 | | .fix(), |
218 | | sydney_offset_without_dst |
219 | | ); |
220 | | // Sydney 2021-10-03T01:30:00+10:00 is 2021-10-02T15:30:00Z |
221 | | let utc_just_before_sydney_dst_starts = NaiveDate::from_ymd_opt(2021, 10, 2) |
222 | | .unwrap() |
223 | | .and_hms_nano_opt(15, 30, 0, 0) |
224 | | .unwrap(); |
225 | | assert_eq!( |
226 | | tz.offset_from_utc_datetime(&utc_just_before_sydney_dst_starts) |
227 | | .fix(), |
228 | | sydney_offset_without_dst |
229 | | ); |
230 | | // Sydney 2021-04-04T03:30:00+11:00 is 2021-10-02T16:30:00Z |
231 | | let utc_just_after_sydney_dst_starts = NaiveDate::from_ymd_opt(2022, 10, 2) |
232 | | .unwrap() |
233 | | .and_hms_nano_opt(16, 30, 0, 0) |
234 | | .unwrap(); |
235 | | assert_eq!( |
236 | | tz.offset_from_utc_datetime(&utc_just_after_sydney_dst_starts) |
237 | | .fix(), |
238 | | sydney_offset_with_dst |
239 | | ); |
240 | | } |
241 | | |
242 | | #[test] |
243 | | fn test_timezone_display() { |
244 | | let test_cases = ["UTC", "America/Los_Angeles", "-08:00", "+05:30"]; |
245 | | for &case in &test_cases { |
246 | | let tz: Tz = case.parse().unwrap(); |
247 | | assert_eq!(tz.to_string(), case); |
248 | | } |
249 | | } |
250 | | } |
251 | | } |
252 | | |
253 | | #[cfg(not(feature = "chrono-tz"))] |
254 | | mod private { |
255 | | use super::*; |
256 | | use chrono::offset::TimeZone; |
257 | | use chrono::{LocalResult, NaiveDate, NaiveDateTime, Offset}; |
258 | | use std::str::FromStr; |
259 | | |
260 | | /// An [`Offset`] for [`Tz`] |
261 | | #[derive(Debug, Copy, Clone)] |
262 | | pub struct TzOffset(FixedOffset); |
263 | | |
264 | | impl std::fmt::Display for TzOffset { |
265 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
266 | 0 | self.0.fmt(f) |
267 | 0 | } |
268 | | } |
269 | | |
270 | | impl Offset for TzOffset { |
271 | 0 | fn fix(&self) -> FixedOffset { |
272 | 0 | self.0 |
273 | 0 | } |
274 | | } |
275 | | |
276 | | /// An Arrow [`TimeZone`] |
277 | | #[derive(Debug, Copy, Clone)] |
278 | | pub struct Tz(FixedOffset); |
279 | | |
280 | | impl FromStr for Tz { |
281 | | type Err = ArrowError; |
282 | | |
283 | 0 | fn from_str(tz: &str) -> Result<Self, Self::Err> { |
284 | 0 | let offset = parse_fixed_offset(tz).ok_or_else(|| { |
285 | 0 | ArrowError::ParseError(format!( |
286 | 0 | "Invalid timezone \"{tz}\": only offset based timezones supported without chrono-tz feature" |
287 | 0 | )) |
288 | 0 | })?; |
289 | 0 | Ok(Self(offset)) |
290 | 0 | } |
291 | | } |
292 | | |
293 | | impl TimeZone for Tz { |
294 | | type Offset = TzOffset; |
295 | | |
296 | 0 | fn from_offset(offset: &Self::Offset) -> Self { |
297 | 0 | Self(offset.0) |
298 | 0 | } |
299 | | |
300 | 0 | fn offset_from_local_date(&self, local: &NaiveDate) -> LocalResult<Self::Offset> { |
301 | 0 | self.0.offset_from_local_date(local).map(TzOffset) |
302 | 0 | } |
303 | | |
304 | 0 | fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> { |
305 | 0 | self.0.offset_from_local_datetime(local).map(TzOffset) |
306 | 0 | } |
307 | | |
308 | 0 | fn offset_from_utc_date(&self, utc: &NaiveDate) -> Self::Offset { |
309 | 0 | TzOffset(self.0.offset_from_utc_date(utc).fix()) |
310 | 0 | } |
311 | | |
312 | 0 | fn offset_from_utc_datetime(&self, utc: &NaiveDateTime) -> Self::Offset { |
313 | 0 | TzOffset(self.0.offset_from_utc_datetime(utc).fix()) |
314 | 0 | } |
315 | | } |
316 | | } |
317 | | |
318 | | #[cfg(test)] |
319 | | mod tests { |
320 | | use super::*; |
321 | | use chrono::{NaiveDate, Offset, TimeZone}; |
322 | | |
323 | | #[test] |
324 | | fn test_with_offset() { |
325 | | let t = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap(); |
326 | | |
327 | | let tz: Tz = "-00:00".parse().unwrap(); |
328 | | assert_eq!(tz.offset_from_utc_date(&t).fix().local_minus_utc(), 0); |
329 | | let tz: Tz = "+00:00".parse().unwrap(); |
330 | | assert_eq!(tz.offset_from_utc_date(&t).fix().local_minus_utc(), 0); |
331 | | |
332 | | let tz: Tz = "-10:00".parse().unwrap(); |
333 | | assert_eq!( |
334 | | tz.offset_from_utc_date(&t).fix().local_minus_utc(), |
335 | | -10 * 60 * 60 |
336 | | ); |
337 | | let tz: Tz = "+09:00".parse().unwrap(); |
338 | | assert_eq!( |
339 | | tz.offset_from_utc_date(&t).fix().local_minus_utc(), |
340 | | 9 * 60 * 60 |
341 | | ); |
342 | | |
343 | | let tz = "+09".parse::<Tz>().unwrap(); |
344 | | assert_eq!( |
345 | | tz.offset_from_utc_date(&t).fix().local_minus_utc(), |
346 | | 9 * 60 * 60 |
347 | | ); |
348 | | |
349 | | let tz = "+0900".parse::<Tz>().unwrap(); |
350 | | assert_eq!( |
351 | | tz.offset_from_utc_date(&t).fix().local_minus_utc(), |
352 | | 9 * 60 * 60 |
353 | | ); |
354 | | |
355 | | let err = "+9:00".parse::<Tz>().unwrap_err().to_string(); |
356 | | assert!(err.contains("Invalid timezone"), "{}", err); |
357 | | } |
358 | | } |