/Users/andrewlamb/Software/arrow-rs/arrow-cast/src/display.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Functions for printing array values as human-readable strings. |
19 | | //! |
20 | | //! This is often used for debugging or logging purposes. |
21 | | //! |
22 | | //! See the [`pretty`] crate for additional functions for |
23 | | //! record batch pretty printing. |
24 | | //! |
25 | | //! [`pretty`]: crate::pretty |
26 | | use std::fmt::{Display, Formatter, Write}; |
27 | | use std::ops::Range; |
28 | | |
29 | | use arrow_array::cast::*; |
30 | | use arrow_array::temporal_conversions::*; |
31 | | use arrow_array::timezone::Tz; |
32 | | use arrow_array::types::*; |
33 | | use arrow_array::*; |
34 | | use arrow_buffer::ArrowNativeType; |
35 | | use arrow_schema::*; |
36 | | use chrono::{NaiveDate, NaiveDateTime, SecondsFormat, TimeZone, Utc}; |
37 | | use lexical_core::FormattedSize; |
38 | | |
39 | | type TimeFormat<'a> = Option<&'a str>; |
40 | | |
41 | | /// Format for displaying durations |
42 | | #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] |
43 | | #[non_exhaustive] |
44 | | pub enum DurationFormat { |
45 | | /// ISO 8601 - `P198DT72932.972880S` |
46 | | ISO8601, |
47 | | /// A human readable representation - `198 days 16 hours 34 mins 15.407810000 secs` |
48 | | Pretty, |
49 | | } |
50 | | |
51 | | /// Options for formatting arrays |
52 | | /// |
53 | | /// By default nulls are formatted as `""` and temporal types formatted |
54 | | /// according to RFC3339 |
55 | | /// |
56 | | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
57 | | pub struct FormatOptions<'a> { |
58 | | /// If set to `true` any formatting errors will be written to the output |
59 | | /// instead of being converted into a [`std::fmt::Error`] |
60 | | safe: bool, |
61 | | /// Format string for nulls |
62 | | null: &'a str, |
63 | | /// Date format for date arrays |
64 | | date_format: TimeFormat<'a>, |
65 | | /// Format for DateTime arrays |
66 | | datetime_format: TimeFormat<'a>, |
67 | | /// Timestamp format for timestamp arrays |
68 | | timestamp_format: TimeFormat<'a>, |
69 | | /// Timestamp format for timestamp with timezone arrays |
70 | | timestamp_tz_format: TimeFormat<'a>, |
71 | | /// Time format for time arrays |
72 | | time_format: TimeFormat<'a>, |
73 | | /// Duration format |
74 | | duration_format: DurationFormat, |
75 | | /// Show types in visual representation batches |
76 | | types_info: bool, |
77 | | } |
78 | | |
79 | | impl Default for FormatOptions<'_> { |
80 | 0 | fn default() -> Self { |
81 | 0 | Self::new() |
82 | 0 | } |
83 | | } |
84 | | |
85 | | impl<'a> FormatOptions<'a> { |
86 | | /// Creates a new set of format options |
87 | 0 | pub const fn new() -> Self { |
88 | 0 | Self { |
89 | 0 | safe: true, |
90 | 0 | null: "", |
91 | 0 | date_format: None, |
92 | 0 | datetime_format: None, |
93 | 0 | timestamp_format: None, |
94 | 0 | timestamp_tz_format: None, |
95 | 0 | time_format: None, |
96 | 0 | duration_format: DurationFormat::ISO8601, |
97 | 0 | types_info: false, |
98 | 0 | } |
99 | 0 | } |
100 | | |
101 | | /// If set to `true` any formatting errors will be written to the output |
102 | | /// instead of being converted into a [`std::fmt::Error`] |
103 | 0 | pub const fn with_display_error(mut self, safe: bool) -> Self { |
104 | 0 | self.safe = safe; |
105 | 0 | self |
106 | 0 | } |
107 | | |
108 | | /// Overrides the string used to represent a null |
109 | | /// |
110 | | /// Defaults to `""` |
111 | 0 | pub const fn with_null(self, null: &'a str) -> Self { |
112 | 0 | Self { null, ..self } |
113 | 0 | } |
114 | | |
115 | | /// Overrides the format used for [`DataType::Date32`] columns |
116 | 0 | pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self { |
117 | 0 | Self { |
118 | 0 | date_format, |
119 | 0 | ..self |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | | /// Overrides the format used for [`DataType::Date64`] columns |
124 | 0 | pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self { |
125 | 0 | Self { |
126 | 0 | datetime_format, |
127 | 0 | ..self |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | /// Overrides the format used for [`DataType::Timestamp`] columns without a timezone |
132 | 0 | pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self { |
133 | 0 | Self { |
134 | 0 | timestamp_format, |
135 | 0 | ..self |
136 | 0 | } |
137 | 0 | } |
138 | | |
139 | | /// Overrides the format used for [`DataType::Timestamp`] columns with a timezone |
140 | 0 | pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self { |
141 | 0 | Self { |
142 | 0 | timestamp_tz_format, |
143 | 0 | ..self |
144 | 0 | } |
145 | 0 | } |
146 | | |
147 | | /// Overrides the format used for [`DataType::Time32`] and [`DataType::Time64`] columns |
148 | 0 | pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self { |
149 | 0 | Self { |
150 | 0 | time_format, |
151 | 0 | ..self |
152 | 0 | } |
153 | 0 | } |
154 | | |
155 | | /// Overrides the format used for duration columns |
156 | | /// |
157 | | /// Defaults to [`DurationFormat::ISO8601`] |
158 | 0 | pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self { |
159 | 0 | Self { |
160 | 0 | duration_format, |
161 | 0 | ..self |
162 | 0 | } |
163 | 0 | } |
164 | | |
165 | | /// Overrides if types should be shown |
166 | | /// |
167 | | /// Defaults to [`false`] |
168 | 0 | pub const fn with_types_info(self, types_info: bool) -> Self { |
169 | 0 | Self { types_info, ..self } |
170 | 0 | } |
171 | | |
172 | | /// Returns true if type info should be included in visual representation of batches |
173 | 0 | pub const fn types_info(&self) -> bool { |
174 | 0 | self.types_info |
175 | 0 | } |
176 | | } |
177 | | |
178 | | /// Implements [`Display`] for a specific array value |
179 | | pub struct ValueFormatter<'a> { |
180 | | idx: usize, |
181 | | formatter: &'a ArrayFormatter<'a>, |
182 | | } |
183 | | |
184 | | impl ValueFormatter<'_> { |
185 | | /// Writes this value to the provided [`Write`] |
186 | | /// |
187 | | /// Note: this ignores [`FormatOptions::with_display_error`] and |
188 | | /// will return an error on formatting issue |
189 | 0 | pub fn write(&self, s: &mut dyn Write) -> Result<(), ArrowError> { |
190 | 0 | match self.formatter.format.write(self.idx, s) { |
191 | 0 | Ok(_) => Ok(()), |
192 | 0 | Err(FormatError::Arrow(e)) => Err(e), |
193 | 0 | Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())), |
194 | | } |
195 | 0 | } |
196 | | |
197 | | /// Fallibly converts this to a string |
198 | 0 | pub fn try_to_string(&self) -> Result<String, ArrowError> { |
199 | 0 | let mut s = String::new(); |
200 | 0 | self.write(&mut s)?; |
201 | 0 | Ok(s) |
202 | 0 | } |
203 | | } |
204 | | |
205 | | impl Display for ValueFormatter<'_> { |
206 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
207 | 0 | match self.formatter.format.write(self.idx, f) { |
208 | 0 | Ok(()) => Ok(()), |
209 | 0 | Err(FormatError::Arrow(e)) if self.formatter.safe => { |
210 | 0 | write!(f, "ERROR: {e}") |
211 | | } |
212 | 0 | Err(_) => Err(std::fmt::Error), |
213 | | } |
214 | 0 | } |
215 | | } |
216 | | |
217 | | /// A string formatter for an [`Array`] |
218 | | /// |
219 | | /// This can be used with [`std::write`] to write type-erased `dyn Array` |
220 | | /// |
221 | | /// ``` |
222 | | /// # use std::fmt::{Display, Formatter, Write}; |
223 | | /// # use arrow_array::{Array, ArrayRef, Int32Array}; |
224 | | /// # use arrow_cast::display::{ArrayFormatter, FormatOptions}; |
225 | | /// # use arrow_schema::ArrowError; |
226 | | /// struct MyContainer { |
227 | | /// values: ArrayRef, |
228 | | /// } |
229 | | /// |
230 | | /// impl Display for MyContainer { |
231 | | /// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
232 | | /// let options = FormatOptions::default(); |
233 | | /// let formatter = ArrayFormatter::try_new(self.values.as_ref(), &options) |
234 | | /// .map_err(|_| std::fmt::Error)?; |
235 | | /// |
236 | | /// let mut iter = 0..self.values.len(); |
237 | | /// if let Some(idx) = iter.next() { |
238 | | /// write!(f, "{}", formatter.value(idx))?; |
239 | | /// } |
240 | | /// for idx in iter { |
241 | | /// write!(f, ", {}", formatter.value(idx))?; |
242 | | /// } |
243 | | /// Ok(()) |
244 | | /// } |
245 | | /// } |
246 | | /// ``` |
247 | | /// |
248 | | /// [`ValueFormatter::write`] can also be used to get a semantic error, instead of the |
249 | | /// opaque [`std::fmt::Error`] |
250 | | /// |
251 | | /// ``` |
252 | | /// # use std::fmt::Write; |
253 | | /// # use arrow_array::Array; |
254 | | /// # use arrow_cast::display::{ArrayFormatter, FormatOptions}; |
255 | | /// # use arrow_schema::ArrowError; |
256 | | /// fn format_array( |
257 | | /// f: &mut dyn Write, |
258 | | /// array: &dyn Array, |
259 | | /// options: &FormatOptions, |
260 | | /// ) -> Result<(), ArrowError> { |
261 | | /// let formatter = ArrayFormatter::try_new(array, options)?; |
262 | | /// for i in 0..array.len() { |
263 | | /// formatter.value(i).write(f)? |
264 | | /// } |
265 | | /// Ok(()) |
266 | | /// } |
267 | | /// ``` |
268 | | /// |
269 | | pub struct ArrayFormatter<'a> { |
270 | | format: Box<dyn DisplayIndex + 'a>, |
271 | | safe: bool, |
272 | | } |
273 | | |
274 | | impl<'a> ArrayFormatter<'a> { |
275 | | /// Returns an [`ArrayFormatter`] that can be used to format `array` |
276 | | /// |
277 | | /// This returns an error if an array of the given data type cannot be formatted |
278 | 0 | pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> { |
279 | | Ok(Self { |
280 | 0 | format: make_formatter(array, options)?, |
281 | 0 | safe: options.safe, |
282 | | }) |
283 | 0 | } |
284 | | |
285 | | /// Returns a [`ValueFormatter`] that implements [`Display`] for |
286 | | /// the value of the array at `idx` |
287 | 0 | pub fn value(&self, idx: usize) -> ValueFormatter<'_> { |
288 | 0 | ValueFormatter { |
289 | 0 | formatter: self, |
290 | 0 | idx, |
291 | 0 | } |
292 | 0 | } |
293 | | } |
294 | | |
295 | 0 | fn make_formatter<'a>( |
296 | 0 | array: &'a dyn Array, |
297 | 0 | options: &FormatOptions<'a>, |
298 | 0 | ) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> { |
299 | 0 | downcast_primitive_array! { |
300 | 0 | array => array_format(array, options), |
301 | 0 | DataType::Null => array_format(as_null_array(array), options), |
302 | 0 | DataType::Boolean => array_format(as_boolean_array(array), options), |
303 | 0 | DataType::Utf8 => array_format(array.as_string::<i32>(), options), |
304 | 0 | DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options), |
305 | 0 | DataType::Utf8View => array_format(array.as_string_view(), options), |
306 | 0 | DataType::Binary => array_format(array.as_binary::<i32>(), options), |
307 | 0 | DataType::BinaryView => array_format(array.as_binary_view(), options), |
308 | 0 | DataType::LargeBinary => array_format(array.as_binary::<i64>(), options), |
309 | | DataType::FixedSizeBinary(_) => { |
310 | 0 | let a = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap(); |
311 | 0 | array_format(a, options) |
312 | | } |
313 | 0 | DataType::Dictionary(_, _) => downcast_dictionary_array! { |
314 | 0 | array => array_format(array, options), |
315 | 0 | _ => unreachable!() |
316 | | } |
317 | 0 | DataType::List(_) => array_format(as_generic_list_array::<i32>(array), options), |
318 | 0 | DataType::LargeList(_) => array_format(as_generic_list_array::<i64>(array), options), |
319 | | DataType::FixedSizeList(_, _) => { |
320 | 0 | let a = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap(); |
321 | 0 | array_format(a, options) |
322 | | } |
323 | 0 | DataType::Struct(_) => array_format(as_struct_array(array), options), |
324 | 0 | DataType::Map(_, _) => array_format(as_map_array(array), options), |
325 | 0 | DataType::Union(_, _) => array_format(as_union_array(array), options), |
326 | 0 | DataType::RunEndEncoded(_, _) => downcast_run_array! { |
327 | 0 | array => array_format(array, options), |
328 | 0 | _ => unreachable!() |
329 | | }, |
330 | 0 | d => Err(ArrowError::NotYetImplemented(format!("formatting {d} is not yet supported"))), |
331 | | } |
332 | 0 | } |
333 | | |
334 | | /// Either an [`ArrowError`] or [`std::fmt::Error`] |
335 | | enum FormatError { |
336 | | Format(std::fmt::Error), |
337 | | Arrow(ArrowError), |
338 | | } |
339 | | |
340 | | type FormatResult = Result<(), FormatError>; |
341 | | |
342 | | impl From<std::fmt::Error> for FormatError { |
343 | 0 | fn from(value: std::fmt::Error) -> Self { |
344 | 0 | Self::Format(value) |
345 | 0 | } |
346 | | } |
347 | | |
348 | | impl From<ArrowError> for FormatError { |
349 | 0 | fn from(value: ArrowError) -> Self { |
350 | 0 | Self::Arrow(value) |
351 | 0 | } |
352 | | } |
353 | | |
354 | | /// [`Display`] but accepting an index |
355 | | trait DisplayIndex { |
356 | | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult; |
357 | | } |
358 | | |
359 | | /// [`DisplayIndex`] with additional state |
360 | | trait DisplayIndexState<'a> { |
361 | | type State; |
362 | | |
363 | | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError>; |
364 | | |
365 | | fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult; |
366 | | } |
367 | | |
368 | | impl<'a, T: DisplayIndex> DisplayIndexState<'a> for T { |
369 | | type State = (); |
370 | | |
371 | 0 | fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
372 | 0 | Ok(()) |
373 | 0 | } |
374 | | |
375 | 0 | fn write(&self, _: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
376 | 0 | DisplayIndex::write(self, idx, f) |
377 | 0 | } |
378 | | } |
379 | | |
380 | | struct ArrayFormat<'a, F: DisplayIndexState<'a>> { |
381 | | state: F::State, |
382 | | array: F, |
383 | | null: &'a str, |
384 | | } |
385 | | |
386 | 0 | fn array_format<'a, F>( |
387 | 0 | array: F, |
388 | 0 | options: &FormatOptions<'a>, |
389 | 0 | ) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> |
390 | 0 | where |
391 | 0 | F: DisplayIndexState<'a> + Array + 'a, |
392 | | { |
393 | 0 | let state = array.prepare(options)?; |
394 | 0 | Ok(Box::new(ArrayFormat { |
395 | 0 | state, |
396 | 0 | array, |
397 | 0 | null: options.null, |
398 | 0 | })) |
399 | 0 | } |
400 | | |
401 | | impl<'a, F: DisplayIndexState<'a> + Array> DisplayIndex for ArrayFormat<'a, F> { |
402 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
403 | 0 | if self.array.is_null(idx) { |
404 | 0 | if !self.null.is_empty() { |
405 | 0 | f.write_str(self.null)? |
406 | 0 | } |
407 | 0 | return Ok(()); |
408 | 0 | } |
409 | 0 | DisplayIndexState::write(&self.array, &self.state, idx, f) |
410 | 0 | } |
411 | | } |
412 | | |
413 | | impl DisplayIndex for &BooleanArray { |
414 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
415 | 0 | write!(f, "{}", self.value(idx))?; |
416 | 0 | Ok(()) |
417 | 0 | } |
418 | | } |
419 | | |
420 | | impl<'a> DisplayIndexState<'a> for &'a NullArray { |
421 | | type State = &'a str; |
422 | | |
423 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
424 | 0 | Ok(options.null) |
425 | 0 | } |
426 | | |
427 | 0 | fn write(&self, state: &Self::State, _idx: usize, f: &mut dyn Write) -> FormatResult { |
428 | 0 | f.write_str(state)?; |
429 | 0 | Ok(()) |
430 | 0 | } |
431 | | } |
432 | | |
433 | | macro_rules! primitive_display { |
434 | | ($($t:ty),+) => { |
435 | | $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t> |
436 | | { |
437 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
438 | 0 | let value = self.value(idx); |
439 | 0 | let mut buffer = [0u8; <$t as ArrowPrimitiveType>::Native::FORMATTED_SIZE]; |
440 | 0 | let b = lexical_core::write(value, &mut buffer); |
441 | | // Lexical core produces valid UTF-8 |
442 | 0 | let s = unsafe { std::str::from_utf8_unchecked(b) }; |
443 | 0 | f.write_str(s)?; |
444 | 0 | Ok(()) |
445 | 0 | } |
446 | | })+ |
447 | | }; |
448 | | } |
449 | | |
450 | | macro_rules! primitive_display_float { |
451 | | ($($t:ty),+) => { |
452 | | $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t> |
453 | | { |
454 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
455 | 0 | let value = self.value(idx); |
456 | 0 | let mut buffer = ryu::Buffer::new(); |
457 | 0 | f.write_str(buffer.format(value))?; |
458 | 0 | Ok(()) |
459 | 0 | } |
460 | | })+ |
461 | | }; |
462 | | } |
463 | | |
464 | | primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type); |
465 | | primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type); |
466 | | primitive_display_float!(Float32Type, Float64Type); |
467 | | |
468 | | impl DisplayIndex for &PrimitiveArray<Float16Type> { |
469 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
470 | 0 | write!(f, "{}", self.value(idx))?; |
471 | 0 | Ok(()) |
472 | 0 | } |
473 | | } |
474 | | |
475 | | macro_rules! decimal_display { |
476 | | ($($t:ty),+) => { |
477 | | $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> { |
478 | | type State = (u8, i8); |
479 | | |
480 | 0 | fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
481 | 0 | Ok((self.precision(), self.scale())) |
482 | 0 | } |
483 | | |
484 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
485 | 0 | write!(f, "{}", <$t>::format_decimal(self.values()[idx], s.0, s.1))?; |
486 | 0 | Ok(()) |
487 | 0 | } |
488 | | })+ |
489 | | }; |
490 | | } |
491 | | |
492 | | decimal_display!(Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type); |
493 | | |
494 | 0 | fn write_timestamp( |
495 | 0 | f: &mut dyn Write, |
496 | 0 | naive: NaiveDateTime, |
497 | 0 | timezone: Option<Tz>, |
498 | 0 | format: Option<&str>, |
499 | 0 | ) -> FormatResult { |
500 | 0 | match timezone { |
501 | 0 | Some(tz) => { |
502 | 0 | let date = Utc.from_utc_datetime(&naive).with_timezone(&tz); |
503 | 0 | match format { |
504 | 0 | Some(s) => write!(f, "{}", date.format(s))?, |
505 | 0 | None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?, |
506 | | } |
507 | | } |
508 | 0 | None => match format { |
509 | 0 | Some(s) => write!(f, "{}", naive.format(s))?, |
510 | 0 | None => write!(f, "{naive:?}")?, |
511 | | }, |
512 | | } |
513 | 0 | Ok(()) |
514 | 0 | } |
515 | | |
516 | | macro_rules! timestamp_display { |
517 | | ($($t:ty),+) => { |
518 | | $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> { |
519 | | type State = (Option<Tz>, TimeFormat<'a>); |
520 | | |
521 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
522 | 0 | match self.data_type() { |
523 | 0 | DataType::Timestamp(_, Some(tz)) => Ok((Some(tz.parse()?), options.timestamp_tz_format)), |
524 | 0 | DataType::Timestamp(_, None) => Ok((None, options.timestamp_format)), |
525 | 0 | _ => unreachable!(), |
526 | | } |
527 | 0 | } |
528 | | |
529 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
530 | 0 | let value = self.value(idx); |
531 | 0 | let naive = as_datetime::<$t>(value).ok_or_else(|| { |
532 | 0 | ArrowError::CastError(format!( |
533 | 0 | "Failed to convert {} to datetime for {}", |
534 | 0 | value, |
535 | 0 | self.data_type() |
536 | 0 | )) |
537 | 0 | })?; |
538 | | |
539 | 0 | write_timestamp(f, naive, s.0, s.1.clone()) |
540 | 0 | } |
541 | | })+ |
542 | | }; |
543 | | } |
544 | | |
545 | | timestamp_display!( |
546 | | TimestampSecondType, |
547 | | TimestampMillisecondType, |
548 | | TimestampMicrosecondType, |
549 | | TimestampNanosecondType |
550 | | ); |
551 | | |
552 | | macro_rules! temporal_display { |
553 | | ($convert:ident, $format:ident, $t:ty) => { |
554 | | impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> { |
555 | | type State = TimeFormat<'a>; |
556 | | |
557 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
558 | 0 | Ok(options.$format) |
559 | 0 | } |
560 | | |
561 | 0 | fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
562 | 0 | let value = self.value(idx); |
563 | 0 | let naive = $convert(value as _).ok_or_else(|| { |
564 | 0 | ArrowError::CastError(format!( |
565 | 0 | "Failed to convert {} to temporal for {}", |
566 | 0 | value, |
567 | 0 | self.data_type() |
568 | 0 | )) |
569 | 0 | })?; |
570 | | |
571 | 0 | match fmt { |
572 | 0 | Some(s) => write!(f, "{}", naive.format(s))?, |
573 | 0 | None => write!(f, "{naive:?}")?, |
574 | | } |
575 | 0 | Ok(()) |
576 | 0 | } |
577 | | } |
578 | | }; |
579 | | } |
580 | | |
581 | | #[inline] |
582 | 0 | fn date32_to_date(value: i32) -> Option<NaiveDate> { |
583 | 0 | Some(date32_to_datetime(value)?.date()) |
584 | 0 | } |
585 | | |
586 | | temporal_display!(date32_to_date, date_format, Date32Type); |
587 | | temporal_display!(date64_to_datetime, datetime_format, Date64Type); |
588 | | temporal_display!(time32s_to_time, time_format, Time32SecondType); |
589 | | temporal_display!(time32ms_to_time, time_format, Time32MillisecondType); |
590 | | temporal_display!(time64us_to_time, time_format, Time64MicrosecondType); |
591 | | temporal_display!(time64ns_to_time, time_format, Time64NanosecondType); |
592 | | |
593 | | /// Derive [`DisplayIndexState`] for `PrimitiveArray<$t>` |
594 | | /// |
595 | | /// Arguments |
596 | | /// * `$convert` - function to convert the value to an `Duration` |
597 | | /// * `$t` - [`ArrowPrimitiveType`] of the array |
598 | | /// * `$scale` - scale of the duration (passed to `duration_fmt`) |
599 | | macro_rules! duration_display { |
600 | | ($convert:ident, $t:ty, $scale:tt) => { |
601 | | impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> { |
602 | | type State = DurationFormat; |
603 | | |
604 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
605 | 0 | Ok(options.duration_format) |
606 | 0 | } |
607 | | |
608 | 0 | fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
609 | 0 | let v = self.value(idx); |
610 | 0 | match fmt { |
611 | 0 | DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?, |
612 | 0 | DurationFormat::Pretty => duration_fmt!(f, v, $scale)?, |
613 | | } |
614 | 0 | Ok(()) |
615 | 0 | } |
616 | | } |
617 | | }; |
618 | | } |
619 | | |
620 | | /// Similar to [`duration_display`] but `$convert` returns an `Option` |
621 | | macro_rules! duration_option_display { |
622 | | ($convert:ident, $t:ty, $scale:tt) => { |
623 | | impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> { |
624 | | type State = DurationFormat; |
625 | | |
626 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
627 | 0 | Ok(options.duration_format) |
628 | 0 | } |
629 | | |
630 | 0 | fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
631 | 0 | let v = self.value(idx); |
632 | 0 | match fmt { |
633 | 0 | DurationFormat::ISO8601 => match $convert(v) { |
634 | 0 | Some(td) => write!(f, "{}", td)?, |
635 | 0 | None => write!(f, "<invalid>")?, |
636 | | }, |
637 | 0 | DurationFormat::Pretty => match $convert(v) { |
638 | 0 | Some(_) => duration_fmt!(f, v, $scale)?, |
639 | 0 | None => write!(f, "<invalid>")?, |
640 | | }, |
641 | | } |
642 | 0 | Ok(()) |
643 | 0 | } |
644 | | } |
645 | | }; |
646 | | } |
647 | | |
648 | | macro_rules! duration_fmt { |
649 | | ($f:ident, $v:expr, 0) => {{ |
650 | | let secs = $v; |
651 | | let mins = secs / 60; |
652 | | let hours = mins / 60; |
653 | | let days = hours / 24; |
654 | | |
655 | | let secs = secs - (mins * 60); |
656 | | let mins = mins - (hours * 60); |
657 | | let hours = hours - (days * 24); |
658 | | write!($f, "{days} days {hours} hours {mins} mins {secs} secs") |
659 | | }}; |
660 | | ($f:ident, $v:expr, $scale:tt) => {{ |
661 | | let subsec = $v; |
662 | | let secs = subsec / 10_i64.pow($scale); |
663 | | let mins = secs / 60; |
664 | | let hours = mins / 60; |
665 | | let days = hours / 24; |
666 | | |
667 | | let subsec = subsec - (secs * 10_i64.pow($scale)); |
668 | | let secs = secs - (mins * 60); |
669 | | let mins = mins - (hours * 60); |
670 | | let hours = hours - (days * 24); |
671 | | match subsec.is_negative() { |
672 | | true => { |
673 | | write!( |
674 | | $f, |
675 | | concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"), |
676 | | days, |
677 | | hours, |
678 | | mins, |
679 | | secs.abs(), |
680 | | subsec.abs() |
681 | | ) |
682 | | } |
683 | | false => { |
684 | | write!( |
685 | | $f, |
686 | | concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"), |
687 | | days, hours, mins, secs, subsec |
688 | | ) |
689 | | } |
690 | | } |
691 | | }}; |
692 | | } |
693 | | |
694 | | duration_option_display!(try_duration_s_to_duration, DurationSecondType, 0); |
695 | | duration_option_display!(try_duration_ms_to_duration, DurationMillisecondType, 3); |
696 | | duration_display!(duration_us_to_duration, DurationMicrosecondType, 6); |
697 | | duration_display!(duration_ns_to_duration, DurationNanosecondType, 9); |
698 | | |
699 | | impl DisplayIndex for &PrimitiveArray<IntervalYearMonthType> { |
700 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
701 | 0 | let interval = self.value(idx) as f64; |
702 | 0 | let years = (interval / 12_f64).floor(); |
703 | 0 | let month = interval - (years * 12_f64); |
704 | | |
705 | 0 | write!(f, "{years} years {month} mons",)?; |
706 | 0 | Ok(()) |
707 | 0 | } |
708 | | } |
709 | | |
710 | | impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> { |
711 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
712 | 0 | let value = self.value(idx); |
713 | 0 | let mut prefix = ""; |
714 | | |
715 | 0 | if value.days != 0 { |
716 | 0 | write!(f, "{prefix}{} days", value.days)?; |
717 | 0 | prefix = " "; |
718 | 0 | } |
719 | | |
720 | 0 | if value.milliseconds != 0 { |
721 | 0 | let millis_fmt = MillisecondsFormatter { |
722 | 0 | milliseconds: value.milliseconds, |
723 | 0 | prefix, |
724 | 0 | }; |
725 | | |
726 | 0 | f.write_fmt(format_args!("{millis_fmt}"))?; |
727 | 0 | } |
728 | | |
729 | 0 | Ok(()) |
730 | 0 | } |
731 | | } |
732 | | |
733 | | impl DisplayIndex for &PrimitiveArray<IntervalMonthDayNanoType> { |
734 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
735 | 0 | let value = self.value(idx); |
736 | 0 | let mut prefix = ""; |
737 | | |
738 | 0 | if value.months != 0 { |
739 | 0 | write!(f, "{prefix}{} mons", value.months)?; |
740 | 0 | prefix = " "; |
741 | 0 | } |
742 | | |
743 | 0 | if value.days != 0 { |
744 | 0 | write!(f, "{prefix}{} days", value.days)?; |
745 | 0 | prefix = " "; |
746 | 0 | } |
747 | | |
748 | 0 | if value.nanoseconds != 0 { |
749 | 0 | let nano_fmt = NanosecondsFormatter { |
750 | 0 | nanoseconds: value.nanoseconds, |
751 | 0 | prefix, |
752 | 0 | }; |
753 | 0 | f.write_fmt(format_args!("{nano_fmt}"))?; |
754 | 0 | } |
755 | | |
756 | 0 | Ok(()) |
757 | 0 | } |
758 | | } |
759 | | |
760 | | struct NanosecondsFormatter<'a> { |
761 | | nanoseconds: i64, |
762 | | prefix: &'a str, |
763 | | } |
764 | | |
765 | | impl Display for NanosecondsFormatter<'_> { |
766 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
767 | 0 | let mut prefix = self.prefix; |
768 | | |
769 | 0 | let secs = self.nanoseconds / 1_000_000_000; |
770 | 0 | let mins = secs / 60; |
771 | 0 | let hours = mins / 60; |
772 | | |
773 | 0 | let secs = secs - (mins * 60); |
774 | 0 | let mins = mins - (hours * 60); |
775 | | |
776 | 0 | let nanoseconds = self.nanoseconds % 1_000_000_000; |
777 | | |
778 | 0 | if hours != 0 { |
779 | 0 | write!(f, "{prefix}{hours} hours")?; |
780 | 0 | prefix = " "; |
781 | 0 | } |
782 | | |
783 | 0 | if mins != 0 { |
784 | 0 | write!(f, "{prefix}{mins} mins")?; |
785 | 0 | prefix = " "; |
786 | 0 | } |
787 | | |
788 | 0 | if secs != 0 || nanoseconds != 0 { |
789 | 0 | let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" }; |
790 | 0 | write!( |
791 | 0 | f, |
792 | 0 | "{prefix}{}{}.{:09} secs", |
793 | | secs_sign, |
794 | 0 | secs.abs(), |
795 | 0 | nanoseconds.abs() |
796 | 0 | )?; |
797 | 0 | } |
798 | | |
799 | 0 | Ok(()) |
800 | 0 | } |
801 | | } |
802 | | |
803 | | struct MillisecondsFormatter<'a> { |
804 | | milliseconds: i32, |
805 | | prefix: &'a str, |
806 | | } |
807 | | |
808 | | impl Display for MillisecondsFormatter<'_> { |
809 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
810 | 0 | let mut prefix = self.prefix; |
811 | | |
812 | 0 | let secs = self.milliseconds / 1_000; |
813 | 0 | let mins = secs / 60; |
814 | 0 | let hours = mins / 60; |
815 | | |
816 | 0 | let secs = secs - (mins * 60); |
817 | 0 | let mins = mins - (hours * 60); |
818 | | |
819 | 0 | let milliseconds = self.milliseconds % 1_000; |
820 | | |
821 | 0 | if hours != 0 { |
822 | 0 | write!(f, "{prefix}{hours} hours")?; |
823 | 0 | prefix = " "; |
824 | 0 | } |
825 | | |
826 | 0 | if mins != 0 { |
827 | 0 | write!(f, "{prefix}{mins} mins")?; |
828 | 0 | prefix = " "; |
829 | 0 | } |
830 | | |
831 | 0 | if secs != 0 || milliseconds != 0 { |
832 | 0 | let secs_sign = if secs < 0 || milliseconds < 0 { |
833 | 0 | "-" |
834 | | } else { |
835 | 0 | "" |
836 | | }; |
837 | | |
838 | 0 | write!( |
839 | 0 | f, |
840 | 0 | "{prefix}{}{}.{:03} secs", |
841 | | secs_sign, |
842 | 0 | secs.abs(), |
843 | 0 | milliseconds.abs() |
844 | 0 | )?; |
845 | 0 | } |
846 | | |
847 | 0 | Ok(()) |
848 | 0 | } |
849 | | } |
850 | | |
851 | | impl<O: OffsetSizeTrait> DisplayIndex for &GenericStringArray<O> { |
852 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
853 | 0 | write!(f, "{}", self.value(idx))?; |
854 | 0 | Ok(()) |
855 | 0 | } |
856 | | } |
857 | | |
858 | | impl DisplayIndex for &StringViewArray { |
859 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
860 | 0 | write!(f, "{}", self.value(idx))?; |
861 | 0 | Ok(()) |
862 | 0 | } |
863 | | } |
864 | | |
865 | | impl<O: OffsetSizeTrait> DisplayIndex for &GenericBinaryArray<O> { |
866 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
867 | 0 | let v = self.value(idx); |
868 | 0 | for byte in v { |
869 | 0 | write!(f, "{byte:02x}")?; |
870 | | } |
871 | 0 | Ok(()) |
872 | 0 | } |
873 | | } |
874 | | |
875 | | impl DisplayIndex for &BinaryViewArray { |
876 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
877 | 0 | let v = self.value(idx); |
878 | 0 | for byte in v { |
879 | 0 | write!(f, "{byte:02x}")?; |
880 | | } |
881 | 0 | Ok(()) |
882 | 0 | } |
883 | | } |
884 | | |
885 | | impl DisplayIndex for &FixedSizeBinaryArray { |
886 | 0 | fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { |
887 | 0 | let v = self.value(idx); |
888 | 0 | for byte in v { |
889 | 0 | write!(f, "{byte:02x}")?; |
890 | | } |
891 | 0 | Ok(()) |
892 | 0 | } |
893 | | } |
894 | | |
895 | | impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArray<K> { |
896 | | type State = Box<dyn DisplayIndex + 'a>; |
897 | | |
898 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
899 | 0 | make_formatter(self.values().as_ref(), options) |
900 | 0 | } |
901 | | |
902 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
903 | 0 | let value_idx = self.keys().values()[idx].as_usize(); |
904 | 0 | s.as_ref().write(value_idx, f) |
905 | 0 | } |
906 | | } |
907 | | |
908 | | impl<'a, K: RunEndIndexType> DisplayIndexState<'a> for &'a RunArray<K> { |
909 | | type State = Box<dyn DisplayIndex + 'a>; |
910 | | |
911 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
912 | 0 | make_formatter(self.values().as_ref(), options) |
913 | 0 | } |
914 | | |
915 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
916 | 0 | let value_idx = self.get_physical_index(idx); |
917 | 0 | s.as_ref().write(value_idx, f) |
918 | 0 | } |
919 | | } |
920 | | |
921 | 0 | fn write_list( |
922 | 0 | f: &mut dyn Write, |
923 | 0 | mut range: Range<usize>, |
924 | 0 | values: &dyn DisplayIndex, |
925 | 0 | ) -> FormatResult { |
926 | 0 | f.write_char('[')?; |
927 | 0 | if let Some(idx) = range.next() { |
928 | 0 | values.write(idx, f)?; |
929 | 0 | } |
930 | 0 | for idx in range { |
931 | 0 | write!(f, ", ")?; |
932 | 0 | values.write(idx, f)?; |
933 | | } |
934 | 0 | f.write_char(']')?; |
935 | 0 | Ok(()) |
936 | 0 | } |
937 | | |
938 | | impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListArray<O> { |
939 | | type State = Box<dyn DisplayIndex + 'a>; |
940 | | |
941 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
942 | 0 | make_formatter(self.values().as_ref(), options) |
943 | 0 | } |
944 | | |
945 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
946 | 0 | let offsets = self.value_offsets(); |
947 | 0 | let end = offsets[idx + 1].as_usize(); |
948 | 0 | let start = offsets[idx].as_usize(); |
949 | 0 | write_list(f, start..end, s.as_ref()) |
950 | 0 | } |
951 | | } |
952 | | |
953 | | impl<'a> DisplayIndexState<'a> for &'a FixedSizeListArray { |
954 | | type State = (usize, Box<dyn DisplayIndex + 'a>); |
955 | | |
956 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
957 | 0 | let values = make_formatter(self.values().as_ref(), options)?; |
958 | 0 | let length = self.value_length(); |
959 | 0 | Ok((length as usize, values)) |
960 | 0 | } |
961 | | |
962 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
963 | 0 | let start = idx * s.0; |
964 | 0 | let end = start + s.0; |
965 | 0 | write_list(f, start..end, s.1.as_ref()) |
966 | 0 | } |
967 | | } |
968 | | |
969 | | /// Pairs a boxed [`DisplayIndex`] with its field name |
970 | | type FieldDisplay<'a> = (&'a str, Box<dyn DisplayIndex + 'a>); |
971 | | |
972 | | impl<'a> DisplayIndexState<'a> for &'a StructArray { |
973 | | type State = Vec<FieldDisplay<'a>>; |
974 | | |
975 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
976 | 0 | let fields = match (*self).data_type() { |
977 | 0 | DataType::Struct(f) => f, |
978 | 0 | _ => unreachable!(), |
979 | | }; |
980 | | |
981 | 0 | self.columns() |
982 | 0 | .iter() |
983 | 0 | .zip(fields) |
984 | 0 | .map(|(a, f)| { |
985 | 0 | let format = make_formatter(a.as_ref(), options)?; |
986 | 0 | Ok((f.name().as_str(), format)) |
987 | 0 | }) |
988 | 0 | .collect() |
989 | 0 | } |
990 | | |
991 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
992 | 0 | let mut iter = s.iter(); |
993 | 0 | f.write_char('{')?; |
994 | 0 | if let Some((name, display)) = iter.next() { |
995 | 0 | write!(f, "{name}: ")?; |
996 | 0 | display.as_ref().write(idx, f)?; |
997 | 0 | } |
998 | 0 | for (name, display) in iter { |
999 | 0 | write!(f, ", {name}: ")?; |
1000 | 0 | display.as_ref().write(idx, f)?; |
1001 | | } |
1002 | 0 | f.write_char('}')?; |
1003 | 0 | Ok(()) |
1004 | 0 | } |
1005 | | } |
1006 | | |
1007 | | impl<'a> DisplayIndexState<'a> for &'a MapArray { |
1008 | | type State = (Box<dyn DisplayIndex + 'a>, Box<dyn DisplayIndex + 'a>); |
1009 | | |
1010 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
1011 | 0 | let keys = make_formatter(self.keys().as_ref(), options)?; |
1012 | 0 | let values = make_formatter(self.values().as_ref(), options)?; |
1013 | 0 | Ok((keys, values)) |
1014 | 0 | } |
1015 | | |
1016 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
1017 | 0 | let offsets = self.value_offsets(); |
1018 | 0 | let end = offsets[idx + 1].as_usize(); |
1019 | 0 | let start = offsets[idx].as_usize(); |
1020 | 0 | let mut iter = start..end; |
1021 | | |
1022 | 0 | f.write_char('{')?; |
1023 | 0 | if let Some(idx) = iter.next() { |
1024 | 0 | s.0.write(idx, f)?; |
1025 | 0 | write!(f, ": ")?; |
1026 | 0 | s.1.write(idx, f)?; |
1027 | 0 | } |
1028 | | |
1029 | 0 | for idx in iter { |
1030 | 0 | write!(f, ", ")?; |
1031 | 0 | s.0.write(idx, f)?; |
1032 | 0 | write!(f, ": ")?; |
1033 | 0 | s.1.write(idx, f)?; |
1034 | | } |
1035 | | |
1036 | 0 | f.write_char('}')?; |
1037 | 0 | Ok(()) |
1038 | 0 | } |
1039 | | } |
1040 | | |
1041 | | impl<'a> DisplayIndexState<'a> for &'a UnionArray { |
1042 | | type State = ( |
1043 | | Vec<Option<(&'a str, Box<dyn DisplayIndex + 'a>)>>, |
1044 | | UnionMode, |
1045 | | ); |
1046 | | |
1047 | 0 | fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> { |
1048 | 0 | let (fields, mode) = match (*self).data_type() { |
1049 | 0 | DataType::Union(fields, mode) => (fields, mode), |
1050 | 0 | _ => unreachable!(), |
1051 | | }; |
1052 | | |
1053 | 0 | let max_id = fields.iter().map(|(id, _)| id).max().unwrap_or_default() as usize; |
1054 | 0 | let mut out: Vec<Option<FieldDisplay>> = (0..max_id + 1).map(|_| None).collect(); |
1055 | 0 | for (i, field) in fields.iter() { |
1056 | 0 | let formatter = make_formatter(self.child(i).as_ref(), options)?; |
1057 | 0 | out[i as usize] = Some((field.name().as_str(), formatter)) |
1058 | | } |
1059 | 0 | Ok((out, *mode)) |
1060 | 0 | } |
1061 | | |
1062 | 0 | fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { |
1063 | 0 | let id = self.type_id(idx); |
1064 | 0 | let idx = match s.1 { |
1065 | 0 | UnionMode::Dense => self.value_offset(idx), |
1066 | 0 | UnionMode::Sparse => idx, |
1067 | | }; |
1068 | 0 | let (name, field) = s.0[id as usize].as_ref().unwrap(); |
1069 | | |
1070 | 0 | write!(f, "{{{name}=")?; |
1071 | 0 | field.write(idx, f)?; |
1072 | 0 | f.write_char('}')?; |
1073 | 0 | Ok(()) |
1074 | 0 | } |
1075 | | } |
1076 | | |
1077 | | /// Get the value at the given row in an array as a String. |
1078 | | /// |
1079 | | /// Note this function is quite inefficient and is unlikely to be |
1080 | | /// suitable for converting large arrays or record batches. |
1081 | | /// |
1082 | | /// Please see [`ArrayFormatter`] for a more performant interface |
1083 | 0 | pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> { |
1084 | 0 | let options = FormatOptions::default().with_display_error(true); |
1085 | 0 | let formatter = ArrayFormatter::try_new(column, &options)?; |
1086 | 0 | Ok(formatter.value(row).to_string()) |
1087 | 0 | } |
1088 | | |
1089 | | /// Converts numeric type to a `String` |
1090 | | pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String { |
1091 | | let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL); |
1092 | | unsafe { |
1093 | | // JUSTIFICATION |
1094 | | // Benefit |
1095 | | // Allows using the faster serializer lexical core and convert to string |
1096 | | // Soundness |
1097 | | // Length of buf is set as written length afterwards. lexical_core |
1098 | | // creates a valid string, so doesn't need to be checked. |
1099 | | let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity()); |
1100 | | let len = lexical_core::write(n, slice).len(); |
1101 | | buf.set_len(len); |
1102 | | String::from_utf8_unchecked(buf) |
1103 | | } |
1104 | | } |
1105 | | |
1106 | | #[cfg(test)] |
1107 | | mod tests { |
1108 | | use super::*; |
1109 | | use arrow_array::builder::StringRunBuilder; |
1110 | | |
1111 | | /// Test to verify options can be constant. See #4580 |
1112 | | const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new() |
1113 | | .with_date_format(Some("foo")) |
1114 | | .with_timestamp_format(Some("404")); |
1115 | | |
1116 | | #[test] |
1117 | | fn test_const_options() { |
1118 | | assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo")); |
1119 | | } |
1120 | | |
1121 | | #[test] |
1122 | | fn test_map_array_to_string() { |
1123 | | let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"]; |
1124 | | let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]); |
1125 | | |
1126 | | // Construct a buffer for value offsets, for the nested array: |
1127 | | // [[a, b, c], [d, e, f], [g, h]] |
1128 | | let entry_offsets = [0, 3, 6, 8]; |
1129 | | |
1130 | | let map_array = |
1131 | | MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets) |
1132 | | .unwrap(); |
1133 | | assert_eq!( |
1134 | | "{d: 30, e: 40, f: 50}", |
1135 | | array_value_to_string(&map_array, 1).unwrap() |
1136 | | ); |
1137 | | } |
1138 | | |
1139 | | fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> { |
1140 | | let fmt = ArrayFormatter::try_new(array, fmt).unwrap(); |
1141 | | (0..array.len()).map(|x| fmt.value(x).to_string()).collect() |
1142 | | } |
1143 | | |
1144 | | #[test] |
1145 | | fn test_array_value_to_string_duration() { |
1146 | | let iso_fmt = FormatOptions::new(); |
1147 | | let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty); |
1148 | | |
1149 | | let array = DurationNanosecondArray::from(vec![ |
1150 | | 1, |
1151 | | -1, |
1152 | | 1000, |
1153 | | -1000, |
1154 | | (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789, |
1155 | | -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789, |
1156 | | ]); |
1157 | | let iso = format_array(&array, &iso_fmt); |
1158 | | let pretty = format_array(&array, &pretty_fmt); |
1159 | | |
1160 | | assert_eq!(iso[0], "PT0.000000001S"); |
1161 | | assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs"); |
1162 | | assert_eq!(iso[1], "-PT0.000000001S"); |
1163 | | assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs"); |
1164 | | assert_eq!(iso[2], "PT0.000001S"); |
1165 | | assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs"); |
1166 | | assert_eq!(iso[3], "-PT0.000001S"); |
1167 | | assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs"); |
1168 | | assert_eq!(iso[4], "PT3938554.123456789S"); |
1169 | | assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456789 secs"); |
1170 | | assert_eq!(iso[5], "-PT3938554.123456789S"); |
1171 | | assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456789 secs"); |
1172 | | |
1173 | | let array = DurationMicrosecondArray::from(vec![ |
1174 | | 1, |
1175 | | -1, |
1176 | | 1000, |
1177 | | -1000, |
1178 | | (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456, |
1179 | | -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456, |
1180 | | ]); |
1181 | | let iso = format_array(&array, &iso_fmt); |
1182 | | let pretty = format_array(&array, &pretty_fmt); |
1183 | | |
1184 | | assert_eq!(iso[0], "PT0.000001S"); |
1185 | | assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs"); |
1186 | | assert_eq!(iso[1], "-PT0.000001S"); |
1187 | | assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs"); |
1188 | | assert_eq!(iso[2], "PT0.001S"); |
1189 | | assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs"); |
1190 | | assert_eq!(iso[3], "-PT0.001S"); |
1191 | | assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs"); |
1192 | | assert_eq!(iso[4], "PT3938554.123456S"); |
1193 | | assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456 secs"); |
1194 | | assert_eq!(iso[5], "-PT3938554.123456S"); |
1195 | | assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456 secs"); |
1196 | | |
1197 | | let array = DurationMillisecondArray::from(vec![ |
1198 | | 1, |
1199 | | -1, |
1200 | | 1000, |
1201 | | -1000, |
1202 | | (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123, |
1203 | | -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123, |
1204 | | ]); |
1205 | | let iso = format_array(&array, &iso_fmt); |
1206 | | let pretty = format_array(&array, &pretty_fmt); |
1207 | | |
1208 | | assert_eq!(iso[0], "PT0.001S"); |
1209 | | assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs"); |
1210 | | assert_eq!(iso[1], "-PT0.001S"); |
1211 | | assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs"); |
1212 | | assert_eq!(iso[2], "PT1S"); |
1213 | | assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs"); |
1214 | | assert_eq!(iso[3], "-PT1S"); |
1215 | | assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs"); |
1216 | | assert_eq!(iso[4], "PT3938554.123S"); |
1217 | | assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123 secs"); |
1218 | | assert_eq!(iso[5], "-PT3938554.123S"); |
1219 | | assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123 secs"); |
1220 | | |
1221 | | let array = DurationSecondArray::from(vec![ |
1222 | | 1, |
1223 | | -1, |
1224 | | 1000, |
1225 | | -1000, |
1226 | | 45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34, |
1227 | | -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34, |
1228 | | ]); |
1229 | | let iso = format_array(&array, &iso_fmt); |
1230 | | let pretty = format_array(&array, &pretty_fmt); |
1231 | | |
1232 | | assert_eq!(iso[0], "PT1S"); |
1233 | | assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs"); |
1234 | | assert_eq!(iso[1], "-PT1S"); |
1235 | | assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs"); |
1236 | | assert_eq!(iso[2], "PT1000S"); |
1237 | | assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs"); |
1238 | | assert_eq!(iso[3], "-PT1000S"); |
1239 | | assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs"); |
1240 | | assert_eq!(iso[4], "PT3938554S"); |
1241 | | assert_eq!(pretty[4], "45 days 14 hours 2 mins 34 secs"); |
1242 | | assert_eq!(iso[5], "-PT3938554S"); |
1243 | | assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34 secs"); |
1244 | | } |
1245 | | |
1246 | | #[test] |
1247 | | fn test_null() { |
1248 | | let array = NullArray::new(2); |
1249 | | let options = FormatOptions::new().with_null("NULL"); |
1250 | | let formatted = format_array(&array, &options); |
1251 | | assert_eq!(formatted, &["NULL".to_string(), "NULL".to_string()]) |
1252 | | } |
1253 | | |
1254 | | #[test] |
1255 | | fn test_string_run_arry_to_string() { |
1256 | | let mut builder = StringRunBuilder::<Int32Type>::new(); |
1257 | | |
1258 | | builder.append_value("input_value"); |
1259 | | builder.append_value("input_value"); |
1260 | | builder.append_value("input_value"); |
1261 | | builder.append_value("input_value1"); |
1262 | | |
1263 | | let map_array = builder.finish(); |
1264 | | assert_eq!("input_value", array_value_to_string(&map_array, 1).unwrap()); |
1265 | | assert_eq!( |
1266 | | "input_value1", |
1267 | | array_value_to_string(&map_array, 3).unwrap() |
1268 | | ); |
1269 | | } |
1270 | | } |