/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/primitive_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::array::print_long_array; |
19 | | use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder}; |
20 | | use crate::iterator::PrimitiveIter; |
21 | | use crate::temporal_conversions::{ |
22 | | as_date, as_datetime, as_datetime_with_timezone, as_duration, as_time, |
23 | | }; |
24 | | use crate::timezone::Tz; |
25 | | use crate::trusted_len::trusted_len_unzip; |
26 | | use crate::types::*; |
27 | | use crate::{Array, ArrayAccessor, ArrayRef, Scalar}; |
28 | | use arrow_buffer::{i256, ArrowNativeType, Buffer, NullBuffer, ScalarBuffer}; |
29 | | use arrow_data::bit_iterator::try_for_each_valid_idx; |
30 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
31 | | use arrow_schema::{ArrowError, DataType}; |
32 | | use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime}; |
33 | | use half::f16; |
34 | | use std::any::Any; |
35 | | use std::sync::Arc; |
36 | | |
37 | | /// A [`PrimitiveArray`] of `i8` |
38 | | /// |
39 | | /// # Examples |
40 | | /// |
41 | | /// Construction |
42 | | /// |
43 | | /// ``` |
44 | | /// # use arrow_array::Int8Array; |
45 | | /// // Create from Vec<Option<i8>> |
46 | | /// let arr = Int8Array::from(vec![Some(1), None, Some(2)]); |
47 | | /// // Create from Vec<i8> |
48 | | /// let arr = Int8Array::from(vec![1, 2, 3]); |
49 | | /// // Create iter/collect |
50 | | /// let arr: Int8Array = std::iter::repeat(42).take(10).collect(); |
51 | | /// ``` |
52 | | /// |
53 | | /// See [`PrimitiveArray`] for more information and examples |
54 | | pub type Int8Array = PrimitiveArray<Int8Type>; |
55 | | |
56 | | /// A [`PrimitiveArray`] of `i16` |
57 | | /// |
58 | | /// # Examples |
59 | | /// |
60 | | /// Construction |
61 | | /// |
62 | | /// ``` |
63 | | /// # use arrow_array::Int16Array; |
64 | | /// // Create from Vec<Option<i16>> |
65 | | /// let arr = Int16Array::from(vec![Some(1), None, Some(2)]); |
66 | | /// // Create from Vec<i16> |
67 | | /// let arr = Int16Array::from(vec![1, 2, 3]); |
68 | | /// // Create iter/collect |
69 | | /// let arr: Int16Array = std::iter::repeat(42).take(10).collect(); |
70 | | /// ``` |
71 | | /// |
72 | | /// See [`PrimitiveArray`] for more information and examples |
73 | | pub type Int16Array = PrimitiveArray<Int16Type>; |
74 | | |
75 | | /// A [`PrimitiveArray`] of `i32` |
76 | | /// |
77 | | /// # Examples |
78 | | /// |
79 | | /// Construction |
80 | | /// |
81 | | /// ``` |
82 | | /// # use arrow_array::Int32Array; |
83 | | /// // Create from Vec<Option<i32>> |
84 | | /// let arr = Int32Array::from(vec![Some(1), None, Some(2)]); |
85 | | /// // Create from Vec<i32> |
86 | | /// let arr = Int32Array::from(vec![1, 2, 3]); |
87 | | /// // Create iter/collect |
88 | | /// let arr: Int32Array = std::iter::repeat(42).take(10).collect(); |
89 | | /// ``` |
90 | | /// |
91 | | /// See [`PrimitiveArray`] for more information and examples |
92 | | pub type Int32Array = PrimitiveArray<Int32Type>; |
93 | | |
94 | | /// A [`PrimitiveArray`] of `i64` |
95 | | /// |
96 | | /// # Examples |
97 | | /// |
98 | | /// Construction |
99 | | /// |
100 | | /// ``` |
101 | | /// # use arrow_array::Int64Array; |
102 | | /// // Create from Vec<Option<i64>> |
103 | | /// let arr = Int64Array::from(vec![Some(1), None, Some(2)]); |
104 | | /// // Create from Vec<i64> |
105 | | /// let arr = Int64Array::from(vec![1, 2, 3]); |
106 | | /// // Create iter/collect |
107 | | /// let arr: Int64Array = std::iter::repeat(42).take(10).collect(); |
108 | | /// ``` |
109 | | /// |
110 | | /// See [`PrimitiveArray`] for more information and examples |
111 | | pub type Int64Array = PrimitiveArray<Int64Type>; |
112 | | |
113 | | /// A [`PrimitiveArray`] of `u8` |
114 | | /// |
115 | | /// # Examples |
116 | | /// |
117 | | /// Construction |
118 | | /// |
119 | | /// ``` |
120 | | /// # use arrow_array::UInt8Array; |
121 | | /// // Create from Vec<Option<u8>> |
122 | | /// let arr = UInt8Array::from(vec![Some(1), None, Some(2)]); |
123 | | /// // Create from Vec<u8> |
124 | | /// let arr = UInt8Array::from(vec![1, 2, 3]); |
125 | | /// // Create iter/collect |
126 | | /// let arr: UInt8Array = std::iter::repeat(42).take(10).collect(); |
127 | | /// ``` |
128 | | /// |
129 | | /// See [`PrimitiveArray`] for more information and examples |
130 | | pub type UInt8Array = PrimitiveArray<UInt8Type>; |
131 | | |
132 | | /// A [`PrimitiveArray`] of `u16` |
133 | | /// |
134 | | /// # Examples |
135 | | /// |
136 | | /// Construction |
137 | | /// |
138 | | /// ``` |
139 | | /// # use arrow_array::UInt16Array; |
140 | | /// // Create from Vec<Option<u16>> |
141 | | /// let arr = UInt16Array::from(vec![Some(1), None, Some(2)]); |
142 | | /// // Create from Vec<u16> |
143 | | /// let arr = UInt16Array::from(vec![1, 2, 3]); |
144 | | /// // Create iter/collect |
145 | | /// let arr: UInt16Array = std::iter::repeat(42).take(10).collect(); |
146 | | /// ``` |
147 | | /// |
148 | | /// See [`PrimitiveArray`] for more information and examples |
149 | | pub type UInt16Array = PrimitiveArray<UInt16Type>; |
150 | | |
151 | | /// A [`PrimitiveArray`] of `u32` |
152 | | /// |
153 | | /// # Examples |
154 | | /// |
155 | | /// Construction |
156 | | /// |
157 | | /// ``` |
158 | | /// # use arrow_array::UInt32Array; |
159 | | /// // Create from Vec<Option<u32>> |
160 | | /// let arr = UInt32Array::from(vec![Some(1), None, Some(2)]); |
161 | | /// // Create from Vec<u32> |
162 | | /// let arr = UInt32Array::from(vec![1, 2, 3]); |
163 | | /// // Create iter/collect |
164 | | /// let arr: UInt32Array = std::iter::repeat(42).take(10).collect(); |
165 | | /// ``` |
166 | | /// |
167 | | /// See [`PrimitiveArray`] for more information and examples |
168 | | pub type UInt32Array = PrimitiveArray<UInt32Type>; |
169 | | |
170 | | /// A [`PrimitiveArray`] of `u64` |
171 | | /// |
172 | | /// # Examples |
173 | | /// |
174 | | /// Construction |
175 | | /// |
176 | | /// ``` |
177 | | /// # use arrow_array::UInt64Array; |
178 | | /// // Create from Vec<Option<u64>> |
179 | | /// let arr = UInt64Array::from(vec![Some(1), None, Some(2)]); |
180 | | /// // Create from Vec<u64> |
181 | | /// let arr = UInt64Array::from(vec![1, 2, 3]); |
182 | | /// // Create iter/collect |
183 | | /// let arr: UInt64Array = std::iter::repeat(42).take(10).collect(); |
184 | | /// ``` |
185 | | /// |
186 | | /// See [`PrimitiveArray`] for more information and examples |
187 | | pub type UInt64Array = PrimitiveArray<UInt64Type>; |
188 | | |
189 | | /// A [`PrimitiveArray`] of `f16` |
190 | | /// |
191 | | /// # Examples |
192 | | /// |
193 | | /// Construction |
194 | | /// |
195 | | /// ``` |
196 | | /// # use arrow_array::Float16Array; |
197 | | /// use half::f16; |
198 | | /// // Create from Vec<Option<f16>> |
199 | | /// let arr = Float16Array::from(vec![Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))]); |
200 | | /// // Create from Vec<i8> |
201 | | /// let arr = Float16Array::from(vec![f16::from_f64(1.0), f16::from_f64(2.0), f16::from_f64(3.0)]); |
202 | | /// // Create iter/collect |
203 | | /// let arr: Float16Array = std::iter::repeat(f16::from_f64(1.0)).take(10).collect(); |
204 | | /// ``` |
205 | | /// |
206 | | /// # Example: Using `collect` |
207 | | /// ``` |
208 | | /// # use arrow_array::Float16Array; |
209 | | /// use half::f16; |
210 | | /// let arr : Float16Array = [Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))].into_iter().collect(); |
211 | | /// ``` |
212 | | /// |
213 | | /// See [`PrimitiveArray`] for more information and examples |
214 | | pub type Float16Array = PrimitiveArray<Float16Type>; |
215 | | |
216 | | /// A [`PrimitiveArray`] of `f32` |
217 | | /// |
218 | | /// # Examples |
219 | | /// |
220 | | /// Construction |
221 | | /// |
222 | | /// ``` |
223 | | /// # use arrow_array::Float32Array; |
224 | | /// // Create from Vec<Option<f32>> |
225 | | /// let arr = Float32Array::from(vec![Some(1.0), None, Some(2.0)]); |
226 | | /// // Create from Vec<f32> |
227 | | /// let arr = Float32Array::from(vec![1.0, 2.0, 3.0]); |
228 | | /// // Create iter/collect |
229 | | /// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); |
230 | | /// ``` |
231 | | /// |
232 | | /// See [`PrimitiveArray`] for more information and examples |
233 | | pub type Float32Array = PrimitiveArray<Float32Type>; |
234 | | |
235 | | /// A [`PrimitiveArray`] of `f64` |
236 | | /// |
237 | | /// # Examples |
238 | | /// |
239 | | /// Construction |
240 | | /// |
241 | | /// ``` |
242 | | /// # use arrow_array::Float64Array; |
243 | | /// // Create from Vec<Option<f32>> |
244 | | /// let arr = Float64Array::from(vec![Some(1.0), None, Some(2.0)]); |
245 | | /// // Create from Vec<f32> |
246 | | /// let arr = Float64Array::from(vec![1.0, 2.0, 3.0]); |
247 | | /// // Create iter/collect |
248 | | /// let arr: Float64Array = std::iter::repeat(42.0).take(10).collect(); |
249 | | /// ``` |
250 | | /// |
251 | | /// See [`PrimitiveArray`] for more information and examples |
252 | | pub type Float64Array = PrimitiveArray<Float64Type>; |
253 | | |
254 | | /// A [`PrimitiveArray`] of seconds since UNIX epoch stored as `i64` |
255 | | /// |
256 | | /// This type is similar to the [`chrono::DateTime`] type and can hold |
257 | | /// values such as `1970-05-09 14:25:11 +01:00` |
258 | | /// |
259 | | /// See also [`Timestamp`](arrow_schema::DataType::Timestamp). |
260 | | /// |
261 | | /// # Example: UTC timestamps post epoch |
262 | | /// ``` |
263 | | /// # use arrow_array::TimestampSecondArray; |
264 | | /// use arrow_array::timezone::Tz; |
265 | | /// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00 |
266 | | /// let arr = TimestampSecondArray::from(vec![11111111]); |
267 | | /// // OR |
268 | | /// let arr = TimestampSecondArray::from(vec![Some(11111111)]); |
269 | | /// let utc_tz: Tz = "+00:00".parse().unwrap(); |
270 | | /// |
271 | | /// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11 +00:00") |
272 | | /// ``` |
273 | | /// |
274 | | /// # Example: UTC timestamps pre epoch |
275 | | /// ``` |
276 | | /// # use arrow_array::TimestampSecondArray; |
277 | | /// use arrow_array::timezone::Tz; |
278 | | /// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00 |
279 | | /// let arr = TimestampSecondArray::from(vec![-11111111]); |
280 | | /// // OR |
281 | | /// let arr = TimestampSecondArray::from(vec![Some(-11111111)]); |
282 | | /// let utc_tz: Tz = "+00:00".parse().unwrap(); |
283 | | /// |
284 | | /// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49 +00:00") |
285 | | /// ``` |
286 | | /// |
287 | | /// # Example: With timezone specified |
288 | | /// ``` |
289 | | /// # use arrow_array::TimestampSecondArray; |
290 | | /// use arrow_array::timezone::Tz; |
291 | | /// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00 |
292 | | /// let arr = TimestampSecondArray::from(vec![11111111]).with_timezone("+10:00".to_string()); |
293 | | /// // OR |
294 | | /// let arr = TimestampSecondArray::from(vec![Some(11111111)]).with_timezone("+10:00".to_string()); |
295 | | /// let sydney_tz: Tz = "+10:00".parse().unwrap(); |
296 | | /// |
297 | | /// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00") |
298 | | /// ``` |
299 | | /// |
300 | | /// See [`PrimitiveArray`] for more information and examples |
301 | | pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>; |
302 | | |
303 | | /// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` |
304 | | /// |
305 | | /// See examples for [`TimestampSecondArray`] |
306 | | pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>; |
307 | | |
308 | | /// A [`PrimitiveArray`] of microseconds since UNIX epoch stored as `i64` |
309 | | /// |
310 | | /// See examples for [`TimestampSecondArray`] |
311 | | pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>; |
312 | | |
313 | | /// A [`PrimitiveArray`] of nanoseconds since UNIX epoch stored as `i64` |
314 | | /// |
315 | | /// See examples for [`TimestampSecondArray`] |
316 | | pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>; |
317 | | |
318 | | /// A [`PrimitiveArray`] of days since UNIX epoch stored as `i32` |
319 | | /// |
320 | | /// This type is similar to the [`chrono::NaiveDate`] type and can hold |
321 | | /// values such as `2018-11-13` |
322 | | pub type Date32Array = PrimitiveArray<Date32Type>; |
323 | | |
324 | | /// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` |
325 | | /// |
326 | | /// This type is similar to the [`chrono::NaiveDate`] type and can hold |
327 | | /// values such as `2018-11-13` |
328 | | pub type Date64Array = PrimitiveArray<Date64Type>; |
329 | | |
330 | | /// A [`PrimitiveArray`] of seconds since midnight stored as `i32` |
331 | | /// |
332 | | /// This type is similar to the [`chrono::NaiveTime`] type and can |
333 | | /// hold values such as `00:02:00` |
334 | | pub type Time32SecondArray = PrimitiveArray<Time32SecondType>; |
335 | | |
336 | | /// A [`PrimitiveArray`] of milliseconds since midnight stored as `i32` |
337 | | /// |
338 | | /// This type is similar to the [`chrono::NaiveTime`] type and can |
339 | | /// hold values such as `00:02:00.123` |
340 | | pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>; |
341 | | |
342 | | /// A [`PrimitiveArray`] of microseconds since midnight stored as `i64` |
343 | | /// |
344 | | /// This type is similar to the [`chrono::NaiveTime`] type and can |
345 | | /// hold values such as `00:02:00.123456` |
346 | | pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>; |
347 | | |
348 | | /// A [`PrimitiveArray`] of nanoseconds since midnight stored as `i64` |
349 | | /// |
350 | | /// This type is similar to the [`chrono::NaiveTime`] type and can |
351 | | /// hold values such as `00:02:00.123456789` |
352 | | pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>; |
353 | | |
354 | | /// A [`PrimitiveArray`] of “calendar” intervals in whole months |
355 | | /// |
356 | | /// See [`IntervalYearMonthType`] for details on representation and caveats. |
357 | | /// |
358 | | /// # Example |
359 | | /// ``` |
360 | | /// # use arrow_array::IntervalYearMonthArray; |
361 | | /// let array = IntervalYearMonthArray::from(vec![ |
362 | | /// 2, // 2 months |
363 | | /// 25, // 2 years and 1 month |
364 | | /// -1 // -1 months |
365 | | /// ]); |
366 | | /// ``` |
367 | | pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>; |
368 | | |
369 | | /// A [`PrimitiveArray`] of “calendar” intervals in days and milliseconds |
370 | | /// |
371 | | /// See [`IntervalDayTime`] for details on representation and caveats. |
372 | | /// |
373 | | /// # Example |
374 | | /// ``` |
375 | | /// # use arrow_array::IntervalDayTimeArray; |
376 | | /// use arrow_array::types::IntervalDayTime; |
377 | | /// let array = IntervalDayTimeArray::from(vec![ |
378 | | /// IntervalDayTime::new(1, 1000), // 1 day, 1000 milliseconds |
379 | | /// IntervalDayTime::new(33, 0), // 33 days, 0 milliseconds |
380 | | /// IntervalDayTime::new(0, 12 * 60 * 60 * 1000), // 0 days, 12 hours |
381 | | /// ]); |
382 | | /// ``` |
383 | | pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>; |
384 | | |
385 | | /// A [`PrimitiveArray`] of “calendar” intervals in months, days, and nanoseconds. |
386 | | /// |
387 | | /// See [`IntervalMonthDayNano`] for details on representation and caveats. |
388 | | /// |
389 | | /// # Example |
390 | | /// ``` |
391 | | /// # use arrow_array::IntervalMonthDayNanoArray; |
392 | | /// use arrow_array::types::IntervalMonthDayNano; |
393 | | /// let array = IntervalMonthDayNanoArray::from(vec![ |
394 | | /// IntervalMonthDayNano::new(1, 2, 1000), // 1 month, 2 days, 1 nanosecond |
395 | | /// IntervalMonthDayNano::new(12, 1, 0), // 12 months, 1 days, 0 nanoseconds |
396 | | /// IntervalMonthDayNano::new(0, 0, 12 * 1000 * 1000), // 0 days, 12 milliseconds |
397 | | /// ]); |
398 | | /// ``` |
399 | | pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>; |
400 | | |
401 | | /// A [`PrimitiveArray`] of elapsed durations in seconds |
402 | | pub type DurationSecondArray = PrimitiveArray<DurationSecondType>; |
403 | | |
404 | | /// A [`PrimitiveArray`] of elapsed durations in milliseconds |
405 | | pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>; |
406 | | |
407 | | /// A [`PrimitiveArray`] of elapsed durations in microseconds |
408 | | pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>; |
409 | | |
410 | | /// A [`PrimitiveArray`] of elapsed durations in nanoseconds |
411 | | pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>; |
412 | | |
413 | | /// A [`PrimitiveArray`] of 32-bit fixed point decimals |
414 | | /// |
415 | | /// # Examples |
416 | | /// |
417 | | /// Construction |
418 | | /// |
419 | | /// ``` |
420 | | /// # use arrow_array::Decimal32Array; |
421 | | /// // Create from Vec<Option<i32>> |
422 | | /// let arr = Decimal32Array::from(vec![Some(1), None, Some(2)]); |
423 | | /// // Create from Vec<i32> |
424 | | /// let arr = Decimal32Array::from(vec![1, 2, 3]); |
425 | | /// // Create iter/collect |
426 | | /// let arr: Decimal32Array = std::iter::repeat(42).take(10).collect(); |
427 | | /// ``` |
428 | | /// |
429 | | /// See [`PrimitiveArray`] for more information and examples |
430 | | pub type Decimal32Array = PrimitiveArray<Decimal32Type>; |
431 | | |
432 | | /// A [`PrimitiveArray`] of 64-bit fixed point decimals |
433 | | /// |
434 | | /// # Examples |
435 | | /// |
436 | | /// Construction |
437 | | /// |
438 | | /// ``` |
439 | | /// # use arrow_array::Decimal64Array; |
440 | | /// // Create from Vec<Option<i64>> |
441 | | /// let arr = Decimal64Array::from(vec![Some(1), None, Some(2)]); |
442 | | /// // Create from Vec<i64> |
443 | | /// let arr = Decimal64Array::from(vec![1, 2, 3]); |
444 | | /// // Create iter/collect |
445 | | /// let arr: Decimal64Array = std::iter::repeat(42).take(10).collect(); |
446 | | /// ``` |
447 | | /// |
448 | | /// See [`PrimitiveArray`] for more information and examples |
449 | | pub type Decimal64Array = PrimitiveArray<Decimal64Type>; |
450 | | |
451 | | /// A [`PrimitiveArray`] of 128-bit fixed point decimals |
452 | | /// |
453 | | /// # Examples |
454 | | /// |
455 | | /// Construction |
456 | | /// |
457 | | /// ``` |
458 | | /// # use arrow_array::Decimal128Array; |
459 | | /// // Create from Vec<Option<i128>> |
460 | | /// let arr = Decimal128Array::from(vec![Some(1), None, Some(2)]); |
461 | | /// // Create from Vec<i128> |
462 | | /// let arr = Decimal128Array::from(vec![1, 2, 3]); |
463 | | /// // Create iter/collect |
464 | | /// let arr: Decimal128Array = std::iter::repeat(42).take(10).collect(); |
465 | | /// ``` |
466 | | /// |
467 | | /// See [`PrimitiveArray`] for more information and examples |
468 | | pub type Decimal128Array = PrimitiveArray<Decimal128Type>; |
469 | | |
470 | | /// A [`PrimitiveArray`] of 256-bit fixed point decimals |
471 | | /// |
472 | | /// # Examples |
473 | | /// |
474 | | /// Construction |
475 | | /// |
476 | | /// ``` |
477 | | /// # use arrow_array::Decimal256Array; |
478 | | /// use arrow_buffer::i256; |
479 | | /// // Create from Vec<Option<i256>> |
480 | | /// let arr = Decimal256Array::from(vec![Some(i256::from(1)), None, Some(i256::from(2))]); |
481 | | /// // Create from Vec<i256> |
482 | | /// let arr = Decimal256Array::from(vec![i256::from(1), i256::from(2), i256::from(3)]); |
483 | | /// // Create iter/collect |
484 | | /// let arr: Decimal256Array = std::iter::repeat(i256::from(42)).take(10).collect(); |
485 | | /// ``` |
486 | | /// |
487 | | /// See [`PrimitiveArray`] for more information and examples |
488 | | pub type Decimal256Array = PrimitiveArray<Decimal256Type>; |
489 | | |
490 | | pub use crate::types::ArrowPrimitiveType; |
491 | | |
492 | | /// An array of primitive values, of type [`ArrowPrimitiveType`] |
493 | | /// |
494 | | /// # Example: From a Vec |
495 | | /// |
496 | | /// ``` |
497 | | /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
498 | | /// let arr: PrimitiveArray<Int32Type> = vec![1, 2, 3, 4].into(); |
499 | | /// assert_eq!(4, arr.len()); |
500 | | /// assert_eq!(0, arr.null_count()); |
501 | | /// assert_eq!(arr.values(), &[1, 2, 3, 4]) |
502 | | /// ``` |
503 | | /// |
504 | | /// # Example: From an optional Vec |
505 | | /// |
506 | | /// ``` |
507 | | /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
508 | | /// let arr: PrimitiveArray<Int32Type> = vec![Some(1), None, Some(3), None].into(); |
509 | | /// assert_eq!(4, arr.len()); |
510 | | /// assert_eq!(2, arr.null_count()); |
511 | | /// // Note: values for null indexes are arbitrary |
512 | | /// assert_eq!(arr.values(), &[1, 0, 3, 0]) |
513 | | /// ``` |
514 | | /// |
515 | | /// # Example: From an iterator of values |
516 | | /// |
517 | | /// ``` |
518 | | /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
519 | | /// let arr: PrimitiveArray<Int32Type> = (0..10).map(|x| x + 1).collect(); |
520 | | /// assert_eq!(10, arr.len()); |
521 | | /// assert_eq!(0, arr.null_count()); |
522 | | /// for i in 0..10i32 { |
523 | | /// assert_eq!(i + 1, arr.value(i as usize)); |
524 | | /// } |
525 | | /// ``` |
526 | | /// |
527 | | /// # Example: From an iterator of option |
528 | | /// |
529 | | /// ``` |
530 | | /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
531 | | /// let arr: PrimitiveArray<Int32Type> = (0..10).map(|x| (x % 2 == 0).then_some(x)).collect(); |
532 | | /// assert_eq!(10, arr.len()); |
533 | | /// assert_eq!(5, arr.null_count()); |
534 | | /// // Note: values for null indexes are arbitrary |
535 | | /// assert_eq!(arr.values(), &[0, 0, 2, 0, 4, 0, 6, 0, 8, 0]) |
536 | | /// ``` |
537 | | /// |
538 | | /// # Example: Using Builder |
539 | | /// |
540 | | /// ``` |
541 | | /// # use arrow_array::Array; |
542 | | /// # use arrow_array::builder::PrimitiveBuilder; |
543 | | /// # use arrow_array::types::Int32Type; |
544 | | /// let mut builder = PrimitiveBuilder::<Int32Type>::new(); |
545 | | /// builder.append_value(1); |
546 | | /// builder.append_null(); |
547 | | /// builder.append_value(2); |
548 | | /// let array = builder.finish(); |
549 | | /// // Note: values for null indexes are arbitrary |
550 | | /// assert_eq!(array.values(), &[1, 0, 2]); |
551 | | /// assert!(array.is_null(1)); |
552 | | /// ``` |
553 | | /// |
554 | | /// # Example: Get a `PrimitiveArray` from an [`ArrayRef`] |
555 | | /// ``` |
556 | | /// # use std::sync::Arc; |
557 | | /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Float32Array, PrimitiveArray}; |
558 | | /// # use arrow_array::types::{Float32Type}; |
559 | | /// # use arrow_schema::DataType; |
560 | | /// # let array: ArrayRef = Arc::new(Float32Array::from(vec![1.2, 2.3])); |
561 | | /// // will panic if the array is not a Float32Array |
562 | | /// assert_eq!(&DataType::Float32, array.data_type()); |
563 | | /// let f32_array: Float32Array = array.as_primitive().clone(); |
564 | | /// assert_eq!(f32_array, Float32Array::from(vec![1.2, 2.3])); |
565 | | /// ``` |
566 | | pub struct PrimitiveArray<T: ArrowPrimitiveType> { |
567 | | data_type: DataType, |
568 | | /// Values data |
569 | | values: ScalarBuffer<T::Native>, |
570 | | nulls: Option<NullBuffer>, |
571 | | } |
572 | | |
573 | | impl<T: ArrowPrimitiveType> Clone for PrimitiveArray<T> { |
574 | 679 | fn clone(&self) -> Self { |
575 | 679 | Self { |
576 | 679 | data_type: self.data_type.clone(), |
577 | 679 | values: self.values.clone(), |
578 | 679 | nulls: self.nulls.clone(), |
579 | 679 | } |
580 | 679 | } |
581 | | } |
582 | | |
583 | | impl<T: ArrowPrimitiveType> PrimitiveArray<T> { |
584 | | /// Create a new [`PrimitiveArray`] from the provided values and nulls |
585 | | /// |
586 | | /// # Panics |
587 | | /// |
588 | | /// Panics if [`Self::try_new`] returns an error |
589 | | /// |
590 | | /// # Example |
591 | | /// |
592 | | /// Creating a [`PrimitiveArray`] directly from a [`ScalarBuffer`] and [`NullBuffer`] using |
593 | | /// this constructor is the most performant approach, avoiding any additional allocations |
594 | | /// |
595 | | /// ``` |
596 | | /// # use arrow_array::Int32Array; |
597 | | /// # use arrow_array::types::Int32Type; |
598 | | /// # use arrow_buffer::NullBuffer; |
599 | | /// // [1, 2, 3, 4] |
600 | | /// let array = Int32Array::new(vec![1, 2, 3, 4].into(), None); |
601 | | /// // [1, null, 3, 4] |
602 | | /// let nulls = NullBuffer::from(vec![true, false, true, true]); |
603 | | /// let array = Int32Array::new(vec![1, 2, 3, 4].into(), Some(nulls)); |
604 | | /// ``` |
605 | 556 | pub fn new(values: ScalarBuffer<T::Native>, nulls: Option<NullBuffer>) -> Self { |
606 | 556 | Self::try_new(values, nulls).unwrap() |
607 | 556 | } |
608 | | |
609 | | /// Create a new [`PrimitiveArray`] of the given length where all values are null |
610 | 0 | pub fn new_null(length: usize) -> Self { |
611 | 0 | Self { |
612 | 0 | data_type: T::DATA_TYPE, |
613 | 0 | values: vec![T::Native::usize_as(0); length].into(), |
614 | 0 | nulls: Some(NullBuffer::new_null(length)), |
615 | 0 | } |
616 | 0 | } |
617 | | |
618 | | /// Create a new [`PrimitiveArray`] from the provided values and nulls |
619 | | /// |
620 | | /// # Errors |
621 | | /// |
622 | | /// Errors if: |
623 | | /// - `values.len() != nulls.len()` |
624 | 559 | pub fn try_new( |
625 | 559 | values: ScalarBuffer<T::Native>, |
626 | 559 | nulls: Option<NullBuffer>, |
627 | 559 | ) -> Result<Self, ArrowError> { |
628 | 559 | if let Some(n39 ) = nulls.as_ref() { |
629 | 39 | if n.len() != values.len() { |
630 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
631 | 0 | "Incorrect length of null buffer for PrimitiveArray, expected {} got {}", |
632 | 0 | values.len(), |
633 | 0 | n.len(), |
634 | 0 | ))); |
635 | 39 | } |
636 | 520 | } |
637 | | |
638 | 559 | Ok(Self { |
639 | 559 | data_type: T::DATA_TYPE, |
640 | 559 | values, |
641 | 559 | nulls, |
642 | 559 | }) |
643 | 559 | } |
644 | | |
645 | | /// Create a new [`Scalar`] from `value` |
646 | | pub fn new_scalar(value: T::Native) -> Scalar<Self> { |
647 | | Scalar::new(Self { |
648 | | data_type: T::DATA_TYPE, |
649 | | values: vec![value].into(), |
650 | | nulls: None, |
651 | | }) |
652 | | } |
653 | | |
654 | | /// Deconstruct this array into its constituent parts |
655 | 51 | pub fn into_parts(self) -> (DataType, ScalarBuffer<T::Native>, Option<NullBuffer>) { |
656 | 51 | (self.data_type, self.values, self.nulls) |
657 | 51 | } |
658 | | |
659 | | /// Overrides the [`DataType`] of this [`PrimitiveArray`] |
660 | | /// |
661 | | /// Prefer using [`Self::with_timezone`] or [`Self::with_precision_and_scale`] where |
662 | | /// the primitive type is suitably constrained, as these cannot panic |
663 | | /// |
664 | | /// # Panics |
665 | | /// |
666 | | /// Panics if ![Self::is_compatible] |
667 | 0 | pub fn with_data_type(self, data_type: DataType) -> Self { |
668 | 0 | Self::assert_compatible(&data_type); |
669 | 0 | Self { data_type, ..self } |
670 | 0 | } |
671 | | |
672 | | /// Asserts that `data_type` is compatible with `Self` |
673 | 178 | fn assert_compatible(data_type: &DataType) { |
674 | 178 | assert!( |
675 | 178 | Self::is_compatible(data_type), |
676 | 0 | "PrimitiveArray expected data type {} got {}", |
677 | 0 | T::DATA_TYPE, |
678 | | data_type |
679 | | ); |
680 | 178 | } |
681 | | |
682 | | /// Returns the length of this array. |
683 | | #[inline] |
684 | 644 | pub fn len(&self) -> usize { |
685 | 644 | self.values.len() |
686 | 644 | } |
687 | | |
688 | | /// Returns whether this array is empty. |
689 | 1 | pub fn is_empty(&self) -> bool { |
690 | 1 | self.values.is_empty() |
691 | 1 | } |
692 | | |
693 | | /// Returns the values of this array |
694 | | #[inline] |
695 | 219 | pub fn values(&self) -> &ScalarBuffer<T::Native> { |
696 | 219 | &self.values |
697 | 219 | } |
698 | | |
699 | | /// Returns a new primitive array builder |
700 | | pub fn builder(capacity: usize) -> PrimitiveBuilder<T> { |
701 | | PrimitiveBuilder::<T>::with_capacity(capacity) |
702 | | } |
703 | | |
704 | | /// Returns if this [`PrimitiveArray`] is compatible with the provided [`DataType`] |
705 | | /// |
706 | | /// This is equivalent to `data_type == T::DATA_TYPE`, however ignores timestamp |
707 | | /// timezones and decimal precision and scale |
708 | 235 | pub fn is_compatible(data_type: &DataType) -> bool { |
709 | 235 | match T::DATA_TYPE { |
710 | 10 | DataType::Timestamp(t1, _) => { |
711 | 10 | matches!(data_type, DataType::Timestamp(t2, _) if &t1 == t2) |
712 | | } |
713 | 0 | DataType::Decimal32(_, _) => matches!(data_type, DataType::Decimal32(_, _)), |
714 | 0 | DataType::Decimal64(_, _) => matches!(data_type, DataType::Decimal64(_, _)), |
715 | 63 | DataType::Decimal128(_, _) => matches!0 (data_type, DataType::Decimal128(_, _)), |
716 | 1 | DataType::Decimal256(_, _) => matches!0 (data_type, DataType::Decimal256(_, _)), |
717 | 161 | _ => T::DATA_TYPE.eq(data_type), |
718 | | } |
719 | 235 | } |
720 | | |
721 | | /// Returns the primitive value at index `i`. |
722 | | /// |
723 | | /// Note: This method does not check for nulls and the value is arbitrary |
724 | | /// if [`is_null`](Self::is_null) returns true for the index. |
725 | | /// |
726 | | /// # Safety |
727 | | /// |
728 | | /// caller must ensure that the passed in offset is less than the array len() |
729 | | #[inline] |
730 | 404 | pub unsafe fn value_unchecked(&self, i: usize) -> T::Native { |
731 | 404 | *self.values.get_unchecked(i) |
732 | 404 | } |
733 | | |
734 | | /// Returns the primitive value at index `i`. |
735 | | /// |
736 | | /// Note: This method does not check for nulls and the value is arbitrary |
737 | | /// if [`is_null`](Self::is_null) returns true for the index. |
738 | | /// |
739 | | /// # Panics |
740 | | /// Panics if index `i` is out of bounds |
741 | | #[inline] |
742 | 404 | pub fn value(&self, i: usize) -> T::Native { |
743 | 404 | assert!( |
744 | 404 | i < self.len(), |
745 | 0 | "Trying to access an element at index {} from a PrimitiveArray of length {}", |
746 | | i, |
747 | 0 | self.len() |
748 | | ); |
749 | 404 | unsafe { self.value_unchecked(i) } |
750 | 404 | } |
751 | | |
752 | | /// Creates a PrimitiveArray based on an iterator of values without nulls |
753 | 119 | pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self { |
754 | 119 | let val_buf: Buffer = iter.into_iter().collect(); |
755 | 119 | let len = val_buf.len() / std::mem::size_of::<T::Native>(); |
756 | 119 | Self { |
757 | 119 | data_type: T::DATA_TYPE, |
758 | 119 | values: ScalarBuffer::new(val_buf, 0, len), |
759 | 119 | nulls: None, |
760 | 119 | } |
761 | 119 | } |
762 | | |
763 | | /// Creates a PrimitiveArray based on an iterator of values with provided nulls |
764 | | pub fn from_iter_values_with_nulls<I: IntoIterator<Item = T::Native>>( |
765 | | iter: I, |
766 | | nulls: Option<NullBuffer>, |
767 | | ) -> Self { |
768 | | let val_buf: Buffer = iter.into_iter().collect(); |
769 | | let len = val_buf.len() / std::mem::size_of::<T::Native>(); |
770 | | Self { |
771 | | data_type: T::DATA_TYPE, |
772 | | values: ScalarBuffer::new(val_buf, 0, len), |
773 | | nulls, |
774 | | } |
775 | | } |
776 | | |
777 | | /// Creates a PrimitiveArray based on a constant value with `count` elements |
778 | | pub fn from_value(value: T::Native, count: usize) -> Self { |
779 | | let val_buf: Vec<_> = vec![value; count]; |
780 | | Self::new(val_buf.into(), None) |
781 | | } |
782 | | |
783 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
784 | | pub fn take_iter<'a>( |
785 | | &'a self, |
786 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
787 | | ) -> impl Iterator<Item = Option<T::Native>> + 'a { |
788 | | indexes.map(|opt_index| opt_index.map(|index| self.value(index))) |
789 | | } |
790 | | |
791 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
792 | | /// # Safety |
793 | | /// |
794 | | /// caller must ensure that the offsets in the iterator are less than the array len() |
795 | | pub unsafe fn take_iter_unchecked<'a>( |
796 | | &'a self, |
797 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
798 | | ) -> impl Iterator<Item = Option<T::Native>> + 'a { |
799 | | indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) |
800 | | } |
801 | | |
802 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
803 | 294 | pub fn slice(&self, offset: usize, length: usize) -> Self { |
804 | | Self { |
805 | 294 | data_type: self.data_type.clone(), |
806 | 294 | values: self.values.slice(offset, length), |
807 | 294 | nulls: self.nulls.as_ref().map(|n| n12 .slice12 (offset12 , length12 )), |
808 | | } |
809 | 294 | } |
810 | | |
811 | | /// Reinterprets this array's contents as a different data type without copying |
812 | | /// |
813 | | /// This can be used to efficiently convert between primitive arrays with the |
814 | | /// same underlying representation |
815 | | /// |
816 | | /// Note: this will not modify the underlying values, and therefore may change |
817 | | /// the semantic values of the array, e.g. 100 milliseconds in a [`TimestampNanosecondArray`] |
818 | | /// will become 100 seconds in a [`TimestampSecondArray`]. |
819 | | /// |
820 | | /// For casts that preserve the semantic value, check out the |
821 | | /// [compute kernels](https://docs.rs/arrow/latest/arrow/compute/kernels/cast/index.html). |
822 | | /// |
823 | | /// ``` |
824 | | /// # use arrow_array::{Int64Array, TimestampNanosecondArray}; |
825 | | /// let a = Int64Array::from_iter_values([1, 2, 3, 4]); |
826 | | /// let b: TimestampNanosecondArray = a.reinterpret_cast(); |
827 | | /// ``` |
828 | 0 | pub fn reinterpret_cast<K>(&self) -> PrimitiveArray<K> |
829 | 0 | where |
830 | 0 | K: ArrowPrimitiveType<Native = T::Native>, |
831 | | { |
832 | 0 | let d = self.to_data().into_builder().data_type(K::DATA_TYPE); |
833 | | |
834 | | // SAFETY: |
835 | | // Native type is the same |
836 | 0 | PrimitiveArray::from(unsafe { d.build_unchecked() }) |
837 | 0 | } |
838 | | |
839 | | /// Applies a unary infallible function to a primitive array, producing a |
840 | | /// new array of potentially different type. |
841 | | /// |
842 | | /// This is the fastest way to perform an operation on a primitive array |
843 | | /// when the benefits of a vectorized operation outweigh the cost of |
844 | | /// branching nulls and non-nulls. |
845 | | /// |
846 | | /// See also |
847 | | /// * [`Self::unary_mut`] for in place modification. |
848 | | /// * [`Self::try_unary`] for fallible operations. |
849 | | /// * [`arrow::compute::binary`] for binary operations |
850 | | /// |
851 | | /// [`arrow::compute::binary`]: https://docs.rs/arrow/latest/arrow/compute/fn.binary.html |
852 | | /// # Null Handling |
853 | | /// |
854 | | /// Applies the function for all values, including those on null slots. This |
855 | | /// will often allow the compiler to generate faster vectorized code, but |
856 | | /// requires that the operation must be infallible (not error/panic) for any |
857 | | /// value of the corresponding type or this function may panic. |
858 | | /// |
859 | | /// # Example |
860 | | /// ```rust |
861 | | /// # use arrow_array::{Int32Array, Float32Array, types::Int32Type}; |
862 | | /// # fn main() { |
863 | | /// let array = Int32Array::from(vec![Some(5), Some(7), None]); |
864 | | /// // Create a new array with the value of applying sqrt |
865 | | /// let c = array.unary(|x| f32::sqrt(x as f32)); |
866 | | /// assert_eq!(c, Float32Array::from(vec![Some(2.236068), Some(2.6457512), None])); |
867 | | /// # } |
868 | | /// ``` |
869 | 0 | pub fn unary<F, O>(&self, op: F) -> PrimitiveArray<O> |
870 | 0 | where |
871 | 0 | O: ArrowPrimitiveType, |
872 | 0 | F: Fn(T::Native) -> O::Native, |
873 | | { |
874 | 0 | let nulls = self.nulls().cloned(); |
875 | 0 | let values = self.values().into_iter().map(|v| op(*v)); |
876 | 0 | let buffer: Vec<_> = values.collect(); |
877 | 0 | PrimitiveArray::new(buffer.into(), nulls) |
878 | 0 | } |
879 | | |
880 | | /// Applies a unary and infallible function to the array in place if possible. |
881 | | /// |
882 | | /// # Buffer Reuse |
883 | | /// |
884 | | /// If the underlying buffers are not shared with other arrays, mutates the |
885 | | /// underlying buffer in place, without allocating. |
886 | | /// |
887 | | /// If the underlying buffer is shared, returns Err(self) |
888 | | /// |
889 | | /// # Null Handling |
890 | | /// |
891 | | /// See [`Self::unary`] for more information on null handling. |
892 | | /// |
893 | | /// # Example |
894 | | /// |
895 | | /// ```rust |
896 | | /// # use arrow_array::{Int32Array, types::Int32Type}; |
897 | | /// let array = Int32Array::from(vec![Some(5), Some(7), None]); |
898 | | /// // Apply x*2+1 to the data in place, no allocations |
899 | | /// let c = array.unary_mut(|x| x * 2 + 1).unwrap(); |
900 | | /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None])); |
901 | | /// ``` |
902 | | /// |
903 | | /// # Example: modify [`ArrayRef`] in place, if not shared |
904 | | /// |
905 | | /// It is also possible to modify an [`ArrayRef`] if there are no other |
906 | | /// references to the underlying buffer. |
907 | | /// |
908 | | /// ```rust |
909 | | /// # use std::sync::Arc; |
910 | | /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Int32Array, PrimitiveArray, types::Int32Type}; |
911 | | /// # let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(5), Some(7), None])); |
912 | | /// // Convert to Int32Array (panic's if array.data_type is not Int32) |
913 | | /// let a = array.as_primitive::<Int32Type>().clone(); |
914 | | /// // Try to apply x*2+1 to the data in place, fails because array is still shared |
915 | | /// a.unary_mut(|x| x * 2 + 1).unwrap_err(); |
916 | | /// // Try again, this time dropping the last remaining reference |
917 | | /// let a = array.as_primitive::<Int32Type>().clone(); |
918 | | /// drop(array); |
919 | | /// // Now we can apply the operation in place |
920 | | /// let c = a.unary_mut(|x| x * 2 + 1).unwrap(); |
921 | | /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None])); |
922 | | /// ``` |
923 | | pub fn unary_mut<F>(self, op: F) -> Result<PrimitiveArray<T>, PrimitiveArray<T>> |
924 | | where |
925 | | F: Fn(T::Native) -> T::Native, |
926 | | { |
927 | | let mut builder = self.into_builder()?; |
928 | | builder |
929 | | .values_slice_mut() |
930 | | .iter_mut() |
931 | | .for_each(|v| *v = op(*v)); |
932 | | Ok(builder.finish()) |
933 | | } |
934 | | |
935 | | /// Applies a unary fallible function to all valid values in a primitive |
936 | | /// array, producing a new array of potentially different type. |
937 | | /// |
938 | | /// Applies `op` to only rows that are valid, which is often significantly |
939 | | /// slower than [`Self::unary`], which should be preferred if `op` is |
940 | | /// fallible. |
941 | | /// |
942 | | /// Note: LLVM is currently unable to effectively vectorize fallible operations |
943 | 0 | pub fn try_unary<F, O, E>(&self, op: F) -> Result<PrimitiveArray<O>, E> |
944 | 0 | where |
945 | 0 | O: ArrowPrimitiveType, |
946 | 0 | F: Fn(T::Native) -> Result<O::Native, E>, |
947 | | { |
948 | 0 | let len = self.len(); |
949 | | |
950 | 0 | let nulls = self.nulls().cloned(); |
951 | 0 | let mut buffer = BufferBuilder::<O::Native>::new(len); |
952 | 0 | buffer.append_n_zeroed(len); |
953 | 0 | let slice = buffer.as_slice_mut(); |
954 | | |
955 | 0 | let f = |idx| { |
956 | 0 | unsafe { *slice.get_unchecked_mut(idx) = op(self.value_unchecked(idx))? }; |
957 | 0 | Ok::<_, E>(()) |
958 | 0 | }; |
959 | | |
960 | 0 | match &nulls { |
961 | 0 | Some(nulls) => nulls.try_for_each_valid_idx(f)?, |
962 | 0 | None => (0..len).try_for_each(f)?, |
963 | | } |
964 | | |
965 | 0 | let values = buffer.finish().into(); |
966 | 0 | Ok(PrimitiveArray::new(values, nulls)) |
967 | 0 | } |
968 | | |
969 | | /// Applies a unary fallible function to all valid values in a mutable |
970 | | /// primitive array. |
971 | | /// |
972 | | /// # Null Handling |
973 | | /// |
974 | | /// See [`Self::try_unary`] for more information on null handling. |
975 | | /// |
976 | | /// # Buffer Reuse |
977 | | /// |
978 | | /// See [`Self::unary_mut`] for more information on buffer reuse. |
979 | | /// |
980 | | /// This returns an `Err` when the input array is shared buffer with other |
981 | | /// array. In the case, returned `Err` wraps input array. If the function |
982 | | /// encounters an error during applying on values. In the case, this returns an `Err` within |
983 | | /// an `Ok` which wraps the actual error. |
984 | | /// |
985 | | /// Note: LLVM is currently unable to effectively vectorize fallible operations |
986 | | pub fn try_unary_mut<F, E>( |
987 | | self, |
988 | | op: F, |
989 | | ) -> Result<Result<PrimitiveArray<T>, E>, PrimitiveArray<T>> |
990 | | where |
991 | | F: Fn(T::Native) -> Result<T::Native, E>, |
992 | | { |
993 | | let len = self.len(); |
994 | | let null_count = self.null_count(); |
995 | | let mut builder = self.into_builder()?; |
996 | | |
997 | | let (slice, null_buffer) = builder.slices_mut(); |
998 | | |
999 | | let r = try_for_each_valid_idx(len, 0, null_count, null_buffer.as_deref(), |idx| { |
1000 | | unsafe { *slice.get_unchecked_mut(idx) = op(*slice.get_unchecked(idx))? }; |
1001 | | Ok::<_, E>(()) |
1002 | | }); |
1003 | | |
1004 | | if let Err(err) = r { |
1005 | | return Ok(Err(err)); |
1006 | | } |
1007 | | |
1008 | | Ok(Ok(builder.finish())) |
1009 | | } |
1010 | | |
1011 | | /// Applies a unary and nullable function to all valid values in a primitive array |
1012 | | /// |
1013 | | /// Applies `op` to only rows that are valid, which is often significantly |
1014 | | /// slower than [`Self::unary`], which should be preferred if `op` is |
1015 | | /// fallible. |
1016 | | /// |
1017 | | /// Note: LLVM is currently unable to effectively vectorize fallible operations |
1018 | 0 | pub fn unary_opt<F, O>(&self, op: F) -> PrimitiveArray<O> |
1019 | 0 | where |
1020 | 0 | O: ArrowPrimitiveType, |
1021 | 0 | F: Fn(T::Native) -> Option<O::Native>, |
1022 | | { |
1023 | 0 | let len = self.len(); |
1024 | 0 | let (nulls, null_count, offset) = match self.nulls() { |
1025 | 0 | Some(n) => (Some(n.validity()), n.null_count(), n.offset()), |
1026 | 0 | None => (None, 0, 0), |
1027 | | }; |
1028 | | |
1029 | 0 | let mut null_builder = BooleanBufferBuilder::new(len); |
1030 | 0 | match nulls { |
1031 | 0 | Some(b) => null_builder.append_packed_range(offset..offset + len, b), |
1032 | 0 | None => null_builder.append_n(len, true), |
1033 | | } |
1034 | | |
1035 | 0 | let mut buffer = BufferBuilder::<O::Native>::new(len); |
1036 | 0 | buffer.append_n_zeroed(len); |
1037 | 0 | let slice = buffer.as_slice_mut(); |
1038 | | |
1039 | 0 | let mut out_null_count = null_count; |
1040 | | |
1041 | 0 | let _ = try_for_each_valid_idx(len, offset, null_count, nulls, |idx| { |
1042 | 0 | match op(unsafe { self.value_unchecked(idx) }) { |
1043 | 0 | Some(v) => unsafe { *slice.get_unchecked_mut(idx) = v }, |
1044 | 0 | None => { |
1045 | 0 | out_null_count += 1; |
1046 | 0 | null_builder.set_bit(idx, false); |
1047 | 0 | } |
1048 | | } |
1049 | 0 | Ok::<_, ()>(()) |
1050 | 0 | }); |
1051 | | |
1052 | 0 | let nulls = null_builder.finish(); |
1053 | 0 | let values = buffer.finish().into(); |
1054 | 0 | let nulls = unsafe { NullBuffer::new_unchecked(nulls, out_null_count) }; |
1055 | 0 | PrimitiveArray::new(values, Some(nulls)) |
1056 | 0 | } |
1057 | | |
1058 | | /// Applies a unary infallible function to each value in an array, producing a |
1059 | | /// new primitive array. |
1060 | | /// |
1061 | | /// # Null Handling |
1062 | | /// |
1063 | | /// See [`Self::unary`] for more information on null handling. |
1064 | | /// |
1065 | | /// # Example: create an [`Int16Array`] from an [`ArrayAccessor`] with item type `&[u8]` |
1066 | | /// ``` |
1067 | | /// use arrow_array::{Array, FixedSizeBinaryArray, Int16Array}; |
1068 | | /// let input_arg = vec![ vec![1, 0], vec![2, 0], vec![3, 0] ]; |
1069 | | /// let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); |
1070 | | /// let c = Int16Array::from_unary(&arr, |x| i16::from_le_bytes(x[..2].try_into().unwrap())); |
1071 | | /// assert_eq!(c, Int16Array::from(vec![Some(1i16), Some(2i16), Some(3i16)])); |
1072 | | /// ``` |
1073 | | pub fn from_unary<U: ArrayAccessor, F>(left: U, mut op: F) -> Self |
1074 | | where |
1075 | | F: FnMut(U::Item) -> T::Native, |
1076 | | { |
1077 | | let nulls = left.logical_nulls(); |
1078 | | let buffer: Vec<_> = (0..left.len()) |
1079 | | // SAFETY: i in range 0..left.len() |
1080 | | .map(|i| op(unsafe { left.value_unchecked(i) })) |
1081 | | .collect(); |
1082 | | PrimitiveArray::new(buffer.into(), nulls) |
1083 | | } |
1084 | | |
1085 | | /// Returns a `PrimitiveBuilder` for this array, suitable for mutating values |
1086 | | /// in place. |
1087 | | /// |
1088 | | /// # Buffer Reuse |
1089 | | /// |
1090 | | /// If the underlying data buffer has no other outstanding references, the |
1091 | | /// buffer is used without copying. |
1092 | | /// |
1093 | | /// If the underlying data buffer does have outstanding references, returns |
1094 | | /// `Err(self)` |
1095 | | pub fn into_builder(self) -> Result<PrimitiveBuilder<T>, Self> { |
1096 | | let len = self.len(); |
1097 | | let data = self.into_data(); |
1098 | | let null_bit_buffer = data.nulls().map(|b| b.inner().sliced()); |
1099 | | |
1100 | | let element_len = std::mem::size_of::<T::Native>(); |
1101 | | let buffer = |
1102 | | data.buffers()[0].slice_with_length(data.offset() * element_len, len * element_len); |
1103 | | |
1104 | | drop(data); |
1105 | | |
1106 | | let try_mutable_null_buffer = match null_bit_buffer { |
1107 | | None => Ok(None), |
1108 | | Some(null_buffer) => { |
1109 | | // Null buffer exists, tries to make it mutable |
1110 | | null_buffer.into_mutable().map(Some) |
1111 | | } |
1112 | | }; |
1113 | | |
1114 | | let try_mutable_buffers = match try_mutable_null_buffer { |
1115 | | Ok(mutable_null_buffer) => { |
1116 | | // Got mutable null buffer, tries to get mutable value buffer |
1117 | | let try_mutable_buffer = buffer.into_mutable(); |
1118 | | |
1119 | | // try_mutable_buffer.map(...).map_err(...) doesn't work as the compiler complains |
1120 | | // mutable_null_buffer is moved into map closure. |
1121 | | match try_mutable_buffer { |
1122 | | Ok(mutable_buffer) => Ok(PrimitiveBuilder::<T>::new_from_buffer( |
1123 | | mutable_buffer, |
1124 | | mutable_null_buffer, |
1125 | | )), |
1126 | | Err(buffer) => Err((buffer, mutable_null_buffer.map(|b| b.into()))), |
1127 | | } |
1128 | | } |
1129 | | Err(mutable_null_buffer) => { |
1130 | | // Unable to get mutable null buffer |
1131 | | Err((buffer, Some(mutable_null_buffer))) |
1132 | | } |
1133 | | }; |
1134 | | |
1135 | | match try_mutable_buffers { |
1136 | | Ok(builder) => Ok(builder), |
1137 | | Err((buffer, null_bit_buffer)) => { |
1138 | | let builder = ArrayData::builder(T::DATA_TYPE) |
1139 | | .len(len) |
1140 | | .add_buffer(buffer) |
1141 | | .null_bit_buffer(null_bit_buffer); |
1142 | | |
1143 | | let array_data = unsafe { builder.build_unchecked() }; |
1144 | | let array = PrimitiveArray::<T>::from(array_data); |
1145 | | |
1146 | | Err(array) |
1147 | | } |
1148 | | } |
1149 | | } |
1150 | | } |
1151 | | |
1152 | | impl<T: ArrowPrimitiveType> From<PrimitiveArray<T>> for ArrayData { |
1153 | 680 | fn from(array: PrimitiveArray<T>) -> Self { |
1154 | 680 | let builder = ArrayDataBuilder::new(array.data_type) |
1155 | 680 | .len(array.values.len()) |
1156 | 680 | .nulls(array.nulls) |
1157 | 680 | .buffers(vec![array.values.into_inner()]); |
1158 | | |
1159 | 680 | unsafe { builder.build_unchecked() } |
1160 | 680 | } |
1161 | | } |
1162 | | |
1163 | | impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> { |
1164 | 266 | fn as_any(&self) -> &dyn Any { |
1165 | 266 | self |
1166 | 266 | } |
1167 | | |
1168 | 667 | fn to_data(&self) -> ArrayData { |
1169 | 667 | self.clone().into() |
1170 | 667 | } |
1171 | | |
1172 | 13 | fn into_data(self) -> ArrayData { |
1173 | 13 | self.into() |
1174 | 13 | } |
1175 | | |
1176 | 1.71k | fn data_type(&self) -> &DataType { |
1177 | 1.71k | &self.data_type |
1178 | 1.71k | } |
1179 | | |
1180 | 288 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
1181 | 288 | Arc::new(self.slice(offset, length)) |
1182 | 288 | } |
1183 | | |
1184 | 1.75k | fn len(&self) -> usize { |
1185 | 1.75k | self.values.len() |
1186 | 1.75k | } |
1187 | | |
1188 | 0 | fn is_empty(&self) -> bool { |
1189 | 0 | self.values.is_empty() |
1190 | 0 | } |
1191 | | |
1192 | 0 | fn shrink_to_fit(&mut self) { |
1193 | 0 | self.values.shrink_to_fit(); |
1194 | 0 | if let Some(nulls) = &mut self.nulls { |
1195 | 0 | nulls.shrink_to_fit(); |
1196 | 0 | } |
1197 | 0 | } |
1198 | | |
1199 | 0 | fn offset(&self) -> usize { |
1200 | 0 | 0 |
1201 | 0 | } |
1202 | | |
1203 | 320 | fn nulls(&self) -> Option<&NullBuffer> { |
1204 | 320 | self.nulls.as_ref() |
1205 | 320 | } |
1206 | | |
1207 | 3 | fn logical_null_count(&self) -> usize { |
1208 | 3 | self.null_count() |
1209 | 3 | } |
1210 | | |
1211 | 0 | fn get_buffer_memory_size(&self) -> usize { |
1212 | 0 | let mut size = self.values.inner().capacity(); |
1213 | 0 | if let Some(n) = self.nulls.as_ref() { |
1214 | 0 | size += n.buffer().capacity(); |
1215 | 0 | } |
1216 | 0 | size |
1217 | 0 | } |
1218 | | |
1219 | 0 | fn get_array_memory_size(&self) -> usize { |
1220 | 0 | std::mem::size_of::<Self>() + self.get_buffer_memory_size() |
1221 | 0 | } |
1222 | | } |
1223 | | |
1224 | | impl<T: ArrowPrimitiveType> ArrayAccessor for &PrimitiveArray<T> { |
1225 | | type Item = T::Native; |
1226 | | |
1227 | 0 | fn value(&self, index: usize) -> Self::Item { |
1228 | 0 | PrimitiveArray::value(self, index) |
1229 | 0 | } |
1230 | | |
1231 | | #[inline] |
1232 | 0 | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
1233 | 0 | PrimitiveArray::value_unchecked(self, index) |
1234 | 0 | } |
1235 | | } |
1236 | | |
1237 | | impl<T: ArrowTemporalType> PrimitiveArray<T> |
1238 | | where |
1239 | | i64: From<T::Native>, |
1240 | | { |
1241 | | /// Returns value as a chrono `NaiveDateTime`, handling time resolution |
1242 | | /// |
1243 | | /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned. |
1244 | | /// A valid value is expected, thus the user should first check for validity. |
1245 | | /// |
1246 | | /// See notes on [`PrimitiveArray::value`] regarding nulls and panics |
1247 | | pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> { |
1248 | | as_datetime::<T>(i64::from(self.value(i))) |
1249 | | } |
1250 | | |
1251 | | /// Returns value as a chrono `NaiveDateTime`, handling time resolution with the provided tz |
1252 | | /// |
1253 | | /// functionally it is same as `value_as_datetime`, however it adds |
1254 | | /// the passed tz to the to-be-returned NaiveDateTime |
1255 | | /// |
1256 | | /// See notes on [`PrimitiveArray::value`] regarding nulls and panics |
1257 | | pub fn value_as_datetime_with_tz(&self, i: usize, tz: Tz) -> Option<DateTime<Tz>> { |
1258 | | as_datetime_with_timezone::<T>(i64::from(self.value(i)), tz) |
1259 | | } |
1260 | | |
1261 | | /// Returns value as a chrono `NaiveDate` by using `Self::datetime()` |
1262 | | /// |
1263 | | /// If a data type cannot be converted to `NaiveDate`, a `None` is returned |
1264 | | /// |
1265 | | /// See notes on [`PrimitiveArray::value`] regarding nulls and panics |
1266 | | pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> { |
1267 | | self.value_as_datetime(i).map(|datetime| datetime.date()) |
1268 | | } |
1269 | | |
1270 | | /// Returns a value as a chrono `NaiveTime` |
1271 | | /// |
1272 | | /// `Date32` and `Date64` return UTC midnight as they do not have time resolution |
1273 | | /// |
1274 | | /// See notes on [`PrimitiveArray::value`] regarding nulls and panics |
1275 | | pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> { |
1276 | | as_time::<T>(i64::from(self.value(i))) |
1277 | | } |
1278 | | |
1279 | | /// Returns a value as a chrono `Duration` |
1280 | | /// |
1281 | | /// If a data type cannot be converted to `Duration`, a `None` is returned |
1282 | | /// |
1283 | | /// See notes on [`PrimitiveArray::value`] regarding nulls and panics |
1284 | | pub fn value_as_duration(&self, i: usize) -> Option<Duration> { |
1285 | | as_duration::<T>(i64::from(self.value(i))) |
1286 | | } |
1287 | | } |
1288 | | |
1289 | | impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> { |
1290 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
1291 | 0 | let data_type = self.data_type(); |
1292 | | |
1293 | 0 | write!(f, "PrimitiveArray<{data_type:?}>\n[\n")?; |
1294 | 0 | print_long_array(self, f, |array, index, f| match data_type { |
1295 | | DataType::Date32 | DataType::Date64 => { |
1296 | 0 | let v = self.value(index).to_i64().unwrap(); |
1297 | 0 | match as_date::<T>(v) { |
1298 | 0 | Some(date) => write!(f, "{date:?}"), |
1299 | | None => { |
1300 | 0 | write!( |
1301 | 0 | f, |
1302 | 0 | "Cast error: Failed to convert {v} to temporal for {data_type:?}" |
1303 | | ) |
1304 | | } |
1305 | | } |
1306 | | } |
1307 | | DataType::Time32(_) | DataType::Time64(_) => { |
1308 | 0 | let v = self.value(index).to_i64().unwrap(); |
1309 | 0 | match as_time::<T>(v) { |
1310 | 0 | Some(time) => write!(f, "{time:?}"), |
1311 | | None => { |
1312 | 0 | write!( |
1313 | 0 | f, |
1314 | 0 | "Cast error: Failed to convert {v} to temporal for {data_type:?}" |
1315 | | ) |
1316 | | } |
1317 | | } |
1318 | | } |
1319 | 0 | DataType::Timestamp(_, tz_string_opt) => { |
1320 | 0 | let v = self.value(index).to_i64().unwrap(); |
1321 | 0 | match tz_string_opt { |
1322 | | // for Timestamp with TimeZone |
1323 | 0 | Some(tz_string) => { |
1324 | 0 | match tz_string.parse::<Tz>() { |
1325 | | // if the time zone is valid, construct a DateTime<Tz> and format it as rfc3339 |
1326 | 0 | Ok(tz) => match as_datetime_with_timezone::<T>(v, tz) { |
1327 | 0 | Some(datetime) => write!(f, "{}", datetime.to_rfc3339()), |
1328 | 0 | None => write!(f, "null"), |
1329 | | }, |
1330 | | // if the time zone is invalid, shows NaiveDateTime with an error message |
1331 | 0 | Err(_) => match as_datetime::<T>(v) { |
1332 | 0 | Some(datetime) => { |
1333 | 0 | write!(f, "{datetime:?} (Unknown Time Zone '{tz_string}')") |
1334 | | } |
1335 | 0 | None => write!(f, "null"), |
1336 | | }, |
1337 | | } |
1338 | | } |
1339 | | // for Timestamp without TimeZone |
1340 | 0 | None => match as_datetime::<T>(v) { |
1341 | 0 | Some(datetime) => write!(f, "{datetime:?}"), |
1342 | 0 | None => write!(f, "null"), |
1343 | | }, |
1344 | | } |
1345 | | } |
1346 | 0 | _ => std::fmt::Debug::fmt(&array.value(index), f), |
1347 | 0 | })?; |
1348 | 0 | write!(f, "]") |
1349 | 0 | } |
1350 | | } |
1351 | | |
1352 | | impl<'a, T: ArrowPrimitiveType> IntoIterator for &'a PrimitiveArray<T> { |
1353 | | type Item = Option<<T as ArrowPrimitiveType>::Native>; |
1354 | | type IntoIter = PrimitiveIter<'a, T>; |
1355 | | |
1356 | 0 | fn into_iter(self) -> Self::IntoIter { |
1357 | 0 | PrimitiveIter::<'a, T>::new(self) |
1358 | 0 | } |
1359 | | } |
1360 | | |
1361 | | impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> { |
1362 | | /// constructs a new iterator |
1363 | 0 | pub fn iter(&'a self) -> PrimitiveIter<'a, T> { |
1364 | 0 | PrimitiveIter::<'a, T>::new(self) |
1365 | 0 | } |
1366 | | } |
1367 | | |
1368 | | /// An optional primitive value |
1369 | | /// |
1370 | | /// This struct is used as an adapter when creating `PrimitiveArray` from an iterator. |
1371 | | /// `FromIterator` for `PrimitiveArray` takes an iterator where the elements can be `into` |
1372 | | /// this struct. So once implementing `From` or `Into` trait for a type, an iterator of |
1373 | | /// the type can be collected to `PrimitiveArray`. |
1374 | | #[derive(Debug)] |
1375 | | pub struct NativeAdapter<T: ArrowPrimitiveType> { |
1376 | | /// Corresponding Rust native type if available |
1377 | | pub native: Option<T::Native>, |
1378 | | } |
1379 | | |
1380 | | macro_rules! def_from_for_primitive { |
1381 | | ( $ty:ident, $tt:tt) => { |
1382 | | impl From<$tt> for NativeAdapter<$ty> { |
1383 | 0 | fn from(value: $tt) -> Self { |
1384 | 0 | NativeAdapter { |
1385 | 0 | native: Some(value), |
1386 | 0 | } |
1387 | 0 | } |
1388 | | } |
1389 | | }; |
1390 | | } |
1391 | | |
1392 | | def_from_for_primitive!(Int8Type, i8); |
1393 | | def_from_for_primitive!(Int16Type, i16); |
1394 | | def_from_for_primitive!(Int32Type, i32); |
1395 | | def_from_for_primitive!(Int64Type, i64); |
1396 | | def_from_for_primitive!(UInt8Type, u8); |
1397 | | def_from_for_primitive!(UInt16Type, u16); |
1398 | | def_from_for_primitive!(UInt32Type, u32); |
1399 | | def_from_for_primitive!(UInt64Type, u64); |
1400 | | def_from_for_primitive!(Float16Type, f16); |
1401 | | def_from_for_primitive!(Float32Type, f32); |
1402 | | def_from_for_primitive!(Float64Type, f64); |
1403 | | def_from_for_primitive!(Decimal32Type, i32); |
1404 | | def_from_for_primitive!(Decimal64Type, i64); |
1405 | | def_from_for_primitive!(Decimal128Type, i128); |
1406 | | def_from_for_primitive!(Decimal256Type, i256); |
1407 | | |
1408 | | impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> { |
1409 | 4 | fn from(value: Option<<T as ArrowPrimitiveType>::Native>) -> Self { |
1410 | 4 | NativeAdapter { native: value } |
1411 | 4 | } |
1412 | | } |
1413 | | |
1414 | | impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> { |
1415 | 78 | fn from(value: &Option<<T as ArrowPrimitiveType>::Native>) -> Self { |
1416 | 78 | NativeAdapter { native: *value } |
1417 | 78 | } |
1418 | | } |
1419 | | |
1420 | | impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr> for PrimitiveArray<T> { |
1421 | 15 | fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self { |
1422 | 15 | let iter = iter.into_iter(); |
1423 | 15 | let (lower, _) = iter.size_hint(); |
1424 | | |
1425 | 15 | let mut null_builder = BooleanBufferBuilder::new(lower); |
1426 | | |
1427 | 15 | let buffer: Buffer = iter |
1428 | 82 | .map15 (|item| { |
1429 | 82 | if let Some(a74 ) = item.into().native { |
1430 | 74 | null_builder.append(true); |
1431 | 74 | a |
1432 | | } else { |
1433 | 8 | null_builder.append(false); |
1434 | | // this ensures that null items on the buffer are not arbitrary. |
1435 | | // This is important because fallible operations can use null values (e.g. a vectorized "add") |
1436 | | // which may panic (e.g. overflow if the number on the slots happen to be very large). |
1437 | 8 | T::Native::default() |
1438 | | } |
1439 | 82 | }) |
1440 | 15 | .collect(); |
1441 | | |
1442 | 15 | let len = null_builder.len(); |
1443 | | |
1444 | 15 | let data = unsafe { |
1445 | 15 | ArrayData::new_unchecked( |
1446 | 15 | T::DATA_TYPE, |
1447 | 15 | len, |
1448 | 15 | None, |
1449 | 15 | Some(null_builder.into()), |
1450 | | 0, |
1451 | 15 | vec![buffer], |
1452 | 15 | vec![], |
1453 | | ) |
1454 | | }; |
1455 | 15 | PrimitiveArray::from(data) |
1456 | 15 | } |
1457 | | } |
1458 | | |
1459 | | impl<T: ArrowPrimitiveType> PrimitiveArray<T> { |
1460 | | /// Creates a [`PrimitiveArray`] from an iterator of trusted length. |
1461 | | /// # Safety |
1462 | | /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). |
1463 | | /// I.e. that `size_hint().1` correctly reports its length. |
1464 | | #[inline] |
1465 | 0 | pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self |
1466 | 0 | where |
1467 | 0 | P: std::borrow::Borrow<Option<<T as ArrowPrimitiveType>::Native>>, |
1468 | 0 | I: IntoIterator<Item = P>, |
1469 | | { |
1470 | 0 | let iterator = iter.into_iter(); |
1471 | 0 | let (_, upper) = iterator.size_hint(); |
1472 | 0 | let len = upper.expect("trusted_len_unzip requires an upper limit"); |
1473 | | |
1474 | 0 | let (null, buffer) = trusted_len_unzip(iterator); |
1475 | | |
1476 | 0 | let data = |
1477 | 0 | ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]); |
1478 | 0 | PrimitiveArray::from(data) |
1479 | 0 | } |
1480 | | } |
1481 | | |
1482 | | // TODO: the macro is needed here because we'd get "conflicting implementations" error |
1483 | | // otherwise with both `From<Vec<T::Native>>` and `From<Vec<Option<T::Native>>>`. |
1484 | | // We should revisit this in future. |
1485 | | macro_rules! def_numeric_from_vec { |
1486 | | ( $ty:ident ) => { |
1487 | | impl From<Vec<<$ty as ArrowPrimitiveType>::Native>> for PrimitiveArray<$ty> { |
1488 | 34 | fn from(data: Vec<<$ty as ArrowPrimitiveType>::Native>) -> Self { |
1489 | 34 | let array_data = ArrayData::builder($ty::DATA_TYPE) |
1490 | 34 | .len(data.len()) |
1491 | 34 | .add_buffer(Buffer::from_vec(data)); |
1492 | 34 | let array_data = unsafe { array_data.build_unchecked() }; |
1493 | 34 | PrimitiveArray::from(array_data) |
1494 | 34 | } |
1495 | | } |
1496 | | |
1497 | | // Constructs a primitive array from a vector. Should only be used for testing. |
1498 | | impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>> for PrimitiveArray<$ty> { |
1499 | 14 | fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self { |
1500 | 14 | PrimitiveArray::from_iter(data.iter()) |
1501 | 14 | } |
1502 | | } |
1503 | | }; |
1504 | | } |
1505 | | |
1506 | | def_numeric_from_vec!(Int8Type); |
1507 | | def_numeric_from_vec!(Int16Type); |
1508 | | def_numeric_from_vec!(Int32Type); |
1509 | | def_numeric_from_vec!(Int64Type); |
1510 | | def_numeric_from_vec!(UInt8Type); |
1511 | | def_numeric_from_vec!(UInt16Type); |
1512 | | def_numeric_from_vec!(UInt32Type); |
1513 | | def_numeric_from_vec!(UInt64Type); |
1514 | | def_numeric_from_vec!(Float16Type); |
1515 | | def_numeric_from_vec!(Float32Type); |
1516 | | def_numeric_from_vec!(Float64Type); |
1517 | | def_numeric_from_vec!(Decimal32Type); |
1518 | | def_numeric_from_vec!(Decimal64Type); |
1519 | | def_numeric_from_vec!(Decimal128Type); |
1520 | | def_numeric_from_vec!(Decimal256Type); |
1521 | | |
1522 | | def_numeric_from_vec!(Date32Type); |
1523 | | def_numeric_from_vec!(Date64Type); |
1524 | | def_numeric_from_vec!(Time32SecondType); |
1525 | | def_numeric_from_vec!(Time32MillisecondType); |
1526 | | def_numeric_from_vec!(Time64MicrosecondType); |
1527 | | def_numeric_from_vec!(Time64NanosecondType); |
1528 | | def_numeric_from_vec!(IntervalYearMonthType); |
1529 | | def_numeric_from_vec!(IntervalDayTimeType); |
1530 | | def_numeric_from_vec!(IntervalMonthDayNanoType); |
1531 | | def_numeric_from_vec!(DurationSecondType); |
1532 | | def_numeric_from_vec!(DurationMillisecondType); |
1533 | | def_numeric_from_vec!(DurationMicrosecondType); |
1534 | | def_numeric_from_vec!(DurationNanosecondType); |
1535 | | def_numeric_from_vec!(TimestampSecondType); |
1536 | | def_numeric_from_vec!(TimestampMillisecondType); |
1537 | | def_numeric_from_vec!(TimestampMicrosecondType); |
1538 | | def_numeric_from_vec!(TimestampNanosecondType); |
1539 | | |
1540 | | impl<T: ArrowTimestampType> PrimitiveArray<T> { |
1541 | | /// Returns the timezone of this array if any |
1542 | 0 | pub fn timezone(&self) -> Option<&str> { |
1543 | 0 | match self.data_type() { |
1544 | 0 | DataType::Timestamp(_, tz) => tz.as_deref(), |
1545 | 0 | _ => unreachable!(), |
1546 | | } |
1547 | 0 | } |
1548 | | |
1549 | | /// Construct a timestamp array with new timezone |
1550 | 18 | pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self { |
1551 | 18 | self.with_timezone_opt(Some(timezone.into())) |
1552 | 18 | } |
1553 | | |
1554 | | /// Construct a timestamp array with UTC |
1555 | | pub fn with_timezone_utc(self) -> Self { |
1556 | | self.with_timezone("+00:00") |
1557 | | } |
1558 | | |
1559 | | /// Construct a timestamp array with an optional timezone |
1560 | 66 | pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self { |
1561 | 66 | Self { |
1562 | 66 | data_type: DataType::Timestamp(T::UNIT, timezone.map(Into::into)), |
1563 | 66 | ..self |
1564 | 66 | } |
1565 | 66 | } |
1566 | | } |
1567 | | |
1568 | | /// Constructs a `PrimitiveArray` from an array data reference. |
1569 | | impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> { |
1570 | 178 | fn from(data: ArrayData) -> Self { |
1571 | 178 | Self::assert_compatible(data.data_type()); |
1572 | 178 | assert_eq!( |
1573 | 178 | data.buffers().len(), |
1574 | | 1, |
1575 | 0 | "PrimitiveArray data should contain a single buffer only (values buffer)" |
1576 | | ); |
1577 | | |
1578 | 178 | let values = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()); |
1579 | 178 | Self { |
1580 | 178 | data_type: data.data_type().clone(), |
1581 | 178 | values, |
1582 | 178 | nulls: data.nulls().cloned(), |
1583 | 178 | } |
1584 | 178 | } |
1585 | | } |
1586 | | |
1587 | | impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> { |
1588 | | /// Returns a Decimal array with the same data as self, with the |
1589 | | /// specified precision and scale. |
1590 | | /// |
1591 | | /// See [`validate_decimal_precision_and_scale`] |
1592 | 52 | pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> { |
1593 | 52 | validate_decimal_precision_and_scale::<T>(precision, scale)?0 ; |
1594 | 52 | Ok(Self { |
1595 | 52 | data_type: T::TYPE_CONSTRUCTOR(precision, scale), |
1596 | 52 | ..self |
1597 | 52 | }) |
1598 | 52 | } |
1599 | | |
1600 | | /// Validates values in this array can be properly interpreted |
1601 | | /// with the specified precision. |
1602 | | pub fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { |
1603 | | (0..self.len()).try_for_each(|idx| { |
1604 | | if self.is_valid(idx) { |
1605 | | let decimal = unsafe { self.value_unchecked(idx) }; |
1606 | | T::validate_decimal_precision(decimal, precision) |
1607 | | } else { |
1608 | | Ok(()) |
1609 | | } |
1610 | | }) |
1611 | | } |
1612 | | |
1613 | | /// Validates the Decimal Array, if the value of slot is overflow for the specified precision, and |
1614 | | /// will be casted to Null |
1615 | | pub fn null_if_overflow_precision(&self, precision: u8) -> Self { |
1616 | | self.unary_opt::<_, T>(|v| T::is_valid_decimal_precision(v, precision).then_some(v)) |
1617 | | } |
1618 | | |
1619 | | /// Returns [`Self::value`] formatted as a string |
1620 | 8 | pub fn value_as_string(&self, row: usize) -> String { |
1621 | 8 | T::format_decimal(self.value(row), self.precision(), self.scale()) |
1622 | 8 | } |
1623 | | |
1624 | | /// Returns the decimal precision of this array |
1625 | 8 | pub fn precision(&self) -> u8 { |
1626 | 8 | match T::BYTE_LENGTH { |
1627 | | 4 => { |
1628 | 0 | if let DataType::Decimal32(p, _) = self.data_type() { |
1629 | 0 | *p |
1630 | | } else { |
1631 | 0 | unreachable!( |
1632 | | "Decimal32Array datatype is not DataType::Decimal32 but {}", |
1633 | 0 | self.data_type() |
1634 | | ) |
1635 | | } |
1636 | | } |
1637 | | 8 => { |
1638 | 0 | if let DataType::Decimal64(p, _) = self.data_type() { |
1639 | 0 | *p |
1640 | | } else { |
1641 | 0 | unreachable!( |
1642 | | "Decimal64Array datatype is not DataType::Decimal64 but {}", |
1643 | 0 | self.data_type() |
1644 | | ) |
1645 | | } |
1646 | | } |
1647 | | 16 => { |
1648 | 6 | if let DataType::Decimal128(p, _) = self.data_type() { |
1649 | 6 | *p |
1650 | | } else { |
1651 | 0 | unreachable!( |
1652 | | "Decimal128Array datatype is not DataType::Decimal128 but {}", |
1653 | 0 | self.data_type() |
1654 | | ) |
1655 | | } |
1656 | | } |
1657 | | 32 => { |
1658 | 2 | if let DataType::Decimal256(p, _) = self.data_type() { |
1659 | 2 | *p |
1660 | | } else { |
1661 | 0 | unreachable!( |
1662 | | "Decimal256Array datatype is not DataType::Decimal256 but {}", |
1663 | 0 | self.data_type() |
1664 | | ) |
1665 | | } |
1666 | | } |
1667 | 0 | other => unreachable!("Unsupported byte length for decimal array {}", other), |
1668 | | } |
1669 | 8 | } |
1670 | | |
1671 | | /// Returns the decimal scale of this array |
1672 | 8 | pub fn scale(&self) -> i8 { |
1673 | 8 | match T::BYTE_LENGTH { |
1674 | | 4 => { |
1675 | 0 | if let DataType::Decimal32(_, s) = self.data_type() { |
1676 | 0 | *s |
1677 | | } else { |
1678 | 0 | unreachable!( |
1679 | | "Decimal32Array datatype is not DataType::Decimal32 but {}", |
1680 | 0 | self.data_type() |
1681 | | ) |
1682 | | } |
1683 | | } |
1684 | | 8 => { |
1685 | 0 | if let DataType::Decimal64(_, s) = self.data_type() { |
1686 | 0 | *s |
1687 | | } else { |
1688 | 0 | unreachable!( |
1689 | | "Decimal64Array datatype is not DataType::Decimal64 but {}", |
1690 | 0 | self.data_type() |
1691 | | ) |
1692 | | } |
1693 | | } |
1694 | | 16 => { |
1695 | 6 | if let DataType::Decimal128(_, s) = self.data_type() { |
1696 | 6 | *s |
1697 | | } else { |
1698 | 0 | unreachable!( |
1699 | | "Decimal128Array datatype is not DataType::Decimal128 but {}", |
1700 | 0 | self.data_type() |
1701 | | ) |
1702 | | } |
1703 | | } |
1704 | | 32 => { |
1705 | 2 | if let DataType::Decimal256(_, s) = self.data_type() { |
1706 | 2 | *s |
1707 | | } else { |
1708 | 0 | unreachable!( |
1709 | | "Decimal256Array datatype is not DataType::Decimal256 but {}", |
1710 | 0 | self.data_type() |
1711 | | ) |
1712 | | } |
1713 | | } |
1714 | 0 | other => unreachable!("Unsupported byte length for decimal array {}", other), |
1715 | | } |
1716 | 8 | } |
1717 | | } |
1718 | | |
1719 | | #[cfg(test)] |
1720 | | mod tests { |
1721 | | use super::*; |
1722 | | use crate::builder::{ |
1723 | | Decimal128Builder, Decimal256Builder, Decimal32Builder, Decimal64Builder, |
1724 | | }; |
1725 | | use crate::cast::downcast_array; |
1726 | | use crate::BooleanArray; |
1727 | | use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; |
1728 | | use arrow_schema::TimeUnit; |
1729 | | |
1730 | | #[test] |
1731 | | fn test_primitive_array_from_vec() { |
1732 | | let buf = Buffer::from_slice_ref([0, 1, 2, 3, 4]); |
1733 | | let arr = Int32Array::from(vec![0, 1, 2, 3, 4]); |
1734 | | assert_eq!(&buf, arr.values.inner()); |
1735 | | assert_eq!(5, arr.len()); |
1736 | | assert_eq!(0, arr.offset()); |
1737 | | assert_eq!(0, arr.null_count()); |
1738 | | for i in 0..5 { |
1739 | | assert!(!arr.is_null(i)); |
1740 | | assert!(arr.is_valid(i)); |
1741 | | assert_eq!(i as i32, arr.value(i)); |
1742 | | } |
1743 | | } |
1744 | | |
1745 | | #[test] |
1746 | | fn test_primitive_array_from_vec_option() { |
1747 | | // Test building a primitive array with null values |
1748 | | let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]); |
1749 | | assert_eq!(5, arr.len()); |
1750 | | assert_eq!(0, arr.offset()); |
1751 | | assert_eq!(2, arr.null_count()); |
1752 | | for i in 0..5 { |
1753 | | if i % 2 == 0 { |
1754 | | assert!(!arr.is_null(i)); |
1755 | | assert!(arr.is_valid(i)); |
1756 | | assert_eq!(i as i32, arr.value(i)); |
1757 | | } else { |
1758 | | assert!(arr.is_null(i)); |
1759 | | assert!(!arr.is_valid(i)); |
1760 | | } |
1761 | | } |
1762 | | } |
1763 | | |
1764 | | #[test] |
1765 | | fn test_date64_array_from_vec_option() { |
1766 | | // Test building a primitive array with null values |
1767 | | // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions |
1768 | | // work |
1769 | | let arr: PrimitiveArray<Date64Type> = |
1770 | | vec![Some(1550902545147), None, Some(1550902545147)].into(); |
1771 | | assert_eq!(3, arr.len()); |
1772 | | assert_eq!(0, arr.offset()); |
1773 | | assert_eq!(1, arr.null_count()); |
1774 | | for i in 0..3 { |
1775 | | if i % 2 == 0 { |
1776 | | assert!(!arr.is_null(i)); |
1777 | | assert!(arr.is_valid(i)); |
1778 | | assert_eq!(1550902545147, arr.value(i)); |
1779 | | // roundtrip to and from datetime |
1780 | | assert_eq!( |
1781 | | 1550902545147, |
1782 | | arr.value_as_datetime(i) |
1783 | | .unwrap() |
1784 | | .and_utc() |
1785 | | .timestamp_millis() |
1786 | | ); |
1787 | | } else { |
1788 | | assert!(arr.is_null(i)); |
1789 | | assert!(!arr.is_valid(i)); |
1790 | | } |
1791 | | } |
1792 | | } |
1793 | | |
1794 | | #[test] |
1795 | | fn test_time32_millisecond_array_from_vec() { |
1796 | | // 1: 00:00:00.001 |
1797 | | // 37800005: 10:30:00.005 |
1798 | | // 86399210: 23:59:59.210 |
1799 | | let arr: PrimitiveArray<Time32MillisecondType> = vec![1, 37_800_005, 86_399_210].into(); |
1800 | | assert_eq!(3, arr.len()); |
1801 | | assert_eq!(0, arr.offset()); |
1802 | | assert_eq!(0, arr.null_count()); |
1803 | | let formatted = ["00:00:00.001", "10:30:00.005", "23:59:59.210"]; |
1804 | | for (i, formatted) in formatted.iter().enumerate().take(3) { |
1805 | | // check that we can't create dates or datetimes from time instances |
1806 | | assert_eq!(None, arr.value_as_datetime(i)); |
1807 | | assert_eq!(None, arr.value_as_date(i)); |
1808 | | let time = arr.value_as_time(i).unwrap(); |
1809 | | assert_eq!(*formatted, time.format("%H:%M:%S%.3f").to_string()); |
1810 | | } |
1811 | | } |
1812 | | |
1813 | | #[test] |
1814 | | fn test_time64_nanosecond_array_from_vec() { |
1815 | | // Test building a primitive array with null values |
1816 | | // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions |
1817 | | // work |
1818 | | |
1819 | | // 1e6: 00:00:00.001 |
1820 | | // 37800005e6: 10:30:00.005 |
1821 | | // 86399210e6: 23:59:59.210 |
1822 | | let arr: PrimitiveArray<Time64NanosecondType> = |
1823 | | vec![1_000_000, 37_800_005_000_000, 86_399_210_000_000].into(); |
1824 | | assert_eq!(3, arr.len()); |
1825 | | assert_eq!(0, arr.offset()); |
1826 | | assert_eq!(0, arr.null_count()); |
1827 | | let formatted = ["00:00:00.001", "10:30:00.005", "23:59:59.210"]; |
1828 | | for (i, item) in formatted.iter().enumerate().take(3) { |
1829 | | // check that we can't create dates or datetimes from time instances |
1830 | | assert_eq!(None, arr.value_as_datetime(i)); |
1831 | | assert_eq!(None, arr.value_as_date(i)); |
1832 | | let time = arr.value_as_time(i).unwrap(); |
1833 | | assert_eq!(*item, time.format("%H:%M:%S%.3f").to_string()); |
1834 | | } |
1835 | | } |
1836 | | |
1837 | | #[test] |
1838 | | fn test_interval_array_from_vec() { |
1839 | | // intervals are currently not treated specially, but are Int32 and Int64 arrays |
1840 | | let arr = IntervalYearMonthArray::from(vec![Some(1), None, Some(-5)]); |
1841 | | assert_eq!(3, arr.len()); |
1842 | | assert_eq!(0, arr.offset()); |
1843 | | assert_eq!(1, arr.null_count()); |
1844 | | assert_eq!(1, arr.value(0)); |
1845 | | assert_eq!(1, arr.values()[0]); |
1846 | | assert!(arr.is_null(1)); |
1847 | | assert_eq!(-5, arr.value(2)); |
1848 | | assert_eq!(-5, arr.values()[2]); |
1849 | | |
1850 | | let v0 = IntervalDayTime { |
1851 | | days: 34, |
1852 | | milliseconds: 1, |
1853 | | }; |
1854 | | let v2 = IntervalDayTime { |
1855 | | days: -2, |
1856 | | milliseconds: -5, |
1857 | | }; |
1858 | | |
1859 | | let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]); |
1860 | | |
1861 | | assert_eq!(3, arr.len()); |
1862 | | assert_eq!(0, arr.offset()); |
1863 | | assert_eq!(1, arr.null_count()); |
1864 | | assert_eq!(v0, arr.value(0)); |
1865 | | assert_eq!(v0, arr.values()[0]); |
1866 | | assert!(arr.is_null(1)); |
1867 | | assert_eq!(v2, arr.value(2)); |
1868 | | assert_eq!(v2, arr.values()[2]); |
1869 | | |
1870 | | let v0 = IntervalMonthDayNano { |
1871 | | months: 2, |
1872 | | days: 34, |
1873 | | nanoseconds: -1, |
1874 | | }; |
1875 | | let v2 = IntervalMonthDayNano { |
1876 | | months: -3, |
1877 | | days: -2, |
1878 | | nanoseconds: 4, |
1879 | | }; |
1880 | | |
1881 | | let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None, Some(v2)]); |
1882 | | assert_eq!(3, arr.len()); |
1883 | | assert_eq!(0, arr.offset()); |
1884 | | assert_eq!(1, arr.null_count()); |
1885 | | assert_eq!(v0, arr.value(0)); |
1886 | | assert_eq!(v0, arr.values()[0]); |
1887 | | assert!(arr.is_null(1)); |
1888 | | assert_eq!(v2, arr.value(2)); |
1889 | | assert_eq!(v2, arr.values()[2]); |
1890 | | } |
1891 | | |
1892 | | #[test] |
1893 | | fn test_duration_array_from_vec() { |
1894 | | let arr = DurationSecondArray::from(vec![Some(1), None, Some(-5)]); |
1895 | | assert_eq!(3, arr.len()); |
1896 | | assert_eq!(0, arr.offset()); |
1897 | | assert_eq!(1, arr.null_count()); |
1898 | | assert_eq!(1, arr.value(0)); |
1899 | | assert_eq!(1, arr.values()[0]); |
1900 | | assert!(arr.is_null(1)); |
1901 | | assert_eq!(-5, arr.value(2)); |
1902 | | assert_eq!(-5, arr.values()[2]); |
1903 | | |
1904 | | let arr = DurationMillisecondArray::from(vec![Some(1), None, Some(-5)]); |
1905 | | assert_eq!(3, arr.len()); |
1906 | | assert_eq!(0, arr.offset()); |
1907 | | assert_eq!(1, arr.null_count()); |
1908 | | assert_eq!(1, arr.value(0)); |
1909 | | assert_eq!(1, arr.values()[0]); |
1910 | | assert!(arr.is_null(1)); |
1911 | | assert_eq!(-5, arr.value(2)); |
1912 | | assert_eq!(-5, arr.values()[2]); |
1913 | | |
1914 | | let arr = DurationMicrosecondArray::from(vec![Some(1), None, Some(-5)]); |
1915 | | assert_eq!(3, arr.len()); |
1916 | | assert_eq!(0, arr.offset()); |
1917 | | assert_eq!(1, arr.null_count()); |
1918 | | assert_eq!(1, arr.value(0)); |
1919 | | assert_eq!(1, arr.values()[0]); |
1920 | | assert!(arr.is_null(1)); |
1921 | | assert_eq!(-5, arr.value(2)); |
1922 | | assert_eq!(-5, arr.values()[2]); |
1923 | | |
1924 | | let arr = DurationNanosecondArray::from(vec![Some(1), None, Some(-5)]); |
1925 | | assert_eq!(3, arr.len()); |
1926 | | assert_eq!(0, arr.offset()); |
1927 | | assert_eq!(1, arr.null_count()); |
1928 | | assert_eq!(1, arr.value(0)); |
1929 | | assert_eq!(1, arr.values()[0]); |
1930 | | assert!(arr.is_null(1)); |
1931 | | assert_eq!(-5, arr.value(2)); |
1932 | | assert_eq!(-5, arr.values()[2]); |
1933 | | } |
1934 | | |
1935 | | #[test] |
1936 | | fn test_timestamp_array_from_vec() { |
1937 | | let arr = TimestampSecondArray::from(vec![1, -5]); |
1938 | | assert_eq!(2, arr.len()); |
1939 | | assert_eq!(0, arr.offset()); |
1940 | | assert_eq!(0, arr.null_count()); |
1941 | | assert_eq!(1, arr.value(0)); |
1942 | | assert_eq!(-5, arr.value(1)); |
1943 | | assert_eq!(&[1, -5], arr.values()); |
1944 | | |
1945 | | let arr = TimestampMillisecondArray::from(vec![1, -5]); |
1946 | | assert_eq!(2, arr.len()); |
1947 | | assert_eq!(0, arr.offset()); |
1948 | | assert_eq!(0, arr.null_count()); |
1949 | | assert_eq!(1, arr.value(0)); |
1950 | | assert_eq!(-5, arr.value(1)); |
1951 | | assert_eq!(&[1, -5], arr.values()); |
1952 | | |
1953 | | let arr = TimestampMicrosecondArray::from(vec![1, -5]); |
1954 | | assert_eq!(2, arr.len()); |
1955 | | assert_eq!(0, arr.offset()); |
1956 | | assert_eq!(0, arr.null_count()); |
1957 | | assert_eq!(1, arr.value(0)); |
1958 | | assert_eq!(-5, arr.value(1)); |
1959 | | assert_eq!(&[1, -5], arr.values()); |
1960 | | |
1961 | | let arr = TimestampNanosecondArray::from(vec![1, -5]); |
1962 | | assert_eq!(2, arr.len()); |
1963 | | assert_eq!(0, arr.offset()); |
1964 | | assert_eq!(0, arr.null_count()); |
1965 | | assert_eq!(1, arr.value(0)); |
1966 | | assert_eq!(-5, arr.value(1)); |
1967 | | assert_eq!(&[1, -5], arr.values()); |
1968 | | } |
1969 | | |
1970 | | #[test] |
1971 | | fn test_primitive_array_slice() { |
1972 | | let arr = Int32Array::from(vec![ |
1973 | | Some(0), |
1974 | | None, |
1975 | | Some(2), |
1976 | | None, |
1977 | | Some(4), |
1978 | | Some(5), |
1979 | | Some(6), |
1980 | | None, |
1981 | | None, |
1982 | | ]); |
1983 | | assert_eq!(9, arr.len()); |
1984 | | assert_eq!(0, arr.offset()); |
1985 | | assert_eq!(4, arr.null_count()); |
1986 | | |
1987 | | let arr2 = arr.slice(2, 5); |
1988 | | assert_eq!(5, arr2.len()); |
1989 | | assert_eq!(1, arr2.null_count()); |
1990 | | |
1991 | | for i in 0..arr2.len() { |
1992 | | assert_eq!(i == 1, arr2.is_null(i)); |
1993 | | assert_eq!(i != 1, arr2.is_valid(i)); |
1994 | | } |
1995 | | let int_arr2 = arr2.as_any().downcast_ref::<Int32Array>().unwrap(); |
1996 | | assert_eq!(2, int_arr2.values()[0]); |
1997 | | assert_eq!(&[4, 5, 6], &int_arr2.values()[2..5]); |
1998 | | |
1999 | | let arr3 = arr2.slice(2, 3); |
2000 | | assert_eq!(3, arr3.len()); |
2001 | | assert_eq!(0, arr3.null_count()); |
2002 | | |
2003 | | let int_arr3 = arr3.as_any().downcast_ref::<Int32Array>().unwrap(); |
2004 | | assert_eq!(&[4, 5, 6], int_arr3.values()); |
2005 | | assert_eq!(4, int_arr3.value(0)); |
2006 | | assert_eq!(5, int_arr3.value(1)); |
2007 | | assert_eq!(6, int_arr3.value(2)); |
2008 | | } |
2009 | | |
2010 | | #[test] |
2011 | | fn test_boolean_array_slice() { |
2012 | | let arr = BooleanArray::from(vec![ |
2013 | | Some(true), |
2014 | | None, |
2015 | | Some(false), |
2016 | | None, |
2017 | | Some(true), |
2018 | | Some(false), |
2019 | | Some(true), |
2020 | | Some(false), |
2021 | | None, |
2022 | | Some(true), |
2023 | | ]); |
2024 | | |
2025 | | assert_eq!(10, arr.len()); |
2026 | | assert_eq!(0, arr.offset()); |
2027 | | assert_eq!(3, arr.null_count()); |
2028 | | |
2029 | | let arr2 = arr.slice(3, 5); |
2030 | | assert_eq!(5, arr2.len()); |
2031 | | assert_eq!(3, arr2.offset()); |
2032 | | assert_eq!(1, arr2.null_count()); |
2033 | | |
2034 | | let bool_arr = arr2.as_any().downcast_ref::<BooleanArray>().unwrap(); |
2035 | | |
2036 | | assert!(!bool_arr.is_valid(0)); |
2037 | | |
2038 | | assert!(bool_arr.is_valid(1)); |
2039 | | assert!(bool_arr.value(1)); |
2040 | | |
2041 | | assert!(bool_arr.is_valid(2)); |
2042 | | assert!(!bool_arr.value(2)); |
2043 | | |
2044 | | assert!(bool_arr.is_valid(3)); |
2045 | | assert!(bool_arr.value(3)); |
2046 | | |
2047 | | assert!(bool_arr.is_valid(4)); |
2048 | | assert!(!bool_arr.value(4)); |
2049 | | } |
2050 | | |
2051 | | #[test] |
2052 | | fn test_int32_fmt_debug() { |
2053 | | let arr = Int32Array::from(vec![0, 1, 2, 3, 4]); |
2054 | | assert_eq!( |
2055 | | "PrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n]", |
2056 | | format!("{arr:?}") |
2057 | | ); |
2058 | | } |
2059 | | |
2060 | | #[test] |
2061 | | fn test_fmt_debug_up_to_20_elements() { |
2062 | | (1..=20).for_each(|i| { |
2063 | | let values = (0..i).collect::<Vec<i16>>(); |
2064 | | let array_expected = format!( |
2065 | | "PrimitiveArray<Int16>\n[\n{}\n]", |
2066 | | values |
2067 | | .iter() |
2068 | | .map(|v| { format!(" {v},") }) |
2069 | | .collect::<Vec<String>>() |
2070 | | .join("\n") |
2071 | | ); |
2072 | | let array = Int16Array::from(values); |
2073 | | |
2074 | | assert_eq!(array_expected, format!("{array:?}")); |
2075 | | }) |
2076 | | } |
2077 | | |
2078 | | #[test] |
2079 | | fn test_int32_with_null_fmt_debug() { |
2080 | | let mut builder = Int32Array::builder(3); |
2081 | | builder.append_slice(&[0, 1]); |
2082 | | builder.append_null(); |
2083 | | builder.append_slice(&[3, 4]); |
2084 | | let arr = builder.finish(); |
2085 | | assert_eq!( |
2086 | | "PrimitiveArray<Int32>\n[\n 0,\n 1,\n null,\n 3,\n 4,\n]", |
2087 | | format!("{arr:?}") |
2088 | | ); |
2089 | | } |
2090 | | |
2091 | | #[test] |
2092 | | fn test_timestamp_fmt_debug() { |
2093 | | let arr: PrimitiveArray<TimestampMillisecondType> = |
2094 | | TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]); |
2095 | | assert_eq!( |
2096 | | "PrimitiveArray<Timestamp(Millisecond, None)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n 1921-01-02T00:00:00,\n]", |
2097 | | format!("{arr:?}") |
2098 | | ); |
2099 | | } |
2100 | | |
2101 | | #[test] |
2102 | | fn test_timestamp_utc_fmt_debug() { |
2103 | | let arr: PrimitiveArray<TimestampMillisecondType> = |
2104 | | TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
2105 | | .with_timezone_utc(); |
2106 | | assert_eq!( |
2107 | | "PrimitiveArray<Timestamp(Millisecond, Some(\"+00:00\"))>\n[\n 2018-12-31T00:00:00+00:00,\n 2018-12-31T00:00:00+00:00,\n 1921-01-02T00:00:00+00:00,\n]", |
2108 | | format!("{arr:?}") |
2109 | | ); |
2110 | | } |
2111 | | |
2112 | | #[test] |
2113 | | #[cfg(feature = "chrono-tz")] |
2114 | | fn test_timestamp_with_named_tz_fmt_debug() { |
2115 | | let arr: PrimitiveArray<TimestampMillisecondType> = |
2116 | | TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
2117 | | .with_timezone("Asia/Taipei".to_string()); |
2118 | | assert_eq!( |
2119 | | "PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]", |
2120 | | format!("{arr:?}") |
2121 | | ); |
2122 | | } |
2123 | | |
2124 | | #[test] |
2125 | | #[cfg(not(feature = "chrono-tz"))] |
2126 | | fn test_timestamp_with_named_tz_fmt_debug() { |
2127 | | let arr: PrimitiveArray<TimestampMillisecondType> = |
2128 | | TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
2129 | | .with_timezone("Asia/Taipei".to_string()); |
2130 | | |
2131 | | println!("{arr:?}"); |
2132 | | |
2133 | | assert_eq!( |
2134 | | "PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n 1921-01-02T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n]", |
2135 | | format!("{arr:?}") |
2136 | | ); |
2137 | | } |
2138 | | |
2139 | | #[test] |
2140 | | fn test_timestamp_with_fixed_offset_tz_fmt_debug() { |
2141 | | let arr: PrimitiveArray<TimestampMillisecondType> = |
2142 | | TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
2143 | | .with_timezone("+08:00".to_string()); |
2144 | | assert_eq!( |
2145 | | "PrimitiveArray<Timestamp(Millisecond, Some(\"+08:00\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]", |
2146 | | format!("{arr:?}") |
2147 | | ); |
2148 | | } |
2149 | | |
2150 | | #[test] |
2151 | | fn test_timestamp_with_incorrect_tz_fmt_debug() { |
2152 | | let arr: PrimitiveArray<TimestampMillisecondType> = |
2153 | | TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
2154 | | .with_timezone("xxx".to_string()); |
2155 | | assert_eq!( |
2156 | | "PrimitiveArray<Timestamp(Millisecond, Some(\"xxx\"))>\n[\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]", |
2157 | | format!("{arr:?}") |
2158 | | ); |
2159 | | } |
2160 | | |
2161 | | #[test] |
2162 | | #[cfg(feature = "chrono-tz")] |
2163 | | fn test_timestamp_with_tz_with_daylight_saving_fmt_debug() { |
2164 | | let arr: PrimitiveArray<TimestampMillisecondType> = TimestampMillisecondArray::from(vec![ |
2165 | | 1647161999000, |
2166 | | 1647162000000, |
2167 | | 1667717999000, |
2168 | | 1667718000000, |
2169 | | ]) |
2170 | | .with_timezone("America/Denver".to_string()); |
2171 | | assert_eq!( |
2172 | | "PrimitiveArray<Timestamp(Millisecond, Some(\"America/Denver\"))>\n[\n 2022-03-13T01:59:59-07:00,\n 2022-03-13T03:00:00-06:00,\n 2022-11-06T00:59:59-06:00,\n 2022-11-06T01:00:00-06:00,\n]", |
2173 | | format!("{arr:?}") |
2174 | | ); |
2175 | | } |
2176 | | |
2177 | | #[test] |
2178 | | fn test_date32_fmt_debug() { |
2179 | | let arr: PrimitiveArray<Date32Type> = vec![12356, 13548, -365].into(); |
2180 | | assert_eq!( |
2181 | | "PrimitiveArray<Date32>\n[\n 2003-10-31,\n 2007-02-04,\n 1969-01-01,\n]", |
2182 | | format!("{arr:?}") |
2183 | | ); |
2184 | | } |
2185 | | |
2186 | | #[test] |
2187 | | fn test_time32second_fmt_debug() { |
2188 | | let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into(); |
2189 | | assert_eq!( |
2190 | | "PrimitiveArray<Time32(Second)>\n[\n 02:00:01,\n 16:40:54,\n]", |
2191 | | format!("{arr:?}") |
2192 | | ); |
2193 | | } |
2194 | | |
2195 | | #[test] |
2196 | | fn test_time32second_invalid_neg() { |
2197 | | // chrono::NaiveDatetime::from_timestamp_opt returns None while input is invalid |
2198 | | let arr: PrimitiveArray<Time32SecondType> = vec![-7201, -60054].into(); |
2199 | | assert_eq!( |
2200 | | "PrimitiveArray<Time32(Second)>\n[\n Cast error: Failed to convert -7201 to temporal for Time32(Second),\n Cast error: Failed to convert -60054 to temporal for Time32(Second),\n]", |
2201 | | // "PrimitiveArray<Time32(Second)>\n[\n null,\n null,\n]", |
2202 | | format!("{arr:?}") |
2203 | | ) |
2204 | | } |
2205 | | |
2206 | | #[test] |
2207 | | fn test_timestamp_micros_out_of_range() { |
2208 | | // replicate the issue from https://github.com/apache/arrow-datafusion/issues/3832 |
2209 | | let arr: PrimitiveArray<TimestampMicrosecondType> = vec![9065525203050843594].into(); |
2210 | | assert_eq!( |
2211 | | "PrimitiveArray<Timestamp(Microsecond, None)>\n[\n null,\n]", |
2212 | | format!("{arr:?}") |
2213 | | ) |
2214 | | } |
2215 | | |
2216 | | #[test] |
2217 | | fn test_primitive_array_builder() { |
2218 | | // Test building a primitive array with ArrayData builder and offset |
2219 | | let buf = Buffer::from_slice_ref([0i32, 1, 2, 3, 4, 5, 6]); |
2220 | | let buf2 = buf.slice_with_length(8, 20); |
2221 | | let data = ArrayData::builder(DataType::Int32) |
2222 | | .len(5) |
2223 | | .offset(2) |
2224 | | .add_buffer(buf) |
2225 | | .build() |
2226 | | .unwrap(); |
2227 | | let arr = Int32Array::from(data); |
2228 | | assert_eq!(&buf2, arr.values.inner()); |
2229 | | assert_eq!(5, arr.len()); |
2230 | | assert_eq!(0, arr.null_count()); |
2231 | | for i in 0..3 { |
2232 | | assert_eq!((i + 2) as i32, arr.value(i)); |
2233 | | } |
2234 | | } |
2235 | | |
2236 | | #[test] |
2237 | | fn test_primitive_from_iter_values() { |
2238 | | // Test building a primitive array with from_iter_values |
2239 | | let arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(0..10); |
2240 | | assert_eq!(10, arr.len()); |
2241 | | assert_eq!(0, arr.null_count()); |
2242 | | for i in 0..10i32 { |
2243 | | assert_eq!(i, arr.value(i as usize)); |
2244 | | } |
2245 | | } |
2246 | | |
2247 | | #[test] |
2248 | | fn test_primitive_array_from_unbound_iter() { |
2249 | | // iterator that doesn't declare (upper) size bound |
2250 | | let value_iter = (0..) |
2251 | | .scan(0usize, |pos, i| { |
2252 | | if *pos < 10 { |
2253 | | *pos += 1; |
2254 | | Some(Some(i)) |
2255 | | } else { |
2256 | | // actually returns up to 10 values |
2257 | | None |
2258 | | } |
2259 | | }) |
2260 | | // limited using take() |
2261 | | .take(100); |
2262 | | |
2263 | | let (_, upper_size_bound) = value_iter.size_hint(); |
2264 | | // the upper bound, defined by take above, is 100 |
2265 | | assert_eq!(upper_size_bound, Some(100)); |
2266 | | let primitive_array: PrimitiveArray<Int32Type> = value_iter.collect(); |
2267 | | // but the actual number of items in the array should be 10 |
2268 | | assert_eq!(primitive_array.len(), 10); |
2269 | | } |
2270 | | |
2271 | | #[test] |
2272 | | fn test_primitive_array_from_non_null_iter() { |
2273 | | let iter = (0..10_i32).map(Some); |
2274 | | let primitive_array = PrimitiveArray::<Int32Type>::from_iter(iter); |
2275 | | assert_eq!(primitive_array.len(), 10); |
2276 | | assert_eq!(primitive_array.null_count(), 0); |
2277 | | assert!(primitive_array.nulls().is_none()); |
2278 | | assert_eq!(primitive_array.values(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
2279 | | } |
2280 | | |
2281 | | #[test] |
2282 | | #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ |
2283 | | (values buffer)")] |
2284 | | // Different error messages, so skip for now |
2285 | | // https://github.com/apache/arrow-rs/issues/1545 |
2286 | | #[cfg(not(feature = "force_validate"))] |
2287 | | fn test_primitive_array_invalid_buffer_len() { |
2288 | | let buffer = Buffer::from_slice_ref([0i32, 1, 2, 3, 4]); |
2289 | | let data = unsafe { |
2290 | | ArrayData::builder(DataType::Int32) |
2291 | | .add_buffer(buffer.clone()) |
2292 | | .add_buffer(buffer) |
2293 | | .len(5) |
2294 | | .build_unchecked() |
2295 | | }; |
2296 | | |
2297 | | drop(Int32Array::from(data)); |
2298 | | } |
2299 | | |
2300 | | #[test] |
2301 | | fn test_access_array_concurrently() { |
2302 | | let a = Int32Array::from(vec![5, 6, 7, 8, 9]); |
2303 | | let ret = std::thread::spawn(move || a.value(3)).join(); |
2304 | | |
2305 | | assert!(ret.is_ok()); |
2306 | | assert_eq!(8, ret.ok().unwrap()); |
2307 | | } |
2308 | | |
2309 | | #[test] |
2310 | | fn test_primitive_array_creation() { |
2311 | | let array1: Int8Array = [10_i8, 11, 12, 13, 14].into_iter().collect(); |
2312 | | let array2: Int8Array = [10_i8, 11, 12, 13, 14].into_iter().map(Some).collect(); |
2313 | | |
2314 | | assert_eq!(array1, array2); |
2315 | | } |
2316 | | |
2317 | | #[test] |
2318 | | #[should_panic( |
2319 | | expected = "Trying to access an element at index 4 from a PrimitiveArray of length 3" |
2320 | | )] |
2321 | | fn test_string_array_get_value_index_out_of_bound() { |
2322 | | let array: Int8Array = [10_i8, 11, 12].into_iter().collect(); |
2323 | | |
2324 | | array.value(4); |
2325 | | } |
2326 | | |
2327 | | #[test] |
2328 | | #[should_panic(expected = "PrimitiveArray expected data type Int64 got Int32")] |
2329 | | fn test_from_array_data_validation() { |
2330 | | let foo = PrimitiveArray::<Int32Type>::from_iter([1, 2, 3]); |
2331 | | let _ = PrimitiveArray::<Int64Type>::from(foo.into_data()); |
2332 | | } |
2333 | | |
2334 | | #[test] |
2335 | | fn test_decimal32() { |
2336 | | let values: Vec<_> = vec![0, 1, -1, i32::MIN, i32::MAX]; |
2337 | | let array: PrimitiveArray<Decimal32Type> = |
2338 | | PrimitiveArray::from_iter(values.iter().copied()); |
2339 | | assert_eq!(array.values(), &values); |
2340 | | |
2341 | | let array: PrimitiveArray<Decimal32Type> = |
2342 | | PrimitiveArray::from_iter_values(values.iter().copied()); |
2343 | | assert_eq!(array.values(), &values); |
2344 | | |
2345 | | let array = PrimitiveArray::<Decimal32Type>::from(values.clone()); |
2346 | | assert_eq!(array.values(), &values); |
2347 | | |
2348 | | let array = PrimitiveArray::<Decimal32Type>::from(array.to_data()); |
2349 | | assert_eq!(array.values(), &values); |
2350 | | } |
2351 | | |
2352 | | #[test] |
2353 | | fn test_decimal64() { |
2354 | | let values: Vec<_> = vec![0, 1, -1, i64::MIN, i64::MAX]; |
2355 | | let array: PrimitiveArray<Decimal64Type> = |
2356 | | PrimitiveArray::from_iter(values.iter().copied()); |
2357 | | assert_eq!(array.values(), &values); |
2358 | | |
2359 | | let array: PrimitiveArray<Decimal64Type> = |
2360 | | PrimitiveArray::from_iter_values(values.iter().copied()); |
2361 | | assert_eq!(array.values(), &values); |
2362 | | |
2363 | | let array = PrimitiveArray::<Decimal64Type>::from(values.clone()); |
2364 | | assert_eq!(array.values(), &values); |
2365 | | |
2366 | | let array = PrimitiveArray::<Decimal64Type>::from(array.to_data()); |
2367 | | assert_eq!(array.values(), &values); |
2368 | | } |
2369 | | |
2370 | | #[test] |
2371 | | fn test_decimal128() { |
2372 | | let values: Vec<_> = vec![0, 1, -1, i128::MIN, i128::MAX]; |
2373 | | let array: PrimitiveArray<Decimal128Type> = |
2374 | | PrimitiveArray::from_iter(values.iter().copied()); |
2375 | | assert_eq!(array.values(), &values); |
2376 | | |
2377 | | let array: PrimitiveArray<Decimal128Type> = |
2378 | | PrimitiveArray::from_iter_values(values.iter().copied()); |
2379 | | assert_eq!(array.values(), &values); |
2380 | | |
2381 | | let array = PrimitiveArray::<Decimal128Type>::from(values.clone()); |
2382 | | assert_eq!(array.values(), &values); |
2383 | | |
2384 | | let array = PrimitiveArray::<Decimal128Type>::from(array.to_data()); |
2385 | | assert_eq!(array.values(), &values); |
2386 | | } |
2387 | | |
2388 | | #[test] |
2389 | | fn test_decimal256() { |
2390 | | let values: Vec<_> = vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX]; |
2391 | | |
2392 | | let array: PrimitiveArray<Decimal256Type> = |
2393 | | PrimitiveArray::from_iter(values.iter().copied()); |
2394 | | assert_eq!(array.values(), &values); |
2395 | | |
2396 | | let array: PrimitiveArray<Decimal256Type> = |
2397 | | PrimitiveArray::from_iter_values(values.iter().copied()); |
2398 | | assert_eq!(array.values(), &values); |
2399 | | |
2400 | | let array = PrimitiveArray::<Decimal256Type>::from(values.clone()); |
2401 | | assert_eq!(array.values(), &values); |
2402 | | |
2403 | | let array = PrimitiveArray::<Decimal256Type>::from(array.to_data()); |
2404 | | assert_eq!(array.values(), &values); |
2405 | | } |
2406 | | |
2407 | | #[test] |
2408 | | fn test_decimal_array() { |
2409 | | // let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; |
2410 | | // let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]; |
2411 | | let values: [u8; 32] = [ |
2412 | | 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, 255, 255, |
2413 | | 255, 255, 255, 255, 255, 255, 255, 255, 255, |
2414 | | ]; |
2415 | | let array_data = ArrayData::builder(DataType::Decimal128(38, 6)) |
2416 | | .len(2) |
2417 | | .add_buffer(Buffer::from(&values)) |
2418 | | .build() |
2419 | | .unwrap(); |
2420 | | let decimal_array = Decimal128Array::from(array_data); |
2421 | | assert_eq!(8_887_000_000_i128, decimal_array.value(0)); |
2422 | | assert_eq!(-8_887_000_000_i128, decimal_array.value(1)); |
2423 | | } |
2424 | | |
2425 | | #[test] |
2426 | | fn test_decimal_append_error_value() { |
2427 | | let mut decimal_builder = Decimal128Builder::with_capacity(10); |
2428 | | decimal_builder.append_value(123456); |
2429 | | decimal_builder.append_value(12345); |
2430 | | let result = decimal_builder.finish().with_precision_and_scale(5, 3); |
2431 | | assert!(result.is_ok()); |
2432 | | let arr = result.unwrap(); |
2433 | | assert_eq!("12.345", arr.value_as_string(1)); |
2434 | | |
2435 | | // Validate it explicitly |
2436 | | let result = arr.validate_decimal_precision(5); |
2437 | | let error = result.unwrap_err(); |
2438 | | assert_eq!( |
2439 | | "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999", |
2440 | | error.to_string() |
2441 | | ); |
2442 | | |
2443 | | decimal_builder = Decimal128Builder::new(); |
2444 | | decimal_builder.append_value(100); |
2445 | | decimal_builder.append_value(99); |
2446 | | decimal_builder.append_value(-100); |
2447 | | decimal_builder.append_value(-99); |
2448 | | let result = decimal_builder.finish().with_precision_and_scale(2, 1); |
2449 | | assert!(result.is_ok()); |
2450 | | let arr = result.unwrap(); |
2451 | | assert_eq!("9.9", arr.value_as_string(1)); |
2452 | | assert_eq!("-9.9", arr.value_as_string(3)); |
2453 | | |
2454 | | // Validate it explicitly |
2455 | | let result = arr.validate_decimal_precision(2); |
2456 | | let error = result.unwrap_err(); |
2457 | | assert_eq!( |
2458 | | "Invalid argument error: 100 is too large to store in a Decimal128 of precision 2. Max is 99", |
2459 | | error.to_string() |
2460 | | ); |
2461 | | } |
2462 | | |
2463 | | #[test] |
2464 | | fn test_decimal_from_iter_values() { |
2465 | | let array = Decimal128Array::from_iter_values(vec![-100, 0, 101]); |
2466 | | assert_eq!(array.len(), 3); |
2467 | | assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); |
2468 | | assert_eq!(-100_i128, array.value(0)); |
2469 | | assert!(!array.is_null(0)); |
2470 | | assert_eq!(0_i128, array.value(1)); |
2471 | | assert!(!array.is_null(1)); |
2472 | | assert_eq!(101_i128, array.value(2)); |
2473 | | assert!(!array.is_null(2)); |
2474 | | } |
2475 | | |
2476 | | #[test] |
2477 | | fn test_decimal_from_iter() { |
2478 | | let array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect(); |
2479 | | assert_eq!(array.len(), 3); |
2480 | | assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); |
2481 | | assert_eq!(-100_i128, array.value(0)); |
2482 | | assert!(!array.is_null(0)); |
2483 | | assert!(array.is_null(1)); |
2484 | | assert_eq!(101_i128, array.value(2)); |
2485 | | assert!(!array.is_null(2)); |
2486 | | } |
2487 | | |
2488 | | #[test] |
2489 | | fn test_decimal_iter_sized() { |
2490 | | let data = vec![Some(-100), None, Some(101)]; |
2491 | | let array: Decimal128Array = data.into_iter().collect(); |
2492 | | let mut iter = array.into_iter(); |
2493 | | |
2494 | | // is exact sized |
2495 | | assert_eq!(array.len(), 3); |
2496 | | |
2497 | | // size_hint is reported correctly |
2498 | | assert_eq!(iter.size_hint(), (3, Some(3))); |
2499 | | iter.next().unwrap(); |
2500 | | assert_eq!(iter.size_hint(), (2, Some(2))); |
2501 | | iter.next().unwrap(); |
2502 | | iter.next().unwrap(); |
2503 | | assert_eq!(iter.size_hint(), (0, Some(0))); |
2504 | | assert!(iter.next().is_none()); |
2505 | | assert_eq!(iter.size_hint(), (0, Some(0))); |
2506 | | } |
2507 | | |
2508 | | #[test] |
2509 | | fn test_decimal_array_value_as_string() { |
2510 | | let arr = [123450, -123450, 100, -100, 10, -10, 0] |
2511 | | .into_iter() |
2512 | | .map(Some) |
2513 | | .collect::<Decimal128Array>() |
2514 | | .with_precision_and_scale(6, 3) |
2515 | | .unwrap(); |
2516 | | |
2517 | | assert_eq!("123.450", arr.value_as_string(0)); |
2518 | | assert_eq!("-123.450", arr.value_as_string(1)); |
2519 | | assert_eq!("0.100", arr.value_as_string(2)); |
2520 | | assert_eq!("-0.100", arr.value_as_string(3)); |
2521 | | assert_eq!("0.010", arr.value_as_string(4)); |
2522 | | assert_eq!("-0.010", arr.value_as_string(5)); |
2523 | | assert_eq!("0.000", arr.value_as_string(6)); |
2524 | | } |
2525 | | |
2526 | | #[test] |
2527 | | fn test_decimal_array_with_precision_and_scale() { |
2528 | | let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) |
2529 | | .with_precision_and_scale(20, 2) |
2530 | | .unwrap(); |
2531 | | |
2532 | | assert_eq!(arr.data_type(), &DataType::Decimal128(20, 2)); |
2533 | | assert_eq!(arr.precision(), 20); |
2534 | | assert_eq!(arr.scale(), 2); |
2535 | | |
2536 | | let actual: Vec<_> = (0..arr.len()).map(|i| arr.value_as_string(i)).collect(); |
2537 | | let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"]; |
2538 | | |
2539 | | assert_eq!(actual, expected); |
2540 | | } |
2541 | | |
2542 | | #[test] |
2543 | | #[should_panic( |
2544 | | expected = "-123223423432432 is too small to store in a Decimal128 of precision 5. Min is -99999" |
2545 | | )] |
2546 | | fn test_decimal_array_with_precision_and_scale_out_of_range() { |
2547 | | let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) |
2548 | | // precision is too small to hold value |
2549 | | .with_precision_and_scale(5, 2) |
2550 | | .unwrap(); |
2551 | | arr.validate_decimal_precision(5).unwrap(); |
2552 | | } |
2553 | | |
2554 | | #[test] |
2555 | | #[should_panic(expected = "precision cannot be 0, has to be between [1, 38]")] |
2556 | | fn test_decimal_array_with_precision_zero() { |
2557 | | Decimal128Array::from_iter_values([12345, 456]) |
2558 | | .with_precision_and_scale(0, 2) |
2559 | | .unwrap(); |
2560 | | } |
2561 | | |
2562 | | #[test] |
2563 | | #[should_panic(expected = "precision 40 is greater than max 38")] |
2564 | | fn test_decimal_array_with_precision_and_scale_invalid_precision() { |
2565 | | Decimal128Array::from_iter_values([12345, 456]) |
2566 | | .with_precision_and_scale(40, 2) |
2567 | | .unwrap(); |
2568 | | } |
2569 | | |
2570 | | #[test] |
2571 | | #[should_panic(expected = "scale 40 is greater than max 38")] |
2572 | | fn test_decimal_array_with_precision_and_scale_invalid_scale() { |
2573 | | Decimal128Array::from_iter_values([12345, 456]) |
2574 | | .with_precision_and_scale(20, 40) |
2575 | | .unwrap(); |
2576 | | } |
2577 | | |
2578 | | #[test] |
2579 | | #[should_panic(expected = "scale 10 is greater than precision 4")] |
2580 | | fn test_decimal_array_with_precision_and_scale_invalid_precision_and_scale() { |
2581 | | Decimal128Array::from_iter_values([12345, 456]) |
2582 | | .with_precision_and_scale(4, 10) |
2583 | | .unwrap(); |
2584 | | } |
2585 | | |
2586 | | #[test] |
2587 | | fn test_decimal_array_set_null_if_overflow_with_precision() { |
2588 | | let array = Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]); |
2589 | | let result = array.null_if_overflow_precision(5); |
2590 | | let expected = Decimal128Array::from(vec![None, Some(123), None, None]); |
2591 | | assert_eq!(result, expected); |
2592 | | } |
2593 | | |
2594 | | #[test] |
2595 | | fn test_decimal256_iter() { |
2596 | | let mut builder = Decimal256Builder::with_capacity(30); |
2597 | | let decimal1 = i256::from_i128(12345); |
2598 | | builder.append_value(decimal1); |
2599 | | |
2600 | | builder.append_null(); |
2601 | | |
2602 | | let decimal2 = i256::from_i128(56789); |
2603 | | builder.append_value(decimal2); |
2604 | | |
2605 | | let array: Decimal256Array = builder.finish().with_precision_and_scale(76, 6).unwrap(); |
2606 | | |
2607 | | let collected: Vec<_> = array.iter().collect(); |
2608 | | assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected); |
2609 | | } |
2610 | | |
2611 | | #[test] |
2612 | | fn test_from_iter_decimal256array() { |
2613 | | let value1 = i256::from_i128(12345); |
2614 | | let value2 = i256::from_i128(56789); |
2615 | | |
2616 | | let mut array: Decimal256Array = |
2617 | | vec![Some(value1), None, Some(value2)].into_iter().collect(); |
2618 | | array = array.with_precision_and_scale(76, 10).unwrap(); |
2619 | | assert_eq!(array.len(), 3); |
2620 | | assert_eq!(array.data_type(), &DataType::Decimal256(76, 10)); |
2621 | | assert_eq!(value1, array.value(0)); |
2622 | | assert!(!array.is_null(0)); |
2623 | | assert!(array.is_null(1)); |
2624 | | assert_eq!(value2, array.value(2)); |
2625 | | assert!(!array.is_null(2)); |
2626 | | } |
2627 | | |
2628 | | #[test] |
2629 | | fn test_from_iter_decimal128array() { |
2630 | | let mut array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect(); |
2631 | | array = array.with_precision_and_scale(38, 10).unwrap(); |
2632 | | assert_eq!(array.len(), 3); |
2633 | | assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); |
2634 | | assert_eq!(-100_i128, array.value(0)); |
2635 | | assert!(!array.is_null(0)); |
2636 | | assert!(array.is_null(1)); |
2637 | | assert_eq!(101_i128, array.value(2)); |
2638 | | assert!(!array.is_null(2)); |
2639 | | } |
2640 | | |
2641 | | #[test] |
2642 | | fn test_decimal64_iter() { |
2643 | | let mut builder = Decimal64Builder::with_capacity(30); |
2644 | | let decimal1 = 12345; |
2645 | | builder.append_value(decimal1); |
2646 | | |
2647 | | builder.append_null(); |
2648 | | |
2649 | | let decimal2 = 56789; |
2650 | | builder.append_value(decimal2); |
2651 | | |
2652 | | let array: Decimal64Array = builder.finish().with_precision_and_scale(18, 4).unwrap(); |
2653 | | |
2654 | | let collected: Vec<_> = array.iter().collect(); |
2655 | | assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected); |
2656 | | } |
2657 | | |
2658 | | #[test] |
2659 | | fn test_from_iter_decimal64array() { |
2660 | | let value1 = 12345; |
2661 | | let value2 = 56789; |
2662 | | |
2663 | | let mut array: Decimal64Array = |
2664 | | vec![Some(value1), None, Some(value2)].into_iter().collect(); |
2665 | | array = array.with_precision_and_scale(18, 4).unwrap(); |
2666 | | assert_eq!(array.len(), 3); |
2667 | | assert_eq!(array.data_type(), &DataType::Decimal64(18, 4)); |
2668 | | assert_eq!(value1, array.value(0)); |
2669 | | assert!(!array.is_null(0)); |
2670 | | assert!(array.is_null(1)); |
2671 | | assert_eq!(value2, array.value(2)); |
2672 | | assert!(!array.is_null(2)); |
2673 | | } |
2674 | | |
2675 | | #[test] |
2676 | | fn test_decimal32_iter() { |
2677 | | let mut builder = Decimal32Builder::with_capacity(30); |
2678 | | let decimal1 = 12345; |
2679 | | builder.append_value(decimal1); |
2680 | | |
2681 | | builder.append_null(); |
2682 | | |
2683 | | let decimal2 = 56789; |
2684 | | builder.append_value(decimal2); |
2685 | | |
2686 | | let array: Decimal32Array = builder.finish().with_precision_and_scale(9, 2).unwrap(); |
2687 | | |
2688 | | let collected: Vec<_> = array.iter().collect(); |
2689 | | assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected); |
2690 | | } |
2691 | | |
2692 | | #[test] |
2693 | | fn test_from_iter_decimal32array() { |
2694 | | let value1 = 12345; |
2695 | | let value2 = 56789; |
2696 | | |
2697 | | let mut array: Decimal32Array = |
2698 | | vec![Some(value1), None, Some(value2)].into_iter().collect(); |
2699 | | array = array.with_precision_and_scale(9, 2).unwrap(); |
2700 | | assert_eq!(array.len(), 3); |
2701 | | assert_eq!(array.data_type(), &DataType::Decimal32(9, 2)); |
2702 | | assert_eq!(value1, array.value(0)); |
2703 | | assert!(!array.is_null(0)); |
2704 | | assert!(array.is_null(1)); |
2705 | | assert_eq!(value2, array.value(2)); |
2706 | | assert!(!array.is_null(2)); |
2707 | | } |
2708 | | |
2709 | | #[test] |
2710 | | fn test_unary_opt() { |
2711 | | let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]); |
2712 | | let r = array.unary_opt::<_, Int32Type>(|x| (x % 2 != 0).then_some(x)); |
2713 | | |
2714 | | let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]); |
2715 | | assert_eq!(r, expected); |
2716 | | |
2717 | | let r = expected.unary_opt::<_, Int32Type>(|x| (x % 3 != 0).then_some(x)); |
2718 | | let expected = Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]); |
2719 | | assert_eq!(r, expected); |
2720 | | } |
2721 | | |
2722 | | #[test] |
2723 | | #[should_panic( |
2724 | | expected = "Trying to access an element at index 4 from a PrimitiveArray of length 3" |
2725 | | )] |
2726 | | fn test_fixed_size_binary_array_get_value_index_out_of_bound() { |
2727 | | let array = Decimal128Array::from(vec![-100, 0, 101]); |
2728 | | array.value(4); |
2729 | | } |
2730 | | |
2731 | | #[test] |
2732 | | fn test_into_builder() { |
2733 | | let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
2734 | | |
2735 | | let boxed: ArrayRef = Arc::new(array); |
2736 | | let col: Int32Array = downcast_array(&boxed); |
2737 | | drop(boxed); |
2738 | | |
2739 | | let mut builder = col.into_builder().unwrap(); |
2740 | | |
2741 | | let slice = builder.values_slice_mut(); |
2742 | | assert_eq!(slice, &[1, 2, 3]); |
2743 | | |
2744 | | slice[0] = 4; |
2745 | | slice[1] = 2; |
2746 | | slice[2] = 1; |
2747 | | |
2748 | | let expected: Int32Array = vec![Some(4), Some(2), Some(1)].into_iter().collect(); |
2749 | | |
2750 | | let new_array = builder.finish(); |
2751 | | assert_eq!(expected, new_array); |
2752 | | } |
2753 | | |
2754 | | #[test] |
2755 | | fn test_into_builder_cloned_array() { |
2756 | | let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
2757 | | |
2758 | | let boxed: ArrayRef = Arc::new(array); |
2759 | | |
2760 | | let col: Int32Array = PrimitiveArray::<Int32Type>::from(boxed.to_data()); |
2761 | | let err = col.into_builder(); |
2762 | | |
2763 | | match err { |
2764 | | Ok(_) => panic!("Should not get builder from cloned array"), |
2765 | | Err(returned) => { |
2766 | | let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
2767 | | assert_eq!(expected, returned) |
2768 | | } |
2769 | | } |
2770 | | } |
2771 | | |
2772 | | #[test] |
2773 | | fn test_into_builder_on_sliced_array() { |
2774 | | let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
2775 | | let slice = array.slice(1, 2); |
2776 | | let col: Int32Array = downcast_array(&slice); |
2777 | | |
2778 | | drop(slice); |
2779 | | |
2780 | | col.into_builder() |
2781 | | .expect_err("Should not build builder from sliced array"); |
2782 | | } |
2783 | | |
2784 | | #[test] |
2785 | | fn test_unary_mut() { |
2786 | | let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
2787 | | |
2788 | | let c = array.unary_mut(|x| x * 2 + 1).unwrap(); |
2789 | | let expected: Int32Array = vec![3, 5, 7].into_iter().map(Some).collect(); |
2790 | | |
2791 | | assert_eq!(expected, c); |
2792 | | |
2793 | | let array: Int32Array = Int32Array::from(vec![Some(5), Some(7), None]); |
2794 | | let c = array.unary_mut(|x| x * 2 + 1).unwrap(); |
2795 | | assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None])); |
2796 | | } |
2797 | | |
2798 | | #[test] |
2799 | | #[should_panic( |
2800 | | expected = "PrimitiveArray expected data type Interval(MonthDayNano) got Interval(DayTime)" |
2801 | | )] |
2802 | | fn test_invalid_interval_type() { |
2803 | | let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]); |
2804 | | let _ = IntervalMonthDayNanoArray::from(array.into_data()); |
2805 | | } |
2806 | | |
2807 | | #[test] |
2808 | | fn test_timezone() { |
2809 | | let array = TimestampNanosecondArray::from_iter_values([1, 2]); |
2810 | | assert_eq!(array.timezone(), None); |
2811 | | |
2812 | | let array = array.with_timezone("+02:00"); |
2813 | | assert_eq!(array.timezone(), Some("+02:00")); |
2814 | | } |
2815 | | |
2816 | | #[test] |
2817 | | fn test_try_new() { |
2818 | | Int32Array::new(vec![1, 2, 3, 4].into(), None); |
2819 | | Int32Array::new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(4))); |
2820 | | |
2821 | | let err = Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3))) |
2822 | | .unwrap_err(); |
2823 | | |
2824 | | assert_eq!( |
2825 | | err.to_string(), |
2826 | | "Invalid argument error: Incorrect length of null buffer for PrimitiveArray, expected 4 got 3" |
2827 | | ); |
2828 | | |
2829 | | TimestampNanosecondArray::new(vec![1, 2, 3, 4].into(), None).with_data_type( |
2830 | | DataType::Timestamp(TimeUnit::Nanosecond, Some("03:00".into())), |
2831 | | ); |
2832 | | } |
2833 | | |
2834 | | #[test] |
2835 | | #[should_panic(expected = "PrimitiveArray expected data type Int32 got Date32")] |
2836 | | fn test_with_data_type() { |
2837 | | Int32Array::new(vec![1, 2, 3, 4].into(), None).with_data_type(DataType::Date32); |
2838 | | } |
2839 | | |
2840 | | #[test] |
2841 | | fn test_time_32second_output() { |
2842 | | let array: Time32SecondArray = vec![ |
2843 | | Some(-1), |
2844 | | Some(0), |
2845 | | Some(86_399), |
2846 | | Some(86_400), |
2847 | | Some(86_401), |
2848 | | None, |
2849 | | ] |
2850 | | .into(); |
2851 | | let debug_str = format!("{array:?}"); |
2852 | | assert_eq!("PrimitiveArray<Time32(Second)>\n[\n Cast error: Failed to convert -1 to temporal for Time32(Second),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400 to temporal for Time32(Second),\n Cast error: Failed to convert 86401 to temporal for Time32(Second),\n null,\n]", |
2853 | | debug_str |
2854 | | ); |
2855 | | } |
2856 | | |
2857 | | #[test] |
2858 | | fn test_time_32millisecond_debug_output() { |
2859 | | let array: Time32MillisecondArray = vec![ |
2860 | | Some(-1), |
2861 | | Some(0), |
2862 | | Some(86_399_000), |
2863 | | Some(86_400_000), |
2864 | | Some(86_401_000), |
2865 | | None, |
2866 | | ] |
2867 | | .into(); |
2868 | | let debug_str = format!("{array:?}"); |
2869 | | assert_eq!("PrimitiveArray<Time32(Millisecond)>\n[\n Cast error: Failed to convert -1 to temporal for Time32(Millisecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000 to temporal for Time32(Millisecond),\n Cast error: Failed to convert 86401000 to temporal for Time32(Millisecond),\n null,\n]", |
2870 | | debug_str |
2871 | | ); |
2872 | | } |
2873 | | |
2874 | | #[test] |
2875 | | fn test_time_64nanosecond_debug_output() { |
2876 | | let array: Time64NanosecondArray = vec![ |
2877 | | Some(-1), |
2878 | | Some(0), |
2879 | | Some(86_399 * 1_000_000_000), |
2880 | | Some(86_400 * 1_000_000_000), |
2881 | | Some(86_401 * 1_000_000_000), |
2882 | | None, |
2883 | | ] |
2884 | | .into(); |
2885 | | let debug_str = format!("{array:?}"); |
2886 | | assert_eq!( |
2887 | | "PrimitiveArray<Time64(Nanosecond)>\n[\n Cast error: Failed to convert -1 to temporal for Time64(Nanosecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000000 to temporal for Time64(Nanosecond),\n Cast error: Failed to convert 86401000000000 to temporal for Time64(Nanosecond),\n null,\n]", |
2888 | | debug_str |
2889 | | ); |
2890 | | } |
2891 | | |
2892 | | #[test] |
2893 | | fn test_time_64microsecond_debug_output() { |
2894 | | let array: Time64MicrosecondArray = vec![ |
2895 | | Some(-1), |
2896 | | Some(0), |
2897 | | Some(86_399 * 1_000_000), |
2898 | | Some(86_400 * 1_000_000), |
2899 | | Some(86_401 * 1_000_000), |
2900 | | None, |
2901 | | ] |
2902 | | .into(); |
2903 | | let debug_str = format!("{array:?}"); |
2904 | | assert_eq!("PrimitiveArray<Time64(Microsecond)>\n[\n Cast error: Failed to convert -1 to temporal for Time64(Microsecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000 to temporal for Time64(Microsecond),\n Cast error: Failed to convert 86401000000 to temporal for Time64(Microsecond),\n null,\n]", debug_str); |
2905 | | } |
2906 | | |
2907 | | #[test] |
2908 | | fn test_primitive_with_nulls_into_builder() { |
2909 | | let array: Int32Array = vec![ |
2910 | | Some(1), |
2911 | | None, |
2912 | | Some(3), |
2913 | | Some(4), |
2914 | | None, |
2915 | | Some(7), |
2916 | | None, |
2917 | | Some(8), |
2918 | | ] |
2919 | | .into_iter() |
2920 | | .collect(); |
2921 | | let _ = array.into_builder(); |
2922 | | } |
2923 | | } |