/Users/andrewlamb/Software/arrow-rs/arrow-arith/src/aggregate.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Defines aggregations over Arrow arrays. |
19 | | |
20 | | use arrow_array::cast::*; |
21 | | use arrow_array::iterator::ArrayIter; |
22 | | use arrow_array::*; |
23 | | use arrow_buffer::{ArrowNativeType, NullBuffer}; |
24 | | use arrow_data::bit_iterator::try_for_each_valid_idx; |
25 | | use arrow_schema::*; |
26 | | use std::borrow::BorrowMut; |
27 | | use std::cmp::{self, Ordering}; |
28 | | use std::ops::{BitAnd, BitOr, BitXor}; |
29 | | use types::ByteViewType; |
30 | | |
31 | | /// An accumulator for primitive numeric values. |
32 | | trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default { |
33 | | /// Accumulate a non-null value. |
34 | | fn accumulate(&mut self, value: T); |
35 | | /// Accumulate a nullable values. |
36 | | /// If `valid` is false the `value` should not affect the accumulator state. |
37 | | fn accumulate_nullable(&mut self, value: T, valid: bool); |
38 | | /// Merge another accumulator into this accumulator |
39 | | fn merge(&mut self, other: Self); |
40 | | /// Return the aggregated value. |
41 | | fn finish(&mut self) -> T; |
42 | | } |
43 | | |
44 | | /// Helper for branchlessly selecting either `a` or `b` based on the boolean `m`. |
45 | | /// After verifying the generated assembly this can be a simple `if`. |
46 | | #[inline(always)] |
47 | | fn select<T: Copy>(m: bool, a: T, b: T) -> T { |
48 | | if m { |
49 | | a |
50 | | } else { |
51 | | b |
52 | | } |
53 | | } |
54 | | |
55 | | #[derive(Clone, Copy)] |
56 | | struct SumAccumulator<T: ArrowNativeTypeOp> { |
57 | | sum: T, |
58 | | } |
59 | | |
60 | | impl<T: ArrowNativeTypeOp> Default for SumAccumulator<T> { |
61 | | fn default() -> Self { |
62 | | Self { sum: T::ZERO } |
63 | | } |
64 | | } |
65 | | |
66 | | impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for SumAccumulator<T> { |
67 | | fn accumulate(&mut self, value: T) { |
68 | | self.sum = self.sum.add_wrapping(value); |
69 | | } |
70 | | |
71 | | fn accumulate_nullable(&mut self, value: T, valid: bool) { |
72 | | let sum = self.sum; |
73 | | self.sum = select(valid, sum.add_wrapping(value), sum) |
74 | | } |
75 | | |
76 | | fn merge(&mut self, other: Self) { |
77 | | self.sum = self.sum.add_wrapping(other.sum); |
78 | | } |
79 | | |
80 | | fn finish(&mut self) -> T { |
81 | | self.sum |
82 | | } |
83 | | } |
84 | | |
85 | | #[derive(Clone, Copy)] |
86 | | struct MinAccumulator<T: ArrowNativeTypeOp> { |
87 | | min: T, |
88 | | } |
89 | | |
90 | | impl<T: ArrowNativeTypeOp> Default for MinAccumulator<T> { |
91 | | fn default() -> Self { |
92 | | Self { |
93 | | min: T::MAX_TOTAL_ORDER, |
94 | | } |
95 | | } |
96 | | } |
97 | | |
98 | | impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MinAccumulator<T> { |
99 | | fn accumulate(&mut self, value: T) { |
100 | | let min = self.min; |
101 | | self.min = select(value.is_lt(min), value, min); |
102 | | } |
103 | | |
104 | | fn accumulate_nullable(&mut self, value: T, valid: bool) { |
105 | | let min = self.min; |
106 | | let is_lt = valid & value.is_lt(min); |
107 | | self.min = select(is_lt, value, min); |
108 | | } |
109 | | |
110 | | fn merge(&mut self, other: Self) { |
111 | | self.accumulate(other.min) |
112 | | } |
113 | | |
114 | | fn finish(&mut self) -> T { |
115 | | self.min |
116 | | } |
117 | | } |
118 | | |
119 | | #[derive(Clone, Copy)] |
120 | | struct MaxAccumulator<T: ArrowNativeTypeOp> { |
121 | | max: T, |
122 | | } |
123 | | |
124 | | impl<T: ArrowNativeTypeOp> Default for MaxAccumulator<T> { |
125 | | fn default() -> Self { |
126 | | Self { |
127 | | max: T::MIN_TOTAL_ORDER, |
128 | | } |
129 | | } |
130 | | } |
131 | | |
132 | | impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MaxAccumulator<T> { |
133 | | fn accumulate(&mut self, value: T) { |
134 | | let max = self.max; |
135 | | self.max = select(value.is_gt(max), value, max); |
136 | | } |
137 | | |
138 | | fn accumulate_nullable(&mut self, value: T, valid: bool) { |
139 | | let max = self.max; |
140 | | let is_gt = value.is_gt(max) & valid; |
141 | | self.max = select(is_gt, value, max); |
142 | | } |
143 | | |
144 | | fn merge(&mut self, other: Self) { |
145 | | self.accumulate(other.max) |
146 | | } |
147 | | |
148 | | fn finish(&mut self) -> T { |
149 | | self.max |
150 | | } |
151 | | } |
152 | | |
153 | | fn reduce_accumulators<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>( |
154 | | mut acc: [A; LANES], |
155 | | ) -> A { |
156 | | assert!(LANES > 0 && LANES.is_power_of_two()); |
157 | | let mut len = LANES; |
158 | | |
159 | | // attempt at tree reduction, unfortunately llvm does not fully recognize this pattern, |
160 | | // but the generated code is still a little faster than purely sequential reduction for floats. |
161 | | while len >= 2 { |
162 | | let mid = len / 2; |
163 | | let (h, t) = acc[..len].split_at_mut(mid); |
164 | | |
165 | | for i in 0..mid { |
166 | | h[i].merge(t[i]); |
167 | | } |
168 | | len /= 2; |
169 | | } |
170 | | acc[0] |
171 | | } |
172 | | |
173 | | #[inline(always)] |
174 | | fn aggregate_nonnull_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>( |
175 | | acc: &mut [A; LANES], |
176 | | values: &[T; LANES], |
177 | | ) { |
178 | | for i in 0..LANES { |
179 | | acc[i].accumulate(values[i]); |
180 | | } |
181 | | } |
182 | | |
183 | | #[inline(always)] |
184 | | fn aggregate_nullable_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>( |
185 | | acc: &mut [A; LANES], |
186 | | values: &[T; LANES], |
187 | | validity: u64, |
188 | | ) { |
189 | | let mut bit = 1; |
190 | | for i in 0..LANES { |
191 | | acc[i].accumulate_nullable(values[i], (validity & bit) != 0); |
192 | | bit <<= 1; |
193 | | } |
194 | | } |
195 | | |
196 | | fn aggregate_nonnull_simple<T: ArrowNativeTypeOp, A: NumericAccumulator<T>>(values: &[T]) -> T { |
197 | | values |
198 | | .iter() |
199 | | .copied() |
200 | | .fold(A::default(), |mut a, b| { |
201 | | a.accumulate(b); |
202 | | a |
203 | | }) |
204 | | .finish() |
205 | | } |
206 | | |
207 | | #[inline(never)] |
208 | | fn aggregate_nonnull_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>( |
209 | | values: &[T], |
210 | | ) -> T { |
211 | | // aggregating into multiple independent accumulators allows the compiler to use vector registers |
212 | | // with a single accumulator the compiler would not be allowed to reorder floating point addition |
213 | | let mut acc = [A::default(); LANES]; |
214 | | let mut chunks = values.chunks_exact(LANES); |
215 | | chunks.borrow_mut().for_each(|chunk| { |
216 | | aggregate_nonnull_chunk(&mut acc, chunk[..LANES].try_into().unwrap()); |
217 | | }); |
218 | | |
219 | | let remainder = chunks.remainder(); |
220 | | for i in 0..remainder.len() { |
221 | | acc[i].accumulate(remainder[i]); |
222 | | } |
223 | | |
224 | | reduce_accumulators(acc).finish() |
225 | | } |
226 | | |
227 | | #[inline(never)] |
228 | | fn aggregate_nullable_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>( |
229 | | values: &[T], |
230 | | validity: &NullBuffer, |
231 | | ) -> T { |
232 | | assert!(LANES > 0 && 64 % LANES == 0); |
233 | | assert_eq!(values.len(), validity.len()); |
234 | | |
235 | | // aggregating into multiple independent accumulators allows the compiler to use vector registers |
236 | | let mut acc = [A::default(); LANES]; |
237 | | // we process 64 bits of validity at a time |
238 | | let mut values_chunks = values.chunks_exact(64); |
239 | | let validity_chunks = validity.inner().bit_chunks(); |
240 | | let mut validity_chunks_iter = validity_chunks.iter(); |
241 | | |
242 | | values_chunks.borrow_mut().for_each(|chunk| { |
243 | | // Safety: we asserted that values and validity have the same length and trust the iterator impl |
244 | | let mut validity = unsafe { validity_chunks_iter.next().unwrap_unchecked() }; |
245 | | // chunk further based on the number of vector lanes |
246 | | chunk.chunks_exact(LANES).for_each(|chunk| { |
247 | | aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity); |
248 | | validity >>= LANES; |
249 | | }); |
250 | | }); |
251 | | |
252 | | let remainder = values_chunks.remainder(); |
253 | | if !remainder.is_empty() { |
254 | | let mut validity = validity_chunks.remainder_bits(); |
255 | | |
256 | | let mut remainder_chunks = remainder.chunks_exact(LANES); |
257 | | remainder_chunks.borrow_mut().for_each(|chunk| { |
258 | | aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity); |
259 | | validity >>= LANES; |
260 | | }); |
261 | | |
262 | | let remainder = remainder_chunks.remainder(); |
263 | | if !remainder.is_empty() { |
264 | | let mut bit = 1; |
265 | | for i in 0..remainder.len() { |
266 | | acc[i].accumulate_nullable(remainder[i], (validity & bit) != 0); |
267 | | bit <<= 1; |
268 | | } |
269 | | } |
270 | | } |
271 | | |
272 | | reduce_accumulators(acc).finish() |
273 | | } |
274 | | |
275 | | /// The preferred vector size in bytes for the target platform. |
276 | | /// Note that the avx512 target feature is still unstable and this also means it is not detected on stable rust. |
277 | | const PREFERRED_VECTOR_SIZE: usize = |
278 | | if cfg!(all(target_arch = "x86_64", target_feature = "avx512f")) { |
279 | | 64 |
280 | | } else if cfg!(all(target_arch = "x86_64", target_feature = "avx")) { |
281 | | 32 |
282 | | } else { |
283 | | 16 |
284 | | }; |
285 | | |
286 | | /// non-nullable aggregation requires fewer temporary registers so we can use more of them for accumulators |
287 | | const PREFERRED_VECTOR_SIZE_NON_NULL: usize = PREFERRED_VECTOR_SIZE * 2; |
288 | | |
289 | | /// Generic aggregation for any primitive type. |
290 | | /// Returns None if there are no non-null values in `array`. |
291 | | fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: NumericAccumulator<T>>( |
292 | | array: &PrimitiveArray<P>, |
293 | | ) -> Option<T> { |
294 | | let null_count = array.null_count(); |
295 | | if null_count == array.len() { |
296 | | return None; |
297 | | } |
298 | | let values = array.values().as_ref(); |
299 | | match array.nulls() { |
300 | | Some(nulls) if null_count > 0 => { |
301 | | // const generics depending on a generic type parameter are not supported |
302 | | // so we have to match and call aggregate with the corresponding constant |
303 | | match PREFERRED_VECTOR_SIZE / std::mem::size_of::<T>() { |
304 | | 64 => Some(aggregate_nullable_lanes::<T, A, 64>(values, nulls)), |
305 | | 32 => Some(aggregate_nullable_lanes::<T, A, 32>(values, nulls)), |
306 | | 16 => Some(aggregate_nullable_lanes::<T, A, 16>(values, nulls)), |
307 | | 8 => Some(aggregate_nullable_lanes::<T, A, 8>(values, nulls)), |
308 | | 4 => Some(aggregate_nullable_lanes::<T, A, 4>(values, nulls)), |
309 | | 2 => Some(aggregate_nullable_lanes::<T, A, 2>(values, nulls)), |
310 | | _ => Some(aggregate_nullable_lanes::<T, A, 1>(values, nulls)), |
311 | | } |
312 | | } |
313 | | _ => { |
314 | | let is_float = matches!( |
315 | | array.data_type(), |
316 | | DataType::Float16 | DataType::Float32 | DataType::Float64 |
317 | | ); |
318 | | if is_float { |
319 | | match PREFERRED_VECTOR_SIZE_NON_NULL / std::mem::size_of::<T>() { |
320 | | 64 => Some(aggregate_nonnull_lanes::<T, A, 64>(values)), |
321 | | 32 => Some(aggregate_nonnull_lanes::<T, A, 32>(values)), |
322 | | 16 => Some(aggregate_nonnull_lanes::<T, A, 16>(values)), |
323 | | 8 => Some(aggregate_nonnull_lanes::<T, A, 8>(values)), |
324 | | 4 => Some(aggregate_nonnull_lanes::<T, A, 4>(values)), |
325 | | 2 => Some(aggregate_nonnull_lanes::<T, A, 2>(values)), |
326 | | _ => Some(aggregate_nonnull_simple::<T, A>(values)), |
327 | | } |
328 | | } else { |
329 | | // for non-null integers its better to not chunk ourselves and instead |
330 | | // let llvm fully handle loop unrolling and vectorization |
331 | | Some(aggregate_nonnull_simple::<T, A>(values)) |
332 | | } |
333 | | } |
334 | | } |
335 | | } |
336 | | |
337 | | /// Returns the minimum value in the boolean array. |
338 | | /// |
339 | | /// ``` |
340 | | /// # use arrow_array::BooleanArray; |
341 | | /// # use arrow_arith::aggregate::min_boolean; |
342 | | /// |
343 | | /// let a = BooleanArray::from(vec![Some(true), None, Some(false)]); |
344 | | /// assert_eq!(min_boolean(&a), Some(false)) |
345 | | /// ``` |
346 | | pub fn min_boolean(array: &BooleanArray) -> Option<bool> { |
347 | | // short circuit if all nulls / zero length array |
348 | | if array.null_count() == array.len() { |
349 | | return None; |
350 | | } |
351 | | |
352 | | // Note the min bool is false (0), so short circuit as soon as we see it |
353 | | match array.nulls() { |
354 | | None => { |
355 | | let bit_chunks = array.values().bit_chunks(); |
356 | 0 | if bit_chunks.iter().any(|x| { |
357 | | // u64::MAX has all bits set, so if the value is not that, then there is a false |
358 | 0 | x != u64::MAX |
359 | 0 | }) { |
360 | | return Some(false); |
361 | | } |
362 | | // If the remainder bits are not all set, then there is a false |
363 | | if bit_chunks.remainder_bits().count_ones() as usize != bit_chunks.remainder_len() { |
364 | | Some(false) |
365 | | } else { |
366 | | Some(true) |
367 | | } |
368 | | } |
369 | | Some(nulls) => { |
370 | | let validity_chunks = nulls.inner().bit_chunks(); |
371 | | let value_chunks = array.values().bit_chunks(); |
372 | | |
373 | | if value_chunks |
374 | | .iter() |
375 | | .zip(validity_chunks.iter()) |
376 | 0 | .any(|(value, validity)| { |
377 | | // We are looking for a false value, but because applying the validity mask |
378 | | // can create a false for a true value (e.g. value: true, validity: false), we instead invert the value, so that we have to look for a true. |
379 | 0 | (!value & validity) != 0 |
380 | 0 | }) |
381 | | { |
382 | | return Some(false); |
383 | | } |
384 | | |
385 | | // Same trick as above: Instead of looking for a false, we invert the value bits and look for a true |
386 | | if (!value_chunks.remainder_bits() & validity_chunks.remainder_bits()) != 0 { |
387 | | Some(false) |
388 | | } else { |
389 | | Some(true) |
390 | | } |
391 | | } |
392 | | } |
393 | | } |
394 | | |
395 | | /// Returns the maximum value in the boolean array |
396 | | /// |
397 | | /// ``` |
398 | | /// # use arrow_array::BooleanArray; |
399 | | /// # use arrow_arith::aggregate::max_boolean; |
400 | | /// |
401 | | /// let a = BooleanArray::from(vec![Some(true), None, Some(false)]); |
402 | | /// assert_eq!(max_boolean(&a), Some(true)) |
403 | | /// ``` |
404 | | pub fn max_boolean(array: &BooleanArray) -> Option<bool> { |
405 | | // short circuit if all nulls / zero length array |
406 | | if array.null_count() == array.len() { |
407 | | return None; |
408 | | } |
409 | | |
410 | | // Note the max bool is true (1), so short circuit as soon as we see it |
411 | | match array.nulls() { |
412 | | None => array |
413 | | .values() |
414 | | .bit_chunks() |
415 | | .iter_padded() |
416 | | // We found a true if any bit is set |
417 | 0 | .map(|x| x != 0) |
418 | | .find(|b| *b) |
419 | | .or(Some(false)), |
420 | | Some(nulls) => { |
421 | | let validity_chunks = nulls.inner().bit_chunks().iter_padded(); |
422 | | let value_chunks = array.values().bit_chunks().iter_padded(); |
423 | | value_chunks |
424 | | .zip(validity_chunks) |
425 | | // We found a true if the value bit is 1, AND the validity bit is 1 for any bits in the chunk |
426 | 0 | .map(|(value_bits, validity_bits)| (value_bits & validity_bits) != 0) |
427 | | .find(|b| *b) |
428 | | .or(Some(false)) |
429 | | } |
430 | | } |
431 | | } |
432 | | |
433 | | /// Helper to compute min/max of [`ArrayAccessor`]. |
434 | 0 | fn min_max_helper<T, A: ArrayAccessor<Item = T>, F>(array: A, cmp: F) -> Option<T> |
435 | 0 | where |
436 | 0 | F: Fn(&T, &T) -> bool, |
437 | | { |
438 | 0 | let null_count = array.null_count(); |
439 | 0 | if null_count == array.len() { |
440 | 0 | None |
441 | 0 | } else if null_count == 0 { |
442 | | // JUSTIFICATION |
443 | | // Benefit: ~8% speedup |
444 | | // Soundness: `i` is always within the array bounds |
445 | 0 | (0..array.len()) |
446 | 0 | .map(|i| unsafe { array.value_unchecked(i) }) |
447 | 0 | .reduce(|acc, item| if cmp(&acc, &item) { item } else { acc }) |
448 | | } else { |
449 | 0 | let nulls = array.nulls().unwrap(); |
450 | | unsafe { |
451 | 0 | let idx = nulls.valid_indices().reduce(|acc_idx, idx| { |
452 | 0 | let acc = array.value_unchecked(acc_idx); |
453 | 0 | let item = array.value_unchecked(idx); |
454 | 0 | if cmp(&acc, &item) { |
455 | 0 | idx |
456 | | } else { |
457 | 0 | acc_idx |
458 | | } |
459 | 0 | }); |
460 | 0 | idx.map(|idx| array.value_unchecked(idx)) |
461 | | } |
462 | | } |
463 | 0 | } |
464 | | |
465 | | /// Helper to compute min/max of [`GenericByteViewArray<T>`]. |
466 | | /// The specialized min/max leverages the inlined values to compare the byte views. |
467 | | /// `swap_cond` is the condition to swap current min/max with the new value. |
468 | | /// For example, `Ordering::Greater` for max and `Ordering::Less` for min. |
469 | 0 | fn min_max_view_helper<T: ByteViewType>( |
470 | 0 | array: &GenericByteViewArray<T>, |
471 | 0 | swap_cond: cmp::Ordering, |
472 | 0 | ) -> Option<&T::Native> { |
473 | 0 | let null_count = array.null_count(); |
474 | 0 | if null_count == array.len() { |
475 | 0 | None |
476 | 0 | } else if null_count == 0 { |
477 | 0 | let target_idx = (0..array.len()).reduce(|acc, item| { |
478 | | // SAFETY: array's length is correct so item is within bounds |
479 | 0 | let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) }; |
480 | 0 | if cmp == swap_cond { |
481 | 0 | item |
482 | | } else { |
483 | 0 | acc |
484 | | } |
485 | 0 | }); |
486 | | // SAFETY: idx came from valid range `0..array.len()` |
487 | 0 | unsafe { target_idx.map(|idx| array.value_unchecked(idx)) } |
488 | | } else { |
489 | 0 | let nulls = array.nulls().unwrap(); |
490 | | |
491 | 0 | let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| { |
492 | 0 | let cmp = |
493 | 0 | unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) }; |
494 | 0 | if cmp == swap_cond { |
495 | 0 | idx |
496 | | } else { |
497 | 0 | acc_idx |
498 | | } |
499 | 0 | }); |
500 | | |
501 | | // SAFETY: idx came from valid range `0..array.len()` |
502 | 0 | unsafe { target_idx.map(|idx| array.value_unchecked(idx)) } |
503 | | } |
504 | 0 | } |
505 | | |
506 | | /// Returns the maximum value in the binary array, according to the natural order. |
507 | | pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> { |
508 | | min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b) |
509 | | } |
510 | | |
511 | | /// Returns the maximum value in the binary view array, according to the natural order. |
512 | | pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> { |
513 | | min_max_view_helper(array, Ordering::Greater) |
514 | | } |
515 | | |
516 | | /// Returns the maximum value in the fixed size binary array, according to the natural order. |
517 | | pub fn max_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> { |
518 | 0 | min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b) |
519 | | } |
520 | | |
521 | | /// Returns the minimum value in the binary array, according to the natural order. |
522 | | pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> { |
523 | | min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b) |
524 | | } |
525 | | |
526 | | /// Returns the minimum value in the binary view array, according to the natural order. |
527 | | pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> { |
528 | | min_max_view_helper(array, Ordering::Less) |
529 | | } |
530 | | |
531 | | /// Returns the minimum value in the fixed size binary array, according to the natural order. |
532 | | pub fn min_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> { |
533 | 0 | min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b) |
534 | | } |
535 | | |
536 | | /// Returns the maximum value in the string array, according to the natural order. |
537 | | pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> { |
538 | | min_max_helper::<&str, _, _>(array, |a, b| *a < *b) |
539 | | } |
540 | | |
541 | | /// Returns the maximum value in the string view array, according to the natural order. |
542 | | pub fn max_string_view(array: &StringViewArray) -> Option<&str> { |
543 | | min_max_view_helper(array, Ordering::Greater) |
544 | | } |
545 | | |
546 | | /// Returns the minimum value in the string array, according to the natural order. |
547 | | pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> { |
548 | | min_max_helper::<&str, _, _>(array, |a, b| *a > *b) |
549 | | } |
550 | | |
551 | | /// Returns the minimum value in the string view array, according to the natural order. |
552 | | pub fn min_string_view(array: &StringViewArray) -> Option<&str> { |
553 | | min_max_view_helper(array, Ordering::Less) |
554 | | } |
555 | | |
556 | | /// Returns the sum of values in the array. |
557 | | /// |
558 | | /// This doesn't detect overflow. Once overflowing, the result will wrap around. |
559 | | /// For an overflow-checking variant, use `sum_array_checked` instead. |
560 | | pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native> |
561 | | where |
562 | | T: ArrowNumericType, |
563 | | T::Native: ArrowNativeTypeOp, |
564 | | { |
565 | | match array.data_type() { |
566 | | DataType::Dictionary(_, _) => { |
567 | | let null_count = array.null_count(); |
568 | | |
569 | | if null_count == array.len() { |
570 | | return None; |
571 | | } |
572 | | |
573 | | let iter = ArrayIter::new(array); |
574 | | let sum = iter |
575 | | .into_iter() |
576 | | .fold(T::default_value(), |accumulator, value| { |
577 | | if let Some(value) = value { |
578 | | accumulator.add_wrapping(value) |
579 | | } else { |
580 | | accumulator |
581 | | } |
582 | | }); |
583 | | |
584 | | Some(sum) |
585 | | } |
586 | | _ => sum::<T>(as_primitive_array(&array)), |
587 | | } |
588 | | } |
589 | | |
590 | | /// Returns the sum of values in the array. |
591 | | /// |
592 | | /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, |
593 | | /// use `sum_array` instead. |
594 | | pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>( |
595 | | array: A, |
596 | | ) -> Result<Option<T::Native>, ArrowError> |
597 | | where |
598 | | T: ArrowNumericType, |
599 | | T::Native: ArrowNativeTypeOp, |
600 | | { |
601 | | match array.data_type() { |
602 | | DataType::Dictionary(_, _) => { |
603 | | let null_count = array.null_count(); |
604 | | |
605 | | if null_count == array.len() { |
606 | | return Ok(None); |
607 | | } |
608 | | |
609 | | let iter = ArrayIter::new(array); |
610 | | let sum = iter |
611 | | .into_iter() |
612 | | .try_fold(T::default_value(), |accumulator, value| { |
613 | | if let Some(value) = value { |
614 | | accumulator.add_checked(value) |
615 | | } else { |
616 | | Ok(accumulator) |
617 | | } |
618 | | })?; |
619 | | |
620 | | Ok(Some(sum)) |
621 | | } |
622 | | _ => sum_checked::<T>(as_primitive_array(&array)), |
623 | | } |
624 | | } |
625 | | |
626 | | /// Returns the min of values in the array of `ArrowNumericType` type, or dictionary |
627 | | /// array with value of `ArrowNumericType` type. |
628 | | pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native> |
629 | | where |
630 | | T: ArrowNumericType, |
631 | | T::Native: ArrowNativeType, |
632 | | { |
633 | | min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_gt(*b), min) |
634 | | } |
635 | | |
636 | | /// Returns the max of values in the array of `ArrowNumericType` type, or dictionary |
637 | | /// array with value of `ArrowNumericType` type. |
638 | | pub fn max_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native> |
639 | | where |
640 | | T: ArrowNumericType, |
641 | | T::Native: ArrowNativeTypeOp, |
642 | | { |
643 | | min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_lt(*b), max) |
644 | | } |
645 | | |
646 | | fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>( |
647 | | array: A, |
648 | | cmp: F, |
649 | | m: M, |
650 | | ) -> Option<T::Native> |
651 | | where |
652 | | T: ArrowNumericType, |
653 | | F: Fn(&T::Native, &T::Native) -> bool, |
654 | | M: Fn(&PrimitiveArray<T>) -> Option<T::Native>, |
655 | | { |
656 | | match array.data_type() { |
657 | | DataType::Dictionary(_, _) => min_max_helper::<T::Native, _, _>(array, cmp), |
658 | | _ => m(as_primitive_array(&array)), |
659 | | } |
660 | | } |
661 | | |
662 | | macro_rules! bit_operation { |
663 | | ($NAME:ident, $OP:ident, $NATIVE:ident, $DEFAULT:expr, $DOC:expr) => { |
664 | | #[doc = $DOC] |
665 | | /// |
666 | | /// Returns `None` if the array is empty or only contains null values. |
667 | | pub fn $NAME<T>(array: &PrimitiveArray<T>) -> Option<T::Native> |
668 | | where |
669 | | T: ArrowNumericType, |
670 | | T::Native: $NATIVE<Output = T::Native> + ArrowNativeTypeOp, |
671 | | { |
672 | | let default; |
673 | | if $DEFAULT == -1 { |
674 | | default = T::Native::ONE.neg_wrapping(); |
675 | | } else { |
676 | | default = T::default_value(); |
677 | | } |
678 | | |
679 | | let null_count = array.null_count(); |
680 | | |
681 | | if null_count == array.len() { |
682 | | return None; |
683 | | } |
684 | | |
685 | | let data: &[T::Native] = array.values(); |
686 | | |
687 | | match array.nulls() { |
688 | | None => { |
689 | | let result = data |
690 | | .iter() |
691 | | .fold(default, |accumulator, value| accumulator.$OP(*value)); |
692 | | |
693 | | Some(result) |
694 | | } |
695 | | Some(nulls) => { |
696 | | let mut result = default; |
697 | | let data_chunks = data.chunks_exact(64); |
698 | | let remainder = data_chunks.remainder(); |
699 | | |
700 | | let bit_chunks = nulls.inner().bit_chunks(); |
701 | | data_chunks |
702 | | .zip(bit_chunks.iter()) |
703 | | .for_each(|(chunk, mask)| { |
704 | | // index_mask has value 1 << i in the loop |
705 | | let mut index_mask = 1; |
706 | | chunk.iter().for_each(|value| { |
707 | | if (mask & index_mask) != 0 { |
708 | | result = result.$OP(*value); |
709 | | } |
710 | | index_mask <<= 1; |
711 | | }); |
712 | | }); |
713 | | |
714 | | let remainder_bits = bit_chunks.remainder_bits(); |
715 | | |
716 | | remainder.iter().enumerate().for_each(|(i, value)| { |
717 | | if remainder_bits & (1 << i) != 0 { |
718 | | result = result.$OP(*value); |
719 | | } |
720 | | }); |
721 | | |
722 | | Some(result) |
723 | | } |
724 | | } |
725 | | } |
726 | | }; |
727 | | } |
728 | | |
729 | | bit_operation!( |
730 | | bit_and, |
731 | | bitand, |
732 | | BitAnd, |
733 | | -1, |
734 | | "Returns the bitwise and of all non-null input values." |
735 | | ); |
736 | | bit_operation!( |
737 | | bit_or, |
738 | | bitor, |
739 | | BitOr, |
740 | | 0, |
741 | | "Returns the bitwise or of all non-null input values." |
742 | | ); |
743 | | bit_operation!( |
744 | | bit_xor, |
745 | | bitxor, |
746 | | BitXor, |
747 | | 0, |
748 | | "Returns the bitwise xor of all non-null input values." |
749 | | ); |
750 | | |
751 | | /// Returns true if all non-null input values are true, otherwise false. |
752 | | /// |
753 | | /// Returns `None` if the array is empty or only contains null values. |
754 | | pub fn bool_and(array: &BooleanArray) -> Option<bool> { |
755 | | min_boolean(array) |
756 | | } |
757 | | |
758 | | /// Returns true if any non-null input value is true, otherwise false. |
759 | | /// |
760 | | /// Returns `None` if the array is empty or only contains null values. |
761 | | pub fn bool_or(array: &BooleanArray) -> Option<bool> { |
762 | | max_boolean(array) |
763 | | } |
764 | | |
765 | | /// Returns the sum of values in the primitive array. |
766 | | /// |
767 | | /// Returns `Ok(None)` if the array is empty or only contains null values. |
768 | | /// |
769 | | /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, |
770 | | /// use `sum` instead. |
771 | | pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError> |
772 | | where |
773 | | T: ArrowNumericType, |
774 | | T::Native: ArrowNativeTypeOp, |
775 | | { |
776 | | let null_count = array.null_count(); |
777 | | |
778 | | if null_count == array.len() { |
779 | | return Ok(None); |
780 | | } |
781 | | |
782 | | let data: &[T::Native] = array.values(); |
783 | | |
784 | | match array.nulls() { |
785 | | None => { |
786 | | let sum = data |
787 | | .iter() |
788 | | .try_fold(T::default_value(), |accumulator, value| { |
789 | | accumulator.add_checked(*value) |
790 | | })?; |
791 | | |
792 | | Ok(Some(sum)) |
793 | | } |
794 | | Some(nulls) => { |
795 | | let mut sum = T::default_value(); |
796 | | |
797 | | try_for_each_valid_idx( |
798 | | nulls.len(), |
799 | | nulls.offset(), |
800 | | nulls.null_count(), |
801 | | Some(nulls.validity()), |
802 | | |idx| { |
803 | | unsafe { sum = sum.add_checked(array.value_unchecked(idx))? }; |
804 | | Ok::<_, ArrowError>(()) |
805 | | }, |
806 | | )?; |
807 | | |
808 | | Ok(Some(sum)) |
809 | | } |
810 | | } |
811 | | } |
812 | | |
813 | | /// Returns the sum of values in the primitive array. |
814 | | /// |
815 | | /// Returns `None` if the array is empty or only contains null values. |
816 | | /// |
817 | | /// This doesn't detect overflow in release mode by default. Once overflowing, the result will |
818 | | /// wrap around. For an overflow-checking variant, use `sum_checked` instead. |
819 | | pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native> |
820 | | where |
821 | | T::Native: ArrowNativeTypeOp, |
822 | | { |
823 | | aggregate::<T::Native, T, SumAccumulator<T::Native>>(array) |
824 | | } |
825 | | |
826 | | /// Returns the minimum value in the array, according to the natural order. |
827 | | /// For floating point arrays any NaN values are considered to be greater than any other non-null value |
828 | | pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native> |
829 | | where |
830 | | T::Native: PartialOrd, |
831 | | { |
832 | | aggregate::<T::Native, T, MinAccumulator<T::Native>>(array) |
833 | | } |
834 | | |
835 | | /// Returns the maximum value in the array, according to the natural order. |
836 | | /// For floating point arrays any NaN values are considered to be greater than any other non-null value |
837 | | pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native> |
838 | | where |
839 | | T::Native: PartialOrd, |
840 | | { |
841 | | aggregate::<T::Native, T, MaxAccumulator<T::Native>>(array) |
842 | | } |
843 | | |
844 | | #[cfg(test)] |
845 | | mod tests { |
846 | | use super::*; |
847 | | use arrow_array::types::*; |
848 | | use builder::BooleanBuilder; |
849 | | use std::sync::Arc; |
850 | | |
851 | | #[test] |
852 | | fn test_primitive_array_sum() { |
853 | | let a = Int32Array::from(vec![1, 2, 3, 4, 5]); |
854 | | assert_eq!(15, sum(&a).unwrap()); |
855 | | } |
856 | | |
857 | | #[test] |
858 | | fn test_primitive_array_float_sum() { |
859 | | let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]); |
860 | | assert_eq!(16.5, sum(&a).unwrap()); |
861 | | } |
862 | | |
863 | | #[test] |
864 | | fn test_primitive_array_sum_with_nulls() { |
865 | | let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]); |
866 | | assert_eq!(10, sum(&a).unwrap()); |
867 | | } |
868 | | |
869 | | #[test] |
870 | | fn test_primitive_array_sum_all_nulls() { |
871 | | let a = Int32Array::from(vec![None, None, None]); |
872 | | assert_eq!(None, sum(&a)); |
873 | | } |
874 | | |
875 | | #[test] |
876 | | fn test_primitive_array_sum_large_float_64() { |
877 | | let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), None); |
878 | | assert_eq!(Some((1..=100).sum::<i64>() as f64), sum(&c)); |
879 | | |
880 | | // create an array that actually has non-zero values at the invalid indices |
881 | | let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect()); |
882 | | let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), Some(validity)); |
883 | | |
884 | | assert_eq!( |
885 | | Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f64), |
886 | | sum(&c) |
887 | | ); |
888 | | } |
889 | | |
890 | | #[test] |
891 | | fn test_primitive_array_sum_large_float_32() { |
892 | | let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), None); |
893 | | assert_eq!(Some((1..=100).sum::<i64>() as f32), sum(&c)); |
894 | | |
895 | | // create an array that actually has non-zero values at the invalid indices |
896 | | let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect()); |
897 | | let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), Some(validity)); |
898 | | |
899 | | assert_eq!( |
900 | | Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f32), |
901 | | sum(&c) |
902 | | ); |
903 | | } |
904 | | |
905 | | #[test] |
906 | | fn test_primitive_array_sum_large_64() { |
907 | | let c = Int64Array::new((1..=100).collect(), None); |
908 | | assert_eq!(Some((1..=100).sum()), sum(&c)); |
909 | | |
910 | | // create an array that actually has non-zero values at the invalid indices |
911 | | let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect()); |
912 | | let c = Int64Array::new((1..=100).collect(), Some(validity)); |
913 | | |
914 | | assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c)); |
915 | | } |
916 | | |
917 | | #[test] |
918 | | fn test_primitive_array_sum_large_32() { |
919 | | let c = Int32Array::new((1..=100).collect(), None); |
920 | | assert_eq!(Some((1..=100).sum()), sum(&c)); |
921 | | |
922 | | // create an array that actually has non-zero values at the invalid indices |
923 | | let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect()); |
924 | | let c = Int32Array::new((1..=100).collect(), Some(validity)); |
925 | | assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c)); |
926 | | } |
927 | | |
928 | | #[test] |
929 | | fn test_primitive_array_sum_large_16() { |
930 | | let c = Int16Array::new((1..=100).collect(), None); |
931 | | assert_eq!(Some((1..=100).sum()), sum(&c)); |
932 | | |
933 | | // create an array that actually has non-zero values at the invalid indices |
934 | | let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect()); |
935 | | let c = Int16Array::new((1..=100).collect(), Some(validity)); |
936 | | assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c)); |
937 | | } |
938 | | |
939 | | #[test] |
940 | | fn test_primitive_array_sum_large_8() { |
941 | | let c = UInt8Array::new((1..=100).collect(), None); |
942 | | assert_eq!( |
943 | | Some((1..=100).fold(0_u8, |a, x| a.wrapping_add(x))), |
944 | | sum(&c) |
945 | | ); |
946 | | |
947 | | // create an array that actually has non-zero values at the invalid indices |
948 | | let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect()); |
949 | | let c = UInt8Array::new((1..=100).collect(), Some(validity)); |
950 | | assert_eq!( |
951 | | Some( |
952 | | (1..=100) |
953 | | .filter(|i| i % 3 == 0) |
954 | | .fold(0_u8, |a, x| a.wrapping_add(x)) |
955 | | ), |
956 | | sum(&c) |
957 | | ); |
958 | | } |
959 | | |
960 | | #[test] |
961 | | fn test_primitive_array_bit_and() { |
962 | | let a = Int32Array::from(vec![1, 2, 3, 4, 5]); |
963 | | assert_eq!(0, bit_and(&a).unwrap()); |
964 | | } |
965 | | |
966 | | #[test] |
967 | | fn test_primitive_array_bit_and_with_nulls() { |
968 | | let a = Int32Array::from(vec![None, Some(2), Some(3), None, None]); |
969 | | assert_eq!(2, bit_and(&a).unwrap()); |
970 | | } |
971 | | |
972 | | #[test] |
973 | | fn test_primitive_array_bit_and_all_nulls() { |
974 | | let a = Int32Array::from(vec![None, None, None]); |
975 | | assert_eq!(None, bit_and(&a)); |
976 | | } |
977 | | |
978 | | #[test] |
979 | | fn test_primitive_array_bit_or() { |
980 | | let a = Int32Array::from(vec![1, 2, 3, 4, 5]); |
981 | | assert_eq!(7, bit_or(&a).unwrap()); |
982 | | } |
983 | | |
984 | | #[test] |
985 | | fn test_primitive_array_bit_or_with_nulls() { |
986 | | let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]); |
987 | | assert_eq!(7, bit_or(&a).unwrap()); |
988 | | } |
989 | | |
990 | | #[test] |
991 | | fn test_primitive_array_bit_or_all_nulls() { |
992 | | let a = Int32Array::from(vec![None, None, None]); |
993 | | assert_eq!(None, bit_or(&a)); |
994 | | } |
995 | | |
996 | | #[test] |
997 | | fn test_primitive_array_bit_xor() { |
998 | | let a = Int32Array::from(vec![1, 2, 3, 4, 5]); |
999 | | assert_eq!(1, bit_xor(&a).unwrap()); |
1000 | | } |
1001 | | |
1002 | | #[test] |
1003 | | fn test_primitive_array_bit_xor_with_nulls() { |
1004 | | let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]); |
1005 | | assert_eq!(4, bit_xor(&a).unwrap()); |
1006 | | } |
1007 | | |
1008 | | #[test] |
1009 | | fn test_primitive_array_bit_xor_all_nulls() { |
1010 | | let a = Int32Array::from(vec![None, None, None]); |
1011 | | assert_eq!(None, bit_xor(&a)); |
1012 | | } |
1013 | | |
1014 | | #[test] |
1015 | | fn test_primitive_array_bool_and() { |
1016 | | let a = BooleanArray::from(vec![true, false, true, false, true]); |
1017 | | assert!(!bool_and(&a).unwrap()); |
1018 | | } |
1019 | | |
1020 | | #[test] |
1021 | | fn test_primitive_array_bool_and_with_nulls() { |
1022 | | let a = BooleanArray::from(vec![None, Some(true), Some(true), None, Some(true)]); |
1023 | | assert!(bool_and(&a).unwrap()); |
1024 | | } |
1025 | | |
1026 | | #[test] |
1027 | | fn test_primitive_array_bool_and_all_nulls() { |
1028 | | let a = BooleanArray::from(vec![None, None, None]); |
1029 | | assert_eq!(None, bool_and(&a)); |
1030 | | } |
1031 | | |
1032 | | #[test] |
1033 | | fn test_primitive_array_bool_or() { |
1034 | | let a = BooleanArray::from(vec![true, false, true, false, true]); |
1035 | | assert!(bool_or(&a).unwrap()); |
1036 | | } |
1037 | | |
1038 | | #[test] |
1039 | | fn test_primitive_array_bool_or_with_nulls() { |
1040 | | let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]); |
1041 | | assert!(!bool_or(&a).unwrap()); |
1042 | | } |
1043 | | |
1044 | | #[test] |
1045 | | fn test_primitive_array_bool_or_all_nulls() { |
1046 | | let a = BooleanArray::from(vec![None, None, None]); |
1047 | | assert_eq!(None, bool_or(&a)); |
1048 | | } |
1049 | | |
1050 | | #[test] |
1051 | | fn test_primitive_array_min_max() { |
1052 | | let a = Int32Array::from(vec![5, 6, 7, 8, 9]); |
1053 | | assert_eq!(5, min(&a).unwrap()); |
1054 | | assert_eq!(9, max(&a).unwrap()); |
1055 | | } |
1056 | | |
1057 | | #[test] |
1058 | | fn test_primitive_array_min_max_with_nulls() { |
1059 | | let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]); |
1060 | | assert_eq!(5, min(&a).unwrap()); |
1061 | | assert_eq!(9, max(&a).unwrap()); |
1062 | | } |
1063 | | |
1064 | | #[test] |
1065 | | fn test_primitive_min_max_1() { |
1066 | | let a = Int32Array::from(vec![None, None, Some(5), Some(2)]); |
1067 | | assert_eq!(Some(2), min(&a)); |
1068 | | assert_eq!(Some(5), max(&a)); |
1069 | | } |
1070 | | |
1071 | | #[test] |
1072 | | fn test_primitive_min_max_float_large_nonnull_array() { |
1073 | | let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect(); |
1074 | | // min/max are on boundaries of chunked data |
1075 | | assert_eq!(Some(1.0), min(&a)); |
1076 | | assert_eq!(Some(256.0), max(&a)); |
1077 | | |
1078 | | // max is last value in remainder after chunking |
1079 | | let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect(); |
1080 | | assert_eq!(Some(255.0), max(&a)); |
1081 | | |
1082 | | // max is first value in remainder after chunking |
1083 | | let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect(); |
1084 | | assert_eq!(Some(257.0), max(&a)); |
1085 | | } |
1086 | | |
1087 | | #[test] |
1088 | | fn test_primitive_min_max_float_large_nullable_array() { |
1089 | | let a: Float64Array = (0..256) |
1090 | | .map(|i| { |
1091 | | if (i + 1) % 3 == 0 { |
1092 | | None |
1093 | | } else { |
1094 | | Some((i + 1) as f64) |
1095 | | } |
1096 | | }) |
1097 | | .collect(); |
1098 | | // min/max are on boundaries of chunked data |
1099 | | assert_eq!(Some(1.0), min(&a)); |
1100 | | assert_eq!(Some(256.0), max(&a)); |
1101 | | |
1102 | | let a: Float64Array = (0..256) |
1103 | | .map(|i| { |
1104 | | if i == 0 || i == 255 { |
1105 | | None |
1106 | | } else { |
1107 | | Some((i + 1) as f64) |
1108 | | } |
1109 | | }) |
1110 | | .collect(); |
1111 | | // boundaries of chunked data are null |
1112 | | assert_eq!(Some(2.0), min(&a)); |
1113 | | assert_eq!(Some(255.0), max(&a)); |
1114 | | |
1115 | | let a: Float64Array = (0..256) |
1116 | | .map(|i| if i != 100 { None } else { Some((i) as f64) }) |
1117 | | .collect(); |
1118 | | // a single non-null value somewhere in the middle |
1119 | | assert_eq!(Some(100.0), min(&a)); |
1120 | | assert_eq!(Some(100.0), max(&a)); |
1121 | | |
1122 | | // max is last value in remainder after chunking |
1123 | | let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect(); |
1124 | | assert_eq!(Some(255.0), max(&a)); |
1125 | | |
1126 | | // max is first value in remainder after chunking |
1127 | | let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect(); |
1128 | | assert_eq!(Some(257.0), max(&a)); |
1129 | | } |
1130 | | |
1131 | | #[test] |
1132 | | fn test_primitive_min_max_float_edge_cases() { |
1133 | | let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect(); |
1134 | | assert_eq!(Some(f64::NEG_INFINITY), min(&a)); |
1135 | | assert_eq!(Some(f64::NEG_INFINITY), max(&a)); |
1136 | | |
1137 | | let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect(); |
1138 | | assert_eq!(Some(f64::MIN), min(&a)); |
1139 | | assert_eq!(Some(f64::MIN), max(&a)); |
1140 | | |
1141 | | let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect(); |
1142 | | assert_eq!(Some(f64::MAX), min(&a)); |
1143 | | assert_eq!(Some(f64::MAX), max(&a)); |
1144 | | |
1145 | | let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect(); |
1146 | | assert_eq!(Some(f64::INFINITY), min(&a)); |
1147 | | assert_eq!(Some(f64::INFINITY), max(&a)); |
1148 | | } |
1149 | | |
1150 | | #[test] |
1151 | | fn test_primitive_min_max_float_all_nans_non_null() { |
1152 | | let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect(); |
1153 | | assert!(max(&a).unwrap().is_nan()); |
1154 | | assert!(min(&a).unwrap().is_nan()); |
1155 | | } |
1156 | | |
1157 | | #[test] |
1158 | | fn test_primitive_min_max_float_negative_nan() { |
1159 | | let a: Float64Array = |
1160 | | Float64Array::from(vec![f64::NEG_INFINITY, f64::NAN, f64::INFINITY, -f64::NAN]); |
1161 | | let max = max(&a).unwrap(); |
1162 | | let min = min(&a).unwrap(); |
1163 | | assert!(max.is_nan()); |
1164 | | assert!(max.is_sign_positive()); |
1165 | | |
1166 | | assert!(min.is_nan()); |
1167 | | assert!(min.is_sign_negative()); |
1168 | | } |
1169 | | |
1170 | | #[test] |
1171 | | fn test_primitive_min_max_float_first_nan_nonnull() { |
1172 | | let a: Float64Array = (0..100) |
1173 | | .map(|i| { |
1174 | | if i == 0 { |
1175 | | Some(f64::NAN) |
1176 | | } else { |
1177 | | Some(i as f64) |
1178 | | } |
1179 | | }) |
1180 | | .collect(); |
1181 | | assert_eq!(Some(1.0), min(&a)); |
1182 | | assert!(max(&a).unwrap().is_nan()); |
1183 | | } |
1184 | | |
1185 | | #[test] |
1186 | | fn test_primitive_min_max_float_last_nan_nonnull() { |
1187 | | let a: Float64Array = (0..100) |
1188 | | .map(|i| { |
1189 | | if i == 99 { |
1190 | | Some(f64::NAN) |
1191 | | } else { |
1192 | | Some((i + 1) as f64) |
1193 | | } |
1194 | | }) |
1195 | | .collect(); |
1196 | | assert_eq!(Some(1.0), min(&a)); |
1197 | | assert!(max(&a).unwrap().is_nan()); |
1198 | | } |
1199 | | |
1200 | | #[test] |
1201 | | fn test_primitive_min_max_float_first_nan_nullable() { |
1202 | | let a: Float64Array = (0..100) |
1203 | | .map(|i| { |
1204 | | if i == 0 { |
1205 | | Some(f64::NAN) |
1206 | | } else if i % 2 == 0 { |
1207 | | None |
1208 | | } else { |
1209 | | Some(i as f64) |
1210 | | } |
1211 | | }) |
1212 | | .collect(); |
1213 | | assert_eq!(Some(1.0), min(&a)); |
1214 | | assert!(max(&a).unwrap().is_nan()); |
1215 | | } |
1216 | | |
1217 | | #[test] |
1218 | | fn test_primitive_min_max_float_last_nan_nullable() { |
1219 | | let a: Float64Array = (0..100) |
1220 | | .map(|i| { |
1221 | | if i == 99 { |
1222 | | Some(f64::NAN) |
1223 | | } else if i % 2 == 0 { |
1224 | | None |
1225 | | } else { |
1226 | | Some(i as f64) |
1227 | | } |
1228 | | }) |
1229 | | .collect(); |
1230 | | assert_eq!(Some(1.0), min(&a)); |
1231 | | assert!(max(&a).unwrap().is_nan()); |
1232 | | } |
1233 | | |
1234 | | #[test] |
1235 | | fn test_primitive_min_max_float_inf_and_nans() { |
1236 | | let a: Float64Array = (0..100) |
1237 | | .map(|i| { |
1238 | | let x = match i % 10 { |
1239 | | 0 => f64::NEG_INFINITY, |
1240 | | 1 => f64::MIN, |
1241 | | 2 => f64::MAX, |
1242 | | 4 => f64::INFINITY, |
1243 | | 5 => f64::NAN, |
1244 | | _ => i as f64, |
1245 | | }; |
1246 | | Some(x) |
1247 | | }) |
1248 | | .collect(); |
1249 | | assert_eq!(Some(f64::NEG_INFINITY), min(&a)); |
1250 | | assert!(max(&a).unwrap().is_nan()); |
1251 | | } |
1252 | | |
1253 | | fn pad_inputs_and_test_fixed_size_binary( |
1254 | | input: Vec<Option<&[u8]>>, |
1255 | | expected_min: Option<&[u8]>, |
1256 | | expected_max: Option<&[u8]>, |
1257 | | ) { |
1258 | | fn pad_slice(slice: &[u8], len: usize) -> Vec<u8> { |
1259 | | let mut padded = vec![0; len]; |
1260 | | padded[..slice.len()].copy_from_slice(slice); |
1261 | | padded |
1262 | | } |
1263 | | |
1264 | | let max_len = input |
1265 | | .iter() |
1266 | | .filter_map(|x| x.as_ref().map(|b| b.len())) |
1267 | | .max() |
1268 | | .unwrap_or(0); |
1269 | | let padded_input = input |
1270 | | .iter() |
1271 | | .map(|x| x.as_ref().map(|b| pad_slice(b, max_len))); |
1272 | | let input_arr = |
1273 | | FixedSizeBinaryArray::try_from_sparse_iter_with_size(padded_input, max_len as i32) |
1274 | | .unwrap(); |
1275 | | let padded_expected_min = expected_min.map(|b| pad_slice(b, max_len)); |
1276 | | let padded_expected_max = expected_max.map(|b| pad_slice(b, max_len)); |
1277 | | |
1278 | | assert_eq!( |
1279 | | padded_expected_min.as_deref(), |
1280 | | min_fixed_size_binary(&input_arr) |
1281 | | ); |
1282 | | assert_eq!( |
1283 | | padded_expected_max.as_deref(), |
1284 | | max_fixed_size_binary(&input_arr) |
1285 | | ); |
1286 | | } |
1287 | | |
1288 | | macro_rules! test_binary { |
1289 | | ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => { |
1290 | | #[test] |
1291 | | fn $NAME() { |
1292 | | let binary = BinaryArray::from($ARRAY); |
1293 | | assert_eq!($EXPECTED_MIN, min_binary(&binary)); |
1294 | | assert_eq!($EXPECTED_MAX, max_binary(&binary)); |
1295 | | |
1296 | | let large_binary = LargeBinaryArray::from($ARRAY); |
1297 | | assert_eq!($EXPECTED_MIN, min_binary(&large_binary)); |
1298 | | assert_eq!($EXPECTED_MAX, max_binary(&large_binary)); |
1299 | | |
1300 | | let binary_view = BinaryViewArray::from($ARRAY); |
1301 | | assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view)); |
1302 | | assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view)); |
1303 | | |
1304 | | pad_inputs_and_test_fixed_size_binary($ARRAY, $EXPECTED_MIN, $EXPECTED_MAX); |
1305 | | } |
1306 | | }; |
1307 | | } |
1308 | | |
1309 | | test_binary!( |
1310 | | test_binary_min_max_with_nulls, |
1311 | | vec![ |
1312 | | Some("b01234567890123".as_bytes()), // long bytes |
1313 | | None, |
1314 | | None, |
1315 | | Some(b"a"), |
1316 | | Some(b"c"), |
1317 | | Some(b"abcdedfg0123456"), |
1318 | | ], |
1319 | | Some("a".as_bytes()), |
1320 | | Some("c".as_bytes()) |
1321 | | ); |
1322 | | |
1323 | | test_binary!( |
1324 | | test_binary_min_max_no_null, |
1325 | | vec![ |
1326 | | Some("b".as_bytes()), |
1327 | | Some(b"abcdefghijklmnopqrst"), // long bytes |
1328 | | Some(b"c"), |
1329 | | Some(b"b01234567890123"), // long bytes for view types |
1330 | | ], |
1331 | | Some("abcdefghijklmnopqrst".as_bytes()), |
1332 | | Some("c".as_bytes()) |
1333 | | ); |
1334 | | |
1335 | | test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None); |
1336 | | |
1337 | | test_binary!( |
1338 | | test_binary_min_max_1, |
1339 | | vec![ |
1340 | | None, |
1341 | | Some("b01234567890123435".as_bytes()), // long bytes for view types |
1342 | | None, |
1343 | | Some(b"b0123xxxxxxxxxxx"), |
1344 | | Some(b"a") |
1345 | | ], |
1346 | | Some("a".as_bytes()), |
1347 | | Some("b0123xxxxxxxxxxx".as_bytes()) |
1348 | | ); |
1349 | | |
1350 | | macro_rules! test_string { |
1351 | | ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => { |
1352 | | #[test] |
1353 | | fn $NAME() { |
1354 | | let string = StringArray::from($ARRAY); |
1355 | | assert_eq!($EXPECTED_MIN, min_string(&string)); |
1356 | | assert_eq!($EXPECTED_MAX, max_string(&string)); |
1357 | | |
1358 | | let large_string = LargeStringArray::from($ARRAY); |
1359 | | assert_eq!($EXPECTED_MIN, min_string(&large_string)); |
1360 | | assert_eq!($EXPECTED_MAX, max_string(&large_string)); |
1361 | | |
1362 | | let string_view = StringViewArray::from($ARRAY); |
1363 | | assert_eq!($EXPECTED_MIN, min_string_view(&string_view)); |
1364 | | assert_eq!($EXPECTED_MAX, max_string_view(&string_view)); |
1365 | | } |
1366 | | }; |
1367 | | } |
1368 | | |
1369 | | test_string!( |
1370 | | test_string_min_max_with_nulls, |
1371 | | vec![ |
1372 | | Some("b012345678901234"), // long bytes for view types |
1373 | | None, |
1374 | | None, |
1375 | | Some("a"), |
1376 | | Some("c"), |
1377 | | Some("b0123xxxxxxxxxxx") |
1378 | | ], |
1379 | | Some("a"), |
1380 | | Some("c") |
1381 | | ); |
1382 | | |
1383 | | test_string!( |
1384 | | test_string_min_max_no_null, |
1385 | | vec![ |
1386 | | Some("b"), |
1387 | | Some("b012345678901234"), // long bytes for view types |
1388 | | Some("a"), |
1389 | | Some("b012xxxxxxxxxxxx") |
1390 | | ], |
1391 | | Some("a"), |
1392 | | Some("b012xxxxxxxxxxxx") |
1393 | | ); |
1394 | | |
1395 | | test_string!( |
1396 | | test_string_min_max_all_nulls, |
1397 | | Vec::<Option<&str>>::from_iter([None, None]), |
1398 | | None, |
1399 | | None |
1400 | | ); |
1401 | | |
1402 | | test_string!( |
1403 | | test_string_min_max_1, |
1404 | | vec![ |
1405 | | None, |
1406 | | Some("c12345678901234"), // long bytes for view types |
1407 | | None, |
1408 | | Some("b"), |
1409 | | Some("c1234xxxxxxxxxx") |
1410 | | ], |
1411 | | Some("b"), |
1412 | | Some("c1234xxxxxxxxxx") |
1413 | | ); |
1414 | | |
1415 | | test_string!( |
1416 | | test_string_min_max_empty, |
1417 | | Vec::<Option<&str>>::new(), |
1418 | | None, |
1419 | | None |
1420 | | ); |
1421 | | |
1422 | | #[test] |
1423 | | fn test_boolean_min_max_empty() { |
1424 | | let a = BooleanArray::from(vec![] as Vec<Option<bool>>); |
1425 | | assert_eq!(None, min_boolean(&a)); |
1426 | | assert_eq!(None, max_boolean(&a)); |
1427 | | } |
1428 | | |
1429 | | #[test] |
1430 | | fn test_boolean_min_max_all_null() { |
1431 | | let a = BooleanArray::from(vec![None, None]); |
1432 | | assert_eq!(None, min_boolean(&a)); |
1433 | | assert_eq!(None, max_boolean(&a)); |
1434 | | } |
1435 | | |
1436 | | #[test] |
1437 | | fn test_boolean_min_max_no_null() { |
1438 | | let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]); |
1439 | | assert_eq!(Some(false), min_boolean(&a)); |
1440 | | assert_eq!(Some(true), max_boolean(&a)); |
1441 | | } |
1442 | | |
1443 | | #[test] |
1444 | | fn test_boolean_min_max() { |
1445 | | let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]); |
1446 | | assert_eq!(Some(false), min_boolean(&a)); |
1447 | | assert_eq!(Some(true), max_boolean(&a)); |
1448 | | |
1449 | | let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]); |
1450 | | assert_eq!(Some(false), min_boolean(&a)); |
1451 | | assert_eq!(Some(true), max_boolean(&a)); |
1452 | | |
1453 | | let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]); |
1454 | | assert_eq!(Some(false), min_boolean(&a)); |
1455 | | assert_eq!(Some(true), max_boolean(&a)); |
1456 | | |
1457 | | let a = BooleanArray::from(vec![Some(true), None]); |
1458 | | assert_eq!(Some(true), min_boolean(&a)); |
1459 | | assert_eq!(Some(true), max_boolean(&a)); |
1460 | | |
1461 | | let a = BooleanArray::from(vec![Some(false), None]); |
1462 | | assert_eq!(Some(false), min_boolean(&a)); |
1463 | | assert_eq!(Some(false), max_boolean(&a)); |
1464 | | |
1465 | | let a = BooleanArray::from(vec![Some(true)]); |
1466 | | assert_eq!(Some(true), min_boolean(&a)); |
1467 | | assert_eq!(Some(true), max_boolean(&a)); |
1468 | | |
1469 | | let a = BooleanArray::from(vec![Some(false)]); |
1470 | | assert_eq!(Some(false), min_boolean(&a)); |
1471 | | assert_eq!(Some(false), max_boolean(&a)); |
1472 | | } |
1473 | | |
1474 | | #[test] |
1475 | | fn test_boolean_min_max_smaller() { |
1476 | | let a = BooleanArray::from(vec![Some(false)]); |
1477 | | assert_eq!(Some(false), min_boolean(&a)); |
1478 | | assert_eq!(Some(false), max_boolean(&a)); |
1479 | | |
1480 | | let a = BooleanArray::from(vec![None, Some(false)]); |
1481 | | assert_eq!(Some(false), min_boolean(&a)); |
1482 | | assert_eq!(Some(false), max_boolean(&a)); |
1483 | | |
1484 | | let a = BooleanArray::from(vec![None, Some(true)]); |
1485 | | assert_eq!(Some(true), min_boolean(&a)); |
1486 | | assert_eq!(Some(true), max_boolean(&a)); |
1487 | | |
1488 | | let a = BooleanArray::from(vec![Some(true)]); |
1489 | | assert_eq!(Some(true), min_boolean(&a)); |
1490 | | assert_eq!(Some(true), max_boolean(&a)); |
1491 | | } |
1492 | | |
1493 | | #[test] |
1494 | | fn test_boolean_min_max_64_true_64_false() { |
1495 | | let mut no_nulls = BooleanBuilder::new(); |
1496 | | no_nulls.append_slice(&[true; 64]); |
1497 | | no_nulls.append_slice(&[false; 64]); |
1498 | | let no_nulls = no_nulls.finish(); |
1499 | | |
1500 | | assert_eq!(Some(false), min_boolean(&no_nulls)); |
1501 | | assert_eq!(Some(true), max_boolean(&no_nulls)); |
1502 | | |
1503 | | let mut with_nulls = BooleanBuilder::new(); |
1504 | | with_nulls.append_slice(&[true; 31]); |
1505 | | with_nulls.append_null(); |
1506 | | with_nulls.append_slice(&[true; 32]); |
1507 | | with_nulls.append_slice(&[false; 1]); |
1508 | | with_nulls.append_nulls(63); |
1509 | | let with_nulls = with_nulls.finish(); |
1510 | | |
1511 | | assert_eq!(Some(false), min_boolean(&with_nulls)); |
1512 | | assert_eq!(Some(true), max_boolean(&with_nulls)); |
1513 | | } |
1514 | | |
1515 | | #[test] |
1516 | | fn test_boolean_min_max_64_false_64_true() { |
1517 | | let mut no_nulls = BooleanBuilder::new(); |
1518 | | no_nulls.append_slice(&[false; 64]); |
1519 | | no_nulls.append_slice(&[true; 64]); |
1520 | | let no_nulls = no_nulls.finish(); |
1521 | | |
1522 | | assert_eq!(Some(false), min_boolean(&no_nulls)); |
1523 | | assert_eq!(Some(true), max_boolean(&no_nulls)); |
1524 | | |
1525 | | let mut with_nulls = BooleanBuilder::new(); |
1526 | | with_nulls.append_slice(&[false; 31]); |
1527 | | with_nulls.append_null(); |
1528 | | with_nulls.append_slice(&[false; 32]); |
1529 | | with_nulls.append_slice(&[true; 1]); |
1530 | | with_nulls.append_nulls(63); |
1531 | | let with_nulls = with_nulls.finish(); |
1532 | | |
1533 | | assert_eq!(Some(false), min_boolean(&with_nulls)); |
1534 | | assert_eq!(Some(true), max_boolean(&with_nulls)); |
1535 | | } |
1536 | | |
1537 | | #[test] |
1538 | | fn test_boolean_min_max_96_true() { |
1539 | | let mut no_nulls = BooleanBuilder::new(); |
1540 | | no_nulls.append_slice(&[true; 96]); |
1541 | | let no_nulls = no_nulls.finish(); |
1542 | | |
1543 | | assert_eq!(Some(true), min_boolean(&no_nulls)); |
1544 | | assert_eq!(Some(true), max_boolean(&no_nulls)); |
1545 | | |
1546 | | let mut with_nulls = BooleanBuilder::new(); |
1547 | | with_nulls.append_slice(&[true; 31]); |
1548 | | with_nulls.append_null(); |
1549 | | with_nulls.append_slice(&[true; 32]); |
1550 | | with_nulls.append_slice(&[true; 31]); |
1551 | | with_nulls.append_null(); |
1552 | | let with_nulls = with_nulls.finish(); |
1553 | | |
1554 | | assert_eq!(Some(true), min_boolean(&with_nulls)); |
1555 | | assert_eq!(Some(true), max_boolean(&with_nulls)); |
1556 | | } |
1557 | | |
1558 | | #[test] |
1559 | | fn test_boolean_min_max_96_false() { |
1560 | | let mut no_nulls = BooleanBuilder::new(); |
1561 | | no_nulls.append_slice(&[false; 96]); |
1562 | | let no_nulls = no_nulls.finish(); |
1563 | | |
1564 | | assert_eq!(Some(false), min_boolean(&no_nulls)); |
1565 | | assert_eq!(Some(false), max_boolean(&no_nulls)); |
1566 | | |
1567 | | let mut with_nulls = BooleanBuilder::new(); |
1568 | | with_nulls.append_slice(&[false; 31]); |
1569 | | with_nulls.append_null(); |
1570 | | with_nulls.append_slice(&[false; 32]); |
1571 | | with_nulls.append_slice(&[false; 31]); |
1572 | | with_nulls.append_null(); |
1573 | | let with_nulls = with_nulls.finish(); |
1574 | | |
1575 | | assert_eq!(Some(false), min_boolean(&with_nulls)); |
1576 | | assert_eq!(Some(false), max_boolean(&with_nulls)); |
1577 | | } |
1578 | | |
1579 | | #[test] |
1580 | | fn test_sum_dyn() { |
1581 | | let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]); |
1582 | | let values = Arc::new(values) as ArrayRef; |
1583 | | let keys = Int8Array::from_iter_values([2_i8, 3, 4]); |
1584 | | |
1585 | | let dict_array = DictionaryArray::new(keys, values.clone()); |
1586 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1587 | | assert_eq!(39, sum_array::<Int8Type, _>(array).unwrap()); |
1588 | | |
1589 | | let a = Int32Array::from(vec![1, 2, 3, 4, 5]); |
1590 | | assert_eq!(15, sum_array::<Int32Type, _>(&a).unwrap()); |
1591 | | |
1592 | | let keys = Int8Array::from(vec![Some(2_i8), None, Some(4)]); |
1593 | | let dict_array = DictionaryArray::new(keys, values.clone()); |
1594 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1595 | | assert_eq!(26, sum_array::<Int8Type, _>(array).unwrap()); |
1596 | | |
1597 | | let keys = Int8Array::from(vec![None, None, None]); |
1598 | | let dict_array = DictionaryArray::new(keys, values.clone()); |
1599 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1600 | | assert!(sum_array::<Int8Type, _>(array).is_none()); |
1601 | | } |
1602 | | |
1603 | | #[test] |
1604 | | fn test_max_min_dyn() { |
1605 | | let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]); |
1606 | | let keys = Int8Array::from_iter_values([2_i8, 3, 4]); |
1607 | | let values = Arc::new(values) as ArrayRef; |
1608 | | |
1609 | | let dict_array = DictionaryArray::new(keys, values.clone()); |
1610 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1611 | | assert_eq!(14, max_array::<Int8Type, _>(array).unwrap()); |
1612 | | |
1613 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1614 | | assert_eq!(12, min_array::<Int8Type, _>(array).unwrap()); |
1615 | | |
1616 | | let a = Int32Array::from(vec![1, 2, 3, 4, 5]); |
1617 | | assert_eq!(5, max_array::<Int32Type, _>(&a).unwrap()); |
1618 | | assert_eq!(1, min_array::<Int32Type, _>(&a).unwrap()); |
1619 | | |
1620 | | let keys = Int8Array::from(vec![Some(2_i8), None, Some(7)]); |
1621 | | let dict_array = DictionaryArray::new(keys, values.clone()); |
1622 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1623 | | assert_eq!(17, max_array::<Int8Type, _>(array).unwrap()); |
1624 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1625 | | assert_eq!(12, min_array::<Int8Type, _>(array).unwrap()); |
1626 | | |
1627 | | let keys = Int8Array::from(vec![None, None, None]); |
1628 | | let dict_array = DictionaryArray::new(keys, values.clone()); |
1629 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1630 | | assert!(max_array::<Int8Type, _>(array).is_none()); |
1631 | | let array = dict_array.downcast_dict::<Int8Array>().unwrap(); |
1632 | | assert!(min_array::<Int8Type, _>(array).is_none()); |
1633 | | } |
1634 | | |
1635 | | #[test] |
1636 | | fn test_max_min_dyn_nan() { |
1637 | | let values = Float32Array::from(vec![5.0_f32, 2.0_f32, f32::NAN]); |
1638 | | let keys = Int8Array::from_iter_values([0_i8, 1, 2]); |
1639 | | |
1640 | | let dict_array = DictionaryArray::new(keys, Arc::new(values)); |
1641 | | let array = dict_array.downcast_dict::<Float32Array>().unwrap(); |
1642 | | assert!(max_array::<Float32Type, _>(array).unwrap().is_nan()); |
1643 | | |
1644 | | let array = dict_array.downcast_dict::<Float32Array>().unwrap(); |
1645 | | assert_eq!(2.0_f32, min_array::<Float32Type, _>(array).unwrap()); |
1646 | | } |
1647 | | |
1648 | | #[test] |
1649 | | fn test_min_max_sliced_primitive() { |
1650 | | let expected = Some(4.0); |
1651 | | let input: Float64Array = vec![None, Some(4.0)].into_iter().collect(); |
1652 | | let actual = min(&input); |
1653 | | assert_eq!(actual, expected); |
1654 | | let actual = max(&input); |
1655 | | assert_eq!(actual, expected); |
1656 | | |
1657 | | let sliced_input: Float64Array = vec![None, None, None, None, None, Some(4.0)] |
1658 | | .into_iter() |
1659 | | .collect(); |
1660 | | let sliced_input = sliced_input.slice(4, 2); |
1661 | | |
1662 | | assert_eq!(&sliced_input, &input); |
1663 | | |
1664 | | let actual = min(&sliced_input); |
1665 | | assert_eq!(actual, expected); |
1666 | | let actual = max(&sliced_input); |
1667 | | assert_eq!(actual, expected); |
1668 | | } |
1669 | | |
1670 | | #[test] |
1671 | | fn test_min_max_sliced_boolean() { |
1672 | | let expected = Some(true); |
1673 | | let input: BooleanArray = vec![None, Some(true)].into_iter().collect(); |
1674 | | let actual = min_boolean(&input); |
1675 | | assert_eq!(actual, expected); |
1676 | | let actual = max_boolean(&input); |
1677 | | assert_eq!(actual, expected); |
1678 | | |
1679 | | let sliced_input: BooleanArray = vec![None, None, None, None, None, Some(true)] |
1680 | | .into_iter() |
1681 | | .collect(); |
1682 | | let sliced_input = sliced_input.slice(4, 2); |
1683 | | |
1684 | | assert_eq!(sliced_input, input); |
1685 | | |
1686 | | let actual = min_boolean(&sliced_input); |
1687 | | assert_eq!(actual, expected); |
1688 | | let actual = max_boolean(&sliced_input); |
1689 | | assert_eq!(actual, expected); |
1690 | | } |
1691 | | |
1692 | | #[test] |
1693 | | fn test_min_max_sliced_string() { |
1694 | | let expected = Some("foo"); |
1695 | | let input: StringArray = vec![None, Some("foo")].into_iter().collect(); |
1696 | | let actual = min_string(&input); |
1697 | | assert_eq!(actual, expected); |
1698 | | let actual = max_string(&input); |
1699 | | assert_eq!(actual, expected); |
1700 | | |
1701 | | let sliced_input: StringArray = vec![None, None, None, None, None, Some("foo")] |
1702 | | .into_iter() |
1703 | | .collect(); |
1704 | | let sliced_input = sliced_input.slice(4, 2); |
1705 | | |
1706 | | assert_eq!(&sliced_input, &input); |
1707 | | |
1708 | | let actual = min_string(&sliced_input); |
1709 | | assert_eq!(actual, expected); |
1710 | | let actual = max_string(&sliced_input); |
1711 | | assert_eq!(actual, expected); |
1712 | | } |
1713 | | |
1714 | | #[test] |
1715 | | fn test_min_max_sliced_binary() { |
1716 | | let expected: Option<&[u8]> = Some(&[5]); |
1717 | | let input: BinaryArray = vec![None, Some(&[5])].into_iter().collect(); |
1718 | | let actual = min_binary(&input); |
1719 | | assert_eq!(actual, expected); |
1720 | | let actual = max_binary(&input); |
1721 | | assert_eq!(actual, expected); |
1722 | | |
1723 | | let sliced_input: BinaryArray = vec![None, None, None, None, None, Some(&[5])] |
1724 | | .into_iter() |
1725 | | .collect(); |
1726 | | let sliced_input = sliced_input.slice(4, 2); |
1727 | | |
1728 | | assert_eq!(&sliced_input, &input); |
1729 | | |
1730 | | let actual = min_binary(&sliced_input); |
1731 | | assert_eq!(actual, expected); |
1732 | | let actual = max_binary(&sliced_input); |
1733 | | assert_eq!(actual, expected); |
1734 | | } |
1735 | | |
1736 | | #[test] |
1737 | | fn test_sum_overflow() { |
1738 | | let a = Int32Array::from(vec![i32::MAX, 1]); |
1739 | | |
1740 | | assert_eq!(sum(&a).unwrap(), -2147483648); |
1741 | | assert_eq!(sum_array::<Int32Type, _>(&a).unwrap(), -2147483648); |
1742 | | } |
1743 | | |
1744 | | #[test] |
1745 | | fn test_sum_checked_overflow() { |
1746 | | let a = Int32Array::from(vec![i32::MAX, 1]); |
1747 | | |
1748 | | sum_checked(&a).expect_err("overflow should be detected"); |
1749 | | sum_array_checked::<Int32Type, _>(&a).expect_err("overflow should be detected"); |
1750 | | } |
1751 | | } |