Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-arith/src/aggregate.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines aggregations over Arrow arrays.
19
20
use arrow_array::cast::*;
21
use arrow_array::iterator::ArrayIter;
22
use arrow_array::*;
23
use arrow_buffer::{ArrowNativeType, NullBuffer};
24
use arrow_data::bit_iterator::try_for_each_valid_idx;
25
use arrow_schema::*;
26
use std::borrow::BorrowMut;
27
use std::cmp::{self, Ordering};
28
use std::ops::{BitAnd, BitOr, BitXor};
29
use types::ByteViewType;
30
31
/// An accumulator for primitive numeric values.
32
trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default {
33
    /// Accumulate a non-null value.
34
    fn accumulate(&mut self, value: T);
35
    /// Accumulate a nullable values.
36
    /// If `valid` is false the `value` should not affect the accumulator state.
37
    fn accumulate_nullable(&mut self, value: T, valid: bool);
38
    /// Merge another accumulator into this accumulator
39
    fn merge(&mut self, other: Self);
40
    /// Return the aggregated value.
41
    fn finish(&mut self) -> T;
42
}
43
44
/// Helper for branchlessly selecting either `a` or `b` based on the boolean `m`.
45
/// After verifying the generated assembly this can be a simple `if`.
46
#[inline(always)]
47
fn select<T: Copy>(m: bool, a: T, b: T) -> T {
48
    if m {
49
        a
50
    } else {
51
        b
52
    }
53
}
54
55
#[derive(Clone, Copy)]
56
struct SumAccumulator<T: ArrowNativeTypeOp> {
57
    sum: T,
58
}
59
60
impl<T: ArrowNativeTypeOp> Default for SumAccumulator<T> {
61
    fn default() -> Self {
62
        Self { sum: T::ZERO }
63
    }
64
}
65
66
impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for SumAccumulator<T> {
67
    fn accumulate(&mut self, value: T) {
68
        self.sum = self.sum.add_wrapping(value);
69
    }
70
71
    fn accumulate_nullable(&mut self, value: T, valid: bool) {
72
        let sum = self.sum;
73
        self.sum = select(valid, sum.add_wrapping(value), sum)
74
    }
75
76
    fn merge(&mut self, other: Self) {
77
        self.sum = self.sum.add_wrapping(other.sum);
78
    }
79
80
    fn finish(&mut self) -> T {
81
        self.sum
82
    }
83
}
84
85
#[derive(Clone, Copy)]
86
struct MinAccumulator<T: ArrowNativeTypeOp> {
87
    min: T,
88
}
89
90
impl<T: ArrowNativeTypeOp> Default for MinAccumulator<T> {
91
    fn default() -> Self {
92
        Self {
93
            min: T::MAX_TOTAL_ORDER,
94
        }
95
    }
96
}
97
98
impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MinAccumulator<T> {
99
    fn accumulate(&mut self, value: T) {
100
        let min = self.min;
101
        self.min = select(value.is_lt(min), value, min);
102
    }
103
104
    fn accumulate_nullable(&mut self, value: T, valid: bool) {
105
        let min = self.min;
106
        let is_lt = valid & value.is_lt(min);
107
        self.min = select(is_lt, value, min);
108
    }
109
110
    fn merge(&mut self, other: Self) {
111
        self.accumulate(other.min)
112
    }
113
114
    fn finish(&mut self) -> T {
115
        self.min
116
    }
117
}
118
119
#[derive(Clone, Copy)]
120
struct MaxAccumulator<T: ArrowNativeTypeOp> {
121
    max: T,
122
}
123
124
impl<T: ArrowNativeTypeOp> Default for MaxAccumulator<T> {
125
    fn default() -> Self {
126
        Self {
127
            max: T::MIN_TOTAL_ORDER,
128
        }
129
    }
130
}
131
132
impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MaxAccumulator<T> {
133
    fn accumulate(&mut self, value: T) {
134
        let max = self.max;
135
        self.max = select(value.is_gt(max), value, max);
136
    }
137
138
    fn accumulate_nullable(&mut self, value: T, valid: bool) {
139
        let max = self.max;
140
        let is_gt = value.is_gt(max) & valid;
141
        self.max = select(is_gt, value, max);
142
    }
143
144
    fn merge(&mut self, other: Self) {
145
        self.accumulate(other.max)
146
    }
147
148
    fn finish(&mut self) -> T {
149
        self.max
150
    }
151
}
152
153
fn reduce_accumulators<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
154
    mut acc: [A; LANES],
155
) -> A {
156
    assert!(LANES > 0 && LANES.is_power_of_two());
157
    let mut len = LANES;
158
159
    // attempt at tree reduction, unfortunately llvm does not fully recognize this pattern,
160
    // but the generated code is still a little faster than purely sequential reduction for floats.
161
    while len >= 2 {
162
        let mid = len / 2;
163
        let (h, t) = acc[..len].split_at_mut(mid);
164
165
        for i in 0..mid {
166
            h[i].merge(t[i]);
167
        }
168
        len /= 2;
169
    }
170
    acc[0]
171
}
172
173
#[inline(always)]
174
fn aggregate_nonnull_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
175
    acc: &mut [A; LANES],
176
    values: &[T; LANES],
177
) {
178
    for i in 0..LANES {
179
        acc[i].accumulate(values[i]);
180
    }
181
}
182
183
#[inline(always)]
184
fn aggregate_nullable_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
185
    acc: &mut [A; LANES],
186
    values: &[T; LANES],
187
    validity: u64,
188
) {
189
    let mut bit = 1;
190
    for i in 0..LANES {
191
        acc[i].accumulate_nullable(values[i], (validity & bit) != 0);
192
        bit <<= 1;
193
    }
194
}
195
196
fn aggregate_nonnull_simple<T: ArrowNativeTypeOp, A: NumericAccumulator<T>>(values: &[T]) -> T {
197
    values
198
        .iter()
199
        .copied()
200
        .fold(A::default(), |mut a, b| {
201
            a.accumulate(b);
202
            a
203
        })
204
        .finish()
205
}
206
207
#[inline(never)]
208
fn aggregate_nonnull_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
209
    values: &[T],
210
) -> T {
211
    // aggregating into multiple independent accumulators allows the compiler to use vector registers
212
    // with a single accumulator the compiler would not be allowed to reorder floating point addition
213
    let mut acc = [A::default(); LANES];
214
    let mut chunks = values.chunks_exact(LANES);
215
    chunks.borrow_mut().for_each(|chunk| {
216
        aggregate_nonnull_chunk(&mut acc, chunk[..LANES].try_into().unwrap());
217
    });
218
219
    let remainder = chunks.remainder();
220
    for i in 0..remainder.len() {
221
        acc[i].accumulate(remainder[i]);
222
    }
223
224
    reduce_accumulators(acc).finish()
225
}
226
227
#[inline(never)]
228
fn aggregate_nullable_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
229
    values: &[T],
230
    validity: &NullBuffer,
231
) -> T {
232
    assert!(LANES > 0 && 64 % LANES == 0);
233
    assert_eq!(values.len(), validity.len());
234
235
    // aggregating into multiple independent accumulators allows the compiler to use vector registers
236
    let mut acc = [A::default(); LANES];
237
    // we process 64 bits of validity at a time
238
    let mut values_chunks = values.chunks_exact(64);
239
    let validity_chunks = validity.inner().bit_chunks();
240
    let mut validity_chunks_iter = validity_chunks.iter();
241
242
    values_chunks.borrow_mut().for_each(|chunk| {
243
        // Safety: we asserted that values and validity have the same length and trust the iterator impl
244
        let mut validity = unsafe { validity_chunks_iter.next().unwrap_unchecked() };
245
        // chunk further based on the number of vector lanes
246
        chunk.chunks_exact(LANES).for_each(|chunk| {
247
            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
248
            validity >>= LANES;
249
        });
250
    });
251
252
    let remainder = values_chunks.remainder();
253
    if !remainder.is_empty() {
254
        let mut validity = validity_chunks.remainder_bits();
255
256
        let mut remainder_chunks = remainder.chunks_exact(LANES);
257
        remainder_chunks.borrow_mut().for_each(|chunk| {
258
            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
259
            validity >>= LANES;
260
        });
261
262
        let remainder = remainder_chunks.remainder();
263
        if !remainder.is_empty() {
264
            let mut bit = 1;
265
            for i in 0..remainder.len() {
266
                acc[i].accumulate_nullable(remainder[i], (validity & bit) != 0);
267
                bit <<= 1;
268
            }
269
        }
270
    }
271
272
    reduce_accumulators(acc).finish()
273
}
274
275
/// The preferred vector size in bytes for the target platform.
276
/// Note that the avx512 target feature is still unstable and this also means it is not detected on stable rust.
277
const PREFERRED_VECTOR_SIZE: usize =
278
    if cfg!(all(target_arch = "x86_64", target_feature = "avx512f")) {
279
        64
280
    } else if cfg!(all(target_arch = "x86_64", target_feature = "avx")) {
281
        32
282
    } else {
283
        16
284
    };
285
286
/// non-nullable aggregation requires fewer temporary registers so we can use more of them for accumulators
287
const PREFERRED_VECTOR_SIZE_NON_NULL: usize = PREFERRED_VECTOR_SIZE * 2;
288
289
/// Generic aggregation for any primitive type.
290
/// Returns None if there are no non-null values in `array`.
291
fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: NumericAccumulator<T>>(
292
    array: &PrimitiveArray<P>,
293
) -> Option<T> {
294
    let null_count = array.null_count();
295
    if null_count == array.len() {
296
        return None;
297
    }
298
    let values = array.values().as_ref();
299
    match array.nulls() {
300
        Some(nulls) if null_count > 0 => {
301
            // const generics depending on a generic type parameter are not supported
302
            // so we have to match and call aggregate with the corresponding constant
303
            match PREFERRED_VECTOR_SIZE / std::mem::size_of::<T>() {
304
                64 => Some(aggregate_nullable_lanes::<T, A, 64>(values, nulls)),
305
                32 => Some(aggregate_nullable_lanes::<T, A, 32>(values, nulls)),
306
                16 => Some(aggregate_nullable_lanes::<T, A, 16>(values, nulls)),
307
                8 => Some(aggregate_nullable_lanes::<T, A, 8>(values, nulls)),
308
                4 => Some(aggregate_nullable_lanes::<T, A, 4>(values, nulls)),
309
                2 => Some(aggregate_nullable_lanes::<T, A, 2>(values, nulls)),
310
                _ => Some(aggregate_nullable_lanes::<T, A, 1>(values, nulls)),
311
            }
312
        }
313
        _ => {
314
            let is_float = matches!(
315
                array.data_type(),
316
                DataType::Float16 | DataType::Float32 | DataType::Float64
317
            );
318
            if is_float {
319
                match PREFERRED_VECTOR_SIZE_NON_NULL / std::mem::size_of::<T>() {
320
                    64 => Some(aggregate_nonnull_lanes::<T, A, 64>(values)),
321
                    32 => Some(aggregate_nonnull_lanes::<T, A, 32>(values)),
322
                    16 => Some(aggregate_nonnull_lanes::<T, A, 16>(values)),
323
                    8 => Some(aggregate_nonnull_lanes::<T, A, 8>(values)),
324
                    4 => Some(aggregate_nonnull_lanes::<T, A, 4>(values)),
325
                    2 => Some(aggregate_nonnull_lanes::<T, A, 2>(values)),
326
                    _ => Some(aggregate_nonnull_simple::<T, A>(values)),
327
                }
328
            } else {
329
                // for non-null integers its better to not chunk ourselves and instead
330
                // let llvm fully handle loop unrolling and vectorization
331
                Some(aggregate_nonnull_simple::<T, A>(values))
332
            }
333
        }
334
    }
335
}
336
337
/// Returns the minimum value in the boolean array.
338
///
339
/// ```
340
/// # use arrow_array::BooleanArray;
341
/// # use arrow_arith::aggregate::min_boolean;
342
///
343
/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
344
/// assert_eq!(min_boolean(&a), Some(false))
345
/// ```
346
pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
347
    // short circuit if all nulls / zero length array
348
    if array.null_count() == array.len() {
349
        return None;
350
    }
351
352
    // Note the min bool is false (0), so short circuit as soon as we see it
353
    match array.nulls() {
354
        None => {
355
            let bit_chunks = array.values().bit_chunks();
356
0
            if bit_chunks.iter().any(|x| {
357
                // u64::MAX has all bits set, so if the value is not that, then there is a false
358
0
                x != u64::MAX
359
0
            }) {
360
                return Some(false);
361
            }
362
            // If the remainder bits are not all set, then there is a false
363
            if bit_chunks.remainder_bits().count_ones() as usize != bit_chunks.remainder_len() {
364
                Some(false)
365
            } else {
366
                Some(true)
367
            }
368
        }
369
        Some(nulls) => {
370
            let validity_chunks = nulls.inner().bit_chunks();
371
            let value_chunks = array.values().bit_chunks();
372
373
            if value_chunks
374
                .iter()
375
                .zip(validity_chunks.iter())
376
0
                .any(|(value, validity)| {
377
                    // We are looking for a false value, but because applying the validity mask
378
                    // can create a false for a true value (e.g. value: true, validity: false), we instead invert the value, so that we have to look for a true.
379
0
                    (!value & validity) != 0
380
0
                })
381
            {
382
                return Some(false);
383
            }
384
385
            // Same trick as above: Instead of looking for a false, we invert the value bits and look for a true
386
            if (!value_chunks.remainder_bits() & validity_chunks.remainder_bits()) != 0 {
387
                Some(false)
388
            } else {
389
                Some(true)
390
            }
391
        }
392
    }
393
}
394
395
/// Returns the maximum value in the boolean array
396
///
397
/// ```
398
/// # use arrow_array::BooleanArray;
399
/// # use arrow_arith::aggregate::max_boolean;
400
///
401
/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
402
/// assert_eq!(max_boolean(&a), Some(true))
403
/// ```
404
pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
405
    // short circuit if all nulls / zero length array
406
    if array.null_count() == array.len() {
407
        return None;
408
    }
409
410
    // Note the max bool is true (1), so short circuit as soon as we see it
411
    match array.nulls() {
412
        None => array
413
            .values()
414
            .bit_chunks()
415
            .iter_padded()
416
            // We found a true if any bit is set
417
0
            .map(|x| x != 0)
418
            .find(|b| *b)
419
            .or(Some(false)),
420
        Some(nulls) => {
421
            let validity_chunks = nulls.inner().bit_chunks().iter_padded();
422
            let value_chunks = array.values().bit_chunks().iter_padded();
423
            value_chunks
424
                .zip(validity_chunks)
425
                // We found a true if the value bit is 1, AND the validity bit is 1 for any bits in the chunk
426
0
                .map(|(value_bits, validity_bits)| (value_bits & validity_bits) != 0)
427
                .find(|b| *b)
428
                .or(Some(false))
429
        }
430
    }
431
}
432
433
/// Helper to compute min/max of [`ArrayAccessor`].
434
0
fn min_max_helper<T, A: ArrayAccessor<Item = T>, F>(array: A, cmp: F) -> Option<T>
435
0
where
436
0
    F: Fn(&T, &T) -> bool,
437
{
438
0
    let null_count = array.null_count();
439
0
    if null_count == array.len() {
440
0
        None
441
0
    } else if null_count == 0 {
442
        // JUSTIFICATION
443
        //  Benefit:  ~8% speedup
444
        //  Soundness: `i` is always within the array bounds
445
0
        (0..array.len())
446
0
            .map(|i| unsafe { array.value_unchecked(i) })
447
0
            .reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
448
    } else {
449
0
        let nulls = array.nulls().unwrap();
450
        unsafe {
451
0
            let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
452
0
                let acc = array.value_unchecked(acc_idx);
453
0
                let item = array.value_unchecked(idx);
454
0
                if cmp(&acc, &item) {
455
0
                    idx
456
                } else {
457
0
                    acc_idx
458
                }
459
0
            });
460
0
            idx.map(|idx| array.value_unchecked(idx))
461
        }
462
    }
463
0
}
464
465
/// Helper to compute min/max of [`GenericByteViewArray<T>`].
466
/// The specialized min/max leverages the inlined values to compare the byte views.
467
/// `swap_cond` is the condition to swap current min/max with the new value.
468
/// For example, `Ordering::Greater` for max and `Ordering::Less` for min.
469
0
fn min_max_view_helper<T: ByteViewType>(
470
0
    array: &GenericByteViewArray<T>,
471
0
    swap_cond: cmp::Ordering,
472
0
) -> Option<&T::Native> {
473
0
    let null_count = array.null_count();
474
0
    if null_count == array.len() {
475
0
        None
476
0
    } else if null_count == 0 {
477
0
        let target_idx = (0..array.len()).reduce(|acc, item| {
478
            // SAFETY:  array's length is correct so item is within bounds
479
0
            let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) };
480
0
            if cmp == swap_cond {
481
0
                item
482
            } else {
483
0
                acc
484
            }
485
0
        });
486
        // SAFETY: idx came from valid range `0..array.len()`
487
0
        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
488
    } else {
489
0
        let nulls = array.nulls().unwrap();
490
491
0
        let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| {
492
0
            let cmp =
493
0
                unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) };
494
0
            if cmp == swap_cond {
495
0
                idx
496
            } else {
497
0
                acc_idx
498
            }
499
0
        });
500
501
        // SAFETY: idx came from valid range `0..array.len()`
502
0
        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
503
    }
504
0
}
505
506
/// Returns the maximum value in the binary array, according to the natural order.
507
pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
508
    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
509
}
510
511
/// Returns the maximum value in the binary view array, according to the natural order.
512
pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
513
    min_max_view_helper(array, Ordering::Greater)
514
}
515
516
/// Returns the maximum value in the fixed size binary array, according to the natural order.
517
pub fn max_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
518
0
    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
519
}
520
521
/// Returns the minimum value in the binary array, according to the natural order.
522
pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
523
    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
524
}
525
526
/// Returns the minimum value in the binary view array, according to the natural order.
527
pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
528
    min_max_view_helper(array, Ordering::Less)
529
}
530
531
/// Returns the minimum value in the fixed size binary array, according to the natural order.
532
pub fn min_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
533
0
    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
534
}
535
536
/// Returns the maximum value in the string array, according to the natural order.
537
pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
538
    min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
539
}
540
541
/// Returns the maximum value in the string view array, according to the natural order.
542
pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
543
    min_max_view_helper(array, Ordering::Greater)
544
}
545
546
/// Returns the minimum value in the string array, according to the natural order.
547
pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
548
    min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
549
}
550
551
/// Returns the minimum value in the string view array, according to the natural order.
552
pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
553
    min_max_view_helper(array, Ordering::Less)
554
}
555
556
/// Returns the sum of values in the array.
557
///
558
/// This doesn't detect overflow. Once overflowing, the result will wrap around.
559
/// For an overflow-checking variant, use `sum_array_checked` instead.
560
pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
561
where
562
    T: ArrowNumericType,
563
    T::Native: ArrowNativeTypeOp,
564
{
565
    match array.data_type() {
566
        DataType::Dictionary(_, _) => {
567
            let null_count = array.null_count();
568
569
            if null_count == array.len() {
570
                return None;
571
            }
572
573
            let iter = ArrayIter::new(array);
574
            let sum = iter
575
                .into_iter()
576
                .fold(T::default_value(), |accumulator, value| {
577
                    if let Some(value) = value {
578
                        accumulator.add_wrapping(value)
579
                    } else {
580
                        accumulator
581
                    }
582
                });
583
584
            Some(sum)
585
        }
586
        _ => sum::<T>(as_primitive_array(&array)),
587
    }
588
}
589
590
/// Returns the sum of values in the array.
591
///
592
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
593
/// use `sum_array` instead.
594
pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>(
595
    array: A,
596
) -> Result<Option<T::Native>, ArrowError>
597
where
598
    T: ArrowNumericType,
599
    T::Native: ArrowNativeTypeOp,
600
{
601
    match array.data_type() {
602
        DataType::Dictionary(_, _) => {
603
            let null_count = array.null_count();
604
605
            if null_count == array.len() {
606
                return Ok(None);
607
            }
608
609
            let iter = ArrayIter::new(array);
610
            let sum = iter
611
                .into_iter()
612
                .try_fold(T::default_value(), |accumulator, value| {
613
                    if let Some(value) = value {
614
                        accumulator.add_checked(value)
615
                    } else {
616
                        Ok(accumulator)
617
                    }
618
                })?;
619
620
            Ok(Some(sum))
621
        }
622
        _ => sum_checked::<T>(as_primitive_array(&array)),
623
    }
624
}
625
626
/// Returns the min of values in the array of `ArrowNumericType` type, or dictionary
627
/// array with value of `ArrowNumericType` type.
628
pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
629
where
630
    T: ArrowNumericType,
631
    T::Native: ArrowNativeType,
632
{
633
    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_gt(*b), min)
634
}
635
636
/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
637
/// array with value of `ArrowNumericType` type.
638
pub fn max_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
639
where
640
    T: ArrowNumericType,
641
    T::Native: ArrowNativeTypeOp,
642
{
643
    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_lt(*b), max)
644
}
645
646
fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
647
    array: A,
648
    cmp: F,
649
    m: M,
650
) -> Option<T::Native>
651
where
652
    T: ArrowNumericType,
653
    F: Fn(&T::Native, &T::Native) -> bool,
654
    M: Fn(&PrimitiveArray<T>) -> Option<T::Native>,
655
{
656
    match array.data_type() {
657
        DataType::Dictionary(_, _) => min_max_helper::<T::Native, _, _>(array, cmp),
658
        _ => m(as_primitive_array(&array)),
659
    }
660
}
661
662
macro_rules! bit_operation {
663
    ($NAME:ident, $OP:ident, $NATIVE:ident, $DEFAULT:expr, $DOC:expr) => {
664
        #[doc = $DOC]
665
        ///
666
        /// Returns `None` if the array is empty or only contains null values.
667
        pub fn $NAME<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
668
        where
669
            T: ArrowNumericType,
670
            T::Native: $NATIVE<Output = T::Native> + ArrowNativeTypeOp,
671
        {
672
            let default;
673
            if $DEFAULT == -1 {
674
                default = T::Native::ONE.neg_wrapping();
675
            } else {
676
                default = T::default_value();
677
            }
678
679
            let null_count = array.null_count();
680
681
            if null_count == array.len() {
682
                return None;
683
            }
684
685
            let data: &[T::Native] = array.values();
686
687
            match array.nulls() {
688
                None => {
689
                    let result = data
690
                        .iter()
691
                        .fold(default, |accumulator, value| accumulator.$OP(*value));
692
693
                    Some(result)
694
                }
695
                Some(nulls) => {
696
                    let mut result = default;
697
                    let data_chunks = data.chunks_exact(64);
698
                    let remainder = data_chunks.remainder();
699
700
                    let bit_chunks = nulls.inner().bit_chunks();
701
                    data_chunks
702
                        .zip(bit_chunks.iter())
703
                        .for_each(|(chunk, mask)| {
704
                            // index_mask has value 1 << i in the loop
705
                            let mut index_mask = 1;
706
                            chunk.iter().for_each(|value| {
707
                                if (mask & index_mask) != 0 {
708
                                    result = result.$OP(*value);
709
                                }
710
                                index_mask <<= 1;
711
                            });
712
                        });
713
714
                    let remainder_bits = bit_chunks.remainder_bits();
715
716
                    remainder.iter().enumerate().for_each(|(i, value)| {
717
                        if remainder_bits & (1 << i) != 0 {
718
                            result = result.$OP(*value);
719
                        }
720
                    });
721
722
                    Some(result)
723
                }
724
            }
725
        }
726
    };
727
}
728
729
bit_operation!(
730
    bit_and,
731
    bitand,
732
    BitAnd,
733
    -1,
734
    "Returns the bitwise and of all non-null input values."
735
);
736
bit_operation!(
737
    bit_or,
738
    bitor,
739
    BitOr,
740
    0,
741
    "Returns the bitwise or of all non-null input values."
742
);
743
bit_operation!(
744
    bit_xor,
745
    bitxor,
746
    BitXor,
747
    0,
748
    "Returns the bitwise xor of all non-null input values."
749
);
750
751
/// Returns true if all non-null input values are true, otherwise false.
752
///
753
/// Returns `None` if the array is empty or only contains null values.
754
pub fn bool_and(array: &BooleanArray) -> Option<bool> {
755
    min_boolean(array)
756
}
757
758
/// Returns true if any non-null input value is true, otherwise false.
759
///
760
/// Returns `None` if the array is empty or only contains null values.
761
pub fn bool_or(array: &BooleanArray) -> Option<bool> {
762
    max_boolean(array)
763
}
764
765
/// Returns the sum of values in the primitive array.
766
///
767
/// Returns `Ok(None)` if the array is empty or only contains null values.
768
///
769
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
770
/// use `sum` instead.
771
pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError>
772
where
773
    T: ArrowNumericType,
774
    T::Native: ArrowNativeTypeOp,
775
{
776
    let null_count = array.null_count();
777
778
    if null_count == array.len() {
779
        return Ok(None);
780
    }
781
782
    let data: &[T::Native] = array.values();
783
784
    match array.nulls() {
785
        None => {
786
            let sum = data
787
                .iter()
788
                .try_fold(T::default_value(), |accumulator, value| {
789
                    accumulator.add_checked(*value)
790
                })?;
791
792
            Ok(Some(sum))
793
        }
794
        Some(nulls) => {
795
            let mut sum = T::default_value();
796
797
            try_for_each_valid_idx(
798
                nulls.len(),
799
                nulls.offset(),
800
                nulls.null_count(),
801
                Some(nulls.validity()),
802
                |idx| {
803
                    unsafe { sum = sum.add_checked(array.value_unchecked(idx))? };
804
                    Ok::<_, ArrowError>(())
805
                },
806
            )?;
807
808
            Ok(Some(sum))
809
        }
810
    }
811
}
812
813
/// Returns the sum of values in the primitive array.
814
///
815
/// Returns `None` if the array is empty or only contains null values.
816
///
817
/// This doesn't detect overflow in release mode by default. Once overflowing, the result will
818
/// wrap around. For an overflow-checking variant, use `sum_checked` instead.
819
pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
820
where
821
    T::Native: ArrowNativeTypeOp,
822
{
823
    aggregate::<T::Native, T, SumAccumulator<T::Native>>(array)
824
}
825
826
/// Returns the minimum value in the array, according to the natural order.
827
/// For floating point arrays any NaN values are considered to be greater than any other non-null value
828
pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
829
where
830
    T::Native: PartialOrd,
831
{
832
    aggregate::<T::Native, T, MinAccumulator<T::Native>>(array)
833
}
834
835
/// Returns the maximum value in the array, according to the natural order.
836
/// For floating point arrays any NaN values are considered to be greater than any other non-null value
837
pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
838
where
839
    T::Native: PartialOrd,
840
{
841
    aggregate::<T::Native, T, MaxAccumulator<T::Native>>(array)
842
}
843
844
#[cfg(test)]
845
mod tests {
846
    use super::*;
847
    use arrow_array::types::*;
848
    use builder::BooleanBuilder;
849
    use std::sync::Arc;
850
851
    #[test]
852
    fn test_primitive_array_sum() {
853
        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
854
        assert_eq!(15, sum(&a).unwrap());
855
    }
856
857
    #[test]
858
    fn test_primitive_array_float_sum() {
859
        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
860
        assert_eq!(16.5, sum(&a).unwrap());
861
    }
862
863
    #[test]
864
    fn test_primitive_array_sum_with_nulls() {
865
        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
866
        assert_eq!(10, sum(&a).unwrap());
867
    }
868
869
    #[test]
870
    fn test_primitive_array_sum_all_nulls() {
871
        let a = Int32Array::from(vec![None, None, None]);
872
        assert_eq!(None, sum(&a));
873
    }
874
875
    #[test]
876
    fn test_primitive_array_sum_large_float_64() {
877
        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), None);
878
        assert_eq!(Some((1..=100).sum::<i64>() as f64), sum(&c));
879
880
        // create an array that actually has non-zero values at the invalid indices
881
        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
882
        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), Some(validity));
883
884
        assert_eq!(
885
            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f64),
886
            sum(&c)
887
        );
888
    }
889
890
    #[test]
891
    fn test_primitive_array_sum_large_float_32() {
892
        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), None);
893
        assert_eq!(Some((1..=100).sum::<i64>() as f32), sum(&c));
894
895
        // create an array that actually has non-zero values at the invalid indices
896
        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
897
        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), Some(validity));
898
899
        assert_eq!(
900
            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f32),
901
            sum(&c)
902
        );
903
    }
904
905
    #[test]
906
    fn test_primitive_array_sum_large_64() {
907
        let c = Int64Array::new((1..=100).collect(), None);
908
        assert_eq!(Some((1..=100).sum()), sum(&c));
909
910
        // create an array that actually has non-zero values at the invalid indices
911
        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
912
        let c = Int64Array::new((1..=100).collect(), Some(validity));
913
914
        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
915
    }
916
917
    #[test]
918
    fn test_primitive_array_sum_large_32() {
919
        let c = Int32Array::new((1..=100).collect(), None);
920
        assert_eq!(Some((1..=100).sum()), sum(&c));
921
922
        // create an array that actually has non-zero values at the invalid indices
923
        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
924
        let c = Int32Array::new((1..=100).collect(), Some(validity));
925
        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
926
    }
927
928
    #[test]
929
    fn test_primitive_array_sum_large_16() {
930
        let c = Int16Array::new((1..=100).collect(), None);
931
        assert_eq!(Some((1..=100).sum()), sum(&c));
932
933
        // create an array that actually has non-zero values at the invalid indices
934
        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
935
        let c = Int16Array::new((1..=100).collect(), Some(validity));
936
        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
937
    }
938
939
    #[test]
940
    fn test_primitive_array_sum_large_8() {
941
        let c = UInt8Array::new((1..=100).collect(), None);
942
        assert_eq!(
943
            Some((1..=100).fold(0_u8, |a, x| a.wrapping_add(x))),
944
            sum(&c)
945
        );
946
947
        // create an array that actually has non-zero values at the invalid indices
948
        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
949
        let c = UInt8Array::new((1..=100).collect(), Some(validity));
950
        assert_eq!(
951
            Some(
952
                (1..=100)
953
                    .filter(|i| i % 3 == 0)
954
                    .fold(0_u8, |a, x| a.wrapping_add(x))
955
            ),
956
            sum(&c)
957
        );
958
    }
959
960
    #[test]
961
    fn test_primitive_array_bit_and() {
962
        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
963
        assert_eq!(0, bit_and(&a).unwrap());
964
    }
965
966
    #[test]
967
    fn test_primitive_array_bit_and_with_nulls() {
968
        let a = Int32Array::from(vec![None, Some(2), Some(3), None, None]);
969
        assert_eq!(2, bit_and(&a).unwrap());
970
    }
971
972
    #[test]
973
    fn test_primitive_array_bit_and_all_nulls() {
974
        let a = Int32Array::from(vec![None, None, None]);
975
        assert_eq!(None, bit_and(&a));
976
    }
977
978
    #[test]
979
    fn test_primitive_array_bit_or() {
980
        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
981
        assert_eq!(7, bit_or(&a).unwrap());
982
    }
983
984
    #[test]
985
    fn test_primitive_array_bit_or_with_nulls() {
986
        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
987
        assert_eq!(7, bit_or(&a).unwrap());
988
    }
989
990
    #[test]
991
    fn test_primitive_array_bit_or_all_nulls() {
992
        let a = Int32Array::from(vec![None, None, None]);
993
        assert_eq!(None, bit_or(&a));
994
    }
995
996
    #[test]
997
    fn test_primitive_array_bit_xor() {
998
        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
999
        assert_eq!(1, bit_xor(&a).unwrap());
1000
    }
1001
1002
    #[test]
1003
    fn test_primitive_array_bit_xor_with_nulls() {
1004
        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
1005
        assert_eq!(4, bit_xor(&a).unwrap());
1006
    }
1007
1008
    #[test]
1009
    fn test_primitive_array_bit_xor_all_nulls() {
1010
        let a = Int32Array::from(vec![None, None, None]);
1011
        assert_eq!(None, bit_xor(&a));
1012
    }
1013
1014
    #[test]
1015
    fn test_primitive_array_bool_and() {
1016
        let a = BooleanArray::from(vec![true, false, true, false, true]);
1017
        assert!(!bool_and(&a).unwrap());
1018
    }
1019
1020
    #[test]
1021
    fn test_primitive_array_bool_and_with_nulls() {
1022
        let a = BooleanArray::from(vec![None, Some(true), Some(true), None, Some(true)]);
1023
        assert!(bool_and(&a).unwrap());
1024
    }
1025
1026
    #[test]
1027
    fn test_primitive_array_bool_and_all_nulls() {
1028
        let a = BooleanArray::from(vec![None, None, None]);
1029
        assert_eq!(None, bool_and(&a));
1030
    }
1031
1032
    #[test]
1033
    fn test_primitive_array_bool_or() {
1034
        let a = BooleanArray::from(vec![true, false, true, false, true]);
1035
        assert!(bool_or(&a).unwrap());
1036
    }
1037
1038
    #[test]
1039
    fn test_primitive_array_bool_or_with_nulls() {
1040
        let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
1041
        assert!(!bool_or(&a).unwrap());
1042
    }
1043
1044
    #[test]
1045
    fn test_primitive_array_bool_or_all_nulls() {
1046
        let a = BooleanArray::from(vec![None, None, None]);
1047
        assert_eq!(None, bool_or(&a));
1048
    }
1049
1050
    #[test]
1051
    fn test_primitive_array_min_max() {
1052
        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
1053
        assert_eq!(5, min(&a).unwrap());
1054
        assert_eq!(9, max(&a).unwrap());
1055
    }
1056
1057
    #[test]
1058
    fn test_primitive_array_min_max_with_nulls() {
1059
        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
1060
        assert_eq!(5, min(&a).unwrap());
1061
        assert_eq!(9, max(&a).unwrap());
1062
    }
1063
1064
    #[test]
1065
    fn test_primitive_min_max_1() {
1066
        let a = Int32Array::from(vec![None, None, Some(5), Some(2)]);
1067
        assert_eq!(Some(2), min(&a));
1068
        assert_eq!(Some(5), max(&a));
1069
    }
1070
1071
    #[test]
1072
    fn test_primitive_min_max_float_large_nonnull_array() {
1073
        let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect();
1074
        // min/max are on boundaries of chunked data
1075
        assert_eq!(Some(1.0), min(&a));
1076
        assert_eq!(Some(256.0), max(&a));
1077
1078
        // max is last value in remainder after chunking
1079
        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1080
        assert_eq!(Some(255.0), max(&a));
1081
1082
        // max is first value in remainder after chunking
1083
        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1084
        assert_eq!(Some(257.0), max(&a));
1085
    }
1086
1087
    #[test]
1088
    fn test_primitive_min_max_float_large_nullable_array() {
1089
        let a: Float64Array = (0..256)
1090
            .map(|i| {
1091
                if (i + 1) % 3 == 0 {
1092
                    None
1093
                } else {
1094
                    Some((i + 1) as f64)
1095
                }
1096
            })
1097
            .collect();
1098
        // min/max are on boundaries of chunked data
1099
        assert_eq!(Some(1.0), min(&a));
1100
        assert_eq!(Some(256.0), max(&a));
1101
1102
        let a: Float64Array = (0..256)
1103
            .map(|i| {
1104
                if i == 0 || i == 255 {
1105
                    None
1106
                } else {
1107
                    Some((i + 1) as f64)
1108
                }
1109
            })
1110
            .collect();
1111
        // boundaries of chunked data are null
1112
        assert_eq!(Some(2.0), min(&a));
1113
        assert_eq!(Some(255.0), max(&a));
1114
1115
        let a: Float64Array = (0..256)
1116
            .map(|i| if i != 100 { None } else { Some((i) as f64) })
1117
            .collect();
1118
        // a single non-null value somewhere in the middle
1119
        assert_eq!(Some(100.0), min(&a));
1120
        assert_eq!(Some(100.0), max(&a));
1121
1122
        // max is last value in remainder after chunking
1123
        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1124
        assert_eq!(Some(255.0), max(&a));
1125
1126
        // max is first value in remainder after chunking
1127
        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1128
        assert_eq!(Some(257.0), max(&a));
1129
    }
1130
1131
    #[test]
1132
    fn test_primitive_min_max_float_edge_cases() {
1133
        let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect();
1134
        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1135
        assert_eq!(Some(f64::NEG_INFINITY), max(&a));
1136
1137
        let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect();
1138
        assert_eq!(Some(f64::MIN), min(&a));
1139
        assert_eq!(Some(f64::MIN), max(&a));
1140
1141
        let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect();
1142
        assert_eq!(Some(f64::MAX), min(&a));
1143
        assert_eq!(Some(f64::MAX), max(&a));
1144
1145
        let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect();
1146
        assert_eq!(Some(f64::INFINITY), min(&a));
1147
        assert_eq!(Some(f64::INFINITY), max(&a));
1148
    }
1149
1150
    #[test]
1151
    fn test_primitive_min_max_float_all_nans_non_null() {
1152
        let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect();
1153
        assert!(max(&a).unwrap().is_nan());
1154
        assert!(min(&a).unwrap().is_nan());
1155
    }
1156
1157
    #[test]
1158
    fn test_primitive_min_max_float_negative_nan() {
1159
        let a: Float64Array =
1160
            Float64Array::from(vec![f64::NEG_INFINITY, f64::NAN, f64::INFINITY, -f64::NAN]);
1161
        let max = max(&a).unwrap();
1162
        let min = min(&a).unwrap();
1163
        assert!(max.is_nan());
1164
        assert!(max.is_sign_positive());
1165
1166
        assert!(min.is_nan());
1167
        assert!(min.is_sign_negative());
1168
    }
1169
1170
    #[test]
1171
    fn test_primitive_min_max_float_first_nan_nonnull() {
1172
        let a: Float64Array = (0..100)
1173
            .map(|i| {
1174
                if i == 0 {
1175
                    Some(f64::NAN)
1176
                } else {
1177
                    Some(i as f64)
1178
                }
1179
            })
1180
            .collect();
1181
        assert_eq!(Some(1.0), min(&a));
1182
        assert!(max(&a).unwrap().is_nan());
1183
    }
1184
1185
    #[test]
1186
    fn test_primitive_min_max_float_last_nan_nonnull() {
1187
        let a: Float64Array = (0..100)
1188
            .map(|i| {
1189
                if i == 99 {
1190
                    Some(f64::NAN)
1191
                } else {
1192
                    Some((i + 1) as f64)
1193
                }
1194
            })
1195
            .collect();
1196
        assert_eq!(Some(1.0), min(&a));
1197
        assert!(max(&a).unwrap().is_nan());
1198
    }
1199
1200
    #[test]
1201
    fn test_primitive_min_max_float_first_nan_nullable() {
1202
        let a: Float64Array = (0..100)
1203
            .map(|i| {
1204
                if i == 0 {
1205
                    Some(f64::NAN)
1206
                } else if i % 2 == 0 {
1207
                    None
1208
                } else {
1209
                    Some(i as f64)
1210
                }
1211
            })
1212
            .collect();
1213
        assert_eq!(Some(1.0), min(&a));
1214
        assert!(max(&a).unwrap().is_nan());
1215
    }
1216
1217
    #[test]
1218
    fn test_primitive_min_max_float_last_nan_nullable() {
1219
        let a: Float64Array = (0..100)
1220
            .map(|i| {
1221
                if i == 99 {
1222
                    Some(f64::NAN)
1223
                } else if i % 2 == 0 {
1224
                    None
1225
                } else {
1226
                    Some(i as f64)
1227
                }
1228
            })
1229
            .collect();
1230
        assert_eq!(Some(1.0), min(&a));
1231
        assert!(max(&a).unwrap().is_nan());
1232
    }
1233
1234
    #[test]
1235
    fn test_primitive_min_max_float_inf_and_nans() {
1236
        let a: Float64Array = (0..100)
1237
            .map(|i| {
1238
                let x = match i % 10 {
1239
                    0 => f64::NEG_INFINITY,
1240
                    1 => f64::MIN,
1241
                    2 => f64::MAX,
1242
                    4 => f64::INFINITY,
1243
                    5 => f64::NAN,
1244
                    _ => i as f64,
1245
                };
1246
                Some(x)
1247
            })
1248
            .collect();
1249
        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1250
        assert!(max(&a).unwrap().is_nan());
1251
    }
1252
1253
    fn pad_inputs_and_test_fixed_size_binary(
1254
        input: Vec<Option<&[u8]>>,
1255
        expected_min: Option<&[u8]>,
1256
        expected_max: Option<&[u8]>,
1257
    ) {
1258
        fn pad_slice(slice: &[u8], len: usize) -> Vec<u8> {
1259
            let mut padded = vec![0; len];
1260
            padded[..slice.len()].copy_from_slice(slice);
1261
            padded
1262
        }
1263
1264
        let max_len = input
1265
            .iter()
1266
            .filter_map(|x| x.as_ref().map(|b| b.len()))
1267
            .max()
1268
            .unwrap_or(0);
1269
        let padded_input = input
1270
            .iter()
1271
            .map(|x| x.as_ref().map(|b| pad_slice(b, max_len)));
1272
        let input_arr =
1273
            FixedSizeBinaryArray::try_from_sparse_iter_with_size(padded_input, max_len as i32)
1274
                .unwrap();
1275
        let padded_expected_min = expected_min.map(|b| pad_slice(b, max_len));
1276
        let padded_expected_max = expected_max.map(|b| pad_slice(b, max_len));
1277
1278
        assert_eq!(
1279
            padded_expected_min.as_deref(),
1280
            min_fixed_size_binary(&input_arr)
1281
        );
1282
        assert_eq!(
1283
            padded_expected_max.as_deref(),
1284
            max_fixed_size_binary(&input_arr)
1285
        );
1286
    }
1287
1288
    macro_rules! test_binary {
1289
        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1290
            #[test]
1291
            fn $NAME() {
1292
                let binary = BinaryArray::from($ARRAY);
1293
                assert_eq!($EXPECTED_MIN, min_binary(&binary));
1294
                assert_eq!($EXPECTED_MAX, max_binary(&binary));
1295
1296
                let large_binary = LargeBinaryArray::from($ARRAY);
1297
                assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
1298
                assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
1299
1300
                let binary_view = BinaryViewArray::from($ARRAY);
1301
                assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
1302
                assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
1303
1304
                pad_inputs_and_test_fixed_size_binary($ARRAY, $EXPECTED_MIN, $EXPECTED_MAX);
1305
            }
1306
        };
1307
    }
1308
1309
    test_binary!(
1310
        test_binary_min_max_with_nulls,
1311
        vec![
1312
            Some("b01234567890123".as_bytes()), // long bytes
1313
            None,
1314
            None,
1315
            Some(b"a"),
1316
            Some(b"c"),
1317
            Some(b"abcdedfg0123456"),
1318
        ],
1319
        Some("a".as_bytes()),
1320
        Some("c".as_bytes())
1321
    );
1322
1323
    test_binary!(
1324
        test_binary_min_max_no_null,
1325
        vec![
1326
            Some("b".as_bytes()),
1327
            Some(b"abcdefghijklmnopqrst"), // long bytes
1328
            Some(b"c"),
1329
            Some(b"b01234567890123"), // long bytes for view types
1330
        ],
1331
        Some("abcdefghijklmnopqrst".as_bytes()),
1332
        Some("c".as_bytes())
1333
    );
1334
1335
    test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
1336
1337
    test_binary!(
1338
        test_binary_min_max_1,
1339
        vec![
1340
            None,
1341
            Some("b01234567890123435".as_bytes()), // long bytes for view types
1342
            None,
1343
            Some(b"b0123xxxxxxxxxxx"),
1344
            Some(b"a")
1345
        ],
1346
        Some("a".as_bytes()),
1347
        Some("b0123xxxxxxxxxxx".as_bytes())
1348
    );
1349
1350
    macro_rules! test_string {
1351
        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1352
            #[test]
1353
            fn $NAME() {
1354
                let string = StringArray::from($ARRAY);
1355
                assert_eq!($EXPECTED_MIN, min_string(&string));
1356
                assert_eq!($EXPECTED_MAX, max_string(&string));
1357
1358
                let large_string = LargeStringArray::from($ARRAY);
1359
                assert_eq!($EXPECTED_MIN, min_string(&large_string));
1360
                assert_eq!($EXPECTED_MAX, max_string(&large_string));
1361
1362
                let string_view = StringViewArray::from($ARRAY);
1363
                assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
1364
                assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
1365
            }
1366
        };
1367
    }
1368
1369
    test_string!(
1370
        test_string_min_max_with_nulls,
1371
        vec![
1372
            Some("b012345678901234"), // long bytes for view types
1373
            None,
1374
            None,
1375
            Some("a"),
1376
            Some("c"),
1377
            Some("b0123xxxxxxxxxxx")
1378
        ],
1379
        Some("a"),
1380
        Some("c")
1381
    );
1382
1383
    test_string!(
1384
        test_string_min_max_no_null,
1385
        vec![
1386
            Some("b"),
1387
            Some("b012345678901234"), // long bytes for view types
1388
            Some("a"),
1389
            Some("b012xxxxxxxxxxxx")
1390
        ],
1391
        Some("a"),
1392
        Some("b012xxxxxxxxxxxx")
1393
    );
1394
1395
    test_string!(
1396
        test_string_min_max_all_nulls,
1397
        Vec::<Option<&str>>::from_iter([None, None]),
1398
        None,
1399
        None
1400
    );
1401
1402
    test_string!(
1403
        test_string_min_max_1,
1404
        vec![
1405
            None,
1406
            Some("c12345678901234"), // long bytes for view types
1407
            None,
1408
            Some("b"),
1409
            Some("c1234xxxxxxxxxx")
1410
        ],
1411
        Some("b"),
1412
        Some("c1234xxxxxxxxxx")
1413
    );
1414
1415
    test_string!(
1416
        test_string_min_max_empty,
1417
        Vec::<Option<&str>>::new(),
1418
        None,
1419
        None
1420
    );
1421
1422
    #[test]
1423
    fn test_boolean_min_max_empty() {
1424
        let a = BooleanArray::from(vec![] as Vec<Option<bool>>);
1425
        assert_eq!(None, min_boolean(&a));
1426
        assert_eq!(None, max_boolean(&a));
1427
    }
1428
1429
    #[test]
1430
    fn test_boolean_min_max_all_null() {
1431
        let a = BooleanArray::from(vec![None, None]);
1432
        assert_eq!(None, min_boolean(&a));
1433
        assert_eq!(None, max_boolean(&a));
1434
    }
1435
1436
    #[test]
1437
    fn test_boolean_min_max_no_null() {
1438
        let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
1439
        assert_eq!(Some(false), min_boolean(&a));
1440
        assert_eq!(Some(true), max_boolean(&a));
1441
    }
1442
1443
    #[test]
1444
    fn test_boolean_min_max() {
1445
        let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]);
1446
        assert_eq!(Some(false), min_boolean(&a));
1447
        assert_eq!(Some(true), max_boolean(&a));
1448
1449
        let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]);
1450
        assert_eq!(Some(false), min_boolean(&a));
1451
        assert_eq!(Some(true), max_boolean(&a));
1452
1453
        let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
1454
        assert_eq!(Some(false), min_boolean(&a));
1455
        assert_eq!(Some(true), max_boolean(&a));
1456
1457
        let a = BooleanArray::from(vec![Some(true), None]);
1458
        assert_eq!(Some(true), min_boolean(&a));
1459
        assert_eq!(Some(true), max_boolean(&a));
1460
1461
        let a = BooleanArray::from(vec![Some(false), None]);
1462
        assert_eq!(Some(false), min_boolean(&a));
1463
        assert_eq!(Some(false), max_boolean(&a));
1464
1465
        let a = BooleanArray::from(vec![Some(true)]);
1466
        assert_eq!(Some(true), min_boolean(&a));
1467
        assert_eq!(Some(true), max_boolean(&a));
1468
1469
        let a = BooleanArray::from(vec![Some(false)]);
1470
        assert_eq!(Some(false), min_boolean(&a));
1471
        assert_eq!(Some(false), max_boolean(&a));
1472
    }
1473
1474
    #[test]
1475
    fn test_boolean_min_max_smaller() {
1476
        let a = BooleanArray::from(vec![Some(false)]);
1477
        assert_eq!(Some(false), min_boolean(&a));
1478
        assert_eq!(Some(false), max_boolean(&a));
1479
1480
        let a = BooleanArray::from(vec![None, Some(false)]);
1481
        assert_eq!(Some(false), min_boolean(&a));
1482
        assert_eq!(Some(false), max_boolean(&a));
1483
1484
        let a = BooleanArray::from(vec![None, Some(true)]);
1485
        assert_eq!(Some(true), min_boolean(&a));
1486
        assert_eq!(Some(true), max_boolean(&a));
1487
1488
        let a = BooleanArray::from(vec![Some(true)]);
1489
        assert_eq!(Some(true), min_boolean(&a));
1490
        assert_eq!(Some(true), max_boolean(&a));
1491
    }
1492
1493
    #[test]
1494
    fn test_boolean_min_max_64_true_64_false() {
1495
        let mut no_nulls = BooleanBuilder::new();
1496
        no_nulls.append_slice(&[true; 64]);
1497
        no_nulls.append_slice(&[false; 64]);
1498
        let no_nulls = no_nulls.finish();
1499
1500
        assert_eq!(Some(false), min_boolean(&no_nulls));
1501
        assert_eq!(Some(true), max_boolean(&no_nulls));
1502
1503
        let mut with_nulls = BooleanBuilder::new();
1504
        with_nulls.append_slice(&[true; 31]);
1505
        with_nulls.append_null();
1506
        with_nulls.append_slice(&[true; 32]);
1507
        with_nulls.append_slice(&[false; 1]);
1508
        with_nulls.append_nulls(63);
1509
        let with_nulls = with_nulls.finish();
1510
1511
        assert_eq!(Some(false), min_boolean(&with_nulls));
1512
        assert_eq!(Some(true), max_boolean(&with_nulls));
1513
    }
1514
1515
    #[test]
1516
    fn test_boolean_min_max_64_false_64_true() {
1517
        let mut no_nulls = BooleanBuilder::new();
1518
        no_nulls.append_slice(&[false; 64]);
1519
        no_nulls.append_slice(&[true; 64]);
1520
        let no_nulls = no_nulls.finish();
1521
1522
        assert_eq!(Some(false), min_boolean(&no_nulls));
1523
        assert_eq!(Some(true), max_boolean(&no_nulls));
1524
1525
        let mut with_nulls = BooleanBuilder::new();
1526
        with_nulls.append_slice(&[false; 31]);
1527
        with_nulls.append_null();
1528
        with_nulls.append_slice(&[false; 32]);
1529
        with_nulls.append_slice(&[true; 1]);
1530
        with_nulls.append_nulls(63);
1531
        let with_nulls = with_nulls.finish();
1532
1533
        assert_eq!(Some(false), min_boolean(&with_nulls));
1534
        assert_eq!(Some(true), max_boolean(&with_nulls));
1535
    }
1536
1537
    #[test]
1538
    fn test_boolean_min_max_96_true() {
1539
        let mut no_nulls = BooleanBuilder::new();
1540
        no_nulls.append_slice(&[true; 96]);
1541
        let no_nulls = no_nulls.finish();
1542
1543
        assert_eq!(Some(true), min_boolean(&no_nulls));
1544
        assert_eq!(Some(true), max_boolean(&no_nulls));
1545
1546
        let mut with_nulls = BooleanBuilder::new();
1547
        with_nulls.append_slice(&[true; 31]);
1548
        with_nulls.append_null();
1549
        with_nulls.append_slice(&[true; 32]);
1550
        with_nulls.append_slice(&[true; 31]);
1551
        with_nulls.append_null();
1552
        let with_nulls = with_nulls.finish();
1553
1554
        assert_eq!(Some(true), min_boolean(&with_nulls));
1555
        assert_eq!(Some(true), max_boolean(&with_nulls));
1556
    }
1557
1558
    #[test]
1559
    fn test_boolean_min_max_96_false() {
1560
        let mut no_nulls = BooleanBuilder::new();
1561
        no_nulls.append_slice(&[false; 96]);
1562
        let no_nulls = no_nulls.finish();
1563
1564
        assert_eq!(Some(false), min_boolean(&no_nulls));
1565
        assert_eq!(Some(false), max_boolean(&no_nulls));
1566
1567
        let mut with_nulls = BooleanBuilder::new();
1568
        with_nulls.append_slice(&[false; 31]);
1569
        with_nulls.append_null();
1570
        with_nulls.append_slice(&[false; 32]);
1571
        with_nulls.append_slice(&[false; 31]);
1572
        with_nulls.append_null();
1573
        let with_nulls = with_nulls.finish();
1574
1575
        assert_eq!(Some(false), min_boolean(&with_nulls));
1576
        assert_eq!(Some(false), max_boolean(&with_nulls));
1577
    }
1578
1579
    #[test]
1580
    fn test_sum_dyn() {
1581
        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1582
        let values = Arc::new(values) as ArrayRef;
1583
        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1584
1585
        let dict_array = DictionaryArray::new(keys, values.clone());
1586
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1587
        assert_eq!(39, sum_array::<Int8Type, _>(array).unwrap());
1588
1589
        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1590
        assert_eq!(15, sum_array::<Int32Type, _>(&a).unwrap());
1591
1592
        let keys = Int8Array::from(vec![Some(2_i8), None, Some(4)]);
1593
        let dict_array = DictionaryArray::new(keys, values.clone());
1594
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1595
        assert_eq!(26, sum_array::<Int8Type, _>(array).unwrap());
1596
1597
        let keys = Int8Array::from(vec![None, None, None]);
1598
        let dict_array = DictionaryArray::new(keys, values.clone());
1599
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1600
        assert!(sum_array::<Int8Type, _>(array).is_none());
1601
    }
1602
1603
    #[test]
1604
    fn test_max_min_dyn() {
1605
        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1606
        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1607
        let values = Arc::new(values) as ArrayRef;
1608
1609
        let dict_array = DictionaryArray::new(keys, values.clone());
1610
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1611
        assert_eq!(14, max_array::<Int8Type, _>(array).unwrap());
1612
1613
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1614
        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1615
1616
        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1617
        assert_eq!(5, max_array::<Int32Type, _>(&a).unwrap());
1618
        assert_eq!(1, min_array::<Int32Type, _>(&a).unwrap());
1619
1620
        let keys = Int8Array::from(vec![Some(2_i8), None, Some(7)]);
1621
        let dict_array = DictionaryArray::new(keys, values.clone());
1622
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1623
        assert_eq!(17, max_array::<Int8Type, _>(array).unwrap());
1624
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1625
        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1626
1627
        let keys = Int8Array::from(vec![None, None, None]);
1628
        let dict_array = DictionaryArray::new(keys, values.clone());
1629
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1630
        assert!(max_array::<Int8Type, _>(array).is_none());
1631
        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1632
        assert!(min_array::<Int8Type, _>(array).is_none());
1633
    }
1634
1635
    #[test]
1636
    fn test_max_min_dyn_nan() {
1637
        let values = Float32Array::from(vec![5.0_f32, 2.0_f32, f32::NAN]);
1638
        let keys = Int8Array::from_iter_values([0_i8, 1, 2]);
1639
1640
        let dict_array = DictionaryArray::new(keys, Arc::new(values));
1641
        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1642
        assert!(max_array::<Float32Type, _>(array).unwrap().is_nan());
1643
1644
        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1645
        assert_eq!(2.0_f32, min_array::<Float32Type, _>(array).unwrap());
1646
    }
1647
1648
    #[test]
1649
    fn test_min_max_sliced_primitive() {
1650
        let expected = Some(4.0);
1651
        let input: Float64Array = vec![None, Some(4.0)].into_iter().collect();
1652
        let actual = min(&input);
1653
        assert_eq!(actual, expected);
1654
        let actual = max(&input);
1655
        assert_eq!(actual, expected);
1656
1657
        let sliced_input: Float64Array = vec![None, None, None, None, None, Some(4.0)]
1658
            .into_iter()
1659
            .collect();
1660
        let sliced_input = sliced_input.slice(4, 2);
1661
1662
        assert_eq!(&sliced_input, &input);
1663
1664
        let actual = min(&sliced_input);
1665
        assert_eq!(actual, expected);
1666
        let actual = max(&sliced_input);
1667
        assert_eq!(actual, expected);
1668
    }
1669
1670
    #[test]
1671
    fn test_min_max_sliced_boolean() {
1672
        let expected = Some(true);
1673
        let input: BooleanArray = vec![None, Some(true)].into_iter().collect();
1674
        let actual = min_boolean(&input);
1675
        assert_eq!(actual, expected);
1676
        let actual = max_boolean(&input);
1677
        assert_eq!(actual, expected);
1678
1679
        let sliced_input: BooleanArray = vec![None, None, None, None, None, Some(true)]
1680
            .into_iter()
1681
            .collect();
1682
        let sliced_input = sliced_input.slice(4, 2);
1683
1684
        assert_eq!(sliced_input, input);
1685
1686
        let actual = min_boolean(&sliced_input);
1687
        assert_eq!(actual, expected);
1688
        let actual = max_boolean(&sliced_input);
1689
        assert_eq!(actual, expected);
1690
    }
1691
1692
    #[test]
1693
    fn test_min_max_sliced_string() {
1694
        let expected = Some("foo");
1695
        let input: StringArray = vec![None, Some("foo")].into_iter().collect();
1696
        let actual = min_string(&input);
1697
        assert_eq!(actual, expected);
1698
        let actual = max_string(&input);
1699
        assert_eq!(actual, expected);
1700
1701
        let sliced_input: StringArray = vec![None, None, None, None, None, Some("foo")]
1702
            .into_iter()
1703
            .collect();
1704
        let sliced_input = sliced_input.slice(4, 2);
1705
1706
        assert_eq!(&sliced_input, &input);
1707
1708
        let actual = min_string(&sliced_input);
1709
        assert_eq!(actual, expected);
1710
        let actual = max_string(&sliced_input);
1711
        assert_eq!(actual, expected);
1712
    }
1713
1714
    #[test]
1715
    fn test_min_max_sliced_binary() {
1716
        let expected: Option<&[u8]> = Some(&[5]);
1717
        let input: BinaryArray = vec![None, Some(&[5])].into_iter().collect();
1718
        let actual = min_binary(&input);
1719
        assert_eq!(actual, expected);
1720
        let actual = max_binary(&input);
1721
        assert_eq!(actual, expected);
1722
1723
        let sliced_input: BinaryArray = vec![None, None, None, None, None, Some(&[5])]
1724
            .into_iter()
1725
            .collect();
1726
        let sliced_input = sliced_input.slice(4, 2);
1727
1728
        assert_eq!(&sliced_input, &input);
1729
1730
        let actual = min_binary(&sliced_input);
1731
        assert_eq!(actual, expected);
1732
        let actual = max_binary(&sliced_input);
1733
        assert_eq!(actual, expected);
1734
    }
1735
1736
    #[test]
1737
    fn test_sum_overflow() {
1738
        let a = Int32Array::from(vec![i32::MAX, 1]);
1739
1740
        assert_eq!(sum(&a).unwrap(), -2147483648);
1741
        assert_eq!(sum_array::<Int32Type, _>(&a).unwrap(), -2147483648);
1742
    }
1743
1744
    #[test]
1745
    fn test_sum_checked_overflow() {
1746
        let a = Int32Array::from(vec![i32::MAX, 1]);
1747
1748
        sum_checked(&a).expect_err("overflow should be detected");
1749
        sum_array_checked::<Int32Type, _>(&a).expect_err("overflow should be detected");
1750
    }
1751
}