Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-select/src/zip.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! [`zip`]: Combine values from two arrays based on boolean mask
19
20
use crate::filter::{SlicesIterator, prep_null_mask_filter};
21
use arrow_array::cast::AsArray;
22
use arrow_array::types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, Utf8Type};
23
use arrow_array::*;
24
use arrow_buffer::{
25
    BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, OffsetBufferBuilder,
26
    ScalarBuffer,
27
};
28
use arrow_data::ArrayData;
29
use arrow_data::transform::MutableArrayData;
30
use arrow_schema::{ArrowError, DataType};
31
use std::fmt::{Debug, Formatter};
32
use std::hash::Hash;
33
use std::marker::PhantomData;
34
use std::ops::Not;
35
use std::sync::Arc;
36
37
/// Zip two arrays by some boolean mask.
38
///
39
/// - Where `mask` is `true`, values of `truthy` are taken
40
/// - Where `mask` is `false` or `NULL`, values of `falsy` are taken
41
///
42
/// # Example: `zip` two arrays
43
/// ```
44
/// # use std::sync::Arc;
45
/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array};
46
/// # use arrow_select::zip::zip;
47
/// // mask: [true, true, false, NULL, true]
48
/// let mask = BooleanArray::from(vec![
49
///   Some(true), Some(true), Some(false), None, Some(true)
50
/// ]);
51
/// // truthy array: [1, NULL, 3, 4, 5]
52
/// let truthy = Int32Array::from(vec![
53
///   Some(1), None, Some(3), Some(4), Some(5)
54
/// ]);
55
/// // falsy array: [10, 20, 30, 40, 50]
56
/// let falsy = Int32Array::from(vec![
57
///   Some(10), Some(20), Some(30), Some(40), Some(50)
58
/// ]);
59
/// // zip with this mask select the first, second and last value from `truthy`
60
/// // and the third and fourth value from `falsy`
61
/// let result = zip(&mask, &truthy, &falsy).unwrap();
62
/// // Expected: [1, NULL, 30, 40, 5]
63
/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![
64
///   Some(1), None, Some(30), Some(40), Some(5)
65
/// ]));
66
/// assert_eq!(&result, &expected);
67
/// ```
68
///
69
/// # Example: `zip` and array with a scalar
70
///
71
/// Use `zip` to replace certain values in an array with a scalar
72
///
73
/// ```
74
/// # use std::sync::Arc;
75
/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array};
76
/// # use arrow_select::zip::zip;
77
/// // mask: [true, true, false, NULL, true]
78
/// let mask = BooleanArray::from(vec![
79
///   Some(true), Some(true), Some(false), None, Some(true)
80
/// ]);
81
/// //  array: [1, NULL, 3, 4, 5]
82
/// let arr = Int32Array::from(vec![
83
///   Some(1), None, Some(3), Some(4), Some(5)
84
/// ]);
85
/// // scalar: 42
86
/// let scalar = Int32Array::new_scalar(42);
87
/// // zip the array with the  mask select the first, second and last value from `arr`
88
/// // and fill the third and fourth value with the scalar 42
89
/// let result = zip(&mask, &arr, &scalar).unwrap();
90
/// // Expected: [1, NULL, 42, 42, 5]
91
/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![
92
///   Some(1), None, Some(42), Some(42), Some(5)
93
/// ]));
94
/// assert_eq!(&result, &expected);
95
/// ```
96
78
pub fn zip(
97
78
    mask: &BooleanArray,
98
78
    truthy: &dyn Datum,
99
78
    falsy: &dyn Datum,
100
78
) -> Result<ArrayRef, ArrowError> {
101
78
    let (truthy_array, truthy_is_scalar) = truthy.get();
102
78
    let (falsy_array, falsy_is_scalar) = falsy.get();
103
104
78
    if falsy_is_scalar && 
truthy_is_scalar54
{
105
39
        let zipper = ScalarZipper::try_new(truthy, falsy)
?0
;
106
39
        return zipper.zip_impl.create_output(mask);
107
39
    }
108
109
39
    let truthy = truthy_array;
110
39
    let falsy = falsy_array;
111
112
39
    if truthy.data_type() != falsy.data_type() {
113
0
        return Err(ArrowError::InvalidArgumentError(
114
0
            "arguments need to have the same data type".into(),
115
0
        ));
116
39
    }
117
118
39
    if truthy_is_scalar && 
truthy.len() != 114
{
119
0
        return Err(ArrowError::InvalidArgumentError(
120
0
            "scalar arrays must have 1 element".into(),
121
0
        ));
122
39
    }
123
39
    if !truthy_is_scalar && 
truthy25
.
len25
() != mask.len() {
124
0
        return Err(ArrowError::InvalidArgumentError(
125
0
            "all arrays should have the same length".into(),
126
0
        ));
127
39
    }
128
39
    if falsy_is_scalar && 
falsy.len() != 115
{
129
0
        return Err(ArrowError::InvalidArgumentError(
130
0
            "scalar arrays must have 1 element".into(),
131
0
        ));
132
39
    }
133
39
    if !falsy_is_scalar && 
falsy24
.
len24
() != mask.len() {
134
0
        return Err(ArrowError::InvalidArgumentError(
135
0
            "all arrays should have the same length".into(),
136
0
        ));
137
39
    }
138
139
39
    let falsy = falsy.to_data();
140
39
    let truthy = truthy.to_data();
141
142
39
    zip_impl(mask, &truthy, truthy_is_scalar, &falsy, falsy_is_scalar)
143
78
}
144
145
40
fn zip_impl(
146
40
    mask: &BooleanArray,
147
40
    truthy: &ArrayData,
148
40
    truthy_is_scalar: bool,
149
40
    falsy: &ArrayData,
150
40
    falsy_is_scalar: bool,
151
40
) -> Result<ArrayRef, ArrowError> {
152
40
    let mut mutable = MutableArrayData::new(vec![truthy, falsy], false, truthy.len());
153
154
    // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
155
    // fill with falsy values
156
157
    // keep track of how much is filled
158
40
    let mut filled = 0;
159
160
40
    let mask_buffer = maybe_prep_null_mask_filter(mask);
161
40
    SlicesIterator::from(&mask_buffer).for_each(|(start, end)| 
{35
162
        // the gap needs to be filled with falsy values
163
35
        if start > filled {
164
8
            if falsy_is_scalar {
165
7
                for _ in 
filled4
..
start4
{
166
7
                    // Copy the first item from the 'falsy' array into the output buffer.
167
7
                    mutable.extend(1, 0, 1);
168
7
                }
169
4
            } else {
170
4
                mutable.extend(1, filled, start);
171
4
            }
172
27
        }
173
        // fill with truthy values
174
35
        if truthy_is_scalar {
175
20
            for _ in 
start14
..
end14
{
176
20
                // Copy the first item from the 'truthy' array into the output buffer.
177
20
                mutable.extend(0, 0, 1);
178
20
            }
179
21
        } else {
180
21
            mutable.extend(0, start, end);
181
21
        }
182
35
        filled = end;
183
35
    });
184
    // the remaining part is falsy
185
40
    if filled < mask.len() {
186
27
        if falsy_is_scalar {
187
15
            for _ in 
filled11
..
mask11
.
len11
() {
188
15
                // Copy the first item from the 'falsy' array into the output buffer.
189
15
                mutable.extend(1, 0, 1);
190
15
            }
191
16
        } else {
192
16
            mutable.extend(1, filled, mask.len());
193
16
        }
194
13
    }
195
196
40
    let data = mutable.freeze();
197
40
    Ok(make_array(data))
198
40
}
199
200
/// Zipper for 2 scalars
201
///
202
/// Useful for using in `IF <expr> THEN <scalar> ELSE <scalar> END` expressions
203
///
204
/// # Example
205
/// ```
206
/// # use std::sync::Arc;
207
/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array, Scalar, cast::AsArray, types::Int32Type};
208
///
209
/// # use arrow_select::zip::ScalarZipper;
210
/// let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
211
/// let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
212
/// let zipper = ScalarZipper::try_new(&scalar_truthy, &scalar_falsy).unwrap();
213
///
214
/// // Later when we have a boolean mask
215
/// let mask = BooleanArray::from(vec![true, false, true, false, true]);
216
/// let result = zipper.zip(&mask).unwrap();
217
/// let actual = result.as_primitive::<Int32Type>();
218
/// let expected = Int32Array::from(vec![Some(42), Some(123), Some(42), Some(123), Some(42)]);
219
/// ```
220
///
221
#[derive(Debug, Clone)]
222
pub struct ScalarZipper {
223
    zip_impl: Arc<dyn ZipImpl>,
224
}
225
226
impl ScalarZipper {
227
    /// Try to create a new ScalarZipper from two scalar Datum
228
    ///
229
    /// # Errors
230
    /// returns error if:
231
    /// - the two Datum have different data types
232
    /// - either Datum is not a scalar (or has more than 1 element)
233
    ///
234
40
    pub fn try_new(truthy: &dyn Datum, falsy: &dyn Datum) -> Result<Self, ArrowError> {
235
40
        let (truthy, truthy_is_scalar) = truthy.get();
236
40
        let (falsy, falsy_is_scalar) = falsy.get();
237
238
40
        if truthy.data_type() != falsy.data_type() {
239
0
            return Err(ArrowError::InvalidArgumentError(
240
0
                "arguments need to have the same data type".into(),
241
0
            ));
242
40
        }
243
244
40
        if !truthy_is_scalar {
245
0
            return Err(ArrowError::InvalidArgumentError(
246
0
                "only scalar arrays are supported".into(),
247
0
            ));
248
40
        }
249
250
40
        if !falsy_is_scalar {
251
0
            return Err(ArrowError::InvalidArgumentError(
252
0
                "only scalar arrays are supported".into(),
253
0
            ));
254
40
        }
255
256
40
        if truthy.len() != 1 {
257
0
            return Err(ArrowError::InvalidArgumentError(
258
0
                "scalar arrays must have 1 element".into(),
259
0
            ));
260
40
        }
261
40
        if falsy.len() != 1 {
262
0
            return Err(ArrowError::InvalidArgumentError(
263
0
                "scalar arrays must have 1 element".into(),
264
0
            ));
265
40
        }
266
267
        macro_rules! primitive_size_helper {
268
            ($t:ty) => {
269
                Arc::new(PrimitiveScalarImpl::<$t>::new(truthy, falsy)) as Arc<dyn ZipImpl>
270
            };
271
        }
272
273
40
        let zip_impl = 
downcast_primitive!0
{
274
24
            truthy.data_type() => (primitive_size_helper),
275
            DataType::Utf8 => {
276
11
                Arc::new(BytesScalarImpl::<Utf8Type>::new(truthy, falsy)) as Arc<dyn ZipImpl>
277
            },
278
            DataType::LargeUtf8 => {
279
1
                Arc::new(BytesScalarImpl::<LargeUtf8Type>::new(truthy, falsy)) as Arc<dyn ZipImpl>
280
            },
281
            DataType::Binary => {
282
1
                Arc::new(BytesScalarImpl::<BinaryType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
283
            },
284
            DataType::LargeBinary => {
285
1
                Arc::new(BytesScalarImpl::<LargeBinaryType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
286
            },
287
            // TODO: Handle Utf8View https://github.com/apache/arrow-rs/issues/8724
288
            _ => {
289
1
                Arc::new(FallbackImpl::new(truthy, falsy)) as Arc<dyn ZipImpl>
290
            },
291
        };
292
293
40
        Ok(Self { zip_impl })
294
40
    }
295
296
    /// Creating output array based on input boolean array and the two scalar values the zipper was created with
297
    /// See struct level documentation for examples.
298
2
    pub fn zip(&self, mask: &BooleanArray) -> Result<ArrayRef, ArrowError> {
299
2
        self.zip_impl.create_output(mask)
300
2
    }
301
}
302
303
/// Impl for creating output array based on a mask
304
trait ZipImpl: Debug + Send + Sync {
305
    /// Creating output array based on input boolean array
306
    fn create_output(&self, input: &BooleanArray) -> Result<ArrayRef, ArrowError>;
307
}
308
309
#[derive(Debug, PartialEq)]
310
struct FallbackImpl {
311
    truthy: ArrayData,
312
    falsy: ArrayData,
313
}
314
315
impl FallbackImpl {
316
1
    fn new(left: &dyn Array, right: &dyn Array) -> Self {
317
1
        Self {
318
1
            truthy: left.to_data(),
319
1
            falsy: right.to_data(),
320
1
        }
321
1
    }
322
}
323
324
impl ZipImpl for FallbackImpl {
325
1
    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
326
1
        zip_impl(predicate, &self.truthy, true, &self.falsy, true)
327
1
    }
328
}
329
330
struct PrimitiveScalarImpl<T: ArrowPrimitiveType> {
331
    data_type: DataType,
332
    truthy: Option<T::Native>,
333
    falsy: Option<T::Native>,
334
}
335
336
impl<T: ArrowPrimitiveType> Debug for PrimitiveScalarImpl<T> {
337
0
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
338
0
        f.debug_struct("PrimitiveScalarImpl")
339
0
            .field("data_type", &self.data_type)
340
0
            .field("truthy", &self.truthy)
341
0
            .field("falsy", &self.falsy)
342
0
            .finish()
343
0
    }
344
}
345
346
impl<T: ArrowPrimitiveType> PrimitiveScalarImpl<T> {
347
25
    fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
348
25
        Self {
349
25
            data_type: truthy.data_type().clone(),
350
25
            truthy: Self::get_value_from_scalar(truthy),
351
25
            falsy: Self::get_value_from_scalar(falsy),
352
25
        }
353
25
    }
354
355
50
    fn get_value_from_scalar(scalar: &dyn Array) -> Option<T::Native> {
356
50
        if scalar.is_null(0) {
357
16
            None
358
        } else {
359
34
            let value = scalar.as_primitive::<T>().value(0);
360
361
34
            Some(value)
362
        }
363
50
    }
364
365
    /// return an output array that has
366
    /// `value` in all locations where predicate is true
367
    /// `null` otherwise
368
14
    fn get_scalar_and_null_buffer_for_single_non_nullable(
369
14
        predicate: BooleanBuffer,
370
14
        value: T::Native,
371
14
    ) -> (Vec<T::Native>, Option<NullBuffer>) {
372
14
        let result_len = predicate.len();
373
14
        let nulls = NullBuffer::new(predicate);
374
14
        let scalars = vec![value; result_len];
375
376
14
        (scalars, Some(nulls))
377
14
    }
378
}
379
380
impl<T: ArrowPrimitiveType> ZipImpl for PrimitiveScalarImpl<T> {
381
26
    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
382
26
        let result_len = predicate.len();
383
        // Nulls are treated as false
384
26
        let predicate = maybe_prep_null_mask_filter(predicate);
385
386
26
        let (scalars, nulls): (Vec<T::Native>, Option<NullBuffer>) = match (self.truthy, self.falsy)
387
        {
388
11
            (Some(truthy_val), Some(falsy_val)) => {
389
11
                let scalars: Vec<T::Native> = predicate
390
11
                    .iter()
391
85
                    .
map11
(|b| if b {
truthy_val42
} else {
falsy_val43
})
392
11
                    .collect();
393
394
11
                (scalars, None)
395
            }
396
8
            (Some(truthy_val), None) => {
397
                // If a value is true we need the TRUTHY and the null buffer will have 1 (meaning not null)
398
                // If a value is false we need the FALSY and the null buffer will have 0 (meaning null)
399
400
8
                Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, truthy_val)
401
            }
402
6
            (None, Some(falsy_val)) => {
403
                // Flipping the boolean buffer as we want the opposite of the TRUE case
404
                //
405
                // if the condition is true we want null so we need to NOT the value so we get 0 (meaning null)
406
                // if the condition is false we want the FALSY value so we need to NOT the value so we get 1 (meaning not null)
407
6
                let predicate = predicate.not();
408
409
6
                Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, falsy_val)
410
            }
411
            (None, None) => {
412
                // All values are null
413
1
                let nulls = NullBuffer::new_null(result_len);
414
1
                let scalars = vec![T::default_value(); result_len];
415
416
1
                (scalars, Some(nulls))
417
            }
418
        };
419
420
26
        let scalars = ScalarBuffer::<T::Native>::from(scalars);
421
26
        let output = PrimitiveArray::<T>::try_new(scalars, nulls)
?0
;
422
423
        // Keep decimal precisions, scales or timestamps timezones
424
26
        let output = output.with_data_type(self.data_type.clone());
425
426
26
        Ok(Arc::new(output))
427
26
    }
428
}
429
430
#[derive(PartialEq, Hash)]
431
struct BytesScalarImpl<T: ByteArrayType> {
432
    truthy: Option<Vec<u8>>,
433
    falsy: Option<Vec<u8>>,
434
    phantom: PhantomData<T>,
435
}
436
437
impl<T: ByteArrayType> Debug for BytesScalarImpl<T> {
438
0
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
439
0
        f.debug_struct("BytesScalarImpl")
440
0
            .field("truthy", &self.truthy)
441
0
            .field("falsy", &self.falsy)
442
0
            .finish()
443
0
    }
444
}
445
446
impl<T: ByteArrayType> BytesScalarImpl<T> {
447
14
    fn new(truthy_value: &dyn Array, falsy_value: &dyn Array) -> Self {
448
14
        Self {
449
14
            truthy: Self::get_value_from_scalar(truthy_value),
450
14
            falsy: Self::get_value_from_scalar(falsy_value),
451
14
            phantom: PhantomData,
452
14
        }
453
14
    }
454
455
28
    fn get_value_from_scalar(scalar: &dyn Array) -> Option<Vec<u8>> {
456
28
        if scalar.is_null(0) {
457
8
            None
458
        } else {
459
20
            let bytes: &[u8] = scalar.as_bytes::<T>().value(0).as_ref();
460
461
20
            Some(bytes.to_vec())
462
        }
463
28
    }
464
465
    /// return an output array that has
466
    /// `value` in all locations where predicate is true
467
    /// `null` otherwise
468
6
    fn get_scalar_and_null_buffer_for_single_non_nullable(
469
6
        predicate: BooleanBuffer,
470
6
        value: &[u8],
471
6
    ) -> (Buffer, OffsetBuffer<T::Offset>, Option<NullBuffer>) {
472
6
        let value_length = value.len();
473
474
6
        let number_of_true = predicate.count_set_bits();
475
476
        // Fast path for all nulls
477
6
        if number_of_true == 0 {
478
            // All values are null
479
2
            let nulls = NullBuffer::new_null(predicate.len());
480
481
2
            return (
482
2
                // Empty bytes
483
2
                Buffer::from(&[]),
484
2
                // All nulls so all lengths are 0
485
2
                OffsetBuffer::<T::Offset>::new_zeroed(predicate.len()),
486
2
                Some(nulls),
487
2
            );
488
4
        }
489
490
4
        let offsets = OffsetBuffer::<T::Offset>::from_lengths(
491
20
            
predicate4
.
iter4
().
map4
(|b| if b {
value_length15
} else {
05
}),
492
        );
493
494
4
        let mut bytes = MutableBuffer::with_capacity(0);
495
4
        bytes.repeat_slice_n_times(value, number_of_true);
496
497
4
        let bytes = Buffer::from(bytes);
498
499
        // If a value is true we need the TRUTHY and the null buffer will have 1 (meaning not null)
500
        // If a value is false we need the FALSY and the null buffer will have 0 (meaning null)
501
4
        let nulls = NullBuffer::new(predicate);
502
503
4
        (bytes, offsets, Some(nulls))
504
6
    }
505
506
    /// Create a [`Buffer`] where `value` slice is repeated `number_of_values` times
507
    /// and [`OffsetBuffer`] where there are `number_of_values` lengths, and all equals to `value` length
508
2
    fn get_bytes_and_offset_for_all_same_value(
509
2
        number_of_values: usize,
510
2
        value: &[u8],
511
2
    ) -> (Buffer, OffsetBuffer<T::Offset>) {
512
2
        let value_length = value.len();
513
514
2
        let offsets =
515
2
            OffsetBuffer::<T::Offset>::from_repeated_length(value_length, number_of_values);
516
517
2
        let mut bytes = MutableBuffer::with_capacity(0);
518
2
        bytes.repeat_slice_n_times(value, number_of_values);
519
2
        let bytes = Buffer::from(bytes);
520
521
2
        (bytes, offsets)
522
2
    }
523
524
7
    fn create_output_on_non_nulls(
525
7
        predicate: &BooleanBuffer,
526
7
        truthy_val: &[u8],
527
7
        falsy_val: &[u8],
528
7
    ) -> (Buffer, OffsetBuffer<<T as ByteArrayType>::Offset>) {
529
7
        let true_count = predicate.count_set_bits();
530
531
6
        match true_count {
532
            0 => {
533
                // All values are falsy
534
535
1
                let (bytes, offsets) =
536
1
                    Self::get_bytes_and_offset_for_all_same_value(predicate.len(), falsy_val);
537
538
1
                return (bytes, offsets);
539
            }
540
6
            
n1
if n == predicate.len(
)1
=> {
541
                // All values are truthy
542
1
                let (bytes, offsets) =
543
1
                    Self::get_bytes_and_offset_for_all_same_value(predicate.len(), truthy_val);
544
545
1
                return (bytes, offsets);
546
            }
547
548
5
            _ => {
549
5
                // Fallback
550
5
            }
551
        }
552
553
5
        let total_number_of_bytes =
554
5
            true_count * truthy_val.len() + (predicate.len() - true_count) * falsy_val.len();
555
5
        let mut mutable = MutableBuffer::with_capacity(total_number_of_bytes);
556
5
        let mut offset_buffer_builder = OffsetBufferBuilder::<T::Offset>::new(predicate.len());
557
558
        // keep track of how much is filled
559
5
        let mut filled = 0;
560
561
5
        let truthy_len = truthy_val.len();
562
5
        let falsy_len = falsy_val.len();
563
564
12
        
SlicesIterator::from5
(
predicate5
).
for_each5
(|(start, end)| {
565
            // the gap needs to be filled with falsy values
566
12
            if start > filled {
567
7
                let false_repeat_count = start - filled;
568
                // Push false value `repeat_count` times
569
7
                mutable.repeat_slice_n_times(falsy_val, false_repeat_count);
570
571
7
                for _ in 0..false_repeat_count {
572
7
                    offset_buffer_builder.push_length(falsy_len)
573
                }
574
5
            }
575
576
12
            let true_repeat_count = end - start;
577
            // fill with truthy values
578
12
            mutable.repeat_slice_n_times(truthy_val, true_repeat_count);
579
580
12
            for _ in 0..true_repeat_count {
581
14
                offset_buffer_builder.push_length(truthy_len)
582
            }
583
12
            filled = end;
584
12
        });
585
        // the remaining part is falsy
586
5
        if filled < predicate.len() {
587
2
            let false_repeat_count = predicate.len() - filled;
588
            // Copy the first item from the 'falsy' array into the output buffer.
589
2
            mutable.repeat_slice_n_times(falsy_val, false_repeat_count);
590
591
2
            for _ in 0..false_repeat_count {
592
6
                offset_buffer_builder.push_length(falsy_len)
593
            }
594
3
        }
595
596
5
        (mutable.into(), offset_buffer_builder.finish())
597
7
    }
598
}
599
600
impl<T: ByteArrayType> ZipImpl for BytesScalarImpl<T> {
601
14
    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
602
14
        let result_len = predicate.len();
603
        // Nulls are treated as false
604
14
        let predicate = maybe_prep_null_mask_filter(predicate);
605
606
14
        let (bytes, offsets, nulls): (Buffer, OffsetBuffer<T::Offset>, Option<NullBuffer>) =
607
14
            match (self.truthy.as_deref(), self.falsy.as_deref()) {
608
7
                (Some(truthy_val), Some(falsy_val)) => {
609
7
                    let (bytes, offsets) =
610
7
                        Self::create_output_on_non_nulls(&predicate, truthy_val, falsy_val);
611
612
7
                    (bytes, offsets, None)
613
                }
614
3
                (Some(truthy_val), None) => {
615
3
                    Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, truthy_val)
616
                }
617
3
                (None, Some(falsy_val)) => {
618
                    // Flipping the boolean buffer as we want the opposite of the TRUE case
619
                    //
620
                    // if the condition is true we want null so we need to NOT the value so we get 0 (meaning null)
621
                    // if the condition is false we want the FALSE value so we need to NOT the value so we get 1 (meaning not null)
622
3
                    let predicate = predicate.not();
623
3
                    Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, falsy_val)
624
                }
625
                (None, None) => {
626
                    // All values are null
627
1
                    let nulls = NullBuffer::new_null(result_len);
628
629
1
                    (
630
1
                        // Empty bytes
631
1
                        Buffer::from(&[]),
632
1
                        // All nulls so all lengths are 0
633
1
                        OffsetBuffer::<T::Offset>::new_zeroed(predicate.len()),
634
1
                        Some(nulls),
635
1
                    )
636
                }
637
            };
638
639
14
        let output = unsafe {
640
            // Safety: the values are based on valid inputs
641
            // and `try_new` is expensive for strings as it validate that the input is valid utf8
642
14
            GenericByteArray::<T>::new_unchecked(offsets, bytes, nulls)
643
        };
644
645
14
        Ok(Arc::new(output))
646
14
    }
647
}
648
649
80
fn maybe_prep_null_mask_filter(predicate: &BooleanArray) -> BooleanBuffer {
650
    // Nulls are treated as false
651
80
    if predicate.null_count() == 0 {
652
76
        predicate.values().clone()
653
    } else {
654
4
        let cleaned = prep_null_mask_filter(predicate);
655
4
        let (boolean_buffer, _) = cleaned.into_parts();
656
4
        boolean_buffer
657
    }
658
80
}
659
660
#[cfg(test)]
661
mod test {
662
    use super::*;
663
    use arrow_array::types::Int32Type;
664
665
    #[test]
666
1
    fn test_zip_kernel_one() {
667
1
        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
668
1
        let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
669
1
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
670
1
        let out = zip(&mask, &a, &b).unwrap();
671
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
672
1
        let expected = Int32Array::from(vec![Some(5), None, Some(6), Some(7), Some(1)]);
673
1
        assert_eq!(actual, &expected);
674
1
    }
675
676
    #[test]
677
1
    fn test_zip_kernel_two() {
678
1
        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
679
1
        let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
680
1
        let mask = BooleanArray::from(vec![false, false, true, true, false]);
681
1
        let out = zip(&mask, &a, &b).unwrap();
682
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
683
1
        let expected = Int32Array::from(vec![None, Some(3), Some(7), None, Some(3)]);
684
1
        assert_eq!(actual, &expected);
685
1
    }
686
687
    #[test]
688
1
    fn test_zip_kernel_scalar_falsy_1() {
689
1
        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
690
691
1
        let fallback = Scalar::new(Int32Array::from_value(42, 1));
692
693
1
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
694
1
        let out = zip(&mask, &a, &fallback).unwrap();
695
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
696
1
        let expected = Int32Array::from(vec![Some(5), None, Some(42), Some(42), Some(1)]);
697
1
        assert_eq!(actual, &expected);
698
1
    }
699
700
    #[test]
701
1
    fn test_zip_kernel_scalar_falsy_2() {
702
1
        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
703
704
1
        let fallback = Scalar::new(Int32Array::from_value(42, 1));
705
706
1
        let mask = BooleanArray::from(vec![false, false, true, true, false]);
707
1
        let out = zip(&mask, &a, &fallback).unwrap();
708
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
709
1
        let expected = Int32Array::from(vec![Some(42), Some(42), Some(7), None, Some(42)]);
710
1
        assert_eq!(actual, &expected);
711
1
    }
712
713
    #[test]
714
1
    fn test_zip_kernel_scalar_truthy_1() {
715
1
        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
716
717
1
        let fallback = Scalar::new(Int32Array::from_value(42, 1));
718
719
1
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
720
1
        let out = zip(&mask, &fallback, &a).unwrap();
721
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
722
1
        let expected = Int32Array::from(vec![Some(42), Some(42), Some(7), None, Some(42)]);
723
1
        assert_eq!(actual, &expected);
724
1
    }
725
726
    #[test]
727
1
    fn test_zip_kernel_scalar_truthy_2() {
728
1
        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
729
730
1
        let fallback = Scalar::new(Int32Array::from_value(42, 1));
731
732
1
        let mask = BooleanArray::from(vec![false, false, true, true, false]);
733
1
        let out = zip(&mask, &fallback, &a).unwrap();
734
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
735
1
        let expected = Int32Array::from(vec![Some(5), None, Some(42), Some(42), Some(1)]);
736
1
        assert_eq!(actual, &expected);
737
1
    }
738
739
    #[test]
740
1
    fn test_zip_kernel_scalar_both_mask_ends_with_true() {
741
1
        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
742
1
        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
743
744
1
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
745
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
746
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
747
1
        let expected = Int32Array::from(vec![Some(42), Some(42), Some(123), Some(123), Some(42)]);
748
1
        assert_eq!(actual, &expected);
749
1
    }
750
751
    #[test]
752
1
    fn test_zip_kernel_scalar_both_mask_ends_with_false() {
753
1
        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
754
1
        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
755
756
1
        let mask = BooleanArray::from(vec![true, true, false, true, false, false]);
757
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
758
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
759
1
        let expected = Int32Array::from(vec![
760
1
            Some(42),
761
1
            Some(42),
762
1
            Some(123),
763
1
            Some(42),
764
1
            Some(123),
765
1
            Some(123),
766
        ]);
767
1
        assert_eq!(actual, &expected);
768
1
    }
769
770
    #[test]
771
1
    fn test_zip_kernel_primitive_scalar_none_1() {
772
1
        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
773
1
        let scalar_falsy = Scalar::new(Int32Array::new_null(1));
774
775
1
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
776
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
777
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
778
1
        let expected = Int32Array::from(vec![Some(42), Some(42), None, None, Some(42)]);
779
1
        assert_eq!(actual, &expected);
780
1
    }
781
782
    #[test]
783
1
    fn test_zip_kernel_primitive_scalar_none_2() {
784
1
        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
785
1
        let scalar_falsy = Scalar::new(Int32Array::new_null(1));
786
787
1
        let mask = BooleanArray::from(vec![false, false, true, true, false]);
788
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
789
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
790
1
        let expected = Int32Array::from(vec![None, None, Some(42), Some(42), None]);
791
1
        assert_eq!(actual, &expected);
792
1
    }
793
794
    #[test]
795
1
    fn test_zip_kernel_primitive_scalar_both_null() {
796
1
        let scalar_truthy = Scalar::new(Int32Array::new_null(1));
797
1
        let scalar_falsy = Scalar::new(Int32Array::new_null(1));
798
799
1
        let mask = BooleanArray::from(vec![false, false, true, true, false]);
800
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
801
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
802
1
        let expected = Int32Array::from(vec![None, None, None, None, None]);
803
1
        assert_eq!(actual, &expected);
804
1
    }
805
806
    #[test]
807
1
    fn test_zip_primitive_array_with_nulls_is_mask_should_be_treated_as_false() {
808
1
        let truthy = Int32Array::from_iter_values(vec![1, 2, 3, 4, 5, 6]);
809
1
        let falsy = Int32Array::from_iter_values(vec![7, 8, 9, 10, 11, 12]);
810
811
1
        let mask = {
812
1
            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
813
1
            let nulls = NullBuffer::from(vec![
814
                true, true, true,
815
                false, // null treated as false even though in the original mask it was true
816
                true, true,
817
            ]);
818
1
            BooleanArray::new(booleans, Some(nulls))
819
        };
820
1
        let out = zip(&mask, &truthy, &falsy).unwrap();
821
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
822
1
        let expected = Int32Array::from(vec![
823
1
            Some(1),
824
1
            Some(2),
825
1
            Some(9),
826
1
            Some(10), // true in mask but null
827
1
            Some(11),
828
1
            Some(12),
829
        ]);
830
1
        assert_eq!(actual, &expected);
831
1
    }
832
833
    #[test]
834
1
    fn test_zip_kernel_primitive_scalar_with_boolean_array_mask_with_nulls_should_be_treated_as_false()
835
     {
836
1
        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
837
1
        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
838
839
1
        let mask = {
840
1
            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
841
1
            let nulls = NullBuffer::from(vec![
842
                true, true, true,
843
                false, // null treated as false even though in the original mask it was true
844
                true, true,
845
            ]);
846
1
            BooleanArray::new(booleans, Some(nulls))
847
        };
848
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
849
1
        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
850
1
        let expected = Int32Array::from(vec![
851
1
            Some(42),
852
1
            Some(42),
853
1
            Some(123),
854
1
            Some(123), // true in mask but null
855
1
            Some(123),
856
1
            Some(123),
857
        ]);
858
1
        assert_eq!(actual, &expected);
859
1
    }
860
861
    #[test]
862
1
    fn test_zip_string_array_with_nulls_is_mask_should_be_treated_as_false() {
863
1
        let truthy = StringArray::from_iter_values(vec!["1", "2", "3", "4", "5", "6"]);
864
1
        let falsy = StringArray::from_iter_values(vec!["7", "8", "9", "10", "11", "12"]);
865
866
1
        let mask = {
867
1
            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
868
1
            let nulls = NullBuffer::from(vec![
869
                true, true, true,
870
                false, // null treated as false even though in the original mask it was true
871
                true, true,
872
            ]);
873
1
            BooleanArray::new(booleans, Some(nulls))
874
        };
875
1
        let out = zip(&mask, &truthy, &falsy).unwrap();
876
1
        let actual = out.as_string::<i32>();
877
1
        let expected = StringArray::from_iter_values(vec![
878
1
            "1", "2", "9", "10", // true in mask but null
879
1
            "11", "12",
880
        ]);
881
1
        assert_eq!(actual, &expected);
882
1
    }
883
884
    #[test]
885
1
    fn test_zip_kernel_large_string_scalar_with_boolean_array_mask_with_nulls_should_be_treated_as_false()
886
     {
887
1
        let scalar_truthy = Scalar::new(LargeStringArray::from_iter_values(["test"]));
888
1
        let scalar_falsy = Scalar::new(LargeStringArray::from_iter_values(["something else"]));
889
890
1
        let mask = {
891
1
            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
892
1
            let nulls = NullBuffer::from(vec![
893
                true, true, true,
894
                false, // null treated as false even though in the original mask it was true
895
                true, true,
896
            ]);
897
1
            BooleanArray::new(booleans, Some(nulls))
898
        };
899
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
900
1
        let actual = out.as_any().downcast_ref::<LargeStringArray>().unwrap();
901
1
        let expected = LargeStringArray::from_iter(vec![
902
1
            Some("test"),
903
1
            Some("test"),
904
1
            Some("something else"),
905
1
            Some("something else"), // true in mask but null
906
1
            Some("something else"),
907
1
            Some("something else"),
908
        ]);
909
1
        assert_eq!(actual, &expected);
910
1
    }
911
912
    #[test]
913
1
    fn test_zip_kernel_bytes_scalar_none_1() {
914
1
        let scalar_truthy = Scalar::new(StringArray::from_iter_values(["hello"]));
915
1
        let scalar_falsy = Scalar::new(StringArray::new_null(1));
916
917
1
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
918
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
919
1
        let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
920
1
        let expected = StringArray::from_iter(vec![
921
1
            Some("hello"),
922
1
            Some("hello"),
923
1
            None,
924
1
            None,
925
1
            Some("hello"),
926
        ]);
927
1
        assert_eq!(actual, &expected);
928
1
    }
929
930
    #[test]
931
1
    fn test_zip_kernel_bytes_scalar_none_2() {
932
1
        let scalar_truthy = Scalar::new(StringArray::new_null(1));
933
1
        let scalar_falsy = Scalar::new(StringArray::from_iter_values(["hello"]));
934
935
1
        let mask = BooleanArray::from(vec![true, true, false, false, true]);
936
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
937
1
        let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
938
1
        let expected = StringArray::from_iter(vec![None, None, Some("hello"), Some("hello"), None]);
939
1
        assert_eq!(actual, &expected);
940
1
    }
941
942
    #[test]
943
1
    fn test_zip_kernel_bytes_scalar_both() {
944
1
        let scalar_truthy = Scalar::new(StringArray::from_iter_values(["test"]));
945
1
        let scalar_falsy = Scalar::new(StringArray::from_iter_values(["something else"]));
946
947
        // mask ends with false
948
1
        let mask = BooleanArray::from(vec![true, true, false, true, false, false]);
949
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
950
1
        let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
951
1
        let expected = StringArray::from_iter(vec![
952
1
            Some("test"),
953
1
            Some("test"),
954
1
            Some("something else"),
955
1
            Some("test"),
956
1
            Some("something else"),
957
1
            Some("something else"),
958
        ]);
959
1
        assert_eq!(actual, &expected);
960
1
    }
961
962
    #[test]
963
1
    fn test_zip_scalar_bytes_only_taking_one_side() {
964
1
        let mask_len = 5;
965
1
        let all_true_mask = BooleanArray::from(vec![true; mask_len]);
966
1
        let all_false_mask = BooleanArray::from(vec![false; mask_len]);
967
968
1
        let null_scalar = Scalar::new(StringArray::new_null(1));
969
1
        let non_null_scalar_1 = Scalar::new(StringArray::from_iter_values(["test"]));
970
1
        let non_null_scalar_2 = Scalar::new(StringArray::from_iter_values(["something else"]));
971
972
        {
973
            // 1. Test where left is null and right is non-null
974
            //    and mask is all true
975
1
            let out = zip(&all_true_mask, &null_scalar, &non_null_scalar_1).unwrap();
976
1
            let actual = out.as_string::<i32>();
977
1
            let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
978
1
            assert_eq!(actual, &expected);
979
        }
980
981
        {
982
            // 2. Test where left is null and right is non-null
983
            //    and mask is all false
984
1
            let out = zip(&all_false_mask, &null_scalar, &non_null_scalar_1).unwrap();
985
1
            let actual = out.as_string::<i32>();
986
1
            let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
987
1
            assert_eq!(actual, &expected);
988
        }
989
990
        {
991
            // 3. Test where left is non-null and right is null
992
            //    and mask is all true
993
1
            let out = zip(&all_true_mask, &non_null_scalar_1, &null_scalar).unwrap();
994
1
            let actual = out.as_string::<i32>();
995
1
            let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
996
1
            assert_eq!(actual, &expected);
997
        }
998
999
        {
1000
            // 4. Test where left is non-null and right is null
1001
            //    and mask is all false
1002
1
            let out = zip(&all_false_mask, &non_null_scalar_1, &null_scalar).unwrap();
1003
1
            let actual = out.as_string::<i32>();
1004
1
            let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
1005
1
            assert_eq!(actual, &expected);
1006
        }
1007
1008
        {
1009
            // 5. Test where both left and right are not null
1010
            //    and mask is all true
1011
1
            let out = zip(&all_true_mask, &non_null_scalar_1, &non_null_scalar_2).unwrap();
1012
1
            let actual = out.as_string::<i32>();
1013
1
            let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
1014
1
            assert_eq!(actual, &expected);
1015
        }
1016
1017
        {
1018
            // 6. Test where both left and right are not null
1019
            //    and mask is all false
1020
1
            let out = zip(&all_false_mask, &non_null_scalar_1, &non_null_scalar_2).unwrap();
1021
1
            let actual = out.as_string::<i32>();
1022
1
            let expected =
1023
1
                StringArray::from_iter(std::iter::repeat_n(Some("something else"), mask_len));
1024
1
            assert_eq!(actual, &expected);
1025
        }
1026
1027
        {
1028
            // 7. Test where both left and right are null
1029
            //    and mask is random
1030
1
            let mask = BooleanArray::from(vec![true, false, true, false, true]);
1031
1
            let out = zip(&mask, &null_scalar, &null_scalar).unwrap();
1032
1
            let actual = out.as_string::<i32>();
1033
1
            let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
1034
1
            assert_eq!(actual, &expected);
1035
        }
1036
1
    }
1037
1038
    #[test]
1039
1
    fn test_scalar_zipper() {
1040
1
        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
1041
1
        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
1042
1043
1
        let mask = BooleanArray::from(vec![false, false, true, true, false]);
1044
1045
1
        let scalar_zipper = ScalarZipper::try_new(&scalar_truthy, &scalar_falsy).unwrap();
1046
1
        let out = scalar_zipper.zip(&mask).unwrap();
1047
1
        let actual = out.as_primitive::<Int32Type>();
1048
1
        let expected = Int32Array::from(vec![Some(123), Some(123), Some(42), Some(42), Some(123)]);
1049
1
        assert_eq!(actual, &expected);
1050
1051
        // test with different mask length as well
1052
1
        let mask = BooleanArray::from(vec![true, false, true]);
1053
1
        let out = scalar_zipper.zip(&mask).unwrap();
1054
1
        let actual = out.as_primitive::<Int32Type>();
1055
1
        let expected = Int32Array::from(vec![Some(42), Some(123), Some(42)]);
1056
1
        assert_eq!(actual, &expected);
1057
1
    }
1058
1059
    #[test]
1060
1
    fn test_zip_kernel_scalar_strings() {
1061
1
        let scalar_truthy = Scalar::new(StringArray::from(vec!["hello"]));
1062
1
        let scalar_falsy = Scalar::new(StringArray::from(vec!["world"]));
1063
1064
1
        let mask = BooleanArray::from(vec![true, false, true, false, true]);
1065
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
1066
1
        let actual = out.as_string::<i32>();
1067
1
        let expected = StringArray::from(vec![
1068
1
            Some("hello"),
1069
1
            Some("world"),
1070
1
            Some("hello"),
1071
1
            Some("world"),
1072
1
            Some("hello"),
1073
        ]);
1074
1
        assert_eq!(actual, &expected);
1075
1
    }
1076
1077
    #[test]
1078
1
    fn test_zip_kernel_scalar_binary() {
1079
1
        let truthy_bytes: &[u8] = b"\xFF\xFE\xFD";
1080
1
        let falsy_bytes: &[u8] = b"world";
1081
1
        let scalar_truthy = Scalar::new(BinaryArray::from_iter_values(
1082
            // Non valid UTF8 bytes
1083
1
            vec![truthy_bytes],
1084
        ));
1085
1
        let scalar_falsy = Scalar::new(BinaryArray::from_iter_values(vec![falsy_bytes]));
1086
1087
1
        let mask = BooleanArray::from(vec![true, false, true, false, true]);
1088
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
1089
1
        let actual = out.as_binary::<i32>();
1090
1
        let expected = BinaryArray::from(vec![
1091
1
            Some(truthy_bytes),
1092
1
            Some(falsy_bytes),
1093
1
            Some(truthy_bytes),
1094
1
            Some(falsy_bytes),
1095
1
            Some(truthy_bytes),
1096
        ]);
1097
1
        assert_eq!(actual, &expected);
1098
1
    }
1099
1100
    #[test]
1101
1
    fn test_zip_kernel_scalar_large_binary() {
1102
1
        let truthy_bytes: &[u8] = b"hey";
1103
1
        let falsy_bytes: &[u8] = b"world";
1104
1
        let scalar_truthy = Scalar::new(LargeBinaryArray::from_iter_values(vec![truthy_bytes]));
1105
1
        let scalar_falsy = Scalar::new(LargeBinaryArray::from_iter_values(vec![falsy_bytes]));
1106
1107
1
        let mask = BooleanArray::from(vec![true, false, true, false, true]);
1108
1
        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
1109
1
        let actual = out.as_binary::<i64>();
1110
1
        let expected = LargeBinaryArray::from(vec![
1111
1
            Some(truthy_bytes),
1112
1
            Some(falsy_bytes),
1113
1
            Some(truthy_bytes),
1114
1
            Some(falsy_bytes),
1115
1
            Some(truthy_bytes),
1116
        ]);
1117
1
        assert_eq!(actual, &expected);
1118
1
    }
1119
1120
    // Test to ensure that the precision and scale are kept when zipping Decimal128 data
1121
    #[test]
1122
1
    fn test_zip_decimal_with_custom_precision_and_scale() {
1123
1
        let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432])
1124
1
            .with_precision_and_scale(20, 2)
1125
1
            .unwrap();
1126
1127
1
        let arr: ArrayRef = Arc::new(arr);
1128
1129
1
        let scalar_1 = Scalar::new(arr.slice(0, 1));
1130
1
        let scalar_2 = Scalar::new(arr.slice(1, 1));
1131
1
        let null_scalar = Scalar::new(new_null_array(arr.data_type(), 1));
1132
1
        let array_1: ArrayRef = arr.slice(0, 2);
1133
1
        let array_2: ArrayRef = arr.slice(2, 2);
1134
1135
1
        test_zip_output_data_types_for_input(scalar_1, scalar_2, null_scalar, array_1, array_2);
1136
1
    }
1137
1138
    // Test to ensure that the timezone is kept when zipping TimestampArray data
1139
    #[test]
1140
1
    fn test_zip_timestamp_with_timezone() {
1141
1
        let arr = TimestampSecondArray::from(vec![0, 1000, 2000, 4000])
1142
1
            .with_timezone("+01:00".to_string());
1143
1144
1
        let arr: ArrayRef = Arc::new(arr);
1145
1146
1
        let scalar_1 = Scalar::new(arr.slice(0, 1));
1147
1
        let scalar_2 = Scalar::new(arr.slice(1, 1));
1148
1
        let null_scalar = Scalar::new(new_null_array(arr.data_type(), 1));
1149
1
        let array_1: ArrayRef = arr.slice(0, 2);
1150
1
        let array_2: ArrayRef = arr.slice(2, 2);
1151
1152
1
        test_zip_output_data_types_for_input(scalar_1, scalar_2, null_scalar, array_1, array_2);
1153
1
    }
1154
1155
2
    fn test_zip_output_data_types_for_input(
1156
2
        scalar_1: Scalar<ArrayRef>,
1157
2
        scalar_2: Scalar<ArrayRef>,
1158
2
        null_scalar: Scalar<ArrayRef>,
1159
2
        array_1: ArrayRef,
1160
2
        array_2: ArrayRef,
1161
2
    ) {
1162
        // non null Scalar vs non null Scalar
1163
2
        test_zip_output_data_type(&scalar_1, &scalar_2, 10);
1164
1165
        // null Scalar vs non-null Scalar (and vice versa)
1166
2
        test_zip_output_data_type(&null_scalar, &scalar_1, 10);
1167
2
        test_zip_output_data_type(&scalar_1, &null_scalar, 10);
1168
1169
        // non-null Scalar and array (and vice versa)
1170
2
        test_zip_output_data_type(&array_1.as_ref(), &scalar_1, array_1.len());
1171
2
        test_zip_output_data_type(&scalar_1, &array_1.as_ref(), array_1.len());
1172
1173
        // Array and null scalar (and vice versa)
1174
2
        test_zip_output_data_type(&array_1.as_ref(), &null_scalar, array_1.len());
1175
1176
2
        test_zip_output_data_type(&null_scalar, &array_1.as_ref(), array_1.len());
1177
1178
        // Both arrays
1179
2
        test_zip_output_data_type(&array_1.as_ref(), &array_2.as_ref(), array_1.len());
1180
2
    }
1181
1182
16
    fn test_zip_output_data_type(truthy: &dyn Datum, falsy: &dyn Datum, mask_length: usize) {
1183
16
        let expected_data_type = truthy.get().0.data_type().clone();
1184
16
        assert_eq!(&expected_data_type, falsy.get().0.data_type());
1185
1186
        // Try different masks to test different paths
1187
16
        let mask_all_true = BooleanArray::from(vec![true; mask_length]);
1188
16
        let mask_all_false = BooleanArray::from(vec![false; mask_length]);
1189
16
        let mask_some_true_and_false =
1190
80
            
BooleanArray::from16
(
(0..mask_length)16
.
map16
(|i| i % 2 == 0).
collect16
::<Vec<bool>>());
1191
1192
48
        for mask in [
&mask_all_true16
,
&mask_all_false16
,
&mask_some_true_and_false16
] {
1193
48
            let out = zip(mask, truthy, falsy).unwrap();
1194
48
            assert_eq!(out.data_type(), &expected_data_type);
1195
        }
1196
16
    }
1197
1198
    #[test]
1199
1
    fn zip_scalar_fallback_impl() {
1200
1
        let truthy_list_item_scalar = Some(vec![Some(1), None, Some(3)]);
1201
1
        let truthy_list_array_scalar =
1202
1
            Scalar::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
1203
1
                truthy_list_item_scalar.clone(),
1204
            ]));
1205
1
        let falsy_list_item_scalar = Some(vec![None, Some(2), Some(4)]);
1206
1
        let falsy_list_array_scalar =
1207
1
            Scalar::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
1208
1
                falsy_list_item_scalar.clone(),
1209
            ]));
1210
1
        let mask = BooleanArray::from(vec![true, false, true, false, false, true, false]);
1211
1
        let out = zip(&mask, &truthy_list_array_scalar, &falsy_list_array_scalar).unwrap();
1212
1
        let actual = out.as_list::<i32>();
1213
1214
1
        let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
1215
1
            truthy_list_item_scalar.clone(),
1216
1
            falsy_list_item_scalar.clone(),
1217
1
            truthy_list_item_scalar.clone(),
1218
1
            falsy_list_item_scalar.clone(),
1219
1
            falsy_list_item_scalar.clone(),
1220
1
            truthy_list_item_scalar.clone(),
1221
1
            falsy_list_item_scalar.clone(),
1222
        ]);
1223
1
        assert_eq!(actual, &expected);
1224
1
    }
1225
}