Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-select/src/union_extract.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines union_extract kernel for [UnionArray]
19
20
use crate::take::take;
21
use arrow_array::{
22
    Array, ArrayRef, BooleanArray, Int32Array, Scalar, UnionArray, make_array, new_empty_array,
23
    new_null_array,
24
};
25
use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer, ScalarBuffer, bit_util};
26
use arrow_data::layout;
27
use arrow_schema::{ArrowError, DataType, UnionFields};
28
use std::cmp::Ordering;
29
use std::sync::Arc;
30
31
/// Returns the value of the target field when selected, or NULL otherwise.
32
/// ```text
33
/// ┌─────────────────┐                                   ┌─────────────────┐
34
/// │       A=1       │                                   │        1        │
35
/// ├─────────────────┤                                   ├─────────────────┤
36
/// │      A=NULL     │                                   │       NULL      │
37
/// ├─────────────────┤    union_extract(values, 'A')     ├─────────────────┤
38
/// │      B='t'      │  ────────────────────────────▶    │       NULL      │
39
/// ├─────────────────┤                                   ├─────────────────┤
40
/// │       A=3       │                                   │        3        │
41
/// ├─────────────────┤                                   ├─────────────────┤
42
/// │      B=NULL     │                                   │       NULL      │
43
/// └─────────────────┘                                   └─────────────────┘
44
///    union array                                              result
45
/// ```
46
/// # Errors
47
///
48
/// Returns error if target field is not found
49
///
50
/// # Examples
51
/// ```
52
/// # use std::sync::Arc;
53
/// # use arrow_schema::{DataType, Field, UnionFields};
54
/// # use arrow_array::{UnionArray, StringArray, Int32Array};
55
/// # use arrow_select::union_extract::union_extract;
56
/// let fields = UnionFields::new(
57
///     [1, 3],
58
///     [
59
///         Field::new("A", DataType::Int32, true),
60
///         Field::new("B", DataType::Utf8, true)
61
///     ]
62
/// );
63
///
64
/// let union = UnionArray::try_new(
65
///     fields,
66
///     vec![1, 1, 3, 1, 3].into(),
67
///     None,
68
///     vec![
69
///         Arc::new(Int32Array::from(vec![Some(1), None, None, Some(3), Some(0)])),
70
///         Arc::new(StringArray::from(vec![None, None, Some("t"), Some("."), None]))
71
///     ]
72
/// ).unwrap();
73
///
74
/// // Extract field A
75
/// let extracted = union_extract(&union, "A").unwrap();
76
///
77
/// assert_eq!(*extracted, Int32Array::from(vec![Some(1), None, None, Some(3), None]));
78
/// ```
79
32
pub fn union_extract(union_array: &UnionArray, target: &str) -> Result<ArrayRef, ArrowError> {
80
32
    let fields = match union_array.data_type() {
81
32
        DataType::Union(fields, _) => fields,
82
0
        _ => unreachable!(),
83
    };
84
85
32
    let (
target_type_id30
, _) = fields
86
32
        .iter()
87
39
        .
find32
(|field| field.1.name() == target)
88
32
        .ok_or_else(|| 
{2
89
2
            ArrowError::InvalidArgumentError(format!("field {target} not found on union"))
90
2
        })?;
91
92
30
    match union_array.offsets() {
93
20
        Some(_) => extract_dense(union_array, fields, target_type_id),
94
10
        None => extract_sparse(union_array, fields, target_type_id),
95
    }
96
32
}
97
98
10
fn extract_sparse(
99
10
    union_array: &UnionArray,
100
10
    fields: &UnionFields,
101
10
    target_type_id: i8,
102
10
) -> Result<ArrayRef, ArrowError> {
103
10
    let target = union_array.child(target_type_id);
104
105
10
    if fields.len() == 1 // case 1.1: if there is a single field, all type ids are the same, and since union doesn't have a null mask, the result array is exactly the same as it only child
106
9
        || union_array.is_empty() // case 1.2: sparse union length and childrens length must match, if the union is empty, so is any children
107
8
        || target.null_count() == target.len() || 
target.data_type()7
.
is_null7
()
108
    // case 1.3: if all values of the target children are null, regardless of selected type ids, the result will also be completely null
109
    {
110
4
        Ok(Arc::clone(target))
111
    } else {
112
6
        match eq_scalar(union_array.type_ids(), target_type_id) {
113
            // case 2: all type ids equals our target, and since unions doesn't have a null mask, the result array is exactly the same as our target
114
1
            BoolValue::Scalar(true) => Ok(Arc::clone(target)),
115
            // case 3: none type_id matches our target, the result is a null array
116
            BoolValue::Scalar(false) => {
117
2
                if layout(target.data_type()).can_contain_null_mask {
118
                    // case 3.1: target array can contain a null mask
119
                    //SAFETY: The only change to the array data is the addition of a null mask, and if the target data type can contain a null mask was just checked above
120
1
                    let data = unsafe {
121
1
                        target
122
1
                            .into_data()
123
1
                            .into_builder()
124
1
                            .nulls(Some(NullBuffer::new_null(target.len())))
125
1
                            .build_unchecked()
126
                    };
127
128
1
                    Ok(make_array(data))
129
                } else {
130
                    // case 3.2: target can't contain a null mask
131
1
                    Ok(new_null_array(target.data_type(), target.len()))
132
                }
133
            }
134
            // case 4: some but not all type_id matches our target
135
3
            BoolValue::Buffer(selected) => {
136
3
                if layout(target.data_type()).can_contain_null_mask {
137
                    // case 4.1: target array can contain a null mask
138
2
                    let nulls = match target.nulls().filter(|n| 
n1
.
null_count1
() > 0) {
139
                        // case 4.1.1: our target child has nulls and types other than our target are selected, union the masks
140
                        // the case where n.null_count() == n.len() is cheaply handled at case 1.3
141
1
                        Some(nulls) => &selected & nulls.inner(),
142
                        // case 4.1.2: target child has no nulls, but types other than our target are selected, use the selected mask as a null mask
143
1
                        None => selected,
144
                    };
145
146
                    //SAFETY: The only change to the array data is the addition of a null mask, and if the target data type can contain a null mask was just checked above
147
2
                    let data = unsafe {
148
2
                        assert_eq!(nulls.len(), target.len());
149
150
2
                        target
151
2
                            .into_data()
152
2
                            .into_builder()
153
2
                            .nulls(Some(nulls.into()))
154
2
                            .build_unchecked()
155
                    };
156
157
2
                    Ok(make_array(data))
158
                } else {
159
                    // case 4.2: target can't containt a null mask, zip the values that match with a null value
160
1
                    Ok(crate::zip::zip(
161
1
                        &BooleanArray::new(selected, None),
162
1
                        target,
163
1
                        &Scalar::new(new_null_array(target.data_type(), 1)),
164
0
                    )?)
165
                }
166
            }
167
        }
168
    }
169
10
}
170
171
20
fn extract_dense(
172
20
    union_array: &UnionArray,
173
20
    fields: &UnionFields,
174
20
    target_type_id: i8,
175
20
) -> Result<ArrayRef, ArrowError> {
176
20
    let target = union_array.child(target_type_id);
177
20
    let offsets = union_array.offsets().unwrap();
178
179
20
    if union_array.is_empty() {
180
        // case 1: the union is empty
181
2
        if target.is_empty() {
182
            // case 1.1: the target is also empty, do a cheap Arc::clone instead of allocating a new empty array
183
1
            Ok(Arc::clone(target))
184
        } else {
185
            // case 1.2: the target is not empty, allocate a new empty array
186
1
            Ok(new_empty_array(target.data_type()))
187
        }
188
18
    } else if target.is_empty() {
189
        // case 2: the union is not empty but the target is, which implies that none type_id points to it. The result is a null array
190
1
        Ok(new_null_array(target.data_type(), union_array.len()))
191
17
    } else if target.null_count() == target.len() || 
target.data_type()14
.
is_null14
() {
192
        // case 3: since all values on our target are null, regardless of selected type ids and offsets, the result is a null array
193
3
        match target.len().cmp(&union_array.len()) {
194
            // case 3.1: since the target is smaller than the union, allocate a new correclty sized null array
195
1
            Ordering::Less => Ok(new_null_array(target.data_type(), union_array.len())),
196
            // case 3.2: target equals the union len, return it direcly
197
1
            Ordering::Equal => Ok(Arc::clone(target)),
198
            // case 3.3: target len is bigger than the union len, slice it
199
1
            Ordering::Greater => Ok(target.slice(0, union_array.len())),
200
        }
201
14
    } else if fields.len() == 1 // case A: since there's a single field, our target, every type id must matches our target
202
11
        || fields
203
11
            .iter()
204
20
            .
filter11
(|(field_type_id, _)| *field_type_id != target_type_id)
205
11
            .all(|(sibling_type_id, _)| union_array.child(sibling_type_id).is_empty())
206
    // case B: since siblings are empty, every type id must matches our target
207
    {
208
        // case 4: every type id matches our target
209
6
        Ok(extract_dense_all_selected(union_array, target, offsets)
?0
)
210
    } else {
211
8
        match eq_scalar(union_array.type_ids(), target_type_id) {
212
            // case 4C: all type ids matches our target.
213
            // Non empty sibling without any selected value may happen after slicing the parent union,
214
            // since only type_ids and offsets are sliced, not the children
215
            BoolValue::Scalar(true) => {
216
3
                Ok(extract_dense_all_selected(union_array, target, offsets)
?0
)
217
            }
218
            BoolValue::Scalar(false) => {
219
                // case 5: none type_id matches our target, so the result array will be completely null
220
                // Non empty target without any selected value may happen after slicing the parent union,
221
                // since only type_ids and offsets are sliced, not the children
222
4
                match (target.len().cmp(&union_array.len()), layout(target.data_type()).can_contain_null_mask) {
223
                    (Ordering::Less, _) // case 5.1A: our target is smaller than the parent union, allocate a new correclty sized null array
224
                    | (_, false) => { // case 5.1B: target array can't contain a null mask
225
2
                        Ok(new_null_array(target.data_type(), union_array.len()))
226
                    }
227
                    // case 5.2: target and parent union lengths are equal, and the target can contain a null mask, let's set it to a all-null null-buffer
228
                    (Ordering::Equal, true) => {
229
                        //SAFETY: The only change to the array data is the addition of a null mask, and if the target data type can contain a null mask was just checked above
230
1
                        let data = unsafe {
231
1
                            target
232
1
                                .into_data()
233
1
                                .into_builder()
234
1
                                .nulls(Some(NullBuffer::new_null(union_array.len())))
235
1
                                .build_unchecked()
236
                        };
237
238
1
                        Ok(make_array(data))
239
                    }
240
                    // case 5.3: target is bigger than it's parent union and can contain a null mask, let's slice it, and set it's nulls to a all-null null-buffer
241
                    (Ordering::Greater, true) => {
242
                        //SAFETY: The only change to the array data is the addition of a null mask, and if the target data type can contain a null mask was just checked above
243
1
                        let data = unsafe {
244
1
                            target
245
1
                                .into_data()
246
1
                                .slice(0, union_array.len())
247
1
                                .into_builder()
248
1
                                .nulls(Some(NullBuffer::new_null(union_array.len())))
249
1
                                .build_unchecked()
250
                        };
251
252
1
                        Ok(make_array(data))
253
                    }
254
                }
255
            }
256
1
            BoolValue::Buffer(selected) => {
257
                //case 6: some type_ids matches our target, but not all. For selected values, take the value pointed by the offset. For unselected, use a valid null
258
1
                Ok(take(
259
1
                    target,
260
1
                    &Int32Array::try_new(offsets.clone(), Some(selected.into()))
?0
,
261
1
                    None,
262
0
                )?)
263
            }
264
        }
265
    }
266
20
}
267
268
9
fn extract_dense_all_selected(
269
9
    union_array: &UnionArray,
270
9
    target: &Arc<dyn Array>,
271
9
    offsets: &ScalarBuffer<i32>,
272
9
) -> Result<ArrayRef, ArrowError> {
273
9
    let sequential =
274
9
        target.len() - offsets[0] as usize >= union_array.len() && is_sequential(offsets);
275
276
9
    if sequential && 
target6
.
len6
() == union_array.len() {
277
        // case 1: all offsets are sequential and both lengths match, return the array directly
278
3
        Ok(Arc::clone(target))
279
6
    } else if sequential && 
target3
.
len3
() > union_array.len() {
280
        // case 2: All offsets are sequential, but our target is bigger than our union, slice it, starting at the first offset
281
3
        Ok(target.slice(offsets[0] as usize, union_array.len()))
282
    } else {
283
        // case 3: Since offsets are not sequential, take them from the child to a new sequential and correcly sized array
284
3
        let indices = Int32Array::try_new(offsets.clone(), None)
?0
;
285
286
3
        Ok(take(target, &indices, None)
?0
)
287
    }
288
9
}
289
290
const EQ_SCALAR_CHUNK_SIZE: usize = 512;
291
292
/// The result of checking which type_ids matches the target type_id
293
#[derive(Debug, PartialEq)]
294
enum BoolValue {
295
    /// If true, all type_ids matches the target type_id
296
    /// If false, none type_ids matches the target type_id
297
    Scalar(bool),
298
    /// A mask represeting which type_ids matches the target type_id
299
    Buffer(BooleanBuffer),
300
}
301
302
14
fn eq_scalar(type_ids: &[i8], target: i8) -> BoolValue {
303
14
    eq_scalar_inner(EQ_SCALAR_CHUNK_SIZE, type_ids, target)
304
14
}
305
306
1.56k
fn count_first_run(chunk_size: usize, type_ids: &[i8], mut f: impl FnMut(i8) -> bool) -> usize {
307
1.56k
    type_ids
308
1.56k
        .chunks(chunk_size)
309
133k
        .
take_while1.56k
(|chunk|
chunk44.7k
.
iter44.7k
().
copied44.7k
().
fold44.7k
(true, |b, v| b & f(v)))
310
43.7k
        .
map1.56k
(|chunk| chunk.len())
311
1.56k
        .sum()
312
1.56k
}
313
314
// This is like MutableBuffer::collect_bool(type_ids.len(), |i| type_ids[i] == target) with fast paths for all true or all false values.
315
1.04k
fn eq_scalar_inner(chunk_size: usize, type_ids: &[i8], target: i8) -> BoolValue {
316
67.6k
    let 
true_bits1.04k
=
count_first_run1.04k
(
chunk_size1.04k
,
type_ids1.04k
, |v| v == target);
317
318
1.04k
    let (
set_bits516
,
val516
) = if true_bits == type_ids.len() {
319
262
        return BoolValue::Scalar(true);
320
779
    } else if true_bits == 0 {
321
66.0k
        let 
false_bits526
=
count_first_run526
(
chunk_size526
,
type_ids526
, |v| v != target);
322
323
526
        if false_bits == type_ids.len() {
324
263
            return BoolValue::Scalar(false);
325
        } else {
326
263
            (false_bits, false)
327
        }
328
    } else {
329
253
        (true_bits, true)
330
    };
331
332
    // restrict to chunk boundaries
333
516
    let set_bits = set_bits - set_bits % 64;
334
335
516
    let mut buffer =
336
516
        MutableBuffer::new(bit_util::ceil(type_ids.len(), 8)).with_bitset(set_bits / 8, val);
337
338
1.29k
    
buffer516
.
extend516
(
type_ids[set_bits..]516
.
chunks516
(64).
map516
(|chunk| {
339
1.29k
        chunk
340
1.29k
            .iter()
341
1.29k
            .copied()
342
1.29k
            .enumerate()
343
82.3k
            .
fold1.29k
(0, |packed, (bit_idx, v)| {
344
82.3k
                packed | (((v == target) as u64) << bit_idx)
345
82.3k
            })
346
1.29k
    }));
347
348
516
    BoolValue::Buffer(BooleanBuffer::new(buffer.into(), 0, type_ids.len()))
349
1.04k
}
350
351
const IS_SEQUENTIAL_CHUNK_SIZE: usize = 64;
352
353
9
fn is_sequential(offsets: &[i32]) -> bool {
354
9
    is_sequential_generic::<IS_SEQUENTIAL_CHUNK_SIZE>(offsets)
355
9
}
356
357
65
fn is_sequential_generic<const N: usize>(offsets: &[i32]) -> bool {
358
65
    if offsets.is_empty() {
359
1
        return true;
360
64
    }
361
362
    // fast check this common combination:
363
    // 1: sequential nulls are represented as a single null value on the values array, pointed by the same offset multiple times
364
    // 2: valid values offsets increase one by one.
365
    // example for an union with a single field A with type_id 0:
366
    // union    = A=7 A=NULL A=NULL A=5 A=9
367
    // a values = 7 NULL 5 9
368
    // offsets  = 0 1 1 2 3
369
    // type_ids = 0 0 0 0 0
370
    // this also checks if the last chunk/remainder is sequential relative to the first offset
371
64
    if offsets[0] + offsets.len() as i32 - 1 != offsets[offsets.len() - 1] {
372
29
        return false;
373
35
    }
374
375
35
    let chunks = offsets.chunks_exact(N);
376
377
35
    let remainder = chunks.remainder();
378
379
39
    
chunks.enumerate()35
.
all35
(|(i, chunk)| {
380
39
        let chunk_array = <&[i32; N]>::try_from(chunk).unwrap();
381
382
        //checks if values within chunk are sequential
383
39
        chunk_array
384
39
            .iter()
385
39
            .copied()
386
39
            .enumerate()
387
117
            .
fold39
(true, |acc, (i, offset)| {
388
117
                acc & (offset == chunk_array[0] + i as i32)
389
117
            })
390
20
            && offsets[0] + (i * N) as i32 == chunk_array[0] //checks if chunk is sequential relative to the first offset
391
39
    }) && 
remainder16
392
16
        .iter()
393
16
        .copied()
394
16
        .enumerate()
395
25
        .
fold16
(true, |acc, (i, offset)| {
396
25
            acc & (offset == remainder[0] + i as i32)
397
25
        }) //if the remainder is sequential relative to the first offset is checked at the start of the function
398
65
}
399
400
#[cfg(test)]
401
mod tests {
402
    use super::{BoolValue, eq_scalar_inner, is_sequential_generic, union_extract};
403
    use arrow_array::{Array, Int32Array, NullArray, StringArray, UnionArray, new_null_array};
404
    use arrow_buffer::{BooleanBuffer, ScalarBuffer};
405
    use arrow_schema::{ArrowError, DataType, Field, UnionFields, UnionMode};
406
    use std::sync::Arc;
407
408
    #[test]
409
1
    fn test_eq_scalar() {
410
        //multiple all equal chunks, so it's loop and sum logic it's tested
411
        //multiple chunks after, so it's loop logic it's tested
412
        const ARRAY_LEN: usize = 64 * 4;
413
414
        //so out of 64 boundaries chunks can be generated and checked for
415
        const EQ_SCALAR_CHUNK_SIZE: usize = 3;
416
417
1.02k
        fn eq_scalar(type_ids: &[i8], target: i8) -> BoolValue {
418
1.02k
            eq_scalar_inner(EQ_SCALAR_CHUNK_SIZE, type_ids, target)
419
1.02k
        }
420
421
512
        fn cross_check(left: &[i8], right: i8) -> BooleanBuffer {
422
131k
            
BooleanBuffer::collect_bool512
(
left512
.
len512
(), |i| left[i] == right)
423
512
        }
424
425
1
        assert_eq!(eq_scalar(&[], 1), BoolValue::Scalar(true));
426
427
1
        assert_eq!(eq_scalar(&[1], 1), BoolValue::Scalar(true));
428
1
        assert_eq!(eq_scalar(&[2], 1), BoolValue::Scalar(false));
429
430
1
        let mut values = [1; ARRAY_LEN];
431
432
1
        assert_eq!(eq_scalar(&values, 1), BoolValue::Scalar(true));
433
1
        assert_eq!(eq_scalar(&values, 2), BoolValue::Scalar(false));
434
435
        //every subslice should return the same value
436
256
        for 
i255
in 1..ARRAY_LEN {
437
255
            assert_eq!(eq_scalar(&values[..i], 1), BoolValue::Scalar(true));
438
255
            assert_eq!(eq_scalar(&values[..i], 2), BoolValue::Scalar(false));
439
        }
440
441
        // test that a single change anywhere is checked for
442
257
        for 
i256
in 0..ARRAY_LEN {
443
256
            values[i] = 2;
444
445
256
            assert_eq!(
446
256
                eq_scalar(&values, 1),
447
256
                BoolValue::Buffer(cross_check(&values, 1))
448
            );
449
256
            assert_eq!(
450
256
                eq_scalar(&values, 2),
451
256
                BoolValue::Buffer(cross_check(&values, 2))
452
            );
453
454
256
            values[i] = 1;
455
        }
456
1
    }
457
458
    #[test]
459
1
    fn test_is_sequential() {
460
        /*
461
        the smallest value that satisfies:
462
        >1 so the fold logic of a exact chunk executes
463
        >2 so a >1 non-exact remainder can exist, and it's fold logic executes
464
         */
465
        const CHUNK_SIZE: usize = 3;
466
        //we test arrays of size up to 8 = 2 * CHUNK_SIZE + 2:
467
        //multiple(2) exact chunks, so the AND logic between them executes
468
        //a >1(2) remainder, so:
469
        //    the AND logic between all exact chunks and the remainder executes
470
        //    the remainder fold logic executes
471
472
56
        fn is_sequential(v: &[i32]) -> bool {
473
56
            is_sequential_generic::<CHUNK_SIZE>(v)
474
56
        }
475
476
1
        assert!(is_sequential(&[])); //empty
477
1
        assert!(is_sequential(&[1])); //single
478
479
1
        assert!(is_sequential(&[1, 2]));
480
1
        assert!(is_sequential(&[1, 2, 3]));
481
1
        assert!(is_sequential(&[1, 2, 3, 4]));
482
1
        assert!(is_sequential(&[1, 2, 3, 4, 5]));
483
1
        assert!(is_sequential(&[1, 2, 3, 4, 5, 6]));
484
1
        assert!(is_sequential(&[1, 2, 3, 4, 5, 6, 7]));
485
1
        assert!(is_sequential(&[1, 2, 3, 4, 5, 6, 7, 8]));
486
487
1
        assert!(!is_sequential(&[8, 7]));
488
1
        assert!(!is_sequential(&[8, 7, 6]));
489
1
        assert!(!is_sequential(&[8, 7, 6, 5]));
490
1
        assert!(!is_sequential(&[8, 7, 6, 5, 4]));
491
1
        assert!(!is_sequential(&[8, 7, 6, 5, 4, 3]));
492
1
        assert!(!is_sequential(&[8, 7, 6, 5, 4, 3, 2]));
493
1
        assert!(!is_sequential(&[8, 7, 6, 5, 4, 3, 2, 1]));
494
495
1
        assert!(!is_sequential(&[0, 2]));
496
1
        assert!(!is_sequential(&[1, 0]));
497
498
1
        assert!(!is_sequential(&[0, 2, 3]));
499
1
        assert!(!is_sequential(&[1, 0, 3]));
500
1
        assert!(!is_sequential(&[1, 2, 0]));
501
502
1
        assert!(!is_sequential(&[0, 2, 3, 4]));
503
1
        assert!(!is_sequential(&[1, 0, 3, 4]));
504
1
        assert!(!is_sequential(&[1, 2, 0, 4]));
505
1
        assert!(!is_sequential(&[1, 2, 3, 0]));
506
507
1
        assert!(!is_sequential(&[0, 2, 3, 4, 5]));
508
1
        assert!(!is_sequential(&[1, 0, 3, 4, 5]));
509
1
        assert!(!is_sequential(&[1, 2, 0, 4, 5]));
510
1
        assert!(!is_sequential(&[1, 2, 3, 0, 5]));
511
1
        assert!(!is_sequential(&[1, 2, 3, 4, 0]));
512
513
1
        assert!(!is_sequential(&[0, 2, 3, 4, 5, 6]));
514
1
        assert!(!is_sequential(&[1, 0, 3, 4, 5, 6]));
515
1
        assert!(!is_sequential(&[1, 2, 0, 4, 5, 6]));
516
1
        assert!(!is_sequential(&[1, 2, 3, 0, 5, 6]));
517
1
        assert!(!is_sequential(&[1, 2, 3, 4, 0, 6]));
518
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 0]));
519
520
1
        assert!(!is_sequential(&[0, 2, 3, 4, 5, 6, 7]));
521
1
        assert!(!is_sequential(&[1, 0, 3, 4, 5, 6, 7]));
522
1
        assert!(!is_sequential(&[1, 2, 0, 4, 5, 6, 7]));
523
1
        assert!(!is_sequential(&[1, 2, 3, 0, 5, 6, 7]));
524
1
        assert!(!is_sequential(&[1, 2, 3, 4, 0, 6, 7]));
525
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 0, 7]));
526
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 6, 0]));
527
528
1
        assert!(!is_sequential(&[0, 2, 3, 4, 5, 6, 7, 8]));
529
1
        assert!(!is_sequential(&[1, 0, 3, 4, 5, 6, 7, 8]));
530
1
        assert!(!is_sequential(&[1, 2, 0, 4, 5, 6, 7, 8]));
531
1
        assert!(!is_sequential(&[1, 2, 3, 0, 5, 6, 7, 8]));
532
1
        assert!(!is_sequential(&[1, 2, 3, 4, 0, 6, 7, 8]));
533
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 0, 7, 8]));
534
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 6, 0, 8]));
535
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 6, 7, 0]));
536
537
        // checks increments at the chunk boundary
538
1
        assert!(!is_sequential(&[1, 2, 3, 5]));
539
1
        assert!(!is_sequential(&[1, 2, 3, 5, 6]));
540
1
        assert!(!is_sequential(&[1, 2, 3, 5, 6, 7]));
541
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 6, 8]));
542
1
        assert!(!is_sequential(&[1, 2, 3, 4, 5, 6, 8, 9]));
543
1
    }
544
545
7
    fn str1() -> UnionFields {
546
7
        UnionFields::new(vec![1], vec![Field::new("str", DataType::Utf8, true)])
547
7
    }
548
549
22
    fn str1_int3() -> UnionFields {
550
22
        UnionFields::new(
551
22
            vec![1, 3],
552
22
            vec![
553
22
                Field::new("str", DataType::Utf8, true),
554
22
                Field::new("int", DataType::Int32, true),
555
            ],
556
        )
557
22
    }
558
559
    #[test]
560
1
    fn sparse_1_1_single_field() {
561
1
        let union = UnionArray::try_new(
562
            //single field
563
1
            str1(),
564
1
            ScalarBuffer::from(vec![1, 1]), // non empty, every type id must match
565
1
            None,                           //sparse
566
1
            vec![
567
1
                Arc::new(StringArray::from(vec!["a", "b"])), // not null
568
            ],
569
        )
570
1
        .unwrap();
571
572
1
        let expected = StringArray::from(vec!["a", "b"]);
573
1
        let extracted = union_extract(&union, "str").unwrap();
574
575
1
        assert_eq!(extracted.into_data(), expected.into_data());
576
1
    }
577
578
    #[test]
579
1
    fn sparse_1_2_empty() {
580
1
        let union = UnionArray::try_new(
581
            // multiple fields
582
1
            str1_int3(),
583
1
            ScalarBuffer::from(vec![]), //empty union
584
1
            None,                       // sparse
585
1
            vec![
586
1
                Arc::new(StringArray::new_null(0)),
587
1
                Arc::new(Int32Array::new_null(0)),
588
            ],
589
        )
590
1
        .unwrap();
591
592
1
        let expected = StringArray::new_null(0);
593
1
        let extracted = union_extract(&union, "str").unwrap(); //target type is not Null
594
595
1
        assert_eq!(extracted.into_data(), expected.into_data());
596
1
    }
597
598
    #[test]
599
1
    fn sparse_1_3a_null_target() {
600
1
        let union = UnionArray::try_new(
601
            // multiple fields
602
1
            UnionFields::new(
603
1
                vec![1, 3],
604
1
                vec![
605
1
                    Field::new("str", DataType::Utf8, true),
606
1
                    Field::new("null", DataType::Null, true), // target type is Null
607
                ],
608
            ),
609
1
            ScalarBuffer::from(vec![1]), //not empty
610
1
            None,                        // sparse
611
1
            vec![
612
1
                Arc::new(StringArray::new_null(1)),
613
1
                Arc::new(NullArray::new(1)), // null data type
614
            ],
615
        )
616
1
        .unwrap();
617
618
1
        let expected = NullArray::new(1);
619
1
        let extracted = union_extract(&union, "null").unwrap();
620
621
1
        assert_eq!(extracted.into_data(), expected.into_data());
622
1
    }
623
624
    #[test]
625
1
    fn sparse_1_3b_null_target() {
626
1
        let union = UnionArray::try_new(
627
            // multiple fields
628
1
            str1_int3(),
629
1
            ScalarBuffer::from(vec![1]), //not empty
630
1
            None,                        // sparse
631
1
            vec![
632
1
                Arc::new(StringArray::new_null(1)), //all null
633
1
                Arc::new(Int32Array::new_null(1)),
634
            ],
635
        )
636
1
        .unwrap();
637
638
1
        let expected = StringArray::new_null(1);
639
1
        let extracted = union_extract(&union, "str").unwrap(); //target type is not Null
640
641
1
        assert_eq!(extracted.into_data(), expected.into_data());
642
1
    }
643
644
    #[test]
645
1
    fn sparse_2_all_types_match() {
646
1
        let union = UnionArray::try_new(
647
            //multiple fields
648
1
            str1_int3(),
649
1
            ScalarBuffer::from(vec![3, 3]), // all types match
650
1
            None,                           //sparse
651
1
            vec![
652
1
                Arc::new(StringArray::new_null(2)),
653
1
                Arc::new(Int32Array::from(vec![1, 4])), // not null
654
            ],
655
        )
656
1
        .unwrap();
657
658
1
        let expected = Int32Array::from(vec![1, 4]);
659
1
        let extracted = union_extract(&union, "int").unwrap();
660
661
1
        assert_eq!(extracted.into_data(), expected.into_data());
662
1
    }
663
664
    #[test]
665
1
    fn sparse_3_1_none_match_target_can_contain_null_mask() {
666
1
        let union = UnionArray::try_new(
667
            //multiple fields
668
1
            str1_int3(),
669
1
            ScalarBuffer::from(vec![1, 1, 1, 1]), // none match
670
1
            None,                                 // sparse
671
1
            vec![
672
1
                Arc::new(StringArray::new_null(4)),
673
1
                Arc::new(Int32Array::from(vec![None, Some(4), None, Some(8)])), // target is not null
674
            ],
675
        )
676
1
        .unwrap();
677
678
1
        let expected = Int32Array::new_null(4);
679
1
        let extracted = union_extract(&union, "int").unwrap();
680
681
1
        assert_eq!(extracted.into_data(), expected.into_data());
682
1
    }
683
684
3
    fn str1_union3(union3_datatype: DataType) -> UnionFields {
685
3
        UnionFields::new(
686
3
            vec![1, 3],
687
3
            vec![
688
3
                Field::new("str", DataType::Utf8, true),
689
3
                Field::new("union", union3_datatype, true),
690
            ],
691
        )
692
3
    }
693
694
    #[test]
695
1
    fn sparse_3_2_none_match_cant_contain_null_mask_union_target() {
696
1
        let target_fields = str1();
697
1
        let target_type = DataType::Union(target_fields.clone(), UnionMode::Sparse);
698
699
1
        let union = UnionArray::try_new(
700
            //multiple fields
701
1
            str1_union3(target_type.clone()),
702
1
            ScalarBuffer::from(vec![1, 1]), // none match
703
1
            None,                           //sparse
704
1
            vec![
705
1
                Arc::new(StringArray::new_null(2)),
706
                //target is not null
707
1
                Arc::new(
708
1
                    UnionArray::try_new(
709
1
                        target_fields.clone(),
710
1
                        ScalarBuffer::from(vec![1, 1]),
711
1
                        None,
712
1
                        vec![Arc::new(StringArray::from(vec!["a", "b"]))],
713
1
                    )
714
1
                    .unwrap(),
715
1
                ),
716
            ],
717
        )
718
1
        .unwrap();
719
720
1
        let expected = new_null_array(&target_type, 2);
721
1
        let extracted = union_extract(&union, "union").unwrap();
722
723
1
        assert_eq!(extracted.into_data(), expected.into_data());
724
1
    }
725
726
    #[test]
727
1
    fn sparse_4_1_1_target_with_nulls() {
728
1
        let union = UnionArray::try_new(
729
            //multiple fields
730
1
            str1_int3(),
731
1
            ScalarBuffer::from(vec![3, 3, 1, 1]), // multiple selected types
732
1
            None,                                 // sparse
733
1
            vec![
734
1
                Arc::new(StringArray::new_null(4)),
735
1
                Arc::new(Int32Array::from(vec![None, Some(4), None, Some(8)])), // target with nulls
736
            ],
737
        )
738
1
        .unwrap();
739
740
1
        let expected = Int32Array::from(vec![None, Some(4), None, None]);
741
1
        let extracted = union_extract(&union, "int").unwrap();
742
743
1
        assert_eq!(extracted.into_data(), expected.into_data());
744
1
    }
745
746
    #[test]
747
1
    fn sparse_4_1_2_target_without_nulls() {
748
1
        let union = UnionArray::try_new(
749
            //multiple fields
750
1
            str1_int3(),
751
1
            ScalarBuffer::from(vec![1, 3, 3]), // multiple selected types
752
1
            None,                              // sparse
753
1
            vec![
754
1
                Arc::new(StringArray::new_null(3)),
755
1
                Arc::new(Int32Array::from(vec![2, 4, 8])), // target without nulls
756
            ],
757
        )
758
1
        .unwrap();
759
760
1
        let expected = Int32Array::from(vec![None, Some(4), Some(8)]);
761
1
        let extracted = union_extract(&union, "int").unwrap();
762
763
1
        assert_eq!(extracted.into_data(), expected.into_data());
764
1
    }
765
766
    #[test]
767
1
    fn sparse_4_2_some_match_target_cant_contain_null_mask() {
768
1
        let target_fields = str1();
769
1
        let target_type = DataType::Union(target_fields.clone(), UnionMode::Sparse);
770
771
1
        let union = UnionArray::try_new(
772
            //multiple fields
773
1
            str1_union3(target_type),
774
1
            ScalarBuffer::from(vec![3, 1]), // some types match, but not all
775
1
            None,                           //sparse
776
1
            vec![
777
1
                Arc::new(StringArray::new_null(2)),
778
1
                Arc::new(
779
1
                    UnionArray::try_new(
780
1
                        target_fields.clone(),
781
1
                        ScalarBuffer::from(vec![1, 1]),
782
1
                        None,
783
1
                        vec![Arc::new(StringArray::from(vec!["a", "b"]))],
784
1
                    )
785
1
                    .unwrap(),
786
1
                ),
787
            ],
788
        )
789
1
        .unwrap();
790
791
1
        let expected = UnionArray::try_new(
792
1
            target_fields,
793
1
            ScalarBuffer::from(vec![1, 1]),
794
1
            None,
795
1
            vec![Arc::new(StringArray::from(vec![Some("a"), None]))],
796
        )
797
1
        .unwrap();
798
1
        let extracted = union_extract(&union, "union").unwrap();
799
800
1
        assert_eq!(extracted.into_data(), expected.into_data());
801
1
    }
802
803
    #[test]
804
1
    fn dense_1_1_both_empty() {
805
1
        let union = UnionArray::try_new(
806
1
            str1_int3(),
807
1
            ScalarBuffer::from(vec![]),       //empty union
808
1
            Some(ScalarBuffer::from(vec![])), // dense
809
1
            vec![
810
1
                Arc::new(StringArray::new_null(0)), //empty target
811
1
                Arc::new(Int32Array::new_null(0)),
812
            ],
813
        )
814
1
        .unwrap();
815
816
1
        let expected = StringArray::new_null(0);
817
1
        let extracted = union_extract(&union, "str").unwrap();
818
819
1
        assert_eq!(extracted.into_data(), expected.into_data());
820
1
    }
821
822
    #[test]
823
1
    fn dense_1_2_empty_union_target_non_empty() {
824
1
        let union = UnionArray::try_new(
825
1
            str1_int3(),
826
1
            ScalarBuffer::from(vec![]),       //empty union
827
1
            Some(ScalarBuffer::from(vec![])), // dense
828
1
            vec![
829
1
                Arc::new(StringArray::new_null(1)), //non empty target
830
1
                Arc::new(Int32Array::new_null(0)),
831
            ],
832
        )
833
1
        .unwrap();
834
835
1
        let expected = StringArray::new_null(0);
836
1
        let extracted = union_extract(&union, "str").unwrap();
837
838
1
        assert_eq!(extracted.into_data(), expected.into_data());
839
1
    }
840
841
    #[test]
842
1
    fn dense_2_non_empty_union_target_empty() {
843
1
        let union = UnionArray::try_new(
844
1
            str1_int3(),
845
1
            ScalarBuffer::from(vec![3, 3]),       //non empty union
846
1
            Some(ScalarBuffer::from(vec![0, 1])), // dense
847
1
            vec![
848
1
                Arc::new(StringArray::new_null(0)), //empty target
849
1
                Arc::new(Int32Array::new_null(2)),
850
            ],
851
        )
852
1
        .unwrap();
853
854
1
        let expected = StringArray::new_null(2);
855
1
        let extracted = union_extract(&union, "str").unwrap();
856
857
1
        assert_eq!(extracted.into_data(), expected.into_data());
858
1
    }
859
860
    #[test]
861
1
    fn dense_3_1_null_target_smaller_len() {
862
1
        let union = UnionArray::try_new(
863
1
            str1_int3(),
864
1
            ScalarBuffer::from(vec![3, 3]),       //non empty union
865
1
            Some(ScalarBuffer::from(vec![0, 0])), //dense
866
1
            vec![
867
1
                Arc::new(StringArray::new_null(1)), //smaller target
868
1
                Arc::new(Int32Array::new_null(2)),
869
            ],
870
        )
871
1
        .unwrap();
872
873
1
        let expected = StringArray::new_null(2);
874
1
        let extracted = union_extract(&union, "str").unwrap();
875
876
1
        assert_eq!(extracted.into_data(), expected.into_data());
877
1
    }
878
879
    #[test]
880
1
    fn dense_3_2_null_target_equal_len() {
881
1
        let union = UnionArray::try_new(
882
1
            str1_int3(),
883
1
            ScalarBuffer::from(vec![3, 3]),       //non empty union
884
1
            Some(ScalarBuffer::from(vec![0, 0])), //dense
885
1
            vec![
886
1
                Arc::new(StringArray::new_null(2)), //equal len
887
1
                Arc::new(Int32Array::new_null(2)),
888
            ],
889
        )
890
1
        .unwrap();
891
892
1
        let expected = StringArray::new_null(2);
893
1
        let extracted = union_extract(&union, "str").unwrap();
894
895
1
        assert_eq!(extracted.into_data(), expected.into_data());
896
1
    }
897
898
    #[test]
899
1
    fn dense_3_3_null_target_bigger_len() {
900
1
        let union = UnionArray::try_new(
901
1
            str1_int3(),
902
1
            ScalarBuffer::from(vec![3, 3]),       //non empty union
903
1
            Some(ScalarBuffer::from(vec![0, 0])), //dense
904
1
            vec![
905
1
                Arc::new(StringArray::new_null(3)), //bigger len
906
1
                Arc::new(Int32Array::new_null(3)),
907
            ],
908
        )
909
1
        .unwrap();
910
911
1
        let expected = StringArray::new_null(2);
912
1
        let extracted = union_extract(&union, "str").unwrap();
913
914
1
        assert_eq!(extracted.into_data(), expected.into_data());
915
1
    }
916
917
    #[test]
918
1
    fn dense_4_1a_single_type_sequential_offsets_equal_len() {
919
1
        let union = UnionArray::try_new(
920
            // single field
921
1
            str1(),
922
1
            ScalarBuffer::from(vec![1, 1]),       //non empty union
923
1
            Some(ScalarBuffer::from(vec![0, 1])), //sequential
924
1
            vec![
925
1
                Arc::new(StringArray::from(vec!["a1", "b2"])), //equal len, non null
926
            ],
927
        )
928
1
        .unwrap();
929
930
1
        let expected = StringArray::from(vec!["a1", "b2"]);
931
1
        let extracted = union_extract(&union, "str").unwrap();
932
933
1
        assert_eq!(extracted.into_data(), expected.into_data());
934
1
    }
935
936
    #[test]
937
1
    fn dense_4_2a_single_type_sequential_offsets_bigger() {
938
1
        let union = UnionArray::try_new(
939
            // single field
940
1
            str1(),
941
1
            ScalarBuffer::from(vec![1, 1]),       //non empty union
942
1
            Some(ScalarBuffer::from(vec![0, 1])), //sequential
943
1
            vec![
944
1
                Arc::new(StringArray::from(vec!["a1", "b2", "c3"])), //equal len, non null
945
            ],
946
        )
947
1
        .unwrap();
948
949
1
        let expected = StringArray::from(vec!["a1", "b2"]);
950
1
        let extracted = union_extract(&union, "str").unwrap();
951
952
1
        assert_eq!(extracted.into_data(), expected.into_data());
953
1
    }
954
955
    #[test]
956
1
    fn dense_4_3a_single_type_non_sequential() {
957
1
        let union = UnionArray::try_new(
958
            // single field
959
1
            str1(),
960
1
            ScalarBuffer::from(vec![1, 1]),       //non empty union
961
1
            Some(ScalarBuffer::from(vec![0, 2])), //non sequential
962
1
            vec![
963
1
                Arc::new(StringArray::from(vec!["a1", "b2", "c3"])), //equal len, non null
964
            ],
965
        )
966
1
        .unwrap();
967
968
1
        let expected = StringArray::from(vec!["a1", "c3"]);
969
1
        let extracted = union_extract(&union, "str").unwrap();
970
971
1
        assert_eq!(extracted.into_data(), expected.into_data());
972
1
    }
973
974
    #[test]
975
1
    fn dense_4_1b_empty_siblings_sequential_equal_len() {
976
1
        let union = UnionArray::try_new(
977
            // multiple fields
978
1
            str1_int3(),
979
1
            ScalarBuffer::from(vec![1, 1]),       //non empty union
980
1
            Some(ScalarBuffer::from(vec![0, 1])), //sequential
981
1
            vec![
982
1
                Arc::new(StringArray::from(vec!["a", "b"])), //equal len, non null
983
1
                Arc::new(Int32Array::new_null(0)),           //empty sibling
984
            ],
985
        )
986
1
        .unwrap();
987
988
1
        let expected = StringArray::from(vec!["a", "b"]);
989
1
        let extracted = union_extract(&union, "str").unwrap();
990
991
1
        assert_eq!(extracted.into_data(), expected.into_data());
992
1
    }
993
994
    #[test]
995
1
    fn dense_4_2b_empty_siblings_sequential_bigger_len() {
996
1
        let union = UnionArray::try_new(
997
            // multiple fields
998
1
            str1_int3(),
999
1
            ScalarBuffer::from(vec![1, 1]),       //non empty union
1000
1
            Some(ScalarBuffer::from(vec![0, 1])), //sequential
1001
1
            vec![
1002
1
                Arc::new(StringArray::from(vec!["a", "b", "c"])), //bigger len, non null
1003
1
                Arc::new(Int32Array::new_null(0)),                //empty sibling
1004
            ],
1005
        )
1006
1
        .unwrap();
1007
1008
1
        let expected = StringArray::from(vec!["a", "b"]);
1009
1
        let extracted = union_extract(&union, "str").unwrap();
1010
1011
1
        assert_eq!(extracted.into_data(), expected.into_data());
1012
1
    }
1013
1014
    #[test]
1015
1
    fn dense_4_3b_empty_sibling_non_sequential() {
1016
1
        let union = UnionArray::try_new(
1017
            // multiple fields
1018
1
            str1_int3(),
1019
1
            ScalarBuffer::from(vec![1, 1]),       //non empty union
1020
1
            Some(ScalarBuffer::from(vec![0, 2])), //non sequential
1021
1
            vec![
1022
1
                Arc::new(StringArray::from(vec!["a", "b", "c"])), //non null
1023
1
                Arc::new(Int32Array::new_null(0)),                //empty sibling
1024
            ],
1025
        )
1026
1
        .unwrap();
1027
1028
1
        let expected = StringArray::from(vec!["a", "c"]);
1029
1
        let extracted = union_extract(&union, "str").unwrap();
1030
1031
1
        assert_eq!(extracted.into_data(), expected.into_data());
1032
1
    }
1033
1034
    #[test]
1035
1
    fn dense_4_1c_all_types_match_sequential_equal_len() {
1036
1
        let union = UnionArray::try_new(
1037
            // multiple fields
1038
1
            str1_int3(),
1039
1
            ScalarBuffer::from(vec![1, 1]),       //all types match
1040
1
            Some(ScalarBuffer::from(vec![0, 1])), //sequential
1041
1
            vec![
1042
1
                Arc::new(StringArray::from(vec!["a1", "b2"])), //equal len
1043
1
                Arc::new(Int32Array::new_null(2)),             //non empty sibling
1044
            ],
1045
        )
1046
1
        .unwrap();
1047
1048
1
        let expected = StringArray::from(vec!["a1", "b2"]);
1049
1
        let extracted = union_extract(&union, "str").unwrap();
1050
1051
1
        assert_eq!(extracted.into_data(), expected.into_data());
1052
1
    }
1053
1054
    #[test]
1055
1
    fn dense_4_2c_all_types_match_sequential_bigger_len() {
1056
1
        let union = UnionArray::try_new(
1057
            // multiple fields
1058
1
            str1_int3(),
1059
1
            ScalarBuffer::from(vec![1, 1]),       //all types match
1060
1
            Some(ScalarBuffer::from(vec![0, 1])), //sequential
1061
1
            vec![
1062
1
                Arc::new(StringArray::from(vec!["a1", "b2", "b3"])), //bigger len
1063
1
                Arc::new(Int32Array::new_null(2)),                   //non empty sibling
1064
            ],
1065
        )
1066
1
        .unwrap();
1067
1068
1
        let expected = StringArray::from(vec!["a1", "b2"]);
1069
1
        let extracted = union_extract(&union, "str").unwrap();
1070
1071
1
        assert_eq!(extracted.into_data(), expected.into_data());
1072
1
    }
1073
1074
    #[test]
1075
1
    fn dense_4_3c_all_types_match_non_sequential() {
1076
1
        let union = UnionArray::try_new(
1077
            // multiple fields
1078
1
            str1_int3(),
1079
1
            ScalarBuffer::from(vec![1, 1]),       //all types match
1080
1
            Some(ScalarBuffer::from(vec![0, 2])), //non sequential
1081
1
            vec![
1082
1
                Arc::new(StringArray::from(vec!["a1", "b2", "b3"])),
1083
1
                Arc::new(Int32Array::new_null(2)), //non empty sibling
1084
            ],
1085
        )
1086
1
        .unwrap();
1087
1088
1
        let expected = StringArray::from(vec!["a1", "b3"]);
1089
1
        let extracted = union_extract(&union, "str").unwrap();
1090
1091
1
        assert_eq!(extracted.into_data(), expected.into_data());
1092
1
    }
1093
1094
    #[test]
1095
1
    fn dense_5_1a_none_match_less_len() {
1096
1
        let union = UnionArray::try_new(
1097
            // multiple fields
1098
1
            str1_int3(),
1099
1
            ScalarBuffer::from(vec![3, 3, 3, 3, 3]), //none matches
1100
1
            Some(ScalarBuffer::from(vec![0, 0, 0, 1, 1])), // dense
1101
1
            vec![
1102
1
                Arc::new(StringArray::from(vec!["a1", "b2", "c3"])), // less len
1103
1
                Arc::new(Int32Array::from(vec![1, 2])),
1104
            ],
1105
        )
1106
1
        .unwrap();
1107
1108
1
        let expected = StringArray::new_null(5);
1109
1
        let extracted = union_extract(&union, "str").unwrap();
1110
1111
1
        assert_eq!(extracted.into_data(), expected.into_data());
1112
1
    }
1113
1114
    #[test]
1115
1
    fn dense_5_1b_cant_contain_null_mask() {
1116
1
        let target_fields = str1();
1117
1
        let target_type = DataType::Union(target_fields.clone(), UnionMode::Sparse);
1118
1119
1
        let union = UnionArray::try_new(
1120
            // multiple fields
1121
1
            str1_union3(target_type.clone()),
1122
1
            ScalarBuffer::from(vec![1, 1, 1, 1, 1]), //none matches
1123
1
            Some(ScalarBuffer::from(vec![0, 0, 0, 1, 1])), // dense
1124
1
            vec![
1125
1
                Arc::new(StringArray::from(vec!["a1", "b2", "c3"])), // less len
1126
1
                Arc::new(
1127
1
                    UnionArray::try_new(
1128
1
                        target_fields.clone(),
1129
1
                        ScalarBuffer::from(vec![1]),
1130
1
                        None,
1131
1
                        vec![Arc::new(StringArray::from(vec!["a"]))],
1132
1
                    )
1133
1
                    .unwrap(),
1134
1
                ), // non empty
1135
            ],
1136
        )
1137
1
        .unwrap();
1138
1139
1
        let expected = new_null_array(&target_type, 5);
1140
1
        let extracted = union_extract(&union, "union").unwrap();
1141
1142
1
        assert_eq!(extracted.into_data(), expected.into_data());
1143
1
    }
1144
1145
    #[test]
1146
1
    fn dense_5_2_none_match_equal_len() {
1147
1
        let union = UnionArray::try_new(
1148
            // multiple fields
1149
1
            str1_int3(),
1150
1
            ScalarBuffer::from(vec![3, 3, 3, 3, 3]), //none matches
1151
1
            Some(ScalarBuffer::from(vec![0, 0, 0, 1, 1])), // dense
1152
1
            vec![
1153
1
                Arc::new(StringArray::from(vec!["a1", "b2", "c3", "d4", "e5"])), // equal len
1154
1
                Arc::new(Int32Array::from(vec![1, 2])),
1155
            ],
1156
        )
1157
1
        .unwrap();
1158
1159
1
        let expected = StringArray::new_null(5);
1160
1
        let extracted = union_extract(&union, "str").unwrap();
1161
1162
1
        assert_eq!(extracted.into_data(), expected.into_data());
1163
1
    }
1164
1165
    #[test]
1166
1
    fn dense_5_3_none_match_greater_len() {
1167
1
        let union = UnionArray::try_new(
1168
            // multiple fields
1169
1
            str1_int3(),
1170
1
            ScalarBuffer::from(vec![3, 3, 3, 3, 3]), //none matches
1171
1
            Some(ScalarBuffer::from(vec![0, 0, 0, 1, 1])), // dense
1172
1
            vec![
1173
1
                Arc::new(StringArray::from(vec!["a1", "b2", "c3", "d4", "e5", "f6"])), // greater len
1174
1
                Arc::new(Int32Array::from(vec![1, 2])),                                //non null
1175
            ],
1176
        )
1177
1
        .unwrap();
1178
1179
1
        let expected = StringArray::new_null(5);
1180
1
        let extracted = union_extract(&union, "str").unwrap();
1181
1182
1
        assert_eq!(extracted.into_data(), expected.into_data());
1183
1
    }
1184
1185
    #[test]
1186
1
    fn dense_6_some_matches() {
1187
1
        let union = UnionArray::try_new(
1188
            // multiple fields
1189
1
            str1_int3(),
1190
1
            ScalarBuffer::from(vec![3, 3, 1, 1, 1]), //some matches
1191
1
            Some(ScalarBuffer::from(vec![0, 1, 0, 1, 2])), // dense
1192
1
            vec![
1193
1
                Arc::new(StringArray::from(vec!["a1", "b2", "c3"])), // non null
1194
1
                Arc::new(Int32Array::from(vec![1, 2])),
1195
            ],
1196
        )
1197
1
        .unwrap();
1198
1199
1
        let expected = Int32Array::from(vec![Some(1), Some(2), None, None, None]);
1200
1
        let extracted = union_extract(&union, "int").unwrap();
1201
1202
1
        assert_eq!(extracted.into_data(), expected.into_data());
1203
1
    }
1204
1205
    #[test]
1206
1
    fn empty_sparse_union() {
1207
1
        let union = UnionArray::try_new(
1208
1
            UnionFields::empty(),
1209
1
            ScalarBuffer::from(vec![]),
1210
1
            None,
1211
1
            vec![],
1212
        )
1213
1
        .unwrap();
1214
1215
1
        assert_eq!(
1216
1
            union_extract(&union, "a").unwrap_err().to_string(),
1217
1
            ArrowError::InvalidArgumentError("field a not found on union".into()).to_string()
1218
        );
1219
1
    }
1220
1221
    #[test]
1222
1
    fn empty_dense_union() {
1223
1
        let union = UnionArray::try_new(
1224
1
            UnionFields::empty(),
1225
1
            ScalarBuffer::from(vec![]),
1226
1
            Some(ScalarBuffer::from(vec![])),
1227
1
            vec![],
1228
        )
1229
1
        .unwrap();
1230
1231
1
        assert_eq!(
1232
1
            union_extract(&union, "a").unwrap_err().to_string(),
1233
1
            ArrowError::InvalidArgumentError("field a not found on union".into()).to_string()
1234
        );
1235
1
    }
1236
}