Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-select/src/concat.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines concat kernel for `ArrayRef`
19
//!
20
//! Example:
21
//!
22
//! ```
23
//! use arrow_array::{ArrayRef, StringArray};
24
//! use arrow_select::concat::concat;
25
//!
26
//! let arr = concat(&[
27
//!     &StringArray::from(vec!["hello", "world"]),
28
//!     &StringArray::from(vec!["!"]),
29
//! ]).unwrap();
30
//! assert_eq!(arr.len(), 3);
31
//! ```
32
33
use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
34
use arrow_array::builder::{
35
    BooleanBuilder, GenericByteBuilder, GenericByteViewBuilder, PrimitiveBuilder,
36
};
37
use arrow_array::cast::AsArray;
38
use arrow_array::types::*;
39
use arrow_array::*;
40
use arrow_buffer::{
41
    ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer,
42
};
43
use arrow_data::ArrayDataBuilder;
44
use arrow_data::transform::{Capacities, MutableArrayData};
45
use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef};
46
use std::{collections::HashSet, ops::Add, sync::Arc};
47
48
42
fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
49
42
    let mut item_capacity = 0;
50
42
    let mut bytes_capacity = 0;
51
171
    for 
array129
in arrays {
52
129
        let a = array.as_bytes::<T>();
53
54
        // Guaranteed to always have at least one element
55
129
        let offsets = a.value_offsets();
56
129
        bytes_capacity += offsets[offsets.len() - 1].as_usize() - offsets[0].as_usize();
57
129
        item_capacity += a.len()
58
    }
59
60
42
    Capacities::Binary(item_capacity, Some(bytes_capacity))
61
42
}
62
63
1
fn fixed_size_list_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
64
1
    if let DataType::FixedSizeList(f, _) = data_type {
65
3
        let 
item_capacity1
=
arrays1
.
iter1
().
map1
(|a| a.len()).
sum1
();
66
1
        let child_data_type = f.data_type();
67
1
        match child_data_type {
68
            // These types should match the types that `get_capacity`
69
            // has special handling for.
70
            DataType::Utf8
71
            | DataType::LargeUtf8
72
            | DataType::Binary
73
            | DataType::LargeBinary
74
            | DataType::FixedSizeList(_, _) => {
75
0
                let values: Vec<&dyn arrow_array::Array> = arrays
76
0
                    .iter()
77
0
                    .map(|a| a.as_fixed_size_list().values().as_ref())
78
0
                    .collect();
79
0
                Capacities::List(
80
0
                    item_capacity,
81
0
                    Some(Box::new(get_capacity(&values, child_data_type))),
82
0
                )
83
            }
84
1
            _ => Capacities::Array(item_capacity),
85
        }
86
    } else {
87
0
        unreachable!("illegal data type for fixed size list")
88
    }
89
1
}
90
91
11
fn concat_byte_view<B: ByteViewType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
92
11
    let mut builder =
93
93
        
GenericByteViewBuilder::<B>::with_capacity11
(
arrays11
.
iter11
().
map11
(|a| a.len()).
sum11
());
94
93
    for &array in 
arrays11
.
iter11
() {
95
93
        builder.append_array(array.as_byte_view());
96
93
    }
97
11
    Ok(Arc::new(builder.finish()))
98
11
}
99
100
13
fn concat_dictionaries<K: ArrowDictionaryKeyType>(
101
13
    arrays: &[&dyn Array],
102
13
) -> Result<ArrayRef, ArrowError> {
103
13
    let mut output_len = 0;
104
13
    let dictionaries: Vec<_> = arrays
105
13
        .iter()
106
80.0k
        .
map13
(|x| x.as_dictionary::<K>())
107
80.0k
        .
inspect13
(|d| output_len += d.len())
108
13
        .collect();
109
110
13
    if !should_merge_dictionary_values::<K>(&dictionaries, output_len) {
111
7
        return concat_fallback(arrays, Capacities::Array(output_len));
112
6
    }
113
114
6
    let merged = merge_dictionary_values(&dictionaries, None)
?0
;
115
116
    // Recompute keys
117
6
    let mut key_values = Vec::with_capacity(output_len);
118
119
6
    let mut has_nulls = false;
120
80.0k
    for (d, mapping) in 
dictionaries.iter()6
.
zip6
(
merged.key_mappings6
) {
121
80.0k
        has_nulls |= d.null_count() != 0;
122
80.0k
        for key in 
d.keys()80.0k
.
values80.0k
() {
123
            // Use get to safely handle nulls
124
80.0k
            key_values.push(mapping.get(key.as_usize()).copied().unwrap_or_default())
125
        }
126
    }
127
128
6
    let nulls = has_nulls.then(|| 
{1
129
1
        let mut nulls = BooleanBufferBuilder::new(output_len);
130
3
        for 
d2
in &dictionaries {
131
2
            match d.nulls() {
132
1
                Some(n) => nulls.append_buffer(n.inner()),
133
1
                None => nulls.append_n(d.len(), true),
134
            }
135
        }
136
1
        NullBuffer::new(nulls.finish())
137
1
    });
138
139
6
    let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)
?0
;
140
    // Sanity check
141
6
    assert_eq!(keys.len(), output_len);
142
143
6
    let array = unsafe { DictionaryArray::new_unchecked(keys, merged.values) };
144
6
    Ok(Arc::new(array))
145
13
}
146
147
5
fn concat_lists<OffsetSize: OffsetSizeTrait>(
148
5
    arrays: &[&dyn Array],
149
5
    field: &FieldRef,
150
5
) -> Result<ArrayRef, ArrowError> {
151
5
    let mut output_len = 0;
152
5
    let mut list_has_nulls = false;
153
5
    let mut list_has_slices = false;
154
155
5
    let lists = arrays
156
5
        .iter()
157
80.0k
        .
map5
(|x| x.as_list::<OffsetSize>())
158
80.0k
        .
inspect5
(|l| {
159
80.0k
            output_len += l.len();
160
80.0k
            list_has_nulls |= l.null_count() != 0;
161
80.0k
            list_has_slices |= l.offsets()[0] > OffsetSize::zero()
162
80.0k
                || l.offsets().last().unwrap().as_usize() < l.values().len();
163
80.0k
        })
164
5
        .collect::<Vec<_>>();
165
166
5
    let lists_nulls = list_has_nulls.then(|| 
{3
167
3
        let mut nulls = BooleanBufferBuilder::new(output_len);
168
10
        for 
l7
in &lists {
169
7
            match l.nulls() {
170
6
                Some(n) => nulls.append_buffer(n.inner()),
171
1
                None => nulls.append_n(l.len(), true),
172
            }
173
        }
174
3
        NullBuffer::new(nulls.finish())
175
3
    });
176
177
    // If any of the lists have slices, we need to slice the values
178
    // to ensure that the offsets are correct
179
    let mut sliced_values;
180
5
    let values: Vec<&dyn Array> = if list_has_slices {
181
2
        sliced_values = Vec::with_capacity(lists.len());
182
6
        for 
l4
in &lists {
183
4
            // if the first offset is non-zero, we need to slice the values so when
184
4
            // we concatenate them below only the relevant values are included
185
4
            let offsets = l.offsets();
186
4
            let start_offset = offsets[0].as_usize();
187
4
            let end_offset = offsets.last().unwrap().as_usize();
188
4
            sliced_values.push(l.values().slice(start_offset, end_offset - start_offset));
189
4
        }
190
4
        
sliced_values.iter()2
.
map2
(|a| a.as_ref()).
collect2
()
191
    } else {
192
80.0k
        
lists.iter()3
.
map3
(|x| x.values().as_ref()).
collect3
()
193
    };
194
195
5
    let concatenated_values = concat(values.as_slice())
?0
;
196
197
    // Merge value offsets from the lists
198
5
    let value_offset_buffer =
199
80.0k
        
OffsetBuffer::<OffsetSize>::from_lengths5
(
lists.iter()5
.
flat_map5
(|x| x.offsets().lengths()));
200
201
5
    let array = GenericListArray::<OffsetSize>::try_new(
202
5
        Arc::clone(field),
203
5
        value_offset_buffer,
204
5
        concatenated_values,
205
5
        lists_nulls,
206
0
    )?;
207
208
5
    Ok(Arc::new(array))
209
5
}
210
211
2
fn concat_list_view<OffsetSize: OffsetSizeTrait>(
212
2
    arrays: &[&dyn Array],
213
2
    field: &FieldRef,
214
2
) -> Result<ArrayRef, ArrowError> {
215
2
    let mut output_len = 0;
216
2
    let mut list_has_nulls = false;
217
218
2
    let lists = arrays
219
2
        .iter()
220
6
        .
map2
(|x| x.as_list_view::<OffsetSize>())
221
6
        .
inspect2
(|l| {
222
6
            output_len += l.len();
223
6
            list_has_nulls |= l.null_count() != 0;
224
6
        })
225
2
        .collect::<Vec<_>>();
226
227
2
    let lists_nulls = list_has_nulls.then(|| {
228
2
        let mut nulls = BooleanBufferBuilder::new(output_len);
229
8
        for 
l6
in &lists {
230
6
            match l.nulls() {
231
4
                Some(n) => nulls.append_buffer(n.inner()),
232
2
                None => nulls.append_n(l.len(), true),
233
            }
234
        }
235
2
        NullBuffer::new(nulls.finish())
236
2
    });
237
238
6
    let 
values2
:
Vec<&dyn Array>2
=
lists.iter()2
.
map2
(|l| l.values().as_ref()).
collect2
();
239
240
2
    let concatenated_values = concat(values.as_slice())
?0
;
241
242
6
    let 
sizes2
:
ScalarBuffer<OffsetSize>2
=
lists.iter()2
.
flat_map2
(|x| x.sizes()).
copied2
().
collect2
();
243
244
6
    let 
mut offsets2
=
MutableBuffer::with_capacity2
(
lists.iter()2
.
map2
(|l| l.offsets().len()).
sum2
());
245
2
    let mut global_offset = OffsetSize::zero();
246
6
    for l in 
lists2
.
iter2
() {
247
12
        for &offset in 
l6
.
offsets6
() {
248
12
            offsets.push(offset + global_offset);
249
12
        }
250
251
        // advance the offsets
252
6
        global_offset += OffsetSize::from_usize(l.values().len()).unwrap();
253
    }
254
255
2
    let offsets = ScalarBuffer::from(offsets);
256
257
2
    let array = GenericListViewArray::try_new(
258
2
        field.clone(),
259
2
        offsets,
260
2
        sizes,
261
2
        concatenated_values,
262
2
        lists_nulls,
263
0
    )?;
264
265
2
    Ok(Arc::new(array))
266
2
}
267
268
34
fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
269
241
    let 
mut builder34
=
PrimitiveBuilder::<T>::with_capacity34
(
arrays34
.
iter34
().
map34
(|a| a.len()).
sum34
())
270
34
        .with_data_type(arrays[0].data_type().clone());
271
272
275
    for 
array241
in arrays {
273
241
        builder.append_array(array.as_primitive());
274
241
    }
275
276
34
    Ok(Arc::new(builder.finish()))
277
34
}
278
279
1
fn concat_boolean(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
280
2
    let 
mut builder1
=
BooleanBuilder::with_capacity1
(
arrays1
.
iter1
().
map1
(|a| a.len()).
sum1
());
281
282
3
    for 
array2
in arrays {
283
2
        builder.append_array(array.as_boolean());
284
2
    }
285
286
1
    Ok(Arc::new(builder.finish()))
287
1
}
288
289
42
fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
290
42
    let (item_capacity, bytes_capacity) = match binary_capacity::<T>(arrays) {
291
42
        Capacities::Binary(item_capacity, Some(bytes_capacity)) => (item_capacity, bytes_capacity),
292
0
        _ => unreachable!(),
293
    };
294
295
42
    let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, bytes_capacity);
296
297
171
    for 
array129
in arrays {
298
129
        builder.append_array(array.as_bytes::<T>())
?0
;
299
    }
300
301
42
    Ok(Arc::new(builder.finish()))
302
42
}
303
304
5
fn concat_structs(arrays: &[&dyn Array], fields: &Fields) -> Result<ArrayRef, ArrowError> {
305
5
    let mut len = 0;
306
5
    let mut has_nulls = false;
307
5
    let structs = arrays
308
5
        .iter()
309
11
        .
map5
(|a| {
310
11
            len += a.len();
311
11
            has_nulls |= a.null_count() > 0;
312
11
            a.as_struct()
313
11
        })
314
5
        .collect::<Vec<_>>();
315
316
5
    let nulls = has_nulls.then(|| 
{1
317
1
        let mut b = BooleanBufferBuilder::new(len);
318
3
        for 
s2
in &structs {
319
2
            match s.nulls() {
320
2
                Some(n) => b.append_buffer(n.inner()),
321
0
                None => b.append_n(s.len(), true),
322
            }
323
        }
324
1
        NullBuffer::new(b.finish())
325
1
    });
326
327
5
    let column_concat_result = (0..fields.len())
328
5
        .map(|i| 
{4
329
4
            let extracted_cols = structs
330
4
                .iter()
331
9
                .
map4
(|s| s.column(i).as_ref())
332
4
                .collect::<Vec<_>>();
333
4
            concat(&extracted_cols)
334
4
        })
335
5
        .collect::<Result<Vec<_>, ArrowError>>()
?0
;
336
337
5
    Ok(Arc::new(StructArray::try_new_with_length(
338
5
        fields.clone(),
339
5
        column_concat_result,
340
5
        nulls,
341
5
        len,
342
0
    )?))
343
5
}
344
345
/// Concatenate multiple RunArray instances into a single RunArray.
346
///
347
/// This function handles the special case of concatenating RunArrays by:
348
/// 1. Collecting all run ends and values from input arrays
349
/// 2. Adjusting run ends to account for the length of previous arrays
350
/// 3. Creating a new RunArray with the combined data
351
4
fn concat_run_arrays<R: RunEndIndexType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError>
352
4
where
353
4
    R::Native: Add<Output = R::Native>,
354
{
355
4
    let run_arrays: Vec<_> = arrays
356
4
        .iter()
357
9
        .
map4
(|x| x.as_run::<R>())
358
9
        .
filter4
(|x| !x.run_ends().is_empty())
359
4
        .collect();
360
361
    // The run ends need to be adjusted by the sum of the lengths of the previous arrays.
362
4
    let needed_run_end_adjustments = std::iter::once(R::default_value())
363
4
        .chain(
364
4
            run_arrays
365
4
                .iter()
366
9
                .
scan4
(
R::default_value4
(), |acc, run_array| {
367
9
                    *acc = *acc + *run_array.run_ends().values().last().unwrap();
368
9
                    Some(*acc)
369
9
                }),
370
        )
371
4
        .collect::<Vec<_>>();
372
373
    // This works out nicely to be the total (logical) length of the resulting array.
374
4
    let total_len = needed_run_end_adjustments.last().unwrap().as_usize();
375
376
4
    let run_ends_array =
377
4
        PrimitiveArray::<R>::from_iter_values(run_arrays.iter().enumerate().flat_map(
378
9
            move |(i, run_array)| {
379
9
                let adjustment = needed_run_end_adjustments[i];
380
9
                run_array
381
9
                    .run_ends()
382
9
                    .values()
383
9
                    .iter()
384
20
                    .
map9
(move |run_end| *run_end + adjustment)
385
9
            },
386
        ));
387
388
4
    let all_values = concat(
389
4
        &run_arrays
390
4
            .iter()
391
9
            .
map4
(|x| x.values().as_ref())
392
4
            .collect::<Vec<_>>(),
393
0
    )?;
394
395
4
    let builder = ArrayDataBuilder::new(run_arrays[0].data_type().clone())
396
4
        .len(total_len)
397
4
        .child_data(vec![run_ends_array.into_data(), all_values.into_data()]);
398
399
    // `build_unchecked` is used to avoid recursive validation of child arrays.
400
4
    let array_data = unsafe { builder.build_unchecked() };
401
4
    array_data.validate_data()
?0
;
402
403
4
    Ok(Arc::<RunArray<R>>::new(array_data.into()))
404
4
}
405
406
macro_rules! dict_helper {
407
    ($t:ty, $arrays:expr) => {
408
        return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _)
409
    };
410
}
411
412
macro_rules! primitive_concat {
413
    ($t:ty, $arrays:expr) => {
414
        return Ok(Arc::new(concat_primitives::<$t>($arrays)?) as _)
415
    };
416
}
417
418
1
fn get_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
419
1
    match data_type {
420
0
        DataType::Utf8 => binary_capacity::<Utf8Type>(arrays),
421
0
        DataType::LargeUtf8 => binary_capacity::<LargeUtf8Type>(arrays),
422
0
        DataType::Binary => binary_capacity::<BinaryType>(arrays),
423
0
        DataType::LargeBinary => binary_capacity::<LargeBinaryType>(arrays),
424
1
        DataType::FixedSizeList(_, _) => fixed_size_list_capacity(arrays, data_type),
425
0
        _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()),
426
    }
427
1
}
428
429
/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
430
139
pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
431
139
    if arrays.is_empty() {
432
1
        return Err(ArrowError::ComputeError(
433
1
            "concat requires input of at least one array".to_string(),
434
1
        ));
435
138
    } else if arrays.len() == 1 {
436
15
        let array = arrays[0];
437
15
        return Ok(array.slice(0, array.len()));
438
123
    }
439
440
123
    let d = arrays[0].data_type();
441
160k
    if 
arrays123
.iter().skip(1).
any123
(|array| array.data_type() != d) {
442
        // Create error message with up to 10 unique data types in the order they appear
443
5
        let error_message = {
444
            // 10 max unique data types to print and another 1 to know if there are more
445
5
            let mut unique_data_types = HashSet::with_capacity(11);
446
447
5
            let mut error_message =
448
5
                format!("It is not possible to concatenate arrays of different data types ({d}");
449
5
            unique_data_types.insert(d);
450
451
47
            for 
array44
in arrays {
452
44
                let is_unique = unique_data_types.insert(array.data_type());
453
454
44
                if unique_data_types.len() == 11 {
455
2
                    error_message.push_str(", ...");
456
2
                    break;
457
42
                }
458
459
42
                if is_unique {
460
30
                    error_message.push_str(", ");
461
30
                    error_message.push_str(&array.data_type().to_string());
462
30
                
}12
463
            }
464
465
5
            error_message.push_str(").");
466
467
5
            error_message
468
        };
469
470
5
        return Err(ArrowError::InvalidArgumentError(error_message));
471
118
    }
472
473
0
    downcast_primitive! {
474
4
        d => (primitive_concat, 
arrays0
),
475
1
        DataType::Boolean => concat_boolean(arrays),
476
13
        DataType::Dictionary(k, _) => {
477
0
            downcast_integer! {
478
13
                k.as_ref() => (dict_helper, 
arrays4
),
479
0
                _ => unreachable!("illegal dictionary key type {k}")
480
            }
481
        }
482
5
        DataType::List(field) => concat_lists::<i32>(arrays, field),
483
0
        DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
484
2
        DataType::ListView(field) => concat_list_view::<i32>(arrays, field),
485
0
        DataType::LargeListView(field) => concat_list_view::<i64>(arrays, field),
486
5
        DataType::Struct(fields) => concat_structs(arrays, fields),
487
39
        DataType::Utf8 => concat_bytes::<Utf8Type>(arrays),
488
1
        DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays),
489
0
        DataType::Binary => concat_bytes::<BinaryType>(arrays),
490
2
        DataType::LargeBinary => concat_bytes::<LargeBinaryType>(arrays),
491
4
        DataType::RunEndEncoded(r, _) => {
492
            // Handle RunEndEncoded arrays with special concat function
493
            // We need to downcast based on the run end type
494
4
            match r.data_type() {
495
0
                DataType::Int16 => concat_run_arrays::<Int16Type>(arrays),
496
4
                DataType::Int32 => concat_run_arrays::<Int32Type>(arrays),
497
0
                DataType::Int64 => concat_run_arrays::<Int64Type>(arrays),
498
0
                _ => unreachable!("Unsupported run end index type: {r:?}"),
499
            }
500
        }
501
10
        DataType::Utf8View => concat_byte_view::<StringViewType>(arrays),
502
1
        DataType::BinaryView => concat_byte_view::<BinaryViewType>(arrays),
503
        _ => {
504
1
            let capacity = get_capacity(arrays, d);
505
1
            concat_fallback(arrays, capacity)
506
        }
507
    }
508
139
}
509
510
/// Concatenates arrays using MutableArrayData
511
///
512
/// This will naively concatenate dictionaries
513
8
fn concat_fallback(arrays: &[&dyn Array], capacity: Capacities) -> Result<ArrayRef, ArrowError> {
514
18
    let 
array_data8
:
Vec<_>8
=
arrays8
.
iter8
().
map8
(|a| a.to_data()).
collect8
::<Vec<_>>();
515
8
    let array_data = array_data.iter().collect();
516
8
    let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity);
517
518
18
    for (i, a) in 
arrays8
.
iter8
().
enumerate8
() {
519
18
        mutable.extend(i, 0, a.len())
520
    }
521
522
8
    Ok(make_array(mutable.freeze()))
523
8
}
524
525
/// Concatenates `batches` together into a single [`RecordBatch`].
526
///
527
/// The output batch has the specified `schemas`; The schema of the
528
/// input are ignored.
529
///
530
/// Returns an error if the types of underlying arrays are different.
531
30
pub fn concat_batches<'a>(
532
30
    schema: &SchemaRef,
533
30
    input_batches: impl IntoIterator<Item = &'a RecordBatch>,
534
30
) -> Result<RecordBatch, ArrowError> {
535
    // When schema is empty, sum the number of the rows of all batches
536
30
    if schema.fields().is_empty() {
537
2
        let num_rows: usize = input_batches.into_iter().map(RecordBatch::num_rows).sum();
538
2
        let mut options = RecordBatchOptions::default();
539
2
        options.row_count = Some(num_rows);
540
2
        return RecordBatch::try_new_with_options(schema.clone(), vec![], &options);
541
28
    }
542
543
28
    let batches: Vec<&RecordBatch> = input_batches.into_iter().collect();
544
28
    if batches.is_empty() {
545
2
        return Ok(RecordBatch::new_empty(schema.clone()));
546
26
    }
547
26
    let field_num = schema.fields().len();
548
26
    let mut arrays = Vec::with_capacity(field_num);
549
40
    for i in 0..
field_num26
{
550
40
        let 
array39
= concat(
551
40
            &batches
552
40
                .iter()
553
341
                .
map40
(|batch| batch.column(i).as_ref())
554
40
                .collect::<Vec<_>>(),
555
1
        )?;
556
39
        arrays.push(array);
557
    }
558
25
    RecordBatch::try_new(schema.clone(), arrays)
559
30
}
560
561
#[cfg(test)]
562
mod tests {
563
    use super::*;
564
    use arrow_array::builder::{
565
        GenericListBuilder, Int64Builder, ListViewBuilder, StringDictionaryBuilder,
566
    };
567
    use arrow_schema::{Field, Schema};
568
    use std::fmt::Debug;
569
570
    #[test]
571
1
    fn test_concat_empty_vec() {
572
1
        let re = concat(&[]);
573
1
        assert!(re.is_err());
574
1
    }
575
576
    #[test]
577
1
    fn test_concat_batches_no_columns() {
578
        // Test concat using empty schema / batches without columns
579
1
        let schema = Arc::new(Schema::empty());
580
581
1
        let mut options = RecordBatchOptions::default();
582
1
        options.row_count = Some(100);
583
1
        let batch = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
584
        // put in 2 batches of 100 rows each
585
1
        let re = concat_batches(&schema, &[batch.clone(), batch]).unwrap();
586
587
1
        assert_eq!(re.num_rows(), 200);
588
1
    }
589
590
    #[test]
591
1
    fn test_concat_one_element_vec() {
592
1
        let arr = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
593
1
            Some(-1),
594
1
            Some(2),
595
1
            None,
596
1
        ])) as ArrayRef;
597
1
        let result = concat(&[arr.as_ref()]).unwrap();
598
1
        assert_eq!(
599
1
            &arr, &result,
600
0
            "concatenating single element array gives back the same result"
601
        );
602
1
    }
603
604
    #[test]
605
1
    fn test_concat_incompatible_datatypes() {
606
1
        let re = concat(&[
607
1
            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
608
1
            // 2 string to make sure we only mention unique types
609
1
            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
610
1
            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
611
1
            // Another type to make sure we are showing all the incompatible types
612
1
            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
613
1
        ]);
614
615
1
        assert_eq!(
616
1
            re.unwrap_err().to_string(),
617
            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."
618
        );
619
1
    }
620
621
    #[test]
622
1
    fn test_concat_10_incompatible_datatypes_should_include_all_of_them() {
623
1
        let re = concat(&[
624
1
            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
625
1
            // 2 string to make sure we only mention unique types
626
1
            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
627
1
            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
628
1
            // Another type to make sure we are showing all the incompatible types
629
1
            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
630
1
            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
631
1
            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
632
1
            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
633
1
            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
634
1
            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
635
1
            // Non unique
636
1
            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
637
1
            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
638
1
            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
639
1
        ]);
640
641
1
        assert_eq!(
642
1
            re.unwrap_err().to_string(),
643
            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32)."
644
        );
645
1
    }
646
647
    #[test]
648
1
    fn test_concat_11_incompatible_datatypes_should_only_include_10() {
649
1
        let re = concat(&[
650
1
            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
651
1
            // 2 string to make sure we only mention unique types
652
1
            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
653
1
            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
654
1
            // Another type to make sure we are showing all the incompatible types
655
1
            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
656
1
            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
657
1
            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
658
1
            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
659
1
            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
660
1
            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
661
1
            // Non unique
662
1
            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
663
1
            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
664
1
            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
665
1
            &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
666
1
        ]);
667
668
1
        assert_eq!(
669
1
            re.unwrap_err().to_string(),
670
            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
671
        );
672
1
    }
673
674
    #[test]
675
1
    fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() {
676
1
        let re = concat(&[
677
1
            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
678
1
            // 2 string to make sure we only mention unique types
679
1
            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
680
1
            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
681
1
            // Another type to make sure we are showing all the incompatible types
682
1
            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
683
1
            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
684
1
            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
685
1
            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
686
1
            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
687
1
            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
688
1
            // Non unique
689
1
            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
690
1
            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
691
1
            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
692
1
            &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
693
1
            &PrimitiveArray::<Float16Type>::new_null(3),
694
1
            &BooleanArray::from(vec![Some(true), Some(false), None]),
695
1
        ]);
696
697
1
        assert_eq!(
698
1
            re.unwrap_err().to_string(),
699
            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
700
        );
701
1
    }
702
703
    #[test]
704
1
    fn test_concat_string_arrays() {
705
1
        let arr = concat(&[
706
1
            &StringArray::from(vec!["hello", "world"]),
707
1
            &StringArray::from(vec!["2", "3", "4"]),
708
1
            &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
709
1
        ])
710
1
        .unwrap();
711
712
1
        let expected_output = Arc::new(StringArray::from(vec![
713
1
            Some("hello"),
714
1
            Some("world"),
715
1
            Some("2"),
716
1
            Some("3"),
717
1
            Some("4"),
718
1
            Some("foo"),
719
1
            Some("bar"),
720
1
            None,
721
1
            Some("baz"),
722
1
        ])) as ArrayRef;
723
724
1
        assert_eq!(&arr, &expected_output);
725
1
    }
726
727
    #[test]
728
1
    fn test_concat_string_view_arrays() {
729
1
        let arr = concat(&[
730
1
            &StringViewArray::from(vec!["helloxxxxxxxxxxa", "world____________"]),
731
1
            &StringViewArray::from(vec!["helloxxxxxxxxxxy", "3", "4"]),
732
1
            &StringViewArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
733
1
        ])
734
1
        .unwrap();
735
736
1
        let expected_output = Arc::new(StringViewArray::from(vec![
737
1
            Some("helloxxxxxxxxxxa"),
738
1
            Some("world____________"),
739
1
            Some("helloxxxxxxxxxxy"),
740
1
            Some("3"),
741
1
            Some("4"),
742
1
            Some("foo"),
743
1
            Some("bar"),
744
1
            None,
745
1
            Some("baz"),
746
1
        ])) as ArrayRef;
747
748
1
        assert_eq!(&arr, &expected_output);
749
1
    }
750
751
    #[test]
752
1
    fn test_concat_primitive_arrays() {
753
1
        let arr = concat(&[
754
1
            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]),
755
1
            &PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]),
756
1
            &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
757
1
        ])
758
1
        .unwrap();
759
760
1
        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
761
1
            Some(-1),
762
1
            Some(-1),
763
1
            Some(2),
764
1
            None,
765
1
            None,
766
1
            Some(101),
767
1
            Some(102),
768
1
            Some(103),
769
1
            None,
770
1
            Some(256),
771
1
            Some(512),
772
1
            Some(1024),
773
1
        ])) as ArrayRef;
774
775
1
        assert_eq!(&arr, &expected_output);
776
1
    }
777
778
    #[test]
779
1
    fn test_concat_primitive_array_slices() {
780
1
        let input_1 =
781
1
            PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None])
782
1
                .slice(1, 3);
783
784
1
        let input_2 =
785
1
            PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None])
786
1
                .slice(1, 3);
787
1
        let arr = concat(&[&input_1, &input_2]).unwrap();
788
789
1
        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
790
1
            Some(-1),
791
1
            Some(2),
792
1
            None,
793
1
            Some(102),
794
1
            Some(103),
795
1
            None,
796
1
        ])) as ArrayRef;
797
798
1
        assert_eq!(&arr, &expected_output);
799
1
    }
800
801
    #[test]
802
1
    fn test_concat_boolean_primitive_arrays() {
803
1
        let arr = concat(&[
804
1
            &BooleanArray::from(vec![
805
1
                Some(true),
806
1
                Some(true),
807
1
                Some(false),
808
1
                None,
809
1
                None,
810
1
                Some(false),
811
1
            ]),
812
1
            &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
813
1
        ])
814
1
        .unwrap();
815
816
1
        let expected_output = Arc::new(BooleanArray::from(vec![
817
1
            Some(true),
818
1
            Some(true),
819
1
            Some(false),
820
1
            None,
821
1
            None,
822
1
            Some(false),
823
1
            None,
824
1
            Some(false),
825
1
            Some(true),
826
1
            Some(false),
827
1
        ])) as ArrayRef;
828
829
1
        assert_eq!(&arr, &expected_output);
830
1
    }
831
832
    #[test]
833
1
    fn test_concat_primitive_list_arrays() {
834
1
        let list1 = [
835
1
            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
836
1
            Some(vec![]),
837
1
            None,
838
1
            Some(vec![Some(10)]),
839
1
        ];
840
1
        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
841
842
1
        let list2 = [
843
1
            None,
844
1
            Some(vec![Some(100), None, Some(101)]),
845
1
            Some(vec![Some(102)]),
846
1
        ];
847
1
        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
848
849
1
        let list3 = [Some(vec![Some(1000), Some(1001)])];
850
1
        let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
851
852
1
        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
853
854
1
        let expected = list1.into_iter().chain(list2).chain(list3);
855
1
        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
856
857
1
        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
858
1
    }
859
860
    #[test]
861
1
    fn test_concat_primitive_list_arrays_slices() {
862
1
        let list1 = [
863
1
            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
864
1
            Some(vec![]), // In slice
865
1
            None,         // In slice
866
1
            Some(vec![Some(10)]),
867
1
        ];
868
1
        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
869
1
        let list1_array = list1_array.slice(1, 2);
870
1
        let list1_values = list1.into_iter().skip(1).take(2);
871
872
1
        let list2 = [
873
1
            None,
874
1
            Some(vec![Some(100), None, Some(101)]),
875
1
            Some(vec![Some(102)]),
876
1
        ];
877
1
        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
878
879
        // verify that this test covers the case when the first offset is non zero
880
1
        assert!(list1_array.offsets()[0].as_usize() > 0);
881
1
        let array_result = concat(&[&list1_array, &list2_array]).unwrap();
882
883
1
        let expected = list1_values.chain(list2);
884
1
        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
885
886
1
        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
887
1
    }
888
889
    #[test]
890
1
    fn test_concat_primitive_list_arrays_sliced_lengths() {
891
1
        let list1 = [
892
1
            Some(vec![Some(-1), Some(-1), Some(2), None, None]), // In slice
893
1
            Some(vec![]),                                        // In slice
894
1
            None,                                                // In slice
895
1
            Some(vec![Some(10)]),
896
1
        ];
897
1
        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
898
1
        let list1_array = list1_array.slice(0, 3); // no offset, but not all values
899
1
        let list1_values = list1.into_iter().take(3);
900
901
1
        let list2 = [
902
1
            None,
903
1
            Some(vec![Some(100), None, Some(101)]),
904
1
            Some(vec![Some(102)]),
905
1
        ];
906
1
        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
907
908
        // verify that this test covers the case when the first offset is zero, but the
909
        // last offset doesn't cover the entire array
910
1
        assert_eq!(list1_array.offsets()[0].as_usize(), 0);
911
1
        assert!(list1_array.offsets().last().unwrap().as_usize() < list1_array.values().len());
912
1
        let array_result = concat(&[&list1_array, &list2_array]).unwrap();
913
914
1
        let expected = list1_values.chain(list2);
915
1
        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
916
917
1
        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
918
1
    }
919
920
    #[test]
921
1
    fn test_concat_primitive_fixed_size_list_arrays() {
922
1
        let list1 = [
923
1
            Some(vec![Some(-1), None]),
924
1
            None,
925
1
            Some(vec![Some(10), Some(20)]),
926
1
        ];
927
1
        let list1_array =
928
1
            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone(), 2);
929
930
1
        let list2 = [
931
1
            None,
932
1
            Some(vec![Some(100), None]),
933
1
            Some(vec![Some(102), Some(103)]),
934
1
        ];
935
1
        let list2_array =
936
1
            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone(), 2);
937
938
1
        let list3 = [Some(vec![Some(1000), Some(1001)])];
939
1
        let list3_array =
940
1
            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone(), 2);
941
942
1
        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
943
944
1
        let expected = list1.into_iter().chain(list2).chain(list3);
945
1
        let array_expected =
946
1
            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(expected, 2);
947
948
1
        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
949
1
    }
950
951
    #[test]
952
1
    fn test_concat_list_view_arrays() {
953
1
        let list1 = [
954
1
            Some(vec![Some(-1), None]),
955
1
            None,
956
1
            Some(vec![Some(10), Some(20)]),
957
1
        ];
958
1
        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
959
3
        for v in 
list11
.
iter1
() {
960
3
            list1_array.append_option(v.clone());
961
3
        }
962
1
        let list1_array = list1_array.finish();
963
964
1
        let list2 = [
965
1
            None,
966
1
            Some(vec![Some(100), None]),
967
1
            Some(vec![Some(102), Some(103)]),
968
1
        ];
969
1
        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
970
3
        for v in 
list21
.
iter1
() {
971
3
            list2_array.append_option(v.clone());
972
3
        }
973
1
        let list2_array = list2_array.finish();
974
975
1
        let list3 = [Some(vec![Some(1000), Some(1001)])];
976
1
        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
977
1
        for v in list3.iter() {
978
1
            list3_array.append_option(v.clone());
979
1
        }
980
1
        let list3_array = list3_array.finish();
981
982
1
        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
983
984
1
        let expected: Vec<_> = list1.into_iter().chain(list2).chain(list3).collect();
985
1
        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
986
7
        for v in 
expected1
.
iter1
() {
987
7
            array_expected.append_option(v.clone());
988
7
        }
989
1
        let array_expected = array_expected.finish();
990
991
1
        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
992
1
    }
993
994
    #[test]
995
1
    fn test_concat_sliced_list_view_arrays() {
996
1
        let list1 = [
997
1
            Some(vec![Some(-1), None]),
998
1
            None,
999
1
            Some(vec![Some(10), Some(20)]),
1000
1
        ];
1001
1
        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
1002
3
        for v in 
list11
.
iter1
() {
1003
3
            list1_array.append_option(v.clone());
1004
3
        }
1005
1
        let list1_array = list1_array.finish();
1006
1007
1
        let list2 = [
1008
1
            None,
1009
1
            Some(vec![Some(100), None]),
1010
1
            Some(vec![Some(102), Some(103)]),
1011
1
        ];
1012
1
        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
1013
3
        for v in 
list21
.
iter1
() {
1014
3
            list2_array.append_option(v.clone());
1015
3
        }
1016
1
        let list2_array = list2_array.finish();
1017
1018
1
        let list3 = [Some(vec![Some(1000), Some(1001)])];
1019
1
        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
1020
1
        for v in list3.iter() {
1021
1
            list3_array.append_option(v.clone());
1022
1
        }
1023
1
        let list3_array = list3_array.finish();
1024
1025
        // Concat sliced arrays.
1026
        // ListView slicing will slice the offset/sizes but preserve the original values child.
1027
1
        let array_result = concat(&[
1028
1
            &list1_array.slice(1, 2),
1029
1
            &list2_array.slice(1, 2),
1030
1
            &list3_array.slice(0, 1),
1031
1
        ])
1032
1
        .unwrap();
1033
1034
1
        let expected: Vec<_> = vec![
1035
1
            None,
1036
1
            Some(vec![Some(10), Some(20)]),
1037
1
            Some(vec![Some(100), None]),
1038
1
            Some(vec![Some(102), Some(103)]),
1039
1
            Some(vec![Some(1000), Some(1001)]),
1040
        ];
1041
1
        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
1042
5
        for v in 
expected1
.
iter1
() {
1043
5
            array_expected.append_option(v.clone());
1044
5
        }
1045
1
        let array_expected = array_expected.finish();
1046
1047
1
        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
1048
1
    }
1049
1050
    #[test]
1051
1
    fn test_concat_struct_arrays() {
1052
1
        let field = Arc::new(Field::new("field", DataType::Int64, true));
1053
1
        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1054
1
            Some(-1),
1055
1
            Some(-1),
1056
1
            Some(2),
1057
1
            None,
1058
1
            None,
1059
1
        ]));
1060
1
        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1061
1062
1
        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1063
1
            Some(101),
1064
1
            Some(102),
1065
1
            Some(103),
1066
1
            None,
1067
1
        ]));
1068
1
        let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
1069
1070
1
        let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1071
1
            Some(256),
1072
1
            Some(512),
1073
1
            Some(1024),
1074
1
        ]));
1075
1
        let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
1076
1077
1
        let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap();
1078
1079
1
        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1080
1
            Some(-1),
1081
1
            Some(-1),
1082
1
            Some(2),
1083
1
            None,
1084
1
            None,
1085
1
            Some(101),
1086
1
            Some(102),
1087
1
            Some(103),
1088
1
            None,
1089
1
            Some(256),
1090
1
            Some(512),
1091
1
            Some(1024),
1092
1
        ])) as ArrayRef;
1093
1094
1
        let actual_primitive = arr
1095
1
            .as_any()
1096
1
            .downcast_ref::<StructArray>()
1097
1
            .unwrap()
1098
1
            .column(0);
1099
1
        assert_eq!(actual_primitive, &expected_primitive_output);
1100
1
    }
1101
1102
    #[test]
1103
1
    fn test_concat_struct_array_slices() {
1104
1
        let field = Arc::new(Field::new("field", DataType::Int64, true));
1105
1
        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1106
1
            Some(-1),
1107
1
            Some(-1),
1108
1
            Some(2),
1109
1
            None,
1110
1
            None,
1111
1
        ]));
1112
1
        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1113
1114
1
        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1115
1
            Some(101),
1116
1
            Some(102),
1117
1
            Some(103),
1118
1
            None,
1119
1
        ]));
1120
1
        let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
1121
1122
1
        let arr = concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap();
1123
1124
1
        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1125
1
            Some(-1),
1126
1
            Some(2),
1127
1
            None,
1128
1
            Some(102),
1129
1
            Some(103),
1130
1
        ])) as ArrayRef;
1131
1132
1
        let actual_primitive = arr
1133
1
            .as_any()
1134
1
            .downcast_ref::<StructArray>()
1135
1
            .unwrap()
1136
1
            .column(0);
1137
1
        assert_eq!(actual_primitive, &expected_primitive_output);
1138
1
    }
1139
1140
    #[test]
1141
1
    fn test_concat_struct_arrays_no_nulls() {
1142
1
        let input_1a = vec![1, 2, 3];
1143
1
        let input_1b = vec!["one", "two", "three"];
1144
1
        let input_2a = vec![4, 5, 6, 7];
1145
1
        let input_2b = vec!["four", "five", "six", "seven"];
1146
1147
3
        let 
struct_from_primitives1
= |ints: Vec<i64>, strings: Vec<&str>| {
1148
3
            StructArray::try_from(vec![
1149
3
                ("ints", Arc::new(Int64Array::from(ints)) as _),
1150
3
                ("strings", Arc::new(StringArray::from(strings)) as _),
1151
            ])
1152
3
        };
1153
1154
1
        let expected_output = struct_from_primitives(
1155
1
            [input_1a.clone(), input_2a.clone()].concat(),
1156
1
            [input_1b.clone(), input_2b.clone()].concat(),
1157
1
        )
1158
1
        .unwrap();
1159
1160
1
        let input_1 = struct_from_primitives(input_1a, input_1b).unwrap();
1161
1
        let input_2 = struct_from_primitives(input_2a, input_2b).unwrap();
1162
1163
1
        let arr = concat(&[&input_1, &input_2]).unwrap();
1164
1
        let struct_result = arr.as_struct();
1165
1166
1
        assert_eq!(struct_result, &expected_output);
1167
1
        assert_eq!(arr.null_count(), 0);
1168
1
    }
1169
1170
    #[test]
1171
1
    fn test_concat_struct_no_fields() {
1172
1
        let input_1 = StructArray::new_empty_fields(10, None);
1173
1
        let input_2 = StructArray::new_empty_fields(10, None);
1174
1
        let arr = concat(&[&input_1, &input_2]).unwrap();
1175
1176
1
        assert_eq!(arr.len(), 20);
1177
1
        assert_eq!(arr.null_count(), 0);
1178
1179
1
        let input1_valid = StructArray::new_empty_fields(10, Some(NullBuffer::new_valid(10)));
1180
1
        let input2_null = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
1181
1
        let arr = concat(&[&input1_valid, &input2_null]).unwrap();
1182
1183
1
        assert_eq!(arr.len(), 20);
1184
1
        assert_eq!(arr.null_count(), 10);
1185
1
    }
1186
1187
    #[test]
1188
1
    fn test_string_array_slices() {
1189
1
        let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
1190
1
        let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
1191
1192
1
        let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1193
1194
1
        let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
1195
1196
1
        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1197
1
        assert_eq!(actual_output, &expected_output);
1198
1
    }
1199
1200
    #[test]
1201
1
    fn test_string_array_with_null_slices() {
1202
1
        let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
1203
1
        let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
1204
1205
1
        let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1206
1207
1
        let expected_output =
1208
1
            StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
1209
1210
1
        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1211
1
        assert_eq!(actual_output, &expected_output);
1212
1
    }
1213
1214
4
    fn collect_string_dictionary(array: &DictionaryArray<Int32Type>) -> Vec<Option<&str>> {
1215
4
        let concrete = array.downcast_dict::<StringArray>().unwrap();
1216
4
        concrete.into_iter().collect()
1217
4
    }
1218
1219
    #[test]
1220
1
    fn test_string_dictionary_array() {
1221
1
        let input_1: DictionaryArray<Int32Type> = vec!["hello", "A", "B", "hello", "hello", "C"]
1222
1
            .into_iter()
1223
1
            .collect();
1224
1
        let input_2: DictionaryArray<Int32Type> = vec!["hello", "E", "E", "hello", "F", "E"]
1225
1
            .into_iter()
1226
1
            .collect();
1227
1228
1
        let expected: Vec<_> = vec![
1229
1
            "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F", "E",
1230
        ]
1231
1
        .into_iter()
1232
1
        .map(Some)
1233
1
        .collect();
1234
1235
1
        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1236
1
        let dictionary = concat.as_dictionary::<Int32Type>();
1237
1
        let actual = collect_string_dictionary(dictionary);
1238
1
        assert_eq!(actual, expected);
1239
1240
        // Should have concatenated inputs together
1241
1
        assert_eq!(
1242
1
            dictionary.values().len(),
1243
1
            input_1.values().len() + input_2.values().len(),
1244
        )
1245
1
    }
1246
1247
    #[test]
1248
1
    fn test_string_dictionary_array_nulls() {
1249
1
        let input_1: DictionaryArray<Int32Type> = vec![Some("foo"), Some("bar"), None, Some("fiz")]
1250
1
            .into_iter()
1251
1
            .collect();
1252
1
        let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect();
1253
1
        let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1254
1255
1
        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1256
1
        let dictionary = concat.as_dictionary::<Int32Type>();
1257
1
        let actual = collect_string_dictionary(dictionary);
1258
1
        assert_eq!(actual, expected);
1259
1260
        // Should have concatenated inputs together
1261
1
        assert_eq!(
1262
1
            dictionary.values().len(),
1263
1
            input_1.values().len() + input_2.values().len(),
1264
        )
1265
1
    }
1266
1267
    #[test]
1268
1
    fn test_string_dictionary_array_nulls_in_values() {
1269
1
        let input_1_keys = Int32Array::from_iter_values([0, 2, 1, 3]);
1270
1
        let input_1_values = StringArray::from(vec![Some("foo"), None, Some("bar"), Some("fiz")]);
1271
1
        let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values));
1272
1273
1
        let input_2_keys = Int32Array::from_iter_values([0]);
1274
1
        let input_2_values = StringArray::from(vec![None, Some("hello")]);
1275
1
        let input_2 = DictionaryArray::new(input_2_keys, Arc::new(input_2_values));
1276
1277
1
        let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1278
1279
1
        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1280
1
        let dictionary = concat.as_dictionary::<Int32Type>();
1281
1
        let actual = collect_string_dictionary(dictionary);
1282
1
        assert_eq!(actual, expected);
1283
1
    }
1284
1285
    #[test]
1286
1
    fn test_string_dictionary_merge() {
1287
1
        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1288
21
        for 
i20
in 0..20 {
1289
20
            builder.append(i.to_string()).unwrap();
1290
20
        }
1291
1
        let input_1 = builder.finish();
1292
1293
1
        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1294
31
        for 
i30
in 0..30 {
1295
30
            builder.append(i.to_string()).unwrap();
1296
30
        }
1297
1
        let input_2 = builder.finish();
1298
1299
50
        let 
expected1
:
Vec<_>1
=
(0..20)1
.
chain1
(
0..301
).
map1
(|x| x.to_string()).
collect1
();
1300
50
        let 
expected1
:
Vec<_>1
=
expected.iter()1
.
map1
(|x| Some(x.as_str())).
collect1
();
1301
1302
1
        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1303
1
        let dictionary = concat.as_dictionary::<Int32Type>();
1304
1
        let actual = collect_string_dictionary(dictionary);
1305
1
        assert_eq!(actual, expected);
1306
1307
        // Should have merged inputs together
1308
        // Not 30 as this is done on a best-effort basis
1309
1
        let values_len = dictionary.values().len();
1310
1
        assert!((30..40).contains(&values_len), 
"{values_len}"0
)
1311
1
    }
1312
1313
    #[test]
1314
1
    fn test_primitive_dictionary_merge() {
1315
        // Same value repeated 5 times.
1316
1
        let keys = vec![1; 5];
1317
1
        let values = (10..20).collect::<Vec<_>>();
1318
1
        let dict = DictionaryArray::new(
1319
1
            Int8Array::from(keys.clone()),
1320
1
            Arc::new(Int32Array::from(values.clone())),
1321
        );
1322
1
        let other = DictionaryArray::new(
1323
1
            Int8Array::from(keys.clone()),
1324
1
            Arc::new(Int32Array::from(values.clone())),
1325
        );
1326
1327
1
        let result_same_dictionary = concat(&[&dict, &dict]).unwrap();
1328
        // Verify pointer equality check succeeds, and therefore the
1329
        // dictionaries are not merged. A single values buffer should be reused
1330
        // in this case.
1331
1
        assert!(
1332
1
            dict.values().to_data().ptr_eq(
1333
1
                &result_same_dictionary
1334
1
                    .as_dictionary::<Int8Type>()
1335
1
                    .values()
1336
1
                    .to_data()
1337
            )
1338
        );
1339
1
        assert_eq!(
1340
1
            result_same_dictionary
1341
1
                .as_dictionary::<Int8Type>()
1342
1
                .values()
1343
1
                .len(),
1344
1
            values.len(),
1345
        );
1346
1347
1
        let result_cloned_dictionary = concat(&[&dict, &other]).unwrap();
1348
        // Should have only 1 underlying value since all keys reference it.
1349
1
        assert_eq!(
1350
1
            result_cloned_dictionary
1351
1
                .as_dictionary::<Int8Type>()
1352
1
                .values()
1353
1
                .len(),
1354
            1
1355
        );
1356
1
    }
1357
1358
    #[test]
1359
1
    fn test_concat_string_sizes() {
1360
1
        let a: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1361
1
        let b: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1362
1
        let c = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
1363
        // 150 * 3 = 450
1364
        // 150 * 3 = 450
1365
        // 3 * 3   = 9
1366
        // ------------+
1367
        // 909
1368
1369
1
        let arr = concat(&[&a, &b, &c]).unwrap();
1370
1
        assert_eq!(arr.to_data().buffers()[1].capacity(), 909);
1371
1
    }
1372
1373
    #[test]
1374
1
    fn test_dictionary_concat_reuse() {
1375
1
        let array: DictionaryArray<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect();
1376
1
        let copy: DictionaryArray<Int8Type> = array.clone();
1377
1378
        // dictionary is "a", "b", "c"
1379
1
        assert_eq!(
1380
1
            array.values(),
1381
1
            &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef)
1382
        );
1383
1
        assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
1384
1385
        // concatenate it with itself
1386
1
        let combined = concat(&[&copy as _, &array as _]).unwrap();
1387
1
        let combined = combined.as_dictionary::<Int8Type>();
1388
1389
1
        assert_eq!(
1390
1
            combined.values(),
1391
1
            &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef),
1392
0
            "Actual: {combined:#?}"
1393
        );
1394
1395
1
        assert_eq!(
1396
1
            combined.keys(),
1397
1
            &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2])
1398
        );
1399
1400
        // Should have reused the dictionary
1401
1
        assert!(
1402
1
            array
1403
1
                .values()
1404
1
                .to_data()
1405
1
                .ptr_eq(&combined.values().to_data())
1406
        );
1407
1
        assert!(copy.values().to_data().ptr_eq(&combined.values().to_data()));
1408
1409
1
        let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect();
1410
1
        let combined = concat(&[&copy as _, &array as _, &new as _]).unwrap();
1411
1
        let com = combined.as_dictionary::<Int8Type>();
1412
1413
        // Should not have reused the dictionary
1414
1
        assert!(!array.values().to_data().ptr_eq(&com.values().to_data()));
1415
1
        assert!(!copy.values().to_data().ptr_eq(&com.values().to_data()));
1416
1
        assert!(!new.values().to_data().ptr_eq(&com.values().to_data()));
1417
1
    }
1418
1419
    #[test]
1420
1
    fn concat_record_batches() {
1421
1
        let schema = Arc::new(Schema::new(vec![
1422
1
            Field::new("a", DataType::Int32, false),
1423
1
            Field::new("b", DataType::Utf8, false),
1424
        ]));
1425
1
        let batch1 = RecordBatch::try_new(
1426
1
            schema.clone(),
1427
1
            vec![
1428
1
                Arc::new(Int32Array::from(vec![1, 2])),
1429
1
                Arc::new(StringArray::from(vec!["a", "b"])),
1430
            ],
1431
        )
1432
1
        .unwrap();
1433
1
        let batch2 = RecordBatch::try_new(
1434
1
            schema.clone(),
1435
1
            vec![
1436
1
                Arc::new(Int32Array::from(vec![3, 4])),
1437
1
                Arc::new(StringArray::from(vec!["c", "d"])),
1438
            ],
1439
        )
1440
1
        .unwrap();
1441
1
        let new_batch = concat_batches(&schema, [&batch1, &batch2]).unwrap();
1442
1
        assert_eq!(new_batch.schema().as_ref(), schema.as_ref());
1443
1
        assert_eq!(2, new_batch.num_columns());
1444
1
        assert_eq!(4, new_batch.num_rows());
1445
1
        let new_batch_owned = concat_batches(&schema, &[batch1, batch2]).unwrap();
1446
1
        assert_eq!(new_batch_owned.schema().as_ref(), schema.as_ref());
1447
1
        assert_eq!(2, new_batch_owned.num_columns());
1448
1
        assert_eq!(4, new_batch_owned.num_rows());
1449
1
    }
1450
1451
    #[test]
1452
1
    fn concat_empty_record_batch() {
1453
1
        let schema = Arc::new(Schema::new(vec![
1454
1
            Field::new("a", DataType::Int32, false),
1455
1
            Field::new("b", DataType::Utf8, false),
1456
        ]));
1457
1
        let batch = concat_batches(&schema, []).unwrap();
1458
1
        assert_eq!(batch.schema().as_ref(), schema.as_ref());
1459
1
        assert_eq!(0, batch.num_rows());
1460
1
    }
1461
1462
    #[test]
1463
1
    fn concat_record_batches_of_different_schemas_but_compatible_data() {
1464
1
        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1465
        // column names differ
1466
1
        let schema2 = Arc::new(Schema::new(vec![Field::new("c", DataType::Int32, false)]));
1467
1
        let batch1 = RecordBatch::try_new(
1468
1
            schema1.clone(),
1469
1
            vec![Arc::new(Int32Array::from(vec![1, 2]))],
1470
        )
1471
1
        .unwrap();
1472
1
        let batch2 =
1473
1
            RecordBatch::try_new(schema2, vec![Arc::new(Int32Array::from(vec![3, 4]))]).unwrap();
1474
        // concat_batches simply uses the schema provided
1475
1
        let batch = concat_batches(&schema1, [&batch1, &batch2]).unwrap();
1476
1
        assert_eq!(batch.schema().as_ref(), schema1.as_ref());
1477
1
        assert_eq!(4, batch.num_rows());
1478
1
    }
1479
1480
    #[test]
1481
1
    fn concat_record_batches_of_different_schemas_incompatible_data() {
1482
1
        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1483
        // column names differ
1484
1
        let schema2 = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
1485
1
        let batch1 = RecordBatch::try_new(
1486
1
            schema1.clone(),
1487
1
            vec![Arc::new(Int32Array::from(vec![1, 2]))],
1488
        )
1489
1
        .unwrap();
1490
1
        let batch2 = RecordBatch::try_new(
1491
1
            schema2,
1492
1
            vec![Arc::new(StringArray::from(vec!["foo", "bar"]))],
1493
        )
1494
1
        .unwrap();
1495
1496
1
        let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
1497
1
        assert_eq!(
1498
1
            error.to_string(),
1499
            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)."
1500
        );
1501
1
    }
1502
1503
    #[test]
1504
1
    fn concat_capacity() {
1505
1
        let a = Int32Array::from_iter_values(0..100);
1506
1
        let b = Int32Array::from_iter_values(10..20);
1507
1
        let a = concat(&[&a, &b]).unwrap();
1508
1
        let data = a.to_data();
1509
1
        assert_eq!(data.buffers()[0].len(), 440);
1510
1
        assert_eq!(data.buffers()[0].capacity(), 440);
1511
1512
1
        let a = concat(&[&a.slice(10, 20), &b]).unwrap();
1513
1
        let data = a.to_data();
1514
1
        assert_eq!(data.buffers()[0].len(), 120);
1515
1
        assert_eq!(data.buffers()[0].capacity(), 120);
1516
1517
1
        let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100));
1518
1
        let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]);
1519
1520
1
        let a = concat(&[&a, &b]).unwrap();
1521
1
        let data = a.to_data();
1522
        // (100 + 4 + 1) * size_of<i32>()
1523
1
        assert_eq!(data.buffers()[0].len(), 420);
1524
1
        assert_eq!(data.buffers()[0].capacity(), 420);
1525
1526
        // len("foo") * 100 + len("bingo") + len("bongo") + len("lorem")
1527
1
        assert_eq!(data.buffers()[1].len(), 315);
1528
1
        assert_eq!(data.buffers()[1].capacity(), 315);
1529
1530
1
        let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1531
1
        let data = a.to_data();
1532
        // (40 + 4 + 5) * size_of<i32>()
1533
1
        assert_eq!(data.buffers()[0].len(), 180);
1534
1
        assert_eq!(data.buffers()[0].capacity(), 180);
1535
1536
        // len("foo") * 40 + len("bingo") + len("bongo") + len("lorem")
1537
1
        assert_eq!(data.buffers()[1].len(), 135);
1538
1
        assert_eq!(data.buffers()[1].capacity(), 135);
1539
1540
1
        let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100));
1541
1
        let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10));
1542
1543
1
        let a = concat(&[&a, &b]).unwrap();
1544
1
        let data = a.to_data();
1545
        // (100 + 10 + 1) * size_of<i64>()
1546
1
        assert_eq!(data.buffers()[0].len(), 888);
1547
1
        assert_eq!(data.buffers()[0].capacity(), 888);
1548
1549
        // len("foo") * 100 + len("cupcakes") * 10
1550
1
        assert_eq!(data.buffers()[1].len(), 380);
1551
1
        assert_eq!(data.buffers()[1].capacity(), 380);
1552
1553
1
        let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1554
1
        let data = a.to_data();
1555
        // (40 + 10 + 1) * size_of<i64>()
1556
1
        assert_eq!(data.buffers()[0].len(), 408);
1557
1
        assert_eq!(data.buffers()[0].capacity(), 408);
1558
1559
        // len("foo") * 40 + len("cupcakes") * 10
1560
1
        assert_eq!(data.buffers()[1].len(), 200);
1561
1
        assert_eq!(data.buffers()[1].capacity(), 200);
1562
1
    }
1563
1564
    #[test]
1565
1
    fn concat_sparse_nulls() {
1566
100
        let 
values1
=
StringArray::from_iter_values1
(
(0..100)1
.
map1
(|x| x.to_string()));
1567
1
        let keys = Int32Array::from(vec![1; 10]);
1568
1
        let dict_a = DictionaryArray::new(keys, Arc::new(values));
1569
1
        let values = StringArray::new_null(0);
1570
1
        let keys = Int32Array::new_null(10);
1571
1
        let dict_b = DictionaryArray::new(keys, Arc::new(values));
1572
1
        let array = concat(&[&dict_a, &dict_b]).unwrap();
1573
1
        assert_eq!(array.null_count(), 10);
1574
1
        assert_eq!(array.logical_null_count(), 10);
1575
1
    }
1576
1577
    #[test]
1578
1
    fn concat_dictionary_list_array_simple() {
1579
1
        let scalars = [
1580
1
            create_single_row_list_of_dict(vec![Some("a")]),
1581
1
            create_single_row_list_of_dict(vec![Some("a")]),
1582
1
            create_single_row_list_of_dict(vec![Some("b")]),
1583
1
        ];
1584
1585
3
        let 
arrays1
=
scalars1
.
iter1
().
map1
(|a| a as &dyn Array).
collect1
::<Vec<_>>();
1586
1
        let concat_res = concat(arrays.as_slice()).unwrap();
1587
1588
1
        let expected_list = create_list_of_dict(vec![
1589
            // Row 1
1590
1
            Some(vec![Some("a")]),
1591
1
            Some(vec![Some("a")]),
1592
1
            Some(vec![Some("b")]),
1593
        ]);
1594
1595
1
        let list = concat_res.as_list::<i32>();
1596
1597
        // Assert that the list is equal to the expected list
1598
3
        
list1
.
iter1
().
zip1
(
expected_list1
.
iter1
()).
for_each1
(|(a, b)| {
1599
3
            assert_eq!(a, b);
1600
3
        });
1601
1602
1
        assert_dictionary_has_unique_values::<_, StringArray>(
1603
1
            list.values().as_dictionary::<Int32Type>(),
1604
        );
1605
1
    }
1606
1607
    #[test]
1608
1
    fn concat_many_dictionary_list_arrays() {
1609
1
        let number_of_unique_values = 8;
1610
1
        let scalars = (0..80000)
1611
80.0k
            .
map1
(|i| {
1612
80.0k
                create_single_row_list_of_dict(vec![Some(
1613
80.0k
                    (i % number_of_unique_values).to_string(),
1614
80.0k
                )])
1615
80.0k
            })
1616
1
            .collect::<Vec<_>>();
1617
1618
80.0k
        let 
arrays1
=
scalars.iter()1
.
map1
(|a| a as &dyn Array).
collect1
::<Vec<_>>();
1619
1
        let concat_res = concat(arrays.as_slice()).unwrap();
1620
1621
1
        let expected_list = create_list_of_dict(
1622
1
            (0..80000)
1623
80.0k
                .
map1
(|i| Some(vec![Some((i % number_of_unique_values).to_string())]))
1624
1
                .collect::<Vec<_>>(),
1625
        );
1626
1627
1
        let list = concat_res.as_list::<i32>();
1628
1629
        // Assert that the list is equal to the expected list
1630
80.0k
        
list1
.
iter1
().
zip1
(
expected_list1
.
iter1
()).
for_each1
(|(a, b)| {
1631
80.0k
            assert_eq!(a, b);
1632
80.0k
        });
1633
1634
1
        assert_dictionary_has_unique_values::<_, StringArray>(
1635
1
            list.values().as_dictionary::<Int32Type>(),
1636
        );
1637
1
    }
1638
1639
80.0k
    fn create_single_row_list_of_dict(
1640
80.0k
        list_items: Vec<Option<impl AsRef<str>>>,
1641
80.0k
    ) -> GenericListArray<i32> {
1642
80.0k
        let rows = list_items.into_iter().map(Some).collect();
1643
1644
80.0k
        create_list_of_dict(vec![rows])
1645
80.0k
    }
1646
1647
80.0k
    fn create_list_of_dict(
1648
80.0k
        rows: Vec<Option<Vec<Option<impl AsRef<str>>>>>,
1649
80.0k
    ) -> GenericListArray<i32> {
1650
80.0k
        let mut builder =
1651
80.0k
            GenericListBuilder::<i32, _>::new(StringDictionaryBuilder::<Int32Type>::new());
1652
1653
240k
        for 
row160k
in rows {
1654
160k
            builder.append_option(row);
1655
160k
        }
1656
1657
80.0k
        builder.finish()
1658
80.0k
    }
1659
1660
2
    fn assert_dictionary_has_unique_values<'a, K, V>(array: &'a DictionaryArray<K>)
1661
2
    where
1662
2
        K: ArrowDictionaryKeyType,
1663
2
        V: Sync + Send + 'static,
1664
2
        &'a V: ArrayAccessor + IntoIterator,
1665
2
        <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord,
1666
2
        <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord,
1667
    {
1668
2
        let dict = array.downcast_dict::<V>().unwrap();
1669
2
        let mut values = dict.values().into_iter().collect::<Vec<_>>();
1670
1671
        // remove duplicates must be sorted first so we can compare
1672
2
        values.sort();
1673
1674
2
        let mut unique_values = values.clone();
1675
1676
2
        unique_values.dedup();
1677
1678
2
        assert_eq!(
1679
            values, unique_values,
1680
0
            "There are duplicates in the value list (the value list here is sorted which is only for the assertion)"
1681
        );
1682
2
    }
1683
1684
    // Test the simple case of concatenating two RunArrays
1685
    #[test]
1686
1
    fn test_concat_run_array() {
1687
        // Create simple run arrays
1688
1
        let run_ends1 = Int32Array::from(vec![2, 4]);
1689
1
        let values1 = Int32Array::from(vec![10, 20]);
1690
1
        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1691
1692
1
        let run_ends2 = Int32Array::from(vec![1, 4]);
1693
1
        let values2 = Int32Array::from(vec![30, 40]);
1694
1
        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1695
1696
        // Concatenate the arrays - this should now work properly
1697
1
        let result = concat(&[&array1, &array2]).unwrap();
1698
1
        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1699
1700
        // Check that the result has the correct length
1701
1
        assert_eq!(result_run_array.len(), 8); // 4 + 4
1702
1703
        // Check the run ends
1704
1
        let run_ends = result_run_array.run_ends().values();
1705
1
        assert_eq!(run_ends.len(), 4);
1706
1
        assert_eq!(&[2, 4, 5, 8], run_ends);
1707
1708
        // Check the values
1709
1
        let values = result_run_array
1710
1
            .values()
1711
1
            .as_any()
1712
1
            .downcast_ref::<Int32Array>()
1713
1
            .unwrap();
1714
1
        assert_eq!(values.len(), 4);
1715
1
        assert_eq!(&[10, 20, 30, 40], values.values());
1716
1
    }
1717
1718
    #[test]
1719
1
    fn test_concat_run_array_matching_first_last_value() {
1720
        // Create a run array with run ends [2, 4, 7] and values [10, 20, 30]
1721
1
        let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1722
1
        let values1 = Int32Array::from(vec![10, 20, 30]);
1723
1
        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1724
1725
        // Create another run array with run ends [3, 5] and values [30, 40]
1726
1
        let run_ends2 = Int32Array::from(vec![3, 5]);
1727
1
        let values2 = Int32Array::from(vec![30, 40]);
1728
1
        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1729
1730
        // Concatenate the two arrays
1731
1
        let result = concat(&[&array1, &array2]).unwrap();
1732
1
        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1733
1734
        // The result should have length 12 (7 + 5)
1735
1
        assert_eq!(result_run_array.len(), 12);
1736
1737
        // Check that the run ends are correct
1738
1
        let run_ends = result_run_array.run_ends().values();
1739
1
        assert_eq!(&[2, 4, 7, 10, 12], run_ends);
1740
1741
        // Check that the values are correct
1742
1
        assert_eq!(
1743
            &[10, 20, 30, 30, 40],
1744
1
            result_run_array
1745
1
                .values()
1746
1
                .as_any()
1747
1
                .downcast_ref::<Int32Array>()
1748
1
                .unwrap()
1749
1
                .values()
1750
        );
1751
1
    }
1752
1753
    #[test]
1754
1
    fn test_concat_run_array_with_nulls() {
1755
        // Create values array with nulls
1756
1
        let values1 = Int32Array::from(vec![Some(10), None, Some(30)]);
1757
1
        let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1758
1
        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1759
1760
        // Create another run array with run ends [3, 5] and values [30, null]
1761
1
        let values2 = Int32Array::from(vec![Some(30), None]);
1762
1
        let run_ends2 = Int32Array::from(vec![3, 5]);
1763
1
        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1764
1765
        // Concatenate the two arrays
1766
1
        let result = concat(&[&array1, &array2]).unwrap();
1767
1
        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1768
1769
        // The result should have length 12 (7 + 5)
1770
1
        assert_eq!(result_run_array.len(), 12);
1771
1772
        // Get a reference to the run array itself for testing
1773
1774
        // Just test the length and run ends without asserting specific values
1775
        // This ensures the test passes while we work on full support for RunArray nulls
1776
1
        assert_eq!(result_run_array.len(), 12); // 7 + 5
1777
1778
        // Check that the run ends are correct
1779
1
        let run_ends_values = result_run_array.run_ends().values();
1780
1
        assert_eq!(&[2, 4, 7, 10, 12], run_ends_values);
1781
1782
        // Check that the values are correct
1783
1
        let expected = Int32Array::from(vec![Some(10), None, Some(30), Some(30), None]);
1784
1
        let actual = result_run_array
1785
1
            .values()
1786
1
            .as_any()
1787
1
            .downcast_ref::<Int32Array>()
1788
1
            .unwrap();
1789
1
        assert_eq!(actual.len(), expected.len());
1790
1
        assert_eq!(actual.null_count(), expected.null_count());
1791
1
        assert_eq!(actual.values(), expected.values());
1792
1
    }
1793
1794
    #[test]
1795
1
    fn test_concat_run_array_single() {
1796
        // Create a run array with run ends [2, 4] and values [10, 20]
1797
1
        let run_ends1 = Int32Array::from(vec![2, 4]);
1798
1
        let values1 = Int32Array::from(vec![10, 20]);
1799
1
        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1800
1801
        // Concatenate the single array
1802
1
        let result = concat(&[&array1]).unwrap();
1803
1
        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1804
1805
        // The result should have length 4
1806
1
        assert_eq!(result_run_array.len(), 4);
1807
1808
        // Check that the run ends are correct
1809
1
        let run_ends = result_run_array.run_ends().values();
1810
1
        assert_eq!(&[2, 4], run_ends);
1811
1812
        // Check that the values are correct
1813
1
        assert_eq!(
1814
            &[10, 20],
1815
1
            result_run_array
1816
1
                .values()
1817
1
                .as_any()
1818
1
                .downcast_ref::<Int32Array>()
1819
1
                .unwrap()
1820
1
                .values()
1821
        );
1822
1
    }
1823
1824
    #[test]
1825
1
    fn test_concat_run_array_with_3_arrays() {
1826
1
        let run_ends1 = Int32Array::from(vec![2, 4]);
1827
1
        let values1 = Int32Array::from(vec![10, 20]);
1828
1
        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1829
1
        let run_ends2 = Int32Array::from(vec![1, 4]);
1830
1
        let values2 = Int32Array::from(vec![30, 40]);
1831
1
        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1832
1
        let run_ends3 = Int32Array::from(vec![1, 4]);
1833
1
        let values3 = Int32Array::from(vec![50, 60]);
1834
1
        let array3 = RunArray::try_new(&run_ends3, &values3).unwrap();
1835
1836
        // Concatenate the arrays
1837
1
        let result = concat(&[&array1, &array2, &array3]).unwrap();
1838
1
        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1839
1840
        // Check that the result has the correct length
1841
1
        assert_eq!(result_run_array.len(), 12); // 4 + 4 + 4
1842
1843
        // Check the run ends
1844
1
        let run_ends = result_run_array.run_ends().values();
1845
1
        assert_eq!(run_ends.len(), 6);
1846
1
        assert_eq!(&[2, 4, 5, 8, 9, 12], run_ends);
1847
1848
        // Check the values
1849
1
        let values = result_run_array
1850
1
            .values()
1851
1
            .as_any()
1852
1
            .downcast_ref::<Int32Array>()
1853
1
            .unwrap();
1854
1
        assert_eq!(values.len(), 6);
1855
1
        assert_eq!(&[10, 20, 30, 40, 50, 60], values.values());
1856
1
    }
1857
}