Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/list_array.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::array::{get_offsets, make_array, print_long_array};
19
use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20
use crate::{
21
    Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22
    iterator::GenericListArrayIter, new_empty_array,
23
};
24
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25
use arrow_data::{ArrayData, ArrayDataBuilder};
26
use arrow_schema::{ArrowError, DataType, FieldRef};
27
use num_integer::Integer;
28
use std::any::Any;
29
use std::sync::Arc;
30
31
/// A type that can be used within a variable-size array to encode offset information
32
///
33
/// See [`ListArray`], [`LargeListArray`], [`BinaryArray`], [`LargeBinaryArray`],
34
/// [`StringArray`] and [`LargeStringArray`]
35
///
36
/// [`BinaryArray`]: crate::array::BinaryArray
37
/// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
38
/// [`StringArray`]: crate::array::StringArray
39
/// [`LargeStringArray`]: crate::array::LargeStringArray
40
pub trait OffsetSizeTrait:
41
    ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42
{
43
    /// True for 64 bit offset size and false for 32 bit offset size
44
    const IS_LARGE: bool;
45
    /// Prefix for the offset size
46
    const PREFIX: &'static str;
47
    /// The max `usize` offset
48
    const MAX_OFFSET: usize;
49
}
50
51
impl OffsetSizeTrait for i32 {
52
    const IS_LARGE: bool = false;
53
    const PREFIX: &'static str = "";
54
    const MAX_OFFSET: usize = i32::MAX as usize;
55
}
56
57
impl OffsetSizeTrait for i64 {
58
    const IS_LARGE: bool = true;
59
    const PREFIX: &'static str = "Large";
60
    const MAX_OFFSET: usize = i64::MAX as usize;
61
}
62
63
/// An array of [variable length lists], similar to JSON arrays
64
/// (e.g. `["A", "B", "C"]`). This struct specifically represents
65
/// the [list layout]. Refer to [`GenericListViewArray`] for the
66
/// [list-view layout].
67
///
68
/// Lists are represented using `offsets` into a `values` child
69
/// array. Offsets are stored in two adjacent entries of an
70
/// [`OffsetBuffer`].
71
///
72
/// Arrow defines [`ListArray`] with `i32` offsets and
73
/// [`LargeListArray`] with `i64` offsets.
74
///
75
/// Use [`GenericListBuilder`] to construct a [`GenericListArray`].
76
///
77
/// # Representation
78
///
79
/// A [`ListArray`] can represent a list of values of any other
80
/// supported Arrow type. Each element of the `ListArray` itself is
81
/// a list which may be empty, may contain NULL and non-null values,
82
/// or may itself be NULL.
83
///
84
/// For example, the `ListArray` shown in the following diagram stores
85
/// lists of strings. Note that `[]` represents an empty (length
86
/// 0), but non NULL list.
87
///
88
/// ```text
89
/// ┌─────────────┐
90
/// │   [A,B,C]   │
91
/// ├─────────────┤
92
/// │     []      │
93
/// ├─────────────┤
94
/// │    NULL     │
95
/// ├─────────────┤
96
/// │     [D]     │
97
/// ├─────────────┤
98
/// │  [NULL, F]  │
99
/// └─────────────┘
100
/// ```
101
///
102
/// The `values` are stored in a child [`StringArray`] and the offsets
103
/// are stored in an [`OffsetBuffer`] as shown in the following
104
/// diagram. The logical values and offsets are shown on the left, and
105
/// the actual `ListArray` encoding on the right.
106
///
107
/// ```text
108
///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
109
///                                                                 ┌ ─ ─ ─ ─ ─ ─ ┐    │
110
///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐       ┌───┐ ┌───┐
111
///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
112
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
113
///  │ [] (empty)  │  │ (3,3) │                   │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
114
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
115
///  │    NULL     │  │ (3,3) │                   │ 0 │   │ 3 │     │ │ 1 │ │ C │ │ 2  │
116
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
117
///  │     [D]     │  │ (3,4) │                   │ 1 │   │ 3 │     │ │ 1 │ │ D │ │ 3  │
118
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
119
///  │  [NULL, F]  │  │ (4,6) │                   │ 1 │   │ 4 │     │ │ 0 │ │ ? │ │ 4  │
120
///  └─────────────┘  └───────┘             │     └───┘   ├───┤       ├───┤ ├───┤
121
///                                                       │ 6 │     │ │ 1 │ │ F │ │ 5  │
122
///                                         │  Validity   └───┘       └───┘ └───┘
123
///     Logical       Logical                  (nulls)   Offsets    │    Values   │    │
124
///      Values       Offsets               │                           (Array)
125
///                                                                 └ ─ ─ ─ ─ ─ ─ ┘    │
126
///                 (offsets[i],            │   ListArray
127
///                offsets[i+1])                                                       │
128
///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
129
/// ```
130
///
131
/// # Slicing
132
///
133
/// Slicing a `ListArray` creates a new `ListArray` without copying any data,
134
/// but this means the [`Self::values`] and [`Self::offsets`] may have "unused" data
135
///
136
/// For example, calling `slice(1, 3)` on the `ListArray` in the above example
137
/// would result in the following. Note
138
///
139
/// 1. `Values` array is unchanged
140
/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array.
141
///
142
/// ```text
143
///                                 ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
144
///                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │  ╔═══╗
145
///                                 │                         ╔═══╗ ╔═══╗         ║   ║  Not used
146
///                                                         │ ║ 1 ║ ║ A ║ │ 0  │  ╚═══╝
147
///  ┌─────────────┐  ┌───────┐     │     ┌───┐   ┌───┐       ╠═══╣ ╠═══╣
148
///  │ [] (empty)  │  │ (3,3) │           │ 1 │   │ 3 │     │ ║ 1 ║ ║ B ║ │ 1  │
149
///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠═══╣ ╠═══╣
150
///  │    NULL     │  │ (3,3) │           │ 0 │   │ 3 │     │ ║ 1 ║ ║ C ║ │ 2  │
151
///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╚═══╝ ╚═══╝
152
///  │     [D]     │  │ (3,4) │           │ 1 │   │ 3 │     │ │ 1 │ │ D │ │ 3  │
153
///  └─────────────┘  └───────┘     │     └───┘   ├───┤       ╔═══╗ ╔═══╗
154
///                                               │ 4 │     │ ║ 0 ║ ║ ? ║ │ 4  │
155
///                                 │             └───┘       ╠═══╣ ╠═══╣
156
///                                                         │ ║ 1 ║ ║ F ║ │ 5  │
157
///                                 │  Validity               ╚═══╝ ╚═══╝
158
///     Logical       Logical          (nulls)   Offsets    │    Values   │    │
159
///      Values       Offsets       │                           (Array)
160
///                                                         └ ─ ─ ─ ─ ─ ─ ┘    │
161
///                 (offsets[i],    │   ListArray
162
///                offsets[i+1])                                               │
163
///                                 └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
164
/// ```
165
///
166
/// [`StringArray`]: crate::array::StringArray
167
/// [`GenericListViewArray`]: crate::array::GenericListViewArray
168
/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
169
/// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout
170
/// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
171
pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172
    data_type: DataType,
173
    nulls: Option<NullBuffer>,
174
    values: ArrayRef,
175
    value_offsets: OffsetBuffer<OffsetSize>,
176
}
177
178
impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179
29
    fn clone(&self) -> Self {
180
29
        Self {
181
29
            data_type: self.data_type.clone(),
182
29
            nulls: self.nulls.clone(),
183
29
            values: self.values.clone(),
184
29
            value_offsets: self.value_offsets.clone(),
185
29
        }
186
29
    }
187
}
188
189
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190
    /// The data type constructor of list array.
191
    /// The input is the schema of the child array and
192
    /// the output is the [`DataType`], List or LargeList.
193
    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194
        DataType::LargeList
195
    } else {
196
        DataType::List
197
    };
198
199
    /// Create a new [`GenericListArray`] from the provided parts
200
    ///
201
    /// # Errors
202
    ///
203
    /// Errors if
204
    ///
205
    /// * `offsets.len() - 1 != nulls.len()`
206
    /// * `offsets.last() > values.len()`
207
    /// * `!field.is_nullable() && values.is_nullable()`
208
    /// * `field.data_type() != values.data_type()`
209
80.0k
    pub fn try_new(
210
80.0k
        field: FieldRef,
211
80.0k
        offsets: OffsetBuffer<OffsetSize>,
212
80.0k
        values: ArrayRef,
213
80.0k
        nulls: Option<NullBuffer>,
214
80.0k
    ) -> Result<Self, ArrowError> {
215
80.0k
        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
216
80.0k
        let end_offset = offsets.last().unwrap().as_usize();
217
        // don't need to check other values of `offsets` because they are checked
218
        // during construction of `OffsetBuffer`
219
80.0k
        if end_offset > values.len() {
220
0
            return Err(ArrowError::InvalidArgumentError(format!(
221
0
                "Max offset of {end_offset} exceeds length of values {}",
222
0
                values.len()
223
0
            )));
224
80.0k
        }
225
226
80.0k
        if let Some(
n15
) = nulls.as_ref() {
227
15
            if n.len() != len {
228
0
                return Err(ArrowError::InvalidArgumentError(format!(
229
0
                    "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230
0
                    OffsetSize::PREFIX,
231
0
                    n.len(),
232
0
                )));
233
15
            }
234
80.0k
        }
235
80.0k
        if !field.is_nullable() && 
values0
.
is_nullable0
() {
236
0
            return Err(ArrowError::InvalidArgumentError(format!(
237
0
                "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238
0
                OffsetSize::PREFIX,
239
0
                field.name()
240
0
            )));
241
80.0k
        }
242
243
80.0k
        if field.data_type() != values.data_type() {
244
0
            return Err(ArrowError::InvalidArgumentError(format!(
245
0
                "{}ListArray expected data type {} got {} for {:?}",
246
0
                OffsetSize::PREFIX,
247
0
                field.data_type(),
248
0
                values.data_type(),
249
0
                field.name()
250
0
            )));
251
80.0k
        }
252
253
80.0k
        Ok(Self {
254
80.0k
            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255
80.0k
            nulls,
256
80.0k
            values,
257
80.0k
            value_offsets: offsets,
258
80.0k
        })
259
80.0k
    }
260
261
    /// Create a new [`GenericListArray`] from the provided parts
262
    ///
263
    /// # Panics
264
    ///
265
    /// Panics if [`Self::try_new`] returns an error
266
80.0k
    pub fn new(
267
80.0k
        field: FieldRef,
268
80.0k
        offsets: OffsetBuffer<OffsetSize>,
269
80.0k
        values: ArrayRef,
270
80.0k
        nulls: Option<NullBuffer>,
271
80.0k
    ) -> Self {
272
80.0k
        Self::try_new(field, offsets, values, nulls).unwrap()
273
80.0k
    }
274
275
    /// Create a new [`GenericListArray`] of length `len` where all values are null
276
    pub fn new_null(field: FieldRef, len: usize) -> Self {
277
        let values = new_empty_array(field.data_type());
278
        Self {
279
            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280
            nulls: Some(NullBuffer::new_null(len)),
281
            value_offsets: OffsetBuffer::new_zeroed(len),
282
            values,
283
        }
284
    }
285
286
    /// Deconstruct this array into its constituent parts
287
    pub fn into_parts(
288
        self,
289
    ) -> (
290
        FieldRef,
291
        OffsetBuffer<OffsetSize>,
292
        ArrayRef,
293
        Option<NullBuffer>,
294
    ) {
295
        let f = match self.data_type {
296
            DataType::List(f) | DataType::LargeList(f) => f,
297
            _ => unreachable!(),
298
        };
299
        (f, self.value_offsets, self.values, self.nulls)
300
    }
301
302
    /// Returns a reference to the offsets of this list
303
    ///
304
    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
305
    /// allowing for zero-copy cloning.
306
    ///
307
    /// Notes: The `offsets` may not start at 0 and may not cover all values in
308
    /// [`Self::values`]. This can happen when the list array was sliced via
309
    /// [`Self::slice`]. See documentation for [`Self`] for more details.
310
    #[inline]
311
240k
    pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312
240k
        &self.value_offsets
313
240k
    }
314
315
    /// Returns a reference to the values of this list
316
    ///
317
    /// Note: The list array may not refer to all values in the `values` array.
318
    /// For example if the list array was sliced via [`Self::slice`] values will
319
    /// still contain values both before and after the slice. See documentation
320
    /// for [`Self`] for more details.
321
    #[inline]
322
160k
    pub fn values(&self) -> &ArrayRef {
323
160k
        &self.values
324
160k
    }
325
326
    /// Returns a clone of the value type of this list.
327
    pub fn value_type(&self) -> DataType {
328
        self.values.data_type().clone()
329
    }
330
331
    /// Returns ith value of this list array.
332
    ///
333
    /// Note: This method does not check for nulls and the value is arbitrary
334
    /// if [`is_null`](Self::is_null) returns true for the index.
335
    ///
336
    /// # Safety
337
    /// Caller must ensure that the index is within the array bounds
338
    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339
        let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340
        let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341
        self.values.slice(start, end - start)
342
    }
343
344
    /// Returns ith value of this list array.
345
    ///
346
    /// Note: This method does not check for nulls and the value is arbitrary
347
    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
348
    ///
349
    /// # Panics
350
    /// Panics if index `i` is out of bounds
351
160k
    pub fn value(&self, i: usize) -> ArrayRef {
352
160k
        let end = self.value_offsets()[i + 1].as_usize();
353
160k
        let start = self.value_offsets()[i].as_usize();
354
160k
        self.values.slice(start, end - start)
355
160k
    }
356
357
    /// Returns the offset values in the offsets buffer.
358
    ///
359
    /// See [`Self::offsets`] for more details.
360
    #[inline]
361
320k
    pub fn value_offsets(&self) -> &[OffsetSize] {
362
320k
        &self.value_offsets
363
320k
    }
364
365
    /// Returns the length for value at index `i`.
366
    #[inline]
367
    pub fn value_length(&self, i: usize) -> OffsetSize {
368
        let offsets = self.value_offsets();
369
        offsets[i + 1] - offsets[i]
370
    }
371
372
    /// constructs a new iterator
373
4
    pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374
4
        GenericListArrayIter::<'a, OffsetSize>::new(self)
375
4
    }
376
377
    #[inline]
378
26
    fn get_type(data_type: &DataType) -> Option<&DataType> {
379
26
        match (OffsetSize::IS_LARGE, data_type) {
380
14
            (true, DataType::LargeList(
child12
)) | (false, DataType::List(child)) => {
381
26
                Some(child.data_type())
382
            }
383
0
            _ => None,
384
        }
385
26
    }
386
387
    /// Returns a zero-copy slice of this array with the indicated offset and length.
388
    ///
389
    /// Notes: this method does *NOT* slice the underlying values array or modify
390
    /// the values in the offsets buffer. See [`Self::values`] and
391
    /// [`Self::offsets`] for more information.
392
2
    pub fn slice(&self, offset: usize, length: usize) -> Self {
393
        Self {
394
2
            data_type: self.data_type.clone(),
395
2
            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396
2
            values: self.values.clone(),
397
2
            value_offsets: self.value_offsets.slice(offset, length),
398
        }
399
2
    }
400
401
    /// Creates a [`GenericListArray`] from an iterator of primitive values
402
    /// # Example
403
    /// ```
404
    /// # use arrow_array::ListArray;
405
    /// # use arrow_array::types::Int32Type;
406
    ///
407
    /// let data = vec![
408
    ///    Some(vec![Some(0), Some(1), Some(2)]),
409
    ///    None,
410
    ///    Some(vec![Some(3), None, Some(5)]),
411
    ///    Some(vec![Some(6), Some(7)]),
412
    /// ];
413
    /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
414
    /// println!("{:?}", list_array);
415
    /// ```
416
15
    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417
15
    where
418
15
        T: ArrowPrimitiveType,
419
15
        P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420
15
        I: IntoIterator<Item = Option<P>>,
421
    {
422
15
        let iter = iter.into_iter();
423
15
        let size_hint = iter.size_hint().0;
424
15
        let mut builder =
425
15
            GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
426
427
71
        for 
i56
in iter {
428
56
            match i {
429
44
                Some(p) => {
430
148
                    for 
t104
in p {
431
104
                        builder.values().append_option(t);
432
104
                    }
433
44
                    builder.append(true);
434
                }
435
12
                None => builder.append(false),
436
            }
437
        }
438
15
        builder.finish()
439
15
    }
440
}
441
442
impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
443
26
    fn from(data: ArrayData) -> Self {
444
26
        Self::try_new_from_array_data(data)
445
26
            .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
446
26
    }
447
}
448
449
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
450
30
    fn from(array: GenericListArray<OffsetSize>) -> Self {
451
30
        let len = array.len();
452
30
        let builder = ArrayDataBuilder::new(array.data_type)
453
30
            .len(len)
454
30
            .nulls(array.nulls)
455
30
            .buffers(vec![array.value_offsets.into_inner().into_inner()])
456
30
            .child_data(vec![array.values.to_data()]);
457
458
30
        unsafe { builder.build_unchecked() }
459
30
    }
460
}
461
462
impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
463
    fn from(value: FixedSizeListArray) -> Self {
464
        let (field, size) = match value.data_type() {
465
            DataType::FixedSizeList(f, size) => (f, *size as usize),
466
            _ => unreachable!(),
467
        };
468
469
        let offsets = OffsetBuffer::from_repeated_length(size, value.len());
470
471
        Self {
472
            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
473
            nulls: value.nulls().cloned(),
474
            values: value.values().clone(),
475
            value_offsets: offsets,
476
        }
477
    }
478
}
479
480
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
481
26
    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
482
26
        if data.buffers().len() != 1 {
483
0
            return Err(ArrowError::InvalidArgumentError(format!(
484
0
                "ListArray data should contain a single buffer only (value offsets), had {}",
485
0
                data.buffers().len()
486
0
            )));
487
26
        }
488
489
26
        if data.child_data().len() != 1 {
490
0
            return Err(ArrowError::InvalidArgumentError(format!(
491
0
                "ListArray should contain a single child array (values array), had {}",
492
0
                data.child_data().len()
493
0
            )));
494
26
        }
495
496
26
        let values = data.child_data()[0].clone();
497
498
26
        if let Some(child_data_type) = Self::get_type(data.data_type()) {
499
26
            if values.data_type() != child_data_type {
500
0
                return Err(ArrowError::InvalidArgumentError(format!(
501
0
                    "[Large]ListArray's child datatype {:?} does not \
502
0
                             correspond to the List's datatype {:?}",
503
0
                    values.data_type(),
504
0
                    child_data_type
505
0
                )));
506
26
            }
507
        } else {
508
0
            return Err(ArrowError::InvalidArgumentError(format!(
509
0
                "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
510
0
                data.data_type()
511
0
            )));
512
        }
513
514
26
        let values = make_array(values);
515
        // SAFETY:
516
        // ArrayData is valid, and verified type above
517
26
        let value_offsets = unsafe { get_offsets(&data) };
518
519
26
        Ok(Self {
520
26
            data_type: data.data_type().clone(),
521
26
            nulls: data.nulls().cloned(),
522
26
            values,
523
26
            value_offsets,
524
26
        })
525
26
    }
526
}
527
528
impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
529
80.0k
    fn as_any(&self) -> &dyn Any {
530
80.0k
        self
531
80.0k
    }
532
533
29
    fn to_data(&self) -> ArrayData {
534
29
        self.clone().into()
535
29
    }
536
537
1
    fn into_data(self) -> ArrayData {
538
1
        self.into()
539
1
    }
540
541
80.0k
    fn data_type(&self) -> &DataType {
542
80.0k
        &self.data_type
543
80.0k
    }
544
545
0
    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
546
0
        Arc::new(self.slice(offset, length))
547
0
    }
548
549
80.0k
    fn len(&self) -> usize {
550
80.0k
        self.value_offsets.len() - 1
551
80.0k
    }
552
553
0
    fn is_empty(&self) -> bool {
554
0
        self.value_offsets.len() <= 1
555
0
    }
556
557
0
    fn shrink_to_fit(&mut self) {
558
0
        if let Some(nulls) = &mut self.nulls {
559
0
            nulls.shrink_to_fit();
560
0
        }
561
0
        self.values.shrink_to_fit();
562
0
        self.value_offsets.shrink_to_fit();
563
0
    }
564
565
0
    fn offset(&self) -> usize {
566
0
        0
567
0
    }
568
569
80.0k
    fn nulls(&self) -> Option<&NullBuffer> {
570
80.0k
        self.nulls.as_ref()
571
80.0k
    }
572
573
0
    fn logical_null_count(&self) -> usize {
574
        // More efficient that the default implementation
575
0
        self.null_count()
576
0
    }
577
578
0
    fn get_buffer_memory_size(&self) -> usize {
579
0
        let mut size = self.values.get_buffer_memory_size();
580
0
        size += self.value_offsets.inner().inner().capacity();
581
0
        if let Some(n) = self.nulls.as_ref() {
582
0
            size += n.buffer().capacity();
583
0
        }
584
0
        size
585
0
    }
586
587
0
    fn get_array_memory_size(&self) -> usize {
588
0
        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
589
0
        size += self.value_offsets.inner().inner().capacity();
590
0
        if let Some(n) = self.nulls.as_ref() {
591
0
            size += n.buffer().capacity();
592
0
        }
593
0
        size
594
0
    }
595
}
596
597
impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
598
    type Item = ArrayRef;
599
600
    fn value(&self, index: usize) -> Self::Item {
601
        GenericListArray::value(self, index)
602
    }
603
604
160k
    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
605
160k
        GenericListArray::value(self, index)
606
160k
    }
607
}
608
609
impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
610
0
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
611
0
        let prefix = OffsetSize::PREFIX;
612
613
0
        write!(f, "{prefix}ListArray\n[\n")?;
614
0
        print_long_array(self, f, |array, index, f| {
615
0
            std::fmt::Debug::fmt(&array.value(index), f)
616
0
        })?;
617
0
        write!(f, "]")
618
0
    }
619
}
620
621
/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
622
///
623
/// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`]
624
pub type ListArray = GenericListArray<i32>;
625
626
/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
627
///
628
/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`]
629
pub type LargeListArray = GenericListArray<i64>;
630
631
#[cfg(test)]
632
mod tests {
633
    use super::*;
634
    use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
635
    use crate::cast::AsArray;
636
    use crate::types::Int32Type;
637
    use crate::{Int32Array, Int64Array};
638
    use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
639
    use arrow_schema::Field;
640
641
    fn create_from_buffers() -> ListArray {
642
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
643
        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
644
        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
645
        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
646
        ListArray::new(field, offsets, Arc::new(values), None)
647
    }
648
649
    #[test]
650
    fn test_from_iter_primitive() {
651
        let data = vec![
652
            Some(vec![Some(0), Some(1), Some(2)]),
653
            Some(vec![Some(3), Some(4), Some(5)]),
654
            Some(vec![Some(6), Some(7)]),
655
        ];
656
        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
657
658
        let another = create_from_buffers();
659
        assert_eq!(list_array, another)
660
    }
661
662
    #[test]
663
    fn test_empty_list_array() {
664
        // Construct an empty value array
665
        let value_data = ArrayData::builder(DataType::Int32)
666
            .len(0)
667
            .add_buffer(Buffer::from([]))
668
            .build()
669
            .unwrap();
670
671
        // Construct an empty offset buffer
672
        let value_offsets = Buffer::from([]);
673
674
        // Construct a list array from the above two
675
        let list_data_type =
676
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
677
        let list_data = ArrayData::builder(list_data_type)
678
            .len(0)
679
            .add_buffer(value_offsets)
680
            .add_child_data(value_data)
681
            .build()
682
            .unwrap();
683
684
        let list_array = ListArray::from(list_data);
685
        assert_eq!(list_array.len(), 0)
686
    }
687
688
    #[test]
689
    fn test_list_array() {
690
        // Construct a value array
691
        let value_data = ArrayData::builder(DataType::Int32)
692
            .len(8)
693
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
694
            .build()
695
            .unwrap();
696
697
        // Construct a buffer for value offsets, for the nested array:
698
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
699
        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
700
701
        // Construct a list array from the above two
702
        let list_data_type =
703
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
704
        let list_data = ArrayData::builder(list_data_type.clone())
705
            .len(3)
706
            .add_buffer(value_offsets.clone())
707
            .add_child_data(value_data.clone())
708
            .build()
709
            .unwrap();
710
        let list_array = ListArray::from(list_data);
711
712
        let values = list_array.values();
713
        assert_eq!(value_data, values.to_data());
714
        assert_eq!(DataType::Int32, list_array.value_type());
715
        assert_eq!(3, list_array.len());
716
        assert_eq!(0, list_array.null_count());
717
        assert_eq!(6, list_array.value_offsets()[2]);
718
        assert_eq!(2, list_array.value_length(2));
719
        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
720
        assert_eq!(
721
            0,
722
            unsafe { list_array.value_unchecked(0) }
723
                .as_primitive::<Int32Type>()
724
                .value(0)
725
        );
726
        for i in 0..3 {
727
            assert!(list_array.is_valid(i));
728
            assert!(!list_array.is_null(i));
729
        }
730
731
        // Now test with a non-zero offset (skip first element)
732
        //  [[3, 4, 5], [6, 7]]
733
        let list_data = ArrayData::builder(list_data_type)
734
            .len(2)
735
            .offset(1)
736
            .add_buffer(value_offsets)
737
            .add_child_data(value_data.clone())
738
            .build()
739
            .unwrap();
740
        let list_array = ListArray::from(list_data);
741
742
        let values = list_array.values();
743
        assert_eq!(value_data, values.to_data());
744
        assert_eq!(DataType::Int32, list_array.value_type());
745
        assert_eq!(2, list_array.len());
746
        assert_eq!(0, list_array.null_count());
747
        assert_eq!(6, list_array.value_offsets()[1]);
748
        assert_eq!(2, list_array.value_length(1));
749
        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
750
        assert_eq!(
751
            3,
752
            unsafe { list_array.value_unchecked(0) }
753
                .as_primitive::<Int32Type>()
754
                .value(0)
755
        );
756
    }
757
758
    #[test]
759
    fn test_large_list_array() {
760
        // Construct a value array
761
        let value_data = ArrayData::builder(DataType::Int32)
762
            .len(8)
763
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
764
            .build()
765
            .unwrap();
766
767
        // Construct a buffer for value offsets, for the nested array:
768
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
769
        let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
770
771
        // Construct a list array from the above two
772
        let list_data_type = DataType::new_large_list(DataType::Int32, false);
773
        let list_data = ArrayData::builder(list_data_type.clone())
774
            .len(3)
775
            .add_buffer(value_offsets.clone())
776
            .add_child_data(value_data.clone())
777
            .build()
778
            .unwrap();
779
        let list_array = LargeListArray::from(list_data);
780
781
        let values = list_array.values();
782
        assert_eq!(value_data, values.to_data());
783
        assert_eq!(DataType::Int32, list_array.value_type());
784
        assert_eq!(3, list_array.len());
785
        assert_eq!(0, list_array.null_count());
786
        assert_eq!(6, list_array.value_offsets()[2]);
787
        assert_eq!(2, list_array.value_length(2));
788
        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
789
        assert_eq!(
790
            0,
791
            unsafe { list_array.value_unchecked(0) }
792
                .as_primitive::<Int32Type>()
793
                .value(0)
794
        );
795
        for i in 0..3 {
796
            assert!(list_array.is_valid(i));
797
            assert!(!list_array.is_null(i));
798
        }
799
800
        // Now test with a non-zero offset
801
        //  [[3, 4, 5], [6, 7]]
802
        let list_data = ArrayData::builder(list_data_type)
803
            .len(2)
804
            .offset(1)
805
            .add_buffer(value_offsets)
806
            .add_child_data(value_data.clone())
807
            .build()
808
            .unwrap();
809
        let list_array = LargeListArray::from(list_data);
810
811
        let values = list_array.values();
812
        assert_eq!(value_data, values.to_data());
813
        assert_eq!(DataType::Int32, list_array.value_type());
814
        assert_eq!(2, list_array.len());
815
        assert_eq!(0, list_array.null_count());
816
        assert_eq!(6, list_array.value_offsets()[1]);
817
        assert_eq!(2, list_array.value_length(1));
818
        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
819
        assert_eq!(
820
            3,
821
            unsafe { list_array.value_unchecked(0) }
822
                .as_primitive::<Int32Type>()
823
                .value(0)
824
        );
825
    }
826
827
    #[test]
828
    fn test_list_array_slice() {
829
        // Construct a value array
830
        let value_data = ArrayData::builder(DataType::Int32)
831
            .len(10)
832
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
833
            .build()
834
            .unwrap();
835
836
        // Construct a buffer for value offsets, for the nested array:
837
        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
838
        let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
839
        // 01011001 00000001
840
        let mut null_bits: [u8; 2] = [0; 2];
841
        bit_util::set_bit(&mut null_bits, 0);
842
        bit_util::set_bit(&mut null_bits, 3);
843
        bit_util::set_bit(&mut null_bits, 4);
844
        bit_util::set_bit(&mut null_bits, 6);
845
        bit_util::set_bit(&mut null_bits, 8);
846
847
        // Construct a list array from the above two
848
        let list_data_type =
849
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
850
        let list_data = ArrayData::builder(list_data_type)
851
            .len(9)
852
            .add_buffer(value_offsets)
853
            .add_child_data(value_data.clone())
854
            .null_bit_buffer(Some(Buffer::from(null_bits)))
855
            .build()
856
            .unwrap();
857
        let list_array = ListArray::from(list_data);
858
859
        let values = list_array.values();
860
        assert_eq!(value_data, values.to_data());
861
        assert_eq!(DataType::Int32, list_array.value_type());
862
        assert_eq!(9, list_array.len());
863
        assert_eq!(4, list_array.null_count());
864
        assert_eq!(2, list_array.value_offsets()[3]);
865
        assert_eq!(2, list_array.value_length(3));
866
867
        let sliced_array = list_array.slice(1, 6);
868
        assert_eq!(6, sliced_array.len());
869
        assert_eq!(3, sliced_array.null_count());
870
871
        for i in 0..sliced_array.len() {
872
            if bit_util::get_bit(&null_bits, 1 + i) {
873
                assert!(sliced_array.is_valid(i));
874
            } else {
875
                assert!(sliced_array.is_null(i));
876
            }
877
        }
878
879
        // Check offset and length for each non-null value.
880
        let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
881
        assert_eq!(2, sliced_list_array.value_offsets()[2]);
882
        assert_eq!(2, sliced_list_array.value_length(2));
883
        assert_eq!(4, sliced_list_array.value_offsets()[3]);
884
        assert_eq!(2, sliced_list_array.value_length(3));
885
        assert_eq!(6, sliced_list_array.value_offsets()[5]);
886
        assert_eq!(3, sliced_list_array.value_length(5));
887
    }
888
889
    #[test]
890
    fn test_large_list_array_slice() {
891
        // Construct a value array
892
        let value_data = ArrayData::builder(DataType::Int32)
893
            .len(10)
894
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
895
            .build()
896
            .unwrap();
897
898
        // Construct a buffer for value offsets, for the nested array:
899
        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
900
        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
901
        // 01011001 00000001
902
        let mut null_bits: [u8; 2] = [0; 2];
903
        bit_util::set_bit(&mut null_bits, 0);
904
        bit_util::set_bit(&mut null_bits, 3);
905
        bit_util::set_bit(&mut null_bits, 4);
906
        bit_util::set_bit(&mut null_bits, 6);
907
        bit_util::set_bit(&mut null_bits, 8);
908
909
        // Construct a list array from the above two
910
        let list_data_type = DataType::new_large_list(DataType::Int32, false);
911
        let list_data = ArrayData::builder(list_data_type)
912
            .len(9)
913
            .add_buffer(value_offsets)
914
            .add_child_data(value_data.clone())
915
            .null_bit_buffer(Some(Buffer::from(null_bits)))
916
            .build()
917
            .unwrap();
918
        let list_array = LargeListArray::from(list_data);
919
920
        let values = list_array.values();
921
        assert_eq!(value_data, values.to_data());
922
        assert_eq!(DataType::Int32, list_array.value_type());
923
        assert_eq!(9, list_array.len());
924
        assert_eq!(4, list_array.null_count());
925
        assert_eq!(2, list_array.value_offsets()[3]);
926
        assert_eq!(2, list_array.value_length(3));
927
928
        let sliced_array = list_array.slice(1, 6);
929
        assert_eq!(6, sliced_array.len());
930
        assert_eq!(3, sliced_array.null_count());
931
932
        for i in 0..sliced_array.len() {
933
            if bit_util::get_bit(&null_bits, 1 + i) {
934
                assert!(sliced_array.is_valid(i));
935
            } else {
936
                assert!(sliced_array.is_null(i));
937
            }
938
        }
939
940
        // Check offset and length for each non-null value.
941
        let sliced_list_array = sliced_array
942
            .as_any()
943
            .downcast_ref::<LargeListArray>()
944
            .unwrap();
945
        assert_eq!(2, sliced_list_array.value_offsets()[2]);
946
        assert_eq!(2, sliced_list_array.value_length(2));
947
        assert_eq!(4, sliced_list_array.value_offsets()[3]);
948
        assert_eq!(2, sliced_list_array.value_length(3));
949
        assert_eq!(6, sliced_list_array.value_offsets()[5]);
950
        assert_eq!(3, sliced_list_array.value_length(5));
951
    }
952
953
    #[test]
954
    #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
955
    fn test_list_array_index_out_of_bound() {
956
        // Construct a value array
957
        let value_data = ArrayData::builder(DataType::Int32)
958
            .len(10)
959
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
960
            .build()
961
            .unwrap();
962
963
        // Construct a buffer for value offsets, for the nested array:
964
        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
965
        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
966
        // 01011001 00000001
967
        let mut null_bits: [u8; 2] = [0; 2];
968
        bit_util::set_bit(&mut null_bits, 0);
969
        bit_util::set_bit(&mut null_bits, 3);
970
        bit_util::set_bit(&mut null_bits, 4);
971
        bit_util::set_bit(&mut null_bits, 6);
972
        bit_util::set_bit(&mut null_bits, 8);
973
974
        // Construct a list array from the above two
975
        let list_data_type = DataType::new_large_list(DataType::Int32, false);
976
        let list_data = ArrayData::builder(list_data_type)
977
            .len(9)
978
            .add_buffer(value_offsets)
979
            .add_child_data(value_data)
980
            .null_bit_buffer(Some(Buffer::from(null_bits)))
981
            .build()
982
            .unwrap();
983
        let list_array = LargeListArray::from(list_data);
984
        assert_eq!(9, list_array.len());
985
986
        list_array.value(10);
987
    }
988
    #[test]
989
    #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
990
    // Different error messages, so skip for now
991
    // https://github.com/apache/arrow-rs/issues/1545
992
    #[cfg(not(feature = "force_validate"))]
993
    fn test_list_array_invalid_buffer_len() {
994
        let value_data = unsafe {
995
            ArrayData::builder(DataType::Int32)
996
                .len(8)
997
                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
998
                .build_unchecked()
999
        };
1000
        let list_data_type =
1001
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1002
        let list_data = unsafe {
1003
            ArrayData::builder(list_data_type)
1004
                .len(3)
1005
                .add_child_data(value_data)
1006
                .build_unchecked()
1007
        };
1008
        drop(ListArray::from(list_data));
1009
    }
1010
1011
    #[test]
1012
    #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1013
    // Different error messages, so skip for now
1014
    // https://github.com/apache/arrow-rs/issues/1545
1015
    #[cfg(not(feature = "force_validate"))]
1016
    fn test_list_array_invalid_child_array_len() {
1017
        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1018
        let list_data_type =
1019
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1020
        let list_data = unsafe {
1021
            ArrayData::builder(list_data_type)
1022
                .len(3)
1023
                .add_buffer(value_offsets)
1024
                .build_unchecked()
1025
        };
1026
        drop(ListArray::from(list_data));
1027
    }
1028
1029
    #[test]
1030
    #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1031
    fn test_from_array_data_validation() {
1032
        let mut builder = ListBuilder::new(Int32Builder::new());
1033
        builder.values().append_value(1);
1034
        builder.append(true);
1035
        let array = builder.finish();
1036
        let _ = LargeListArray::from(array.into_data());
1037
    }
1038
1039
    #[test]
1040
    fn test_list_array_offsets_need_not_start_at_zero() {
1041
        let value_data = ArrayData::builder(DataType::Int32)
1042
            .len(8)
1043
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1044
            .build()
1045
            .unwrap();
1046
1047
        let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1048
1049
        let list_data_type =
1050
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1051
        let list_data = ArrayData::builder(list_data_type)
1052
            .len(3)
1053
            .add_buffer(value_offsets)
1054
            .add_child_data(value_data)
1055
            .build()
1056
            .unwrap();
1057
1058
        let list_array = ListArray::from(list_data);
1059
        assert_eq!(list_array.value_length(0), 0);
1060
        assert_eq!(list_array.value_length(1), 3);
1061
        assert_eq!(list_array.value_length(2), 2);
1062
    }
1063
1064
    #[test]
1065
    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1066
    // Different error messages, so skip for now
1067
    // https://github.com/apache/arrow-rs/issues/1545
1068
    #[cfg(not(feature = "force_validate"))]
1069
    fn test_primitive_array_alignment() {
1070
        let buf = Buffer::from_slice_ref([0_u64]);
1071
        let buf2 = buf.slice(1);
1072
        let array_data = unsafe {
1073
            ArrayData::builder(DataType::Int32)
1074
                .add_buffer(buf2)
1075
                .build_unchecked()
1076
        };
1077
        drop(Int32Array::from(array_data));
1078
    }
1079
1080
    #[test]
1081
    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1082
    // Different error messages, so skip for now
1083
    // https://github.com/apache/arrow-rs/issues/1545
1084
    #[cfg(not(feature = "force_validate"))]
1085
    fn test_list_array_alignment() {
1086
        let buf = Buffer::from_slice_ref([0_u64]);
1087
        let buf2 = buf.slice(1);
1088
1089
        let values: [i32; 8] = [0; 8];
1090
        let value_data = unsafe {
1091
            ArrayData::builder(DataType::Int32)
1092
                .add_buffer(Buffer::from_slice_ref(values))
1093
                .build_unchecked()
1094
        };
1095
1096
        let list_data_type =
1097
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1098
        let list_data = unsafe {
1099
            ArrayData::builder(list_data_type)
1100
                .add_buffer(buf2)
1101
                .add_child_data(value_data)
1102
                .build_unchecked()
1103
        };
1104
        drop(ListArray::from(list_data));
1105
    }
1106
1107
    #[test]
1108
    fn list_array_equality() {
1109
        // test scaffold
1110
        fn do_comparison(
1111
            lhs_data: Vec<Option<Vec<Option<i32>>>>,
1112
            rhs_data: Vec<Option<Vec<Option<i32>>>>,
1113
            should_equal: bool,
1114
        ) {
1115
            let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1116
            let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1117
            assert_eq!(lhs == rhs, should_equal);
1118
1119
            let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1120
            let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1121
            assert_eq!(lhs == rhs, should_equal);
1122
        }
1123
1124
        do_comparison(
1125
            vec![
1126
                Some(vec![Some(0), Some(1), Some(2)]),
1127
                None,
1128
                Some(vec![Some(3), None, Some(5)]),
1129
                Some(vec![Some(6), Some(7)]),
1130
            ],
1131
            vec![
1132
                Some(vec![Some(0), Some(1), Some(2)]),
1133
                None,
1134
                Some(vec![Some(3), None, Some(5)]),
1135
                Some(vec![Some(6), Some(7)]),
1136
            ],
1137
            true,
1138
        );
1139
1140
        do_comparison(
1141
            vec![
1142
                None,
1143
                None,
1144
                Some(vec![Some(3), None, Some(5)]),
1145
                Some(vec![Some(6), Some(7)]),
1146
            ],
1147
            vec![
1148
                Some(vec![Some(0), Some(1), Some(2)]),
1149
                None,
1150
                Some(vec![Some(3), None, Some(5)]),
1151
                Some(vec![Some(6), Some(7)]),
1152
            ],
1153
            false,
1154
        );
1155
1156
        do_comparison(
1157
            vec![
1158
                None,
1159
                None,
1160
                Some(vec![Some(3), None, Some(5)]),
1161
                Some(vec![Some(6), Some(7)]),
1162
            ],
1163
            vec![
1164
                None,
1165
                None,
1166
                Some(vec![Some(3), None, Some(5)]),
1167
                Some(vec![Some(0), Some(0)]),
1168
            ],
1169
            false,
1170
        );
1171
1172
        do_comparison(
1173
            vec![None, None, Some(vec![Some(1)])],
1174
            vec![None, None, Some(vec![Some(2)])],
1175
            false,
1176
        );
1177
    }
1178
1179
    #[test]
1180
    fn test_empty_offsets() {
1181
        let f = Arc::new(Field::new("element", DataType::Int32, true));
1182
        let string = ListArray::from(
1183
            ArrayData::builder(DataType::List(f.clone()))
1184
                .buffers(vec![Buffer::from(&[])])
1185
                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1186
                .build()
1187
                .unwrap(),
1188
        );
1189
        assert_eq!(string.value_offsets(), &[0]);
1190
        let string = LargeListArray::from(
1191
            ArrayData::builder(DataType::LargeList(f))
1192
                .buffers(vec![Buffer::from(&[])])
1193
                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1194
                .build()
1195
                .unwrap(),
1196
        );
1197
        assert_eq!(string.len(), 0);
1198
        assert_eq!(string.value_offsets(), &[0]);
1199
    }
1200
1201
    #[test]
1202
    fn test_try_new() {
1203
        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1204
        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1205
        let values = Arc::new(values) as ArrayRef;
1206
1207
        let field = Arc::new(Field::new("element", DataType::Int32, false));
1208
        ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1209
1210
        let nulls = NullBuffer::new_null(3);
1211
        ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1212
1213
        let nulls = NullBuffer::new_null(3);
1214
        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1215
        let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1216
            .unwrap_err();
1217
1218
        assert_eq!(
1219
            err.to_string(),
1220
            "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1221
        );
1222
1223
        let field = Arc::new(Field::new("element", DataType::Int64, false));
1224
        let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1225
            .unwrap_err();
1226
1227
        assert_eq!(
1228
            err.to_string(),
1229
            "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1230
        );
1231
1232
        let nulls = NullBuffer::new_null(7);
1233
        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1234
        let values = Arc::new(values);
1235
1236
        let err =
1237
            LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1238
1239
        assert_eq!(
1240
            err.to_string(),
1241
            "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1242
        );
1243
1244
        let field = Arc::new(Field::new("element", DataType::Int64, true));
1245
        LargeListArray::new(field.clone(), offsets.clone(), values, None);
1246
1247
        let values = Int64Array::new(vec![0; 2].into(), None);
1248
        let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1249
1250
        assert_eq!(
1251
            err.to_string(),
1252
            "Invalid argument error: Max offset of 5 exceeds length of values 2"
1253
        );
1254
    }
1255
1256
    #[test]
1257
    fn test_from_fixed_size_list() {
1258
        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1259
        builder.values().append_slice(&[1, 2, 3]);
1260
        builder.append(true);
1261
        builder.values().append_slice(&[0, 0, 0]);
1262
        builder.append(false);
1263
        builder.values().append_slice(&[4, 5, 6]);
1264
        builder.append(true);
1265
        let list: ListArray = builder.finish().into();
1266
1267
        let values: Vec<_> = list
1268
            .iter()
1269
            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1270
            .collect();
1271
        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1272
    }
1273
1274
    #[test]
1275
    fn test_nullable_union() {
1276
        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1277
        let mut builder = UnionBuilder::new_dense();
1278
        builder.append::<Int32Type>("a", 1).unwrap();
1279
        builder.append::<Int32Type>("b", 2).unwrap();
1280
        builder.append::<Int32Type>("b", 3).unwrap();
1281
        builder.append::<Int32Type>("a", 4).unwrap();
1282
        builder.append::<Int32Type>("a", 5).unwrap();
1283
        let values = builder.build().unwrap();
1284
        let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1285
        ListArray::new(field.clone(), offsets, Arc::new(values), None);
1286
    }
1287
}