Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/list_array.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::array::{get_offsets, make_array, print_long_array};
19
use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20
use crate::{
21
    iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22
    ArrowPrimitiveType, FixedSizeListArray,
23
};
24
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25
use arrow_data::{ArrayData, ArrayDataBuilder};
26
use arrow_schema::{ArrowError, DataType, FieldRef};
27
use num::Integer;
28
use std::any::Any;
29
use std::sync::Arc;
30
31
/// A type that can be used within a variable-size array to encode offset information
32
///
33
/// See [`ListArray`], [`LargeListArray`], [`BinaryArray`], [`LargeBinaryArray`],
34
/// [`StringArray`] and [`LargeStringArray`]
35
///
36
/// [`BinaryArray`]: crate::array::BinaryArray
37
/// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
38
/// [`StringArray`]: crate::array::StringArray
39
/// [`LargeStringArray`]: crate::array::LargeStringArray
40
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41
    /// True for 64 bit offset size and false for 32 bit offset size
42
    const IS_LARGE: bool;
43
    /// Prefix for the offset size
44
    const PREFIX: &'static str;
45
    /// The max `usize` offset
46
    const MAX_OFFSET: usize;
47
}
48
49
impl OffsetSizeTrait for i32 {
50
    const IS_LARGE: bool = false;
51
    const PREFIX: &'static str = "";
52
    const MAX_OFFSET: usize = i32::MAX as usize;
53
}
54
55
impl OffsetSizeTrait for i64 {
56
    const IS_LARGE: bool = true;
57
    const PREFIX: &'static str = "Large";
58
    const MAX_OFFSET: usize = i64::MAX as usize;
59
}
60
61
/// An array of [variable length lists], similar to JSON arrays
62
/// (e.g. `["A", "B", "C"]`). This struct specifically represents
63
/// the [list layout]. Refer to [`GenericListViewArray`] for the
64
/// [list-view layout].
65
///
66
/// Lists are represented using `offsets` into a `values` child
67
/// array. Offsets are stored in two adjacent entries of an
68
/// [`OffsetBuffer`].
69
///
70
/// Arrow defines [`ListArray`] with `i32` offsets and
71
/// [`LargeListArray`] with `i64` offsets.
72
///
73
/// Use [`GenericListBuilder`] to construct a [`GenericListArray`].
74
///
75
/// # Representation
76
///
77
/// A [`ListArray`] can represent a list of values of any other
78
/// supported Arrow type. Each element of the `ListArray` itself is
79
/// a list which may be empty, may contain NULL and non-null values,
80
/// or may itself be NULL.
81
///
82
/// For example, the `ListArray` shown in the following diagram stores
83
/// lists of strings. Note that `[]` represents an empty (length
84
/// 0), but non NULL list.
85
///
86
/// ```text
87
/// ┌─────────────┐
88
/// │   [A,B,C]   │
89
/// ├─────────────┤
90
/// │     []      │
91
/// ├─────────────┤
92
/// │    NULL     │
93
/// ├─────────────┤
94
/// │     [D]     │
95
/// ├─────────────┤
96
/// │  [NULL, F]  │
97
/// └─────────────┘
98
/// ```
99
///
100
/// The `values` are stored in a child [`StringArray`] and the offsets
101
/// are stored in an [`OffsetBuffer`] as shown in the following
102
/// diagram. The logical values and offsets are shown on the left, and
103
/// the actual `ListArray` encoding on the right.
104
///
105
/// ```text
106
///                                         ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
107
///                                                                 ┌ ─ ─ ─ ─ ─ ─ ┐    │
108
///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐       ┌───┐ ┌───┐
109
///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
110
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
111
///  │      []     │  │ (3,3) │                   │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
112
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
113
///  │    NULL     │  │ (3,4) │                   │ 0 │   │ 3 │     │ │ 1 │ │ C │ │ 2  │
114
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
115
///  │     [D]     │  │ (4,5) │                   │ 1 │   │ 4 │     │ │ ? │ │ ? │ │ 3  │
116
///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
117
///  │  [NULL, F]  │  │ (5,7) │                   │ 1 │   │ 5 │     │ │ 1 │ │ D │ │ 4  │
118
///  └─────────────┘  └───────┘             │     └───┘   ├───┤       ├───┤ ├───┤
119
///                                                       │ 7 │     │ │ 0 │ │ ? │ │ 5  │
120
///                                         │  Validity   └───┘       ├───┤ ├───┤
121
///     Logical       Logical                  (nulls)   Offsets    │ │ 1 │ │ F │ │ 6  │
122
///      Values       Offsets               │                         └───┘ └───┘
123
///                                                                 │    Values   │    │
124
///                 (offsets[i],            │   ListArray               (Array)
125
///                offsets[i+1])                                    └ ─ ─ ─ ─ ─ ─ ┘    │
126
///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
127
/// ```
128
///
129
/// # Slicing
130
///
131
/// Slicing a `ListArray` creates a new `ListArray` without copying any data,
132
/// but this means the [`Self::values`] and [`Self::offsets`] may have "unused" data
133
///
134
/// For example, calling `slice(1, 3)` on the `ListArray` in the above example
135
/// would result in the following. Note
136
///
137
/// 1. `Values` array is unchanged
138
/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array.
139
///
140
/// ```text
141
///                                 ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
142
///                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    │  ╔═══╗
143
///                                 │                         ╔═══╗ ╔═══╗         ║   ║  Not used
144
///                                                         │ ║ 1 ║ ║ A ║ │ 0  │  ╚═══╝
145
///  ┌─────────────┐  ┌───────┐     │     ┌───┐   ┌───┐       ╠═══╣ ╠═══╣
146
///  │ [] (empty)  │  │ (3,3) │           │ 1 │   │ 3 │     │ ║ 1 ║ ║ B ║ │ 1  │
147
///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠═══╣ ╠═══╣
148
///  │    NULL     │  │ (3,4) │           │ 0 │   │ 3 │     │ ║ 1 ║ ║ C ║ │ 2  │
149
///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠───╣ ╠───╣
150
///  │     [D]     │  │ (4,5) │           │ 1 │   │ 4 │     │ │ 0 │ │ ? │ │ 3  │
151
///  └─────────────┘  └───────┘     │     └───┘   ├───┤       ├───┤ ├───┤
152
///                                               │ 5 │     │ │ 1 │ │ D │ │ 4  │
153
///                                 │             └───┘       ├───┤ ├───┤
154
///                                                         │ │ 0 │ │ ? │ │ 5  │
155
///                                 │  Validity               ╠═══╣ ╠═══╣
156
///     Logical       Logical          (nulls)   Offsets    │ ║ 1 ║ ║ F ║ │ 6  │
157
///      Values       Offsets       │                         ╚═══╝ ╚═══╝
158
///                                                         │    Values   │    │
159
///                 (offsets[i],    │   ListArray               (Array)
160
///                offsets[i+1])                            └ ─ ─ ─ ─ ─ ─ ┘    │
161
///                                 └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
162
/// ```
163
///
164
/// [`StringArray`]: crate::array::StringArray
165
/// [`GenericListViewArray`]: crate::array::GenericListViewArray
166
/// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
167
/// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout
168
/// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout
169
pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
170
    data_type: DataType,
171
    nulls: Option<NullBuffer>,
172
    values: ArrayRef,
173
    value_offsets: OffsetBuffer<OffsetSize>,
174
}
175
176
impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
177
86
    fn clone(&self) -> Self {
178
86
        Self {
179
86
            data_type: self.data_type.clone(),
180
86
            nulls: self.nulls.clone(),
181
86
            values: self.values.clone(),
182
86
            value_offsets: self.value_offsets.clone(),
183
86
        }
184
86
    }
185
}
186
187
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
188
    /// The data type constructor of list array.
189
    /// The input is the schema of the child array and
190
    /// the output is the [`DataType`], List or LargeList.
191
    pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
192
        DataType::LargeList
193
    } else {
194
        DataType::List
195
    };
196
197
    /// Create a new [`GenericListArray`] from the provided parts
198
    ///
199
    /// # Errors
200
    ///
201
    /// Errors if
202
    ///
203
    /// * `offsets.len() - 1 != nulls.len()`
204
    /// * `offsets.last() > values.len()`
205
    /// * `!field.is_nullable() && values.is_nullable()`
206
    /// * `field.data_type() != values.data_type()`
207
92
    pub fn try_new(
208
92
        field: FieldRef,
209
92
        offsets: OffsetBuffer<OffsetSize>,
210
92
        values: ArrayRef,
211
92
        nulls: Option<NullBuffer>,
212
92
    ) -> Result<Self, ArrowError> {
213
92
        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
214
92
        let end_offset = offsets.last().unwrap().as_usize();
215
        // don't need to check other values of `offsets` because they are checked
216
        // during construction of `OffsetBuffer`
217
92
        if end_offset > values.len() {
218
0
            return Err(ArrowError::InvalidArgumentError(format!(
219
0
                "Max offset of {end_offset} exceeds length of values {}",
220
0
                values.len()
221
0
            )));
222
92
        }
223
224
92
        if let Some(
n37
) = nulls.as_ref() {
225
37
            if n.len() != len {
226
0
                return Err(ArrowError::InvalidArgumentError(format!(
227
0
                    "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
228
0
                    OffsetSize::PREFIX,
229
0
                    n.len(),
230
0
                )));
231
37
            }
232
55
        }
233
92
        if !field.is_nullable() && 
values9
.
is_nullable9
() {
234
0
            return Err(ArrowError::InvalidArgumentError(format!(
235
0
                "Non-nullable field of {}ListArray {:?} cannot contain nulls",
236
0
                OffsetSize::PREFIX,
237
0
                field.name()
238
0
            )));
239
92
        }
240
241
92
        if field.data_type() != values.data_type() {
242
0
            return Err(ArrowError::InvalidArgumentError(format!(
243
0
                "{}ListArray expected data type {} got {} for {:?}",
244
0
                OffsetSize::PREFIX,
245
0
                field.data_type(),
246
0
                values.data_type(),
247
0
                field.name()
248
0
            )));
249
92
        }
250
251
92
        Ok(Self {
252
92
            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
253
92
            nulls,
254
92
            values,
255
92
            value_offsets: offsets,
256
92
        })
257
92
    }
258
259
    /// Create a new [`GenericListArray`] from the provided parts
260
    ///
261
    /// # Panics
262
    ///
263
    /// Panics if [`Self::try_new`] returns an error
264
84
    pub fn new(
265
84
        field: FieldRef,
266
84
        offsets: OffsetBuffer<OffsetSize>,
267
84
        values: ArrayRef,
268
84
        nulls: Option<NullBuffer>,
269
84
    ) -> Self {
270
84
        Self::try_new(field, offsets, values, nulls).unwrap()
271
84
    }
272
273
    /// Create a new [`GenericListArray`] of length `len` where all values are null
274
    pub fn new_null(field: FieldRef, len: usize) -> Self {
275
        let values = new_empty_array(field.data_type());
276
        Self {
277
            data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
278
            nulls: Some(NullBuffer::new_null(len)),
279
            value_offsets: OffsetBuffer::new_zeroed(len),
280
            values,
281
        }
282
    }
283
284
    /// Deconstruct this array into its constituent parts
285
    pub fn into_parts(
286
        self,
287
    ) -> (
288
        FieldRef,
289
        OffsetBuffer<OffsetSize>,
290
        ArrayRef,
291
        Option<NullBuffer>,
292
    ) {
293
        let f = match self.data_type {
294
            DataType::List(f) | DataType::LargeList(f) => f,
295
            _ => unreachable!(),
296
        };
297
        (f, self.value_offsets, self.values, self.nulls)
298
    }
299
300
    /// Returns a reference to the offsets of this list
301
    ///
302
    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
303
    /// allowing for zero-copy cloning.
304
    ///
305
    /// Notes: The `offsets` may not start at 0 and may not cover all values in
306
    /// [`Self::values`]. This can happen when the list array was sliced via
307
    /// [`Self::slice`]. See documentation for [`Self`] for more details.
308
    #[inline]
309
69
    pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
310
69
        &self.value_offsets
311
69
    }
312
313
    /// Returns a reference to the values of this list
314
    ///
315
    /// Note: The list array may not refer to all values in the `values` array.
316
    /// For example if the list array was sliced via [`Self::slice`] values will
317
    /// still contain values both before and after the slice. See documentation
318
    /// for [`Self`] for more details.
319
    #[inline]
320
52
    pub fn values(&self) -> &ArrayRef {
321
52
        &self.values
322
52
    }
323
324
    /// Returns a clone of the value type of this list.
325
    pub fn value_type(&self) -> DataType {
326
        self.values.data_type().clone()
327
    }
328
329
    /// Returns ith value of this list array.
330
    ///
331
    /// Note: This method does not check for nulls and the value is arbitrary
332
    /// if [`is_null`](Self::is_null) returns true for the index.
333
    ///
334
    /// # Safety
335
    /// Caller must ensure that the index is within the array bounds
336
    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
337
        let end = self.value_offsets().get_unchecked(i + 1).as_usize();
338
        let start = self.value_offsets().get_unchecked(i).as_usize();
339
        self.values.slice(start, end - start)
340
    }
341
342
    /// Returns ith value of this list array.
343
    ///
344
    /// Note: This method does not check for nulls and the value is arbitrary
345
    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
346
    ///
347
    /// # Panics
348
    /// Panics if index `i` is out of bounds
349
2
    pub fn value(&self, i: usize) -> ArrayRef {
350
2
        let end = self.value_offsets()[i + 1].as_usize();
351
2
        let start = self.value_offsets()[i].as_usize();
352
2
        self.values.slice(start, end - start)
353
2
    }
354
355
    /// Returns the offset values in the offsets buffer.
356
    ///
357
    /// See [`Self::offsets`] for more details.
358
    #[inline]
359
10
    pub fn value_offsets(&self) -> &[OffsetSize] {
360
10
        &self.value_offsets
361
10
    }
362
363
    /// Returns the length for value at index `i`.
364
    #[inline]
365
4
    pub fn value_length(&self, i: usize) -> OffsetSize {
366
4
        let offsets = self.value_offsets();
367
4
        offsets[i + 1] - offsets[i]
368
4
    }
369
370
    /// constructs a new iterator
371
    pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
372
        GenericListArrayIter::<'a, OffsetSize>::new(self)
373
    }
374
375
    #[inline]
376
11
    fn get_type(data_type: &DataType) -> Option<&DataType> {
377
11
        match (OffsetSize::IS_LARGE, data_type) {
378
11
            (true, DataType::LargeList(
child0
)) | (false, DataType::List(child)) => {
379
11
                Some(child.data_type())
380
            }
381
0
            _ => None,
382
        }
383
11
    }
384
385
    /// Returns a zero-copy slice of this array with the indicated offset and length.
386
    ///
387
    /// Notes: this method does *NOT* slice the underlying values array or modify
388
    /// the values in the offsets buffer. See [`Self::values`] and
389
    /// [`Self::offsets`] for more information.
390
27
    pub fn slice(&self, offset: usize, length: usize) -> Self {
391
        Self {
392
27
            data_type: self.data_type.clone(),
393
27
            nulls: self.nulls.as_ref().map(|n| 
n8
.
slice8
(
offset8
,
length8
)),
394
27
            values: self.values.clone(),
395
27
            value_offsets: self.value_offsets.slice(offset, length),
396
        }
397
27
    }
398
399
    /// Creates a [`GenericListArray`] from an iterator of primitive values
400
    /// # Example
401
    /// ```
402
    /// # use arrow_array::ListArray;
403
    /// # use arrow_array::types::Int32Type;
404
    ///
405
    /// let data = vec![
406
    ///    Some(vec![Some(0), Some(1), Some(2)]),
407
    ///    None,
408
    ///    Some(vec![Some(3), None, Some(5)]),
409
    ///    Some(vec![Some(6), Some(7)]),
410
    /// ];
411
    /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
412
    /// println!("{:?}", list_array);
413
    /// ```
414
    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
415
    where
416
        T: ArrowPrimitiveType,
417
        P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
418
        I: IntoIterator<Item = Option<P>>,
419
    {
420
        let iter = iter.into_iter();
421
        let size_hint = iter.size_hint().0;
422
        let mut builder =
423
            GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
424
425
        for i in iter {
426
            match i {
427
                Some(p) => {
428
                    for t in p {
429
                        builder.values().append_option(t);
430
                    }
431
                    builder.append(true);
432
                }
433
                None => builder.append(false),
434
            }
435
        }
436
        builder.finish()
437
    }
438
}
439
440
impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
441
11
    fn from(data: ArrayData) -> Self {
442
11
        Self::try_new_from_array_data(data)
443
11
            .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
444
11
    }
445
}
446
447
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
448
87
    fn from(array: GenericListArray<OffsetSize>) -> Self {
449
87
        let len = array.len();
450
87
        let builder = ArrayDataBuilder::new(array.data_type)
451
87
            .len(len)
452
87
            .nulls(array.nulls)
453
87
            .buffers(vec![array.value_offsets.into_inner().into_inner()])
454
87
            .child_data(vec![array.values.to_data()]);
455
456
87
        unsafe { builder.build_unchecked() }
457
87
    }
458
}
459
460
impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
461
0
    fn from(value: FixedSizeListArray) -> Self {
462
0
        let (field, size) = match value.data_type() {
463
0
            DataType::FixedSizeList(f, size) => (f, *size as usize),
464
0
            _ => unreachable!(),
465
        };
466
467
0
        let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(size, value.len()));
468
469
0
        Self {
470
0
            data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
471
0
            nulls: value.nulls().cloned(),
472
0
            values: value.values().clone(),
473
0
            value_offsets: offsets,
474
0
        }
475
0
    }
476
}
477
478
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
479
11
    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
480
11
        if data.buffers().len() != 1 {
481
0
            return Err(ArrowError::InvalidArgumentError(format!(
482
0
                "ListArray data should contain a single buffer only (value offsets), had {}",
483
0
                data.buffers().len()
484
0
            )));
485
11
        }
486
487
11
        if data.child_data().len() != 1 {
488
0
            return Err(ArrowError::InvalidArgumentError(format!(
489
0
                "ListArray should contain a single child array (values array), had {}",
490
0
                data.child_data().len()
491
0
            )));
492
11
        }
493
494
11
        let values = data.child_data()[0].clone();
495
496
11
        if let Some(child_data_type) = Self::get_type(data.data_type()) {
497
11
            if values.data_type() != child_data_type {
498
0
                return Err(ArrowError::InvalidArgumentError(format!(
499
0
                    "[Large]ListArray's child datatype {:?} does not \
500
0
                             correspond to the List's datatype {:?}",
501
0
                    values.data_type(),
502
0
                    child_data_type
503
0
                )));
504
11
            }
505
        } else {
506
0
            return Err(ArrowError::InvalidArgumentError(format!(
507
0
                "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
508
0
                data.data_type()
509
0
            )));
510
        }
511
512
11
        let values = make_array(values);
513
        // SAFETY:
514
        // ArrayData is valid, and verified type above
515
11
        let value_offsets = unsafe { get_offsets(&data) };
516
517
11
        Ok(Self {
518
11
            data_type: data.data_type().clone(),
519
11
            nulls: data.nulls().cloned(),
520
11
            values,
521
11
            value_offsets,
522
11
        })
523
11
    }
524
}
525
526
impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
527
31
    fn as_any(&self) -> &dyn Any {
528
31
        self
529
31
    }
530
531
86
    fn to_data(&self) -> ArrayData {
532
86
        self.clone().into()
533
86
    }
534
535
1
    fn into_data(self) -> ArrayData {
536
1
        self.into()
537
1
    }
538
539
146
    fn data_type(&self) -> &DataType {
540
146
        &self.data_type
541
146
    }
542
543
27
    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
544
27
        Arc::new(self.slice(offset, length))
545
27
    }
546
547
287
    fn len(&self) -> usize {
548
287
        self.value_offsets.len() - 1
549
287
    }
550
551
0
    fn is_empty(&self) -> bool {
552
0
        self.value_offsets.len() <= 1
553
0
    }
554
555
0
    fn shrink_to_fit(&mut self) {
556
0
        if let Some(nulls) = &mut self.nulls {
557
0
            nulls.shrink_to_fit();
558
0
        }
559
0
        self.values.shrink_to_fit();
560
0
        self.value_offsets.shrink_to_fit();
561
0
    }
562
563
0
    fn offset(&self) -> usize {
564
0
        0
565
0
    }
566
567
61
    fn nulls(&self) -> Option<&NullBuffer> {
568
61
        self.nulls.as_ref()
569
61
    }
570
571
1
    fn logical_null_count(&self) -> usize {
572
        // More efficient that the default implementation
573
1
        self.null_count()
574
1
    }
575
576
0
    fn get_buffer_memory_size(&self) -> usize {
577
0
        let mut size = self.values.get_buffer_memory_size();
578
0
        size += self.value_offsets.inner().inner().capacity();
579
0
        if let Some(n) = self.nulls.as_ref() {
580
0
            size += n.buffer().capacity();
581
0
        }
582
0
        size
583
0
    }
584
585
0
    fn get_array_memory_size(&self) -> usize {
586
0
        let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
587
0
        size += self.value_offsets.inner().inner().capacity();
588
0
        if let Some(n) = self.nulls.as_ref() {
589
0
            size += n.buffer().capacity();
590
0
        }
591
0
        size
592
0
    }
593
}
594
595
impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
596
    type Item = ArrayRef;
597
598
    fn value(&self, index: usize) -> Self::Item {
599
        GenericListArray::value(self, index)
600
    }
601
602
    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
603
        GenericListArray::value(self, index)
604
    }
605
}
606
607
impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
608
0
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
609
0
        let prefix = OffsetSize::PREFIX;
610
611
0
        write!(f, "{prefix}ListArray\n[\n")?;
612
0
        print_long_array(self, f, |array, index, f| {
613
0
            std::fmt::Debug::fmt(&array.value(index), f)
614
0
        })?;
615
0
        write!(f, "]")
616
0
    }
617
}
618
619
/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
620
///
621
/// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`]
622
pub type ListArray = GenericListArray<i32>;
623
624
/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
625
///
626
/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`]
627
pub type LargeListArray = GenericListArray<i64>;
628
629
#[cfg(test)]
630
mod tests {
631
    use super::*;
632
    use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
633
    use crate::cast::AsArray;
634
    use crate::types::Int32Type;
635
    use crate::{Int32Array, Int64Array};
636
    use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
637
    use arrow_schema::Field;
638
639
    fn create_from_buffers() -> ListArray {
640
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
641
        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
642
        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
643
        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
644
        ListArray::new(field, offsets, Arc::new(values), None)
645
    }
646
647
    #[test]
648
    fn test_from_iter_primitive() {
649
        let data = vec![
650
            Some(vec![Some(0), Some(1), Some(2)]),
651
            Some(vec![Some(3), Some(4), Some(5)]),
652
            Some(vec![Some(6), Some(7)]),
653
        ];
654
        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
655
656
        let another = create_from_buffers();
657
        assert_eq!(list_array, another)
658
    }
659
660
    #[test]
661
    fn test_empty_list_array() {
662
        // Construct an empty value array
663
        let value_data = ArrayData::builder(DataType::Int32)
664
            .len(0)
665
            .add_buffer(Buffer::from([]))
666
            .build()
667
            .unwrap();
668
669
        // Construct an empty offset buffer
670
        let value_offsets = Buffer::from([]);
671
672
        // Construct a list array from the above two
673
        let list_data_type =
674
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
675
        let list_data = ArrayData::builder(list_data_type)
676
            .len(0)
677
            .add_buffer(value_offsets)
678
            .add_child_data(value_data)
679
            .build()
680
            .unwrap();
681
682
        let list_array = ListArray::from(list_data);
683
        assert_eq!(list_array.len(), 0)
684
    }
685
686
    #[test]
687
    fn test_list_array() {
688
        // Construct a value array
689
        let value_data = ArrayData::builder(DataType::Int32)
690
            .len(8)
691
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
692
            .build()
693
            .unwrap();
694
695
        // Construct a buffer for value offsets, for the nested array:
696
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
697
        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
698
699
        // Construct a list array from the above two
700
        let list_data_type =
701
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
702
        let list_data = ArrayData::builder(list_data_type.clone())
703
            .len(3)
704
            .add_buffer(value_offsets.clone())
705
            .add_child_data(value_data.clone())
706
            .build()
707
            .unwrap();
708
        let list_array = ListArray::from(list_data);
709
710
        let values = list_array.values();
711
        assert_eq!(value_data, values.to_data());
712
        assert_eq!(DataType::Int32, list_array.value_type());
713
        assert_eq!(3, list_array.len());
714
        assert_eq!(0, list_array.null_count());
715
        assert_eq!(6, list_array.value_offsets()[2]);
716
        assert_eq!(2, list_array.value_length(2));
717
        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
718
        assert_eq!(
719
            0,
720
            unsafe { list_array.value_unchecked(0) }
721
                .as_primitive::<Int32Type>()
722
                .value(0)
723
        );
724
        for i in 0..3 {
725
            assert!(list_array.is_valid(i));
726
            assert!(!list_array.is_null(i));
727
        }
728
729
        // Now test with a non-zero offset (skip first element)
730
        //  [[3, 4, 5], [6, 7]]
731
        let list_data = ArrayData::builder(list_data_type)
732
            .len(2)
733
            .offset(1)
734
            .add_buffer(value_offsets)
735
            .add_child_data(value_data.clone())
736
            .build()
737
            .unwrap();
738
        let list_array = ListArray::from(list_data);
739
740
        let values = list_array.values();
741
        assert_eq!(value_data, values.to_data());
742
        assert_eq!(DataType::Int32, list_array.value_type());
743
        assert_eq!(2, list_array.len());
744
        assert_eq!(0, list_array.null_count());
745
        assert_eq!(6, list_array.value_offsets()[1]);
746
        assert_eq!(2, list_array.value_length(1));
747
        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
748
        assert_eq!(
749
            3,
750
            unsafe { list_array.value_unchecked(0) }
751
                .as_primitive::<Int32Type>()
752
                .value(0)
753
        );
754
    }
755
756
    #[test]
757
    fn test_large_list_array() {
758
        // Construct a value array
759
        let value_data = ArrayData::builder(DataType::Int32)
760
            .len(8)
761
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
762
            .build()
763
            .unwrap();
764
765
        // Construct a buffer for value offsets, for the nested array:
766
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
767
        let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
768
769
        // Construct a list array from the above two
770
        let list_data_type = DataType::new_large_list(DataType::Int32, false);
771
        let list_data = ArrayData::builder(list_data_type.clone())
772
            .len(3)
773
            .add_buffer(value_offsets.clone())
774
            .add_child_data(value_data.clone())
775
            .build()
776
            .unwrap();
777
        let list_array = LargeListArray::from(list_data);
778
779
        let values = list_array.values();
780
        assert_eq!(value_data, values.to_data());
781
        assert_eq!(DataType::Int32, list_array.value_type());
782
        assert_eq!(3, list_array.len());
783
        assert_eq!(0, list_array.null_count());
784
        assert_eq!(6, list_array.value_offsets()[2]);
785
        assert_eq!(2, list_array.value_length(2));
786
        assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
787
        assert_eq!(
788
            0,
789
            unsafe { list_array.value_unchecked(0) }
790
                .as_primitive::<Int32Type>()
791
                .value(0)
792
        );
793
        for i in 0..3 {
794
            assert!(list_array.is_valid(i));
795
            assert!(!list_array.is_null(i));
796
        }
797
798
        // Now test with a non-zero offset
799
        //  [[3, 4, 5], [6, 7]]
800
        let list_data = ArrayData::builder(list_data_type)
801
            .len(2)
802
            .offset(1)
803
            .add_buffer(value_offsets)
804
            .add_child_data(value_data.clone())
805
            .build()
806
            .unwrap();
807
        let list_array = LargeListArray::from(list_data);
808
809
        let values = list_array.values();
810
        assert_eq!(value_data, values.to_data());
811
        assert_eq!(DataType::Int32, list_array.value_type());
812
        assert_eq!(2, list_array.len());
813
        assert_eq!(0, list_array.null_count());
814
        assert_eq!(6, list_array.value_offsets()[1]);
815
        assert_eq!(2, list_array.value_length(1));
816
        assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
817
        assert_eq!(
818
            3,
819
            unsafe { list_array.value_unchecked(0) }
820
                .as_primitive::<Int32Type>()
821
                .value(0)
822
        );
823
    }
824
825
    #[test]
826
    fn test_list_array_slice() {
827
        // Construct a value array
828
        let value_data = ArrayData::builder(DataType::Int32)
829
            .len(10)
830
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
831
            .build()
832
            .unwrap();
833
834
        // Construct a buffer for value offsets, for the nested array:
835
        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
836
        let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
837
        // 01011001 00000001
838
        let mut null_bits: [u8; 2] = [0; 2];
839
        bit_util::set_bit(&mut null_bits, 0);
840
        bit_util::set_bit(&mut null_bits, 3);
841
        bit_util::set_bit(&mut null_bits, 4);
842
        bit_util::set_bit(&mut null_bits, 6);
843
        bit_util::set_bit(&mut null_bits, 8);
844
845
        // Construct a list array from the above two
846
        let list_data_type =
847
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
848
        let list_data = ArrayData::builder(list_data_type)
849
            .len(9)
850
            .add_buffer(value_offsets)
851
            .add_child_data(value_data.clone())
852
            .null_bit_buffer(Some(Buffer::from(null_bits)))
853
            .build()
854
            .unwrap();
855
        let list_array = ListArray::from(list_data);
856
857
        let values = list_array.values();
858
        assert_eq!(value_data, values.to_data());
859
        assert_eq!(DataType::Int32, list_array.value_type());
860
        assert_eq!(9, list_array.len());
861
        assert_eq!(4, list_array.null_count());
862
        assert_eq!(2, list_array.value_offsets()[3]);
863
        assert_eq!(2, list_array.value_length(3));
864
865
        let sliced_array = list_array.slice(1, 6);
866
        assert_eq!(6, sliced_array.len());
867
        assert_eq!(3, sliced_array.null_count());
868
869
        for i in 0..sliced_array.len() {
870
            if bit_util::get_bit(&null_bits, 1 + i) {
871
                assert!(sliced_array.is_valid(i));
872
            } else {
873
                assert!(sliced_array.is_null(i));
874
            }
875
        }
876
877
        // Check offset and length for each non-null value.
878
        let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
879
        assert_eq!(2, sliced_list_array.value_offsets()[2]);
880
        assert_eq!(2, sliced_list_array.value_length(2));
881
        assert_eq!(4, sliced_list_array.value_offsets()[3]);
882
        assert_eq!(2, sliced_list_array.value_length(3));
883
        assert_eq!(6, sliced_list_array.value_offsets()[5]);
884
        assert_eq!(3, sliced_list_array.value_length(5));
885
    }
886
887
    #[test]
888
    fn test_large_list_array_slice() {
889
        // Construct a value array
890
        let value_data = ArrayData::builder(DataType::Int32)
891
            .len(10)
892
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
893
            .build()
894
            .unwrap();
895
896
        // Construct a buffer for value offsets, for the nested array:
897
        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
898
        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
899
        // 01011001 00000001
900
        let mut null_bits: [u8; 2] = [0; 2];
901
        bit_util::set_bit(&mut null_bits, 0);
902
        bit_util::set_bit(&mut null_bits, 3);
903
        bit_util::set_bit(&mut null_bits, 4);
904
        bit_util::set_bit(&mut null_bits, 6);
905
        bit_util::set_bit(&mut null_bits, 8);
906
907
        // Construct a list array from the above two
908
        let list_data_type = DataType::new_large_list(DataType::Int32, false);
909
        let list_data = ArrayData::builder(list_data_type)
910
            .len(9)
911
            .add_buffer(value_offsets)
912
            .add_child_data(value_data.clone())
913
            .null_bit_buffer(Some(Buffer::from(null_bits)))
914
            .build()
915
            .unwrap();
916
        let list_array = LargeListArray::from(list_data);
917
918
        let values = list_array.values();
919
        assert_eq!(value_data, values.to_data());
920
        assert_eq!(DataType::Int32, list_array.value_type());
921
        assert_eq!(9, list_array.len());
922
        assert_eq!(4, list_array.null_count());
923
        assert_eq!(2, list_array.value_offsets()[3]);
924
        assert_eq!(2, list_array.value_length(3));
925
926
        let sliced_array = list_array.slice(1, 6);
927
        assert_eq!(6, sliced_array.len());
928
        assert_eq!(3, sliced_array.null_count());
929
930
        for i in 0..sliced_array.len() {
931
            if bit_util::get_bit(&null_bits, 1 + i) {
932
                assert!(sliced_array.is_valid(i));
933
            } else {
934
                assert!(sliced_array.is_null(i));
935
            }
936
        }
937
938
        // Check offset and length for each non-null value.
939
        let sliced_list_array = sliced_array
940
            .as_any()
941
            .downcast_ref::<LargeListArray>()
942
            .unwrap();
943
        assert_eq!(2, sliced_list_array.value_offsets()[2]);
944
        assert_eq!(2, sliced_list_array.value_length(2));
945
        assert_eq!(4, sliced_list_array.value_offsets()[3]);
946
        assert_eq!(2, sliced_list_array.value_length(3));
947
        assert_eq!(6, sliced_list_array.value_offsets()[5]);
948
        assert_eq!(3, sliced_list_array.value_length(5));
949
    }
950
951
    #[test]
952
    #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
953
    fn test_list_array_index_out_of_bound() {
954
        // Construct a value array
955
        let value_data = ArrayData::builder(DataType::Int32)
956
            .len(10)
957
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
958
            .build()
959
            .unwrap();
960
961
        // Construct a buffer for value offsets, for the nested array:
962
        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
963
        let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
964
        // 01011001 00000001
965
        let mut null_bits: [u8; 2] = [0; 2];
966
        bit_util::set_bit(&mut null_bits, 0);
967
        bit_util::set_bit(&mut null_bits, 3);
968
        bit_util::set_bit(&mut null_bits, 4);
969
        bit_util::set_bit(&mut null_bits, 6);
970
        bit_util::set_bit(&mut null_bits, 8);
971
972
        // Construct a list array from the above two
973
        let list_data_type = DataType::new_large_list(DataType::Int32, false);
974
        let list_data = ArrayData::builder(list_data_type)
975
            .len(9)
976
            .add_buffer(value_offsets)
977
            .add_child_data(value_data)
978
            .null_bit_buffer(Some(Buffer::from(null_bits)))
979
            .build()
980
            .unwrap();
981
        let list_array = LargeListArray::from(list_data);
982
        assert_eq!(9, list_array.len());
983
984
        list_array.value(10);
985
    }
986
    #[test]
987
    #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
988
    // Different error messages, so skip for now
989
    // https://github.com/apache/arrow-rs/issues/1545
990
    #[cfg(not(feature = "force_validate"))]
991
    fn test_list_array_invalid_buffer_len() {
992
        let value_data = unsafe {
993
            ArrayData::builder(DataType::Int32)
994
                .len(8)
995
                .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
996
                .build_unchecked()
997
        };
998
        let list_data_type =
999
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1000
        let list_data = unsafe {
1001
            ArrayData::builder(list_data_type)
1002
                .len(3)
1003
                .add_child_data(value_data)
1004
                .build_unchecked()
1005
        };
1006
        drop(ListArray::from(list_data));
1007
    }
1008
1009
    #[test]
1010
    #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1011
    // Different error messages, so skip for now
1012
    // https://github.com/apache/arrow-rs/issues/1545
1013
    #[cfg(not(feature = "force_validate"))]
1014
    fn test_list_array_invalid_child_array_len() {
1015
        let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1016
        let list_data_type =
1017
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1018
        let list_data = unsafe {
1019
            ArrayData::builder(list_data_type)
1020
                .len(3)
1021
                .add_buffer(value_offsets)
1022
                .build_unchecked()
1023
        };
1024
        drop(ListArray::from(list_data));
1025
    }
1026
1027
    #[test]
1028
    #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1029
    fn test_from_array_data_validation() {
1030
        let mut builder = ListBuilder::new(Int32Builder::new());
1031
        builder.values().append_value(1);
1032
        builder.append(true);
1033
        let array = builder.finish();
1034
        let _ = LargeListArray::from(array.into_data());
1035
    }
1036
1037
    #[test]
1038
    fn test_list_array_offsets_need_not_start_at_zero() {
1039
        let value_data = ArrayData::builder(DataType::Int32)
1040
            .len(8)
1041
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1042
            .build()
1043
            .unwrap();
1044
1045
        let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1046
1047
        let list_data_type =
1048
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1049
        let list_data = ArrayData::builder(list_data_type)
1050
            .len(3)
1051
            .add_buffer(value_offsets)
1052
            .add_child_data(value_data)
1053
            .build()
1054
            .unwrap();
1055
1056
        let list_array = ListArray::from(list_data);
1057
        assert_eq!(list_array.value_length(0), 0);
1058
        assert_eq!(list_array.value_length(1), 3);
1059
        assert_eq!(list_array.value_length(2), 2);
1060
    }
1061
1062
    #[test]
1063
    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1064
    // Different error messages, so skip for now
1065
    // https://github.com/apache/arrow-rs/issues/1545
1066
    #[cfg(not(feature = "force_validate"))]
1067
    fn test_primitive_array_alignment() {
1068
        let buf = Buffer::from_slice_ref([0_u64]);
1069
        let buf2 = buf.slice(1);
1070
        let array_data = unsafe {
1071
            ArrayData::builder(DataType::Int32)
1072
                .add_buffer(buf2)
1073
                .build_unchecked()
1074
        };
1075
        drop(Int32Array::from(array_data));
1076
    }
1077
1078
    #[test]
1079
    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1080
    // Different error messages, so skip for now
1081
    // https://github.com/apache/arrow-rs/issues/1545
1082
    #[cfg(not(feature = "force_validate"))]
1083
    fn test_list_array_alignment() {
1084
        let buf = Buffer::from_slice_ref([0_u64]);
1085
        let buf2 = buf.slice(1);
1086
1087
        let values: [i32; 8] = [0; 8];
1088
        let value_data = unsafe {
1089
            ArrayData::builder(DataType::Int32)
1090
                .add_buffer(Buffer::from_slice_ref(values))
1091
                .build_unchecked()
1092
        };
1093
1094
        let list_data_type =
1095
            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1096
        let list_data = unsafe {
1097
            ArrayData::builder(list_data_type)
1098
                .add_buffer(buf2)
1099
                .add_child_data(value_data)
1100
                .build_unchecked()
1101
        };
1102
        drop(ListArray::from(list_data));
1103
    }
1104
1105
    #[test]
1106
    fn list_array_equality() {
1107
        // test scaffold
1108
        fn do_comparison(
1109
            lhs_data: Vec<Option<Vec<Option<i32>>>>,
1110
            rhs_data: Vec<Option<Vec<Option<i32>>>>,
1111
            should_equal: bool,
1112
        ) {
1113
            let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1114
            let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1115
            assert_eq!(lhs == rhs, should_equal);
1116
1117
            let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1118
            let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1119
            assert_eq!(lhs == rhs, should_equal);
1120
        }
1121
1122
        do_comparison(
1123
            vec![
1124
                Some(vec![Some(0), Some(1), Some(2)]),
1125
                None,
1126
                Some(vec![Some(3), None, Some(5)]),
1127
                Some(vec![Some(6), Some(7)]),
1128
            ],
1129
            vec![
1130
                Some(vec![Some(0), Some(1), Some(2)]),
1131
                None,
1132
                Some(vec![Some(3), None, Some(5)]),
1133
                Some(vec![Some(6), Some(7)]),
1134
            ],
1135
            true,
1136
        );
1137
1138
        do_comparison(
1139
            vec![
1140
                None,
1141
                None,
1142
                Some(vec![Some(3), None, Some(5)]),
1143
                Some(vec![Some(6), Some(7)]),
1144
            ],
1145
            vec![
1146
                Some(vec![Some(0), Some(1), Some(2)]),
1147
                None,
1148
                Some(vec![Some(3), None, Some(5)]),
1149
                Some(vec![Some(6), Some(7)]),
1150
            ],
1151
            false,
1152
        );
1153
1154
        do_comparison(
1155
            vec![
1156
                None,
1157
                None,
1158
                Some(vec![Some(3), None, Some(5)]),
1159
                Some(vec![Some(6), Some(7)]),
1160
            ],
1161
            vec![
1162
                None,
1163
                None,
1164
                Some(vec![Some(3), None, Some(5)]),
1165
                Some(vec![Some(0), Some(0)]),
1166
            ],
1167
            false,
1168
        );
1169
1170
        do_comparison(
1171
            vec![None, None, Some(vec![Some(1)])],
1172
            vec![None, None, Some(vec![Some(2)])],
1173
            false,
1174
        );
1175
    }
1176
1177
    #[test]
1178
    fn test_empty_offsets() {
1179
        let f = Arc::new(Field::new("element", DataType::Int32, true));
1180
        let string = ListArray::from(
1181
            ArrayData::builder(DataType::List(f.clone()))
1182
                .buffers(vec![Buffer::from(&[])])
1183
                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1184
                .build()
1185
                .unwrap(),
1186
        );
1187
        assert_eq!(string.value_offsets(), &[0]);
1188
        let string = LargeListArray::from(
1189
            ArrayData::builder(DataType::LargeList(f))
1190
                .buffers(vec![Buffer::from(&[])])
1191
                .add_child_data(ArrayData::new_empty(&DataType::Int32))
1192
                .build()
1193
                .unwrap(),
1194
        );
1195
        assert_eq!(string.len(), 0);
1196
        assert_eq!(string.value_offsets(), &[0]);
1197
    }
1198
1199
    #[test]
1200
    fn test_try_new() {
1201
        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1202
        let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1203
        let values = Arc::new(values) as ArrayRef;
1204
1205
        let field = Arc::new(Field::new("element", DataType::Int32, false));
1206
        ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1207
1208
        let nulls = NullBuffer::new_null(3);
1209
        ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1210
1211
        let nulls = NullBuffer::new_null(3);
1212
        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1213
        let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1214
            .unwrap_err();
1215
1216
        assert_eq!(
1217
            err.to_string(),
1218
            "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1219
        );
1220
1221
        let field = Arc::new(Field::new("element", DataType::Int64, false));
1222
        let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1223
            .unwrap_err();
1224
1225
        assert_eq!(
1226
            err.to_string(),
1227
            "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1228
        );
1229
1230
        let nulls = NullBuffer::new_null(7);
1231
        let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1232
        let values = Arc::new(values);
1233
1234
        let err =
1235
            LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1236
1237
        assert_eq!(
1238
            err.to_string(),
1239
            "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1240
        );
1241
1242
        let field = Arc::new(Field::new("element", DataType::Int64, true));
1243
        LargeListArray::new(field.clone(), offsets.clone(), values, None);
1244
1245
        let values = Int64Array::new(vec![0; 2].into(), None);
1246
        let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1247
1248
        assert_eq!(
1249
            err.to_string(),
1250
            "Invalid argument error: Max offset of 5 exceeds length of values 2"
1251
        );
1252
    }
1253
1254
    #[test]
1255
    fn test_from_fixed_size_list() {
1256
        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1257
        builder.values().append_slice(&[1, 2, 3]);
1258
        builder.append(true);
1259
        builder.values().append_slice(&[0, 0, 0]);
1260
        builder.append(false);
1261
        builder.values().append_slice(&[4, 5, 6]);
1262
        builder.append(true);
1263
        let list: ListArray = builder.finish().into();
1264
1265
        let values: Vec<_> = list
1266
            .iter()
1267
            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1268
            .collect();
1269
        assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1270
    }
1271
1272
    #[test]
1273
    fn test_nullable_union() {
1274
        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1275
        let mut builder = UnionBuilder::new_dense();
1276
        builder.append::<Int32Type>("a", 1).unwrap();
1277
        builder.append::<Int32Type>("b", 2).unwrap();
1278
        builder.append::<Int32Type>("b", 3).unwrap();
1279
        builder.append::<Int32Type>("a", 4).unwrap();
1280
        builder.append::<Int32Type>("a", 5).unwrap();
1281
        let values = builder.build().unwrap();
1282
        let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1283
        ListArray::new(field.clone(), offsets, Arc::new(values), None);
1284
    }
1285
}