Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/struct_array.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::array::print_long_array;
19
use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
20
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
21
use arrow_data::{ArrayData, ArrayDataBuilder};
22
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
23
use std::sync::Arc;
24
use std::{any::Any, ops::Index};
25
26
/// An array of [structs](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
27
///
28
/// Each child (called *field*) is represented by a separate array.
29
///
30
/// # Comparison with [RecordBatch]
31
///
32
/// Both [`RecordBatch`] and [`StructArray`] represent a collection of columns / arrays with the
33
/// same length.
34
///
35
/// However, there are a couple of key differences:
36
///
37
/// * [`StructArray`] can be nested within other [`Array`], including itself
38
/// * [`RecordBatch`] can contain top-level metadata on its associated [`Schema`][arrow_schema::Schema]
39
/// * [`StructArray`] can contain top-level nulls, i.e. `null`
40
/// * [`RecordBatch`] can only represent nulls in its child columns, i.e. `{"field": null}`
41
///
42
/// [`StructArray`] is therefore a more general data container than [`RecordBatch`], and as such
43
/// code that needs to handle both will typically share an implementation in terms of
44
/// [`StructArray`] and convert to/from [`RecordBatch`] as necessary.
45
///
46
/// [`From`] implementations are provided to facilitate this conversion, however, converting
47
/// from a [`StructArray`] containing top-level nulls to a [`RecordBatch`] will panic, as there
48
/// is no way to preserve them.
49
///
50
/// # Example: Create an array from a vector of fields
51
///
52
/// ```
53
/// use std::sync::Arc;
54
/// use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, StructArray};
55
/// use arrow_schema::{DataType, Field};
56
///
57
/// let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
58
/// let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
59
///
60
/// let struct_array = StructArray::from(vec![
61
///     (
62
///         Arc::new(Field::new("b", DataType::Boolean, false)),
63
///         boolean.clone() as ArrayRef,
64
///     ),
65
///     (
66
///         Arc::new(Field::new("c", DataType::Int32, false)),
67
///         int.clone() as ArrayRef,
68
///     ),
69
/// ]);
70
/// assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
71
/// assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
72
/// assert_eq!(4, struct_array.len());
73
/// assert_eq!(0, struct_array.null_count());
74
/// assert_eq!(0, struct_array.offset());
75
/// ```
76
#[derive(Clone)]
77
pub struct StructArray {
78
    len: usize,
79
    data_type: DataType,
80
    nulls: Option<NullBuffer>,
81
    fields: Vec<ArrayRef>,
82
}
83
84
impl StructArray {
85
    /// Create a new [`StructArray`] from the provided parts, panicking on failure
86
    ///
87
    /// # Panics
88
    ///
89
    /// Panics if [`Self::try_new`] returns an error
90
87
    pub fn new(fields: Fields, arrays: Vec<ArrayRef>, nulls: Option<NullBuffer>) -> Self {
91
87
        Self::try_new(fields, arrays, nulls).unwrap()
92
87
    }
93
94
    /// Create a new [`StructArray`] from the provided parts, returning an error on failure
95
    ///
96
    /// The length will be inferred from the length of the child arrays.  Returns an error if
97
    /// there are no child arrays.  Consider using [`Self::try_new_with_length`] if the length
98
    /// is known to avoid this.
99
    ///
100
    /// # Errors
101
    ///
102
    /// Errors if
103
    ///
104
    /// * `fields.len() == 0`
105
    /// * Any reason that [`Self::try_new_with_length`] would error
106
87
    pub fn try_new(
107
87
        fields: Fields,
108
87
        arrays: Vec<ArrayRef>,
109
87
        nulls: Option<NullBuffer>,
110
87
    ) -> Result<Self, ArrowError> {
111
87
        let len = arrays.first().map(|x| x.len()).ok_or_else(||ArrowError::InvalidArgumentError(
"use StructArray::try_new_with_length or StructArray::new_empty_fields to create a struct array with no fields so that the length can be set correctly"0
.
to_string0
()))
?0
;
112
113
87
        Self::try_new_with_length(fields, arrays, nulls, len)
114
87
    }
115
116
    /// Create a new [`StructArray`] from the provided parts, returning an error on failure
117
    ///
118
    /// # Errors
119
    ///
120
    /// Errors if
121
    ///
122
    /// * `fields.len() != arrays.len()`
123
    /// * `fields[i].data_type() != arrays[i].data_type()`
124
    /// * `arrays[i].len() != arrays[j].len()`
125
    /// * `arrays[i].len() != nulls.len()`
126
    /// * `!fields[i].is_nullable() && !nulls.contains(arrays[i].nulls())`
127
92
    pub fn try_new_with_length(
128
92
        fields: Fields,
129
92
        arrays: Vec<ArrayRef>,
130
92
        nulls: Option<NullBuffer>,
131
92
        len: usize,
132
92
    ) -> Result<Self, ArrowError> {
133
92
        if fields.len() != arrays.len() {
134
0
            return Err(ArrowError::InvalidArgumentError(format!(
135
0
                "Incorrect number of arrays for StructArray fields, expected {} got {}",
136
0
                fields.len(),
137
0
                arrays.len()
138
0
            )));
139
92
        }
140
141
92
        if let Some(
n23
) = nulls.as_ref() {
142
23
            if n.len() != len {
143
0
                return Err(ArrowError::InvalidArgumentError(format!(
144
0
                    "Incorrect number of nulls for StructArray, expected {len} got {}",
145
0
                    n.len(),
146
0
                )));
147
23
            }
148
69
        }
149
150
161
        for (f, a) in 
fields.iter()92
.
zip92
(
&arrays92
) {
151
161
            if f.data_type() != a.data_type() {
152
0
                return Err(ArrowError::InvalidArgumentError(format!(
153
0
                    "Incorrect datatype for StructArray field {:?}, expected {} got {}",
154
0
                    f.name(),
155
0
                    f.data_type(),
156
0
                    a.data_type()
157
0
                )));
158
161
            }
159
160
161
            if a.len() != len {
161
0
                return Err(ArrowError::InvalidArgumentError(format!(
162
0
                    "Incorrect array length for StructArray field {:?}, expected {} got {}",
163
0
                    f.name(),
164
0
                    len,
165
0
                    a.len()
166
0
                )));
167
161
            }
168
169
161
            if !f.is_nullable() {
170
56
                if let Some(
a2
) = a.logical_nulls() {
171
2
                    if !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default()
172
0
                        && a.null_count() > 0
173
                    {
174
0
                        return Err(ArrowError::InvalidArgumentError(format!(
175
0
                            "Found unmasked nulls for non-nullable StructArray field {:?}",
176
0
                            f.name()
177
0
                        )));
178
2
                    }
179
54
                }
180
105
            }
181
        }
182
183
        Ok(Self {
184
92
            len,
185
92
            data_type: DataType::Struct(fields),
186
92
            nulls: nulls.filter(|n| 
n23
.
null_count23
() > 0),
187
92
            fields: arrays,
188
        })
189
92
    }
190
191
    /// Create a new [`StructArray`] of length `len` where all values are null
192
0
    pub fn new_null(fields: Fields, len: usize) -> Self {
193
0
        let arrays = fields
194
0
            .iter()
195
0
            .map(|f| new_null_array(f.data_type(), len))
196
0
            .collect();
197
198
0
        Self {
199
0
            len,
200
0
            data_type: DataType::Struct(fields),
201
0
            nulls: Some(NullBuffer::new_null(len)),
202
0
            fields: arrays,
203
0
        }
204
0
    }
205
206
    /// Create a new [`StructArray`] from the provided parts without validation
207
    ///
208
    /// The length will be inferred from the length of the child arrays.  Panics if there are no
209
    /// child arrays.  Consider using [`Self::new_unchecked_with_length`] if the length is known
210
    /// to avoid this.
211
    ///
212
    /// # Safety
213
    ///
214
    /// Safe if [`Self::new`] would not panic with the given arguments
215
0
    pub unsafe fn new_unchecked(
216
0
        fields: Fields,
217
0
        arrays: Vec<ArrayRef>,
218
0
        nulls: Option<NullBuffer>,
219
0
    ) -> Self {
220
0
        if cfg!(feature = "force_validate") {
221
0
            return Self::new(fields, arrays, nulls);
222
0
        }
223
224
0
        let len = arrays.first().map(|x| x.len()).expect(
225
0
            "cannot use StructArray::new_unchecked if there are no fields, length is unknown",
226
        );
227
0
        Self {
228
0
            len,
229
0
            data_type: DataType::Struct(fields),
230
0
            nulls,
231
0
            fields: arrays,
232
0
        }
233
0
    }
234
235
    /// Create a new [`StructArray`] from the provided parts without validation
236
    ///
237
    /// # Safety
238
    ///
239
    /// Safe if [`Self::new`] would not panic with the given arguments
240
0
    pub unsafe fn new_unchecked_with_length(
241
0
        fields: Fields,
242
0
        arrays: Vec<ArrayRef>,
243
0
        nulls: Option<NullBuffer>,
244
0
        len: usize,
245
0
    ) -> Self {
246
0
        if cfg!(feature = "force_validate") {
247
0
            return Self::try_new_with_length(fields, arrays, nulls, len).unwrap();
248
0
        }
249
250
0
        Self {
251
0
            len,
252
0
            data_type: DataType::Struct(fields),
253
0
            nulls,
254
0
            fields: arrays,
255
0
        }
256
0
    }
257
258
    /// Create a new [`StructArray`] containing no fields
259
    ///
260
    /// # Panics
261
    ///
262
    /// If `len != nulls.len()`
263
0
    pub fn new_empty_fields(len: usize, nulls: Option<NullBuffer>) -> Self {
264
0
        if let Some(n) = &nulls {
265
0
            assert_eq!(len, n.len())
266
0
        }
267
0
        Self {
268
0
            len,
269
0
            data_type: DataType::Struct(Fields::empty()),
270
0
            fields: vec![],
271
0
            nulls,
272
0
        }
273
0
    }
274
275
    /// Deconstruct this array into its constituent parts
276
0
    pub fn into_parts(self) -> (Fields, Vec<ArrayRef>, Option<NullBuffer>) {
277
0
        let f = match self.data_type {
278
0
            DataType::Struct(f) => f,
279
0
            _ => unreachable!(),
280
        };
281
0
        (f, self.fields, self.nulls)
282
0
    }
283
284
    /// Returns the field at `pos`.
285
39
    pub fn column(&self, pos: usize) -> &ArrayRef {
286
39
        &self.fields[pos]
287
39
    }
288
289
    /// Return the number of fields in this struct array
290
0
    pub fn num_columns(&self) -> usize {
291
0
        self.fields.len()
292
0
    }
293
294
    /// Returns the fields of the struct array
295
20
    pub fn columns(&self) -> &[ArrayRef] {
296
20
        &self.fields
297
20
    }
298
299
    /// Return field names in this struct array
300
12
    pub fn column_names(&self) -> Vec<&str> {
301
12
        match self.data_type() {
302
12
            DataType::Struct(fields) => fields
303
12
                .iter()
304
19
                .
map12
(|f| f.name().as_str())
305
12
                .collect::<Vec<&str>>(),
306
0
            _ => unreachable!("Struct array's data type is not struct!"),
307
        }
308
12
    }
309
310
    /// Returns the [`Fields`] of this [`StructArray`]
311
0
    pub fn fields(&self) -> &Fields {
312
0
        match self.data_type() {
313
0
            DataType::Struct(f) => f,
314
0
            _ => unreachable!(),
315
        }
316
0
    }
317
318
    /// Return child array whose field name equals to column_name
319
    ///
320
    /// Note: A schema can currently have duplicate field names, in which case
321
    /// the first field will always be selected.
322
    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
323
12
    pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
324
12
        self.column_names()
325
12
            .iter()
326
14
            .
position12
(|c| c == &column_name)
327
12
            .map(|pos| self.column(pos))
328
12
    }
329
330
    /// Returns a zero-copy slice of this array with the indicated offset and length.
331
17
    pub fn slice(&self, offset: usize, len: usize) -> Self {
332
17
        assert!(
333
17
            offset.saturating_add(len) <= self.len,
334
0
            "the length + offset of the sliced StructArray cannot exceed the existing length"
335
        );
336
337
31
        let 
fields17
=
self.fields.iter()17
.
map17
(|a| a.slice(offset, len)).
collect17
();
338
339
        Self {
340
17
            len,
341
17
            data_type: self.data_type.clone(),
342
17
            nulls: self.nulls.as_ref().map(|n| 
n5
.
slice5
(
offset5
,
len5
)),
343
17
            fields,
344
        }
345
17
    }
346
}
347
348
impl From<ArrayData> for StructArray {
349
15
    fn from(data: ArrayData) -> Self {
350
15
        let parent_offset = data.offset();
351
15
        let parent_len = data.len();
352
353
15
        let fields = data
354
15
            .child_data()
355
15
            .iter()
356
25
            .
map15
(|cd| {
357
25
                if parent_offset != 0 || parent_len != cd.len() {
358
0
                    make_array(cd.slice(parent_offset, parent_len))
359
                } else {
360
25
                    make_array(cd.clone())
361
                }
362
25
            })
363
15
            .collect();
364
365
15
        Self {
366
15
            len: data.len(),
367
15
            data_type: data.data_type().clone(),
368
15
            nulls: data.nulls().cloned(),
369
15
            fields,
370
15
        }
371
15
    }
372
}
373
374
impl From<StructArray> for ArrayData {
375
99
    fn from(array: StructArray) -> Self {
376
99
        let builder = ArrayDataBuilder::new(array.data_type)
377
99
            .len(array.len)
378
99
            .nulls(array.nulls)
379
172
            .
child_data99
(
array.fields.iter()99
.
map99
(|x| x.to_data()).
collect99
());
380
381
99
        unsafe { builder.build_unchecked() }
382
99
    }
383
}
384
385
impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
386
    type Error = ArrowError;
387
388
    /// builds a StructArray from a vector of names and arrays.
389
0
    fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self, ArrowError> {
390
0
        let (fields, arrays): (Vec<_>, _) = values
391
0
            .into_iter()
392
0
            .map(|(name, array)| {
393
0
                (
394
0
                    Field::new(name, array.data_type().clone(), array.is_nullable()),
395
0
                    array,
396
0
                )
397
0
            })
398
0
            .unzip();
399
400
0
        StructArray::try_new(fields.into(), arrays, None)
401
0
    }
402
}
403
404
impl Array for StructArray {
405
23
    fn as_any(&self) -> &dyn Any {
406
23
        self
407
23
    }
408
409
95
    fn to_data(&self) -> ArrayData {
410
95
        self.clone().into()
411
95
    }
412
413
4
    fn into_data(self) -> ArrayData {
414
4
        self.into()
415
4
    }
416
417
139
    fn data_type(&self) -> &DataType {
418
139
        &self.data_type
419
139
    }
420
421
16
    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
422
16
        Arc::new(self.slice(offset, length))
423
16
    }
424
425
151
    fn len(&self) -> usize {
426
151
        self.len
427
151
    }
428
429
0
    fn is_empty(&self) -> bool {
430
0
        self.len == 0
431
0
    }
432
433
0
    fn shrink_to_fit(&mut self) {
434
0
        if let Some(nulls) = &mut self.nulls {
435
0
            nulls.shrink_to_fit();
436
0
        }
437
0
        self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
438
0
    }
439
440
0
    fn offset(&self) -> usize {
441
0
        0
442
0
    }
443
444
59
    fn nulls(&self) -> Option<&NullBuffer> {
445
59
        self.nulls.as_ref()
446
59
    }
447
448
3
    fn logical_null_count(&self) -> usize {
449
        // More efficient that the default implementation
450
3
        self.null_count()
451
3
    }
452
453
0
    fn get_buffer_memory_size(&self) -> usize {
454
0
        let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
455
0
        if let Some(n) = self.nulls.as_ref() {
456
0
            size += n.buffer().capacity();
457
0
        }
458
0
        size
459
0
    }
460
461
0
    fn get_array_memory_size(&self) -> usize {
462
0
        let mut size = self.fields.iter().map(|a| a.get_array_memory_size()).sum();
463
0
        size += std::mem::size_of::<Self>();
464
0
        if let Some(n) = self.nulls.as_ref() {
465
0
            size += n.buffer().capacity();
466
0
        }
467
0
        size
468
0
    }
469
}
470
471
impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
472
5
    fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
473
5
        let (fields, arrays): (Vec<_>, _) = v.into_iter().unzip();
474
5
        StructArray::new(fields.into(), arrays, None)
475
5
    }
476
}
477
478
impl std::fmt::Debug for StructArray {
479
0
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
480
0
        writeln!(f, "StructArray")?;
481
0
        writeln!(f, "-- validity:")?;
482
0
        writeln!(f, "[")?;
483
0
        print_long_array(self, f, |_array, _index, f| write!(f, "valid"))?;
484
0
        writeln!(f, "]\n[")?;
485
0
        for (child_index, name) in self.column_names().iter().enumerate() {
486
0
            let column = self.column(child_index);
487
0
            writeln!(
488
0
                f,
489
0
                "-- child {}: \"{}\" ({:?})",
490
                child_index,
491
                name,
492
0
                column.data_type()
493
0
            )?;
494
0
            std::fmt::Debug::fmt(column, f)?;
495
0
            writeln!(f)?;
496
        }
497
0
        write!(f, "]")
498
0
    }
499
}
500
501
impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
502
0
    fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
503
0
        let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
504
0
        let (fields, arrays): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
505
0
        let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
506
0
        Self::new(fields.into(), arrays, Some(nulls))
507
0
    }
508
}
509
510
impl From<RecordBatch> for StructArray {
511
0
    fn from(value: RecordBatch) -> Self {
512
0
        Self {
513
0
            len: value.num_rows(),
514
0
            data_type: DataType::Struct(value.schema().fields().clone()),
515
0
            nulls: None,
516
0
            fields: value.columns().to_vec(),
517
0
        }
518
0
    }
519
}
520
521
impl Index<&str> for StructArray {
522
    type Output = ArrayRef;
523
524
    /// Get a reference to a column's array by name.
525
    ///
526
    /// Note: A schema can currently have duplicate field names, in which case
527
    /// the first field will always be selected.
528
    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
529
    ///
530
    /// # Panics
531
    ///
532
    /// Panics if the name is not in the schema.
533
0
    fn index(&self, name: &str) -> &Self::Output {
534
0
        self.column_by_name(name).unwrap()
535
0
    }
536
}
537
538
#[cfg(test)]
539
mod tests {
540
    use super::*;
541
542
    use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
543
    use arrow_buffer::ToByteSlice;
544
545
    #[test]
546
    fn test_struct_array_builder() {
547
        let boolean_array = BooleanArray::from(vec![false, false, true, true]);
548
        let int_array = Int64Array::from(vec![42, 28, 19, 31]);
549
550
        let fields = vec![
551
            Field::new("a", DataType::Boolean, false),
552
            Field::new("b", DataType::Int64, false),
553
        ];
554
        let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
555
            .len(4)
556
            .add_child_data(boolean_array.to_data())
557
            .add_child_data(int_array.to_data())
558
            .build()
559
            .unwrap();
560
        let struct_array = StructArray::from(struct_array_data);
561
562
        assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
563
        assert_eq!(struct_array.column(1).as_ref(), &int_array);
564
    }
565
566
    #[test]
567
    fn test_struct_array_from() {
568
        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
569
        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
570
571
        let struct_array = StructArray::from(vec![
572
            (
573
                Arc::new(Field::new("b", DataType::Boolean, false)),
574
                boolean.clone() as ArrayRef,
575
            ),
576
            (
577
                Arc::new(Field::new("c", DataType::Int32, false)),
578
                int.clone() as ArrayRef,
579
            ),
580
        ]);
581
        assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
582
        assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
583
        assert_eq!(4, struct_array.len());
584
        assert_eq!(0, struct_array.null_count());
585
        assert_eq!(0, struct_array.offset());
586
    }
587
588
    #[test]
589
    fn test_struct_array_from_data_with_offset_and_length() {
590
        // Various ways to make the struct array:
591
        //
592
        // [{x: 2}, {x: 3}, None]
593
        //
594
        // from slicing larger buffers/arrays with offsets and lengths
595
        let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
596
        let int_field = Field::new("x", DataType::Int32, false);
597
        let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
598
        let int_data = int_arr.to_data();
599
        // Case 1: Offset + length, nulls are not sliced
600
        let case1 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
601
            .len(3)
602
            .offset(1)
603
            .nulls(Some(struct_nulls))
604
            .add_child_data(int_data.clone())
605
            .build()
606
            .unwrap();
607
608
        // Case 2: Offset + length, nulls are sliced
609
        let struct_nulls =
610
            NullBuffer::new(BooleanBuffer::from(vec![true, true, true, false, true]).slice(1, 3));
611
        let case2 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
612
            .len(3)
613
            .offset(1)
614
            .nulls(Some(struct_nulls.clone()))
615
            .add_child_data(int_data.clone())
616
            .build()
617
            .unwrap();
618
619
        // Case 3: struct length is smaller than child length but no offset
620
        let offset_int_data = int_data.slice(1, 4);
621
        let case3 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
622
            .len(3)
623
            .nulls(Some(struct_nulls))
624
            .add_child_data(offset_int_data)
625
            .build()
626
            .unwrap();
627
628
        let expected = StructArray::new(
629
            Fields::from(vec![int_field.clone()]),
630
            vec![Arc::new(int_arr)],
631
            Some(NullBuffer::new(BooleanBuffer::from(vec![
632
                true, true, true, false, true,
633
            ]))),
634
        )
635
        .slice(1, 3);
636
637
        for case in [case1, case2, case3] {
638
            let struct_arr_from_data = StructArray::from(case);
639
            assert_eq!(struct_arr_from_data, expected);
640
            assert_eq!(struct_arr_from_data.column(0), expected.column(0));
641
        }
642
    }
643
644
    #[test]
645
    #[should_panic(expected = "assertion failed: (offset + length) <= self.len()")]
646
    fn test_struct_array_from_data_with_offset_and_length_error() {
647
        let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
648
        let int_field = Field::new("x", DataType::Int32, false);
649
        let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
650
        let int_data = int_arr.to_data();
651
        // If parent offset is 3 and len is 3 then child must have 6 items
652
        let struct_data =
653
            ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
654
                .len(3)
655
                .offset(3)
656
                .nulls(Some(struct_nulls))
657
                .add_child_data(int_data)
658
                .build()
659
                .unwrap();
660
        let _ = StructArray::from(struct_data);
661
    }
662
663
    /// validates that struct can be accessed using `column_name` as index i.e. `struct_array["column_name"]`.
664
    #[test]
665
    fn test_struct_array_index_access() {
666
        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
667
        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
668
669
        let struct_array = StructArray::from(vec![
670
            (
671
                Arc::new(Field::new("b", DataType::Boolean, false)),
672
                boolean.clone() as ArrayRef,
673
            ),
674
            (
675
                Arc::new(Field::new("c", DataType::Int32, false)),
676
                int.clone() as ArrayRef,
677
            ),
678
        ]);
679
        assert_eq!(struct_array["b"].as_ref(), boolean.as_ref());
680
        assert_eq!(struct_array["c"].as_ref(), int.as_ref());
681
    }
682
683
    /// validates that the in-memory representation follows [the spec](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
684
    #[test]
685
    fn test_struct_array_from_vec() {
686
        let strings: ArrayRef = Arc::new(StringArray::from(vec![
687
            Some("joe"),
688
            None,
689
            None,
690
            Some("mark"),
691
        ]));
692
        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
693
694
        let arr =
695
            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
696
697
        let struct_data = arr.into_data();
698
        assert_eq!(4, struct_data.len());
699
        assert_eq!(0, struct_data.null_count());
700
701
        let expected_string_data = ArrayData::builder(DataType::Utf8)
702
            .len(4)
703
            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
704
            .add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
705
            .add_buffer(Buffer::from(b"joemark"))
706
            .build()
707
            .unwrap();
708
709
        let expected_int_data = ArrayData::builder(DataType::Int32)
710
            .len(4)
711
            .null_bit_buffer(Some(Buffer::from(&[11_u8])))
712
            .add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
713
            .build()
714
            .unwrap();
715
716
        assert_eq!(expected_string_data, struct_data.child_data()[0]);
717
        assert_eq!(expected_int_data, struct_data.child_data()[1]);
718
    }
719
720
    #[test]
721
    fn test_struct_array_from_vec_error() {
722
        let strings: ArrayRef = Arc::new(StringArray::from(vec![
723
            Some("joe"),
724
            None,
725
            None,
726
            // 3 elements, not 4
727
        ]));
728
        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
729
730
        let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
731
            .unwrap_err()
732
            .to_string();
733
734
        assert_eq!(
735
            err,
736
            "Invalid argument error: Incorrect array length for StructArray field \"f2\", expected 3 got 4"
737
        )
738
    }
739
740
    #[test]
741
    #[should_panic(
742
        expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
743
    )]
744
    fn test_struct_array_from_mismatched_types_single() {
745
        drop(StructArray::from(vec![(
746
            Arc::new(Field::new("b", DataType::Int16, false)),
747
            Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
748
        )]));
749
    }
750
751
    #[test]
752
    #[should_panic(
753
        expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
754
    )]
755
    fn test_struct_array_from_mismatched_types_multiple() {
756
        drop(StructArray::from(vec![
757
            (
758
                Arc::new(Field::new("b", DataType::Int16, false)),
759
                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
760
            ),
761
            (
762
                Arc::new(Field::new("c", DataType::Utf8, false)),
763
                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
764
            ),
765
        ]));
766
    }
767
768
    #[test]
769
    fn test_struct_array_slice() {
770
        let boolean_data = ArrayData::builder(DataType::Boolean)
771
            .len(5)
772
            .add_buffer(Buffer::from([0b00010000]))
773
            .null_bit_buffer(Some(Buffer::from([0b00010001])))
774
            .build()
775
            .unwrap();
776
        let int_data = ArrayData::builder(DataType::Int32)
777
            .len(5)
778
            .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
779
            .null_bit_buffer(Some(Buffer::from([0b00000110])))
780
            .build()
781
            .unwrap();
782
783
        let field_types = vec![
784
            Field::new("a", DataType::Boolean, true),
785
            Field::new("b", DataType::Int32, true),
786
        ];
787
        let struct_array_data = ArrayData::builder(DataType::Struct(field_types.into()))
788
            .len(5)
789
            .add_child_data(boolean_data.clone())
790
            .add_child_data(int_data.clone())
791
            .null_bit_buffer(Some(Buffer::from([0b00010111])))
792
            .build()
793
            .unwrap();
794
        let struct_array = StructArray::from(struct_array_data);
795
796
        assert_eq!(5, struct_array.len());
797
        assert_eq!(1, struct_array.null_count());
798
        assert!(struct_array.is_valid(0));
799
        assert!(struct_array.is_valid(1));
800
        assert!(struct_array.is_valid(2));
801
        assert!(struct_array.is_null(3));
802
        assert!(struct_array.is_valid(4));
803
        assert_eq!(boolean_data, struct_array.column(0).to_data());
804
        assert_eq!(int_data, struct_array.column(1).to_data());
805
806
        let c0 = struct_array.column(0);
807
        let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
808
        assert_eq!(5, c0.len());
809
        assert_eq!(3, c0.null_count());
810
        assert!(c0.is_valid(0));
811
        assert!(!c0.value(0));
812
        assert!(c0.is_null(1));
813
        assert!(c0.is_null(2));
814
        assert!(c0.is_null(3));
815
        assert!(c0.is_valid(4));
816
        assert!(c0.value(4));
817
818
        let c1 = struct_array.column(1);
819
        let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
820
        assert_eq!(5, c1.len());
821
        assert_eq!(3, c1.null_count());
822
        assert!(c1.is_null(0));
823
        assert!(c1.is_valid(1));
824
        assert_eq!(28, c1.value(1));
825
        assert!(c1.is_valid(2));
826
        assert_eq!(42, c1.value(2));
827
        assert!(c1.is_null(3));
828
        assert!(c1.is_null(4));
829
830
        let sliced_array = struct_array.slice(2, 3);
831
        let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
832
        assert_eq!(3, sliced_array.len());
833
        assert_eq!(1, sliced_array.null_count());
834
        assert!(sliced_array.is_valid(0));
835
        assert!(sliced_array.is_null(1));
836
        assert!(sliced_array.is_valid(2));
837
838
        let sliced_c0 = sliced_array.column(0);
839
        let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
840
        assert_eq!(3, sliced_c0.len());
841
        assert!(sliced_c0.is_null(0));
842
        assert!(sliced_c0.is_null(1));
843
        assert!(sliced_c0.is_valid(2));
844
        assert!(sliced_c0.value(2));
845
846
        let sliced_c1 = sliced_array.column(1);
847
        let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
848
        assert_eq!(3, sliced_c1.len());
849
        assert!(sliced_c1.is_valid(0));
850
        assert_eq!(42, sliced_c1.value(0));
851
        assert!(sliced_c1.is_null(1));
852
        assert!(sliced_c1.is_null(2));
853
    }
854
855
    #[test]
856
    #[should_panic(
857
        expected = "Incorrect array length for StructArray field \\\"c\\\", expected 1 got 2"
858
    )]
859
    fn test_invalid_struct_child_array_lengths() {
860
        drop(StructArray::from(vec![
861
            (
862
                Arc::new(Field::new("b", DataType::Float32, false)),
863
                Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
864
            ),
865
            (
866
                Arc::new(Field::new("c", DataType::Float64, false)),
867
                Arc::new(Float64Array::from(vec![2.2, 3.3])),
868
            ),
869
        ]));
870
    }
871
872
    #[test]
873
    #[should_panic(expected = "use StructArray::try_new_with_length")]
874
    fn test_struct_array_from_empty() {
875
        // This can't work because we don't know how many rows the array should have.  Previously we inferred 0 but
876
        // that often led to bugs.
877
        let _ = StructArray::from(vec![]);
878
    }
879
880
    #[test]
881
    fn test_empty_struct_array() {
882
        assert!(StructArray::try_new(Fields::empty(), vec![], None).is_err());
883
884
        let arr = StructArray::new_empty_fields(10, None);
885
        assert_eq!(arr.len(), 10);
886
        assert_eq!(arr.null_count(), 0);
887
        assert_eq!(arr.num_columns(), 0);
888
889
        let arr2 = StructArray::try_new_with_length(Fields::empty(), vec![], None, 10).unwrap();
890
        assert_eq!(arr2.len(), 10);
891
892
        let arr = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
893
        assert_eq!(arr.len(), 10);
894
        assert_eq!(arr.null_count(), 10);
895
        assert_eq!(arr.num_columns(), 0);
896
897
        let arr2 = StructArray::try_new_with_length(
898
            Fields::empty(),
899
            vec![],
900
            Some(NullBuffer::new_null(10)),
901
            10,
902
        )
903
        .unwrap();
904
        assert_eq!(arr2.len(), 10);
905
    }
906
907
    #[test]
908
    #[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
909
    fn test_struct_array_from_mismatched_nullability() {
910
        drop(StructArray::from(vec![(
911
            Arc::new(Field::new("c", DataType::Int32, false)),
912
            Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
913
        )]));
914
    }
915
916
    #[test]
917
    fn test_struct_array_fmt_debug() {
918
        let arr: StructArray = StructArray::new(
919
            vec![Arc::new(Field::new("c", DataType::Int32, true))].into(),
920
            vec![Arc::new(Int32Array::from((0..30).collect::<Vec<_>>())) as ArrayRef],
921
            Some(NullBuffer::new(BooleanBuffer::from(
922
                (0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
923
            ))),
924
        );
925
        assert_eq!(format!("{arr:?}"), "StructArray\n-- validity:\n[\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  ...10 elements...,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n  0,\n  1,\n  2,\n  3,\n  4,\n  5,\n  6,\n  7,\n  8,\n  9,\n  ...10 elements...,\n  20,\n  21,\n  22,\n  23,\n  24,\n  25,\n  26,\n  27,\n  28,\n  29,\n]\n]")
926
    }
927
928
    #[test]
929
    fn test_struct_array_logical_nulls() {
930
        // Field is non-nullable
931
        let field = Field::new("a", DataType::Int32, false);
932
        let values = vec![1, 2, 3];
933
        // Create a NullBuffer with all bits set to valid (true)
934
        let nulls = NullBuffer::from(vec![true, true, true]);
935
        let array = Int32Array::new(values.into(), Some(nulls));
936
        let child = Arc::new(array) as ArrayRef;
937
        assert!(child.logical_nulls().is_some());
938
        assert_eq!(child.logical_nulls().unwrap().null_count(), 0);
939
940
        let fields = Fields::from(vec![field]);
941
        let arrays = vec![child];
942
        let nulls = None;
943
944
        StructArray::try_new(fields, arrays, nulls).expect("should not error");
945
    }
946
}