Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/mod.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! The concrete array definitions
19
20
mod binary_array;
21
22
use crate::types::*;
23
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
24
use arrow_data::ArrayData;
25
use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26
use std::any::Any;
27
use std::sync::Arc;
28
29
pub use binary_array::*;
30
31
mod boolean_array;
32
pub use boolean_array::*;
33
34
mod byte_array;
35
pub use byte_array::*;
36
37
mod dictionary_array;
38
pub use dictionary_array::*;
39
40
mod fixed_size_binary_array;
41
pub use fixed_size_binary_array::*;
42
43
mod fixed_size_list_array;
44
pub use fixed_size_list_array::*;
45
46
mod list_array;
47
pub use list_array::*;
48
49
mod map_array;
50
pub use map_array::*;
51
52
mod null_array;
53
pub use null_array::*;
54
55
mod primitive_array;
56
pub use primitive_array::*;
57
58
mod string_array;
59
pub use string_array::*;
60
61
mod struct_array;
62
pub use struct_array::*;
63
64
mod union_array;
65
pub use union_array::*;
66
67
mod run_array;
68
69
pub use run_array::*;
70
71
mod byte_view_array;
72
73
pub use byte_view_array::*;
74
75
mod list_view_array;
76
77
pub use list_view_array::*;
78
79
use crate::iterator::ArrayIter;
80
81
/// An array in the [arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html)
82
pub trait Array: std::fmt::Debug + Send + Sync {
83
    /// Returns the array as [`Any`] so that it can be
84
    /// downcasted to a specific implementation.
85
    ///
86
    /// # Example:
87
    ///
88
    /// ```
89
    /// # use std::sync::Arc;
90
    /// # use arrow_array::{Int32Array, RecordBatch};
91
    /// # use arrow_schema::{Schema, Field, DataType, ArrowError};
92
    ///
93
    /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
94
    /// let batch = RecordBatch::try_new(
95
    ///     Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
96
    ///     vec![Arc::new(id)]
97
    /// ).unwrap();
98
    ///
99
    /// let int32array = batch
100
    ///     .column(0)
101
    ///     .as_any()
102
    ///     .downcast_ref::<Int32Array>()
103
    ///     .expect("Failed to downcast");
104
    /// ```
105
    fn as_any(&self) -> &dyn Any;
106
107
    /// Returns the underlying data of this array
108
    fn to_data(&self) -> ArrayData;
109
110
    /// Returns the underlying data of this array
111
    ///
112
    /// Unlike [`Array::to_data`] this consumes self, allowing it avoid unnecessary clones
113
    fn into_data(self) -> ArrayData;
114
115
    /// Returns a reference to the [`DataType`] of this array.
116
    ///
117
    /// # Example:
118
    ///
119
    /// ```
120
    /// use arrow_schema::DataType;
121
    /// use arrow_array::{Array, Int32Array};
122
    ///
123
    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
124
    ///
125
    /// assert_eq!(*array.data_type(), DataType::Int32);
126
    /// ```
127
    fn data_type(&self) -> &DataType;
128
129
    /// Returns a zero-copy slice of this array with the indicated offset and length.
130
    ///
131
    /// # Example:
132
    ///
133
    /// ```
134
    /// use arrow_array::{Array, Int32Array};
135
    ///
136
    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
137
    /// // Make slice over the values [2, 3, 4]
138
    /// let array_slice = array.slice(1, 3);
139
    ///
140
    /// assert_eq!(&array_slice, &Int32Array::from(vec![2, 3, 4]));
141
    /// ```
142
    fn slice(&self, offset: usize, length: usize) -> ArrayRef;
143
144
    /// Returns the length (i.e., number of elements) of this array.
145
    ///
146
    /// # Example:
147
    ///
148
    /// ```
149
    /// use arrow_array::{Array, Int32Array};
150
    ///
151
    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
152
    ///
153
    /// assert_eq!(array.len(), 5);
154
    /// ```
155
    fn len(&self) -> usize;
156
157
    /// Returns whether this array is empty.
158
    ///
159
    /// # Example:
160
    ///
161
    /// ```
162
    /// use arrow_array::{Array, Int32Array};
163
    ///
164
    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
165
    ///
166
    /// assert_eq!(array.is_empty(), false);
167
    /// ```
168
    fn is_empty(&self) -> bool;
169
170
    /// Shrinks the capacity of any exclusively owned buffer as much as possible
171
    ///
172
    /// Shared or externally allocated buffers will be ignored, and
173
    /// any buffer offsets will be preserved.
174
0
    fn shrink_to_fit(&mut self) {}
175
176
    /// Returns the offset into the underlying data used by this array(-slice).
177
    /// Note that the underlying data can be shared by many arrays.
178
    /// This defaults to `0`.
179
    ///
180
    /// # Example:
181
    ///
182
    /// ```
183
    /// use arrow_array::{Array, BooleanArray};
184
    ///
185
    /// let array = BooleanArray::from(vec![false, false, true, true]);
186
    /// let array_slice = array.slice(1, 3);
187
    ///
188
    /// assert_eq!(array.offset(), 0);
189
    /// assert_eq!(array_slice.offset(), 1);
190
    /// ```
191
    fn offset(&self) -> usize;
192
193
    /// Returns the null buffer of this array if any.
194
    ///
195
    /// The null buffer contains the "physical" nulls of an array, that is how
196
    /// the nulls are represented in the underlying arrow format.
197
    ///
198
    /// The physical representation is efficient, but is sometimes non intuitive
199
    /// for certain array types such as those with nullable child arrays like
200
    /// [`DictionaryArray::values`], [`RunArray::values`] or [`UnionArray`], or without a
201
    /// null buffer, such as [`NullArray`].
202
    ///
203
    /// To determine if each element of such an array is "logically" null,
204
    /// use the slower [`Array::logical_nulls`] to obtain a computed mask.
205
    fn nulls(&self) -> Option<&NullBuffer>;
206
207
    /// Returns a potentially computed [`NullBuffer`] that represents the logical
208
    /// null values of this array, if any.
209
    ///
210
    /// Logical nulls represent the values that are null in the array,
211
    /// regardless of the underlying physical arrow representation.
212
    ///
213
    /// For most array types, this is equivalent to the "physical" nulls
214
    /// returned by [`Array::nulls`]. It is different for the following cases, because which
215
    /// elements are null is not encoded in a single null buffer:
216
    ///
217
    /// * [`DictionaryArray`] where [`DictionaryArray::values`] contains nulls
218
    /// * [`RunArray`] where [`RunArray::values`] contains nulls
219
    /// * [`NullArray`] where all indices are nulls
220
    /// * [`UnionArray`] where the selected values contains nulls
221
    ///
222
    /// In these cases a logical [`NullBuffer`] will be computed, encoding the
223
    /// logical nullability of these arrays, beyond what is encoded in
224
    /// [`Array::nulls`]
225
56
    fn logical_nulls(&self) -> Option<NullBuffer> {
226
56
        self.nulls().cloned()
227
56
    }
228
229
    /// Returns whether the element at `index` is null according to [`Array::nulls`]
230
    ///
231
    /// Note: For performance reasons, this method returns nullability solely as determined by the
232
    /// null buffer. This difference can lead to surprising results, for example, [`NullArray::is_null`] always
233
    /// returns `false` as the array lacks a null buffer. Similarly [`DictionaryArray`], [`RunArray`] and [`UnionArray`] may
234
    /// encode nullability in their children. See [`Self::logical_nulls`] for more information.
235
    ///
236
    /// # Example:
237
    ///
238
    /// ```
239
    /// use arrow_array::{Array, Int32Array, NullArray};
240
    ///
241
    /// let array = Int32Array::from(vec![Some(1), None]);
242
    /// assert_eq!(array.is_null(0), false);
243
    /// assert_eq!(array.is_null(1), true);
244
    ///
245
    /// // NullArrays do not have a null buffer, and therefore always
246
    /// // return false for is_null.
247
    /// let array = NullArray::new(1);
248
    /// assert_eq!(array.is_null(0), false);
249
    /// ```
250
36
    fn is_null(&self, index: usize) -> bool {
251
36
        self.nulls().map(|n| 
n18
.
is_null18
(
index18
)).unwrap_or_default()
252
36
    }
253
254
    /// Returns whether the element at `index` is *not* null, the
255
    /// opposite of [`Self::is_null`].
256
    ///
257
    /// # Example:
258
    ///
259
    /// ```
260
    /// use arrow_array::{Array, Int32Array};
261
    ///
262
    /// let array = Int32Array::from(vec![Some(1), None]);
263
    ///
264
    /// assert_eq!(array.is_valid(0), true);
265
    /// assert_eq!(array.is_valid(1), false);
266
    /// ```
267
32
    fn is_valid(&self, index: usize) -> bool {
268
32
        !self.is_null(index)
269
32
    }
270
271
    /// Returns the total number of physical null values in this array.
272
    ///
273
    /// Note: this method returns the physical null count, i.e. that encoded in [`Array::nulls`],
274
    /// see [`Array::logical_nulls`] for logical nullability
275
    ///
276
    /// # Example:
277
    ///
278
    /// ```
279
    /// use arrow_array::{Array, Int32Array};
280
    ///
281
    /// // Construct an array with values [1, NULL, NULL]
282
    /// let array = Int32Array::from(vec![Some(1), None, None]);
283
    ///
284
    /// assert_eq!(array.null_count(), 2);
285
    /// ```
286
154
    fn null_count(&self) -> usize {
287
154
        self.nulls().map(|n| 
n19
.
null_count19
()).unwrap_or_default()
288
154
    }
289
290
    /// Returns the total number of logical null values in this array.
291
    ///
292
    /// Note: this method returns the logical null count, i.e. that encoded in
293
    /// [`Array::logical_nulls`]. In general this is equivalent to [`Array::null_count`] but may differ in the
294
    /// presence of logical nullability, see [`Array::nulls`] and [`Array::logical_nulls`].
295
    ///
296
    /// # Example:
297
    ///
298
    /// ```
299
    /// use arrow_array::{Array, Int32Array};
300
    ///
301
    /// // Construct an array with values [1, NULL, NULL]
302
    /// let array = Int32Array::from(vec![Some(1), None, None]);
303
    ///
304
    /// assert_eq!(array.logical_null_count(), 2);
305
    /// ```
306
0
    fn logical_null_count(&self) -> usize {
307
0
        self.logical_nulls()
308
0
            .map(|n| n.null_count())
309
0
            .unwrap_or_default()
310
0
    }
311
312
    /// Returns `false` if the array is guaranteed to not contain any logical nulls
313
    ///
314
    /// This is generally equivalent to `Array::logical_null_count() != 0` unless determining
315
    /// the logical nulls is expensive, in which case this method can return true even for an
316
    /// array without nulls.
317
    ///
318
    /// This is also generally equivalent to `Array::null_count() != 0` but may differ in the
319
    /// presence of logical nullability, see [`Array::logical_null_count`] and [`Array::null_count`].
320
    ///
321
    /// Implementations will return `true` unless they can cheaply prove no logical nulls
322
    /// are present. For example a [`DictionaryArray`] with nullable values will still return true,
323
    /// even if the nulls present in [`DictionaryArray::values`] are not referenced by any key,
324
    /// and therefore would not appear in [`Array::logical_nulls`].
325
9
    fn is_nullable(&self) -> bool {
326
9
        self.logical_null_count() != 0
327
9
    }
328
329
    /// Returns the total number of bytes of memory pointed to by this array.
330
    /// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
331
    /// Note that this does not always correspond to the exact memory usage of an array,
332
    /// since multiple arrays can share the same buffers or slices thereof.
333
    fn get_buffer_memory_size(&self) -> usize;
334
335
    /// Returns the total number of bytes of memory occupied physically by this array.
336
    /// This value will always be greater than returned by `get_buffer_memory_size()` and
337
    /// includes the overhead of the data structures that contain the pointers to the various buffers.
338
    fn get_array_memory_size(&self) -> usize;
339
}
340
341
/// A reference-counted reference to a generic `Array`
342
pub type ArrayRef = Arc<dyn Array>;
343
344
/// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
345
impl Array for ArrayRef {
346
76
    fn as_any(&self) -> &dyn Any {
347
76
        self.as_ref().as_any()
348
76
    }
349
350
331
    fn to_data(&self) -> ArrayData {
351
331
        self.as_ref().to_data()
352
331
    }
353
354
0
    fn into_data(self) -> ArrayData {
355
0
        self.to_data()
356
0
    }
357
358
2.01k
    fn data_type(&self) -> &DataType {
359
2.01k
        self.as_ref().data_type()
360
2.01k
    }
361
362
33
    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
363
33
        self.as_ref().slice(offset, length)
364
33
    }
365
366
2.16k
    fn len(&self) -> usize {
367
2.16k
        self.as_ref().len()
368
2.16k
    }
369
370
0
    fn is_empty(&self) -> bool {
371
0
        self.as_ref().is_empty()
372
0
    }
373
374
    /// For shared buffers, this is a no-op.
375
0
    fn shrink_to_fit(&mut self) {
376
0
        if let Some(slf) = Arc::get_mut(self) {
377
0
            slf.shrink_to_fit();
378
0
        } else {
379
0
            // We ignore shared buffers.
380
0
        }
381
0
    }
382
383
0
    fn offset(&self) -> usize {
384
0
        self.as_ref().offset()
385
0
    }
386
387
0
    fn nulls(&self) -> Option<&NullBuffer> {
388
0
        self.as_ref().nulls()
389
0
    }
390
391
56
    fn logical_nulls(&self) -> Option<NullBuffer> {
392
56
        self.as_ref().logical_nulls()
393
56
    }
394
395
0
    fn is_null(&self, index: usize) -> bool {
396
0
        self.as_ref().is_null(index)
397
0
    }
398
399
0
    fn is_valid(&self, index: usize) -> bool {
400
0
        self.as_ref().is_valid(index)
401
0
    }
402
403
79
    fn null_count(&self) -> usize {
404
79
        self.as_ref().null_count()
405
79
    }
406
407
0
    fn logical_null_count(&self) -> usize {
408
0
        self.as_ref().logical_null_count()
409
0
    }
410
411
10
    fn is_nullable(&self) -> bool {
412
10
        self.as_ref().is_nullable()
413
10
    }
414
415
0
    fn get_buffer_memory_size(&self) -> usize {
416
0
        self.as_ref().get_buffer_memory_size()
417
0
    }
418
419
0
    fn get_array_memory_size(&self) -> usize {
420
0
        self.as_ref().get_array_memory_size()
421
0
    }
422
}
423
424
impl<T: Array> Array for &T {
425
0
    fn as_any(&self) -> &dyn Any {
426
0
        T::as_any(self)
427
0
    }
428
429
0
    fn to_data(&self) -> ArrayData {
430
0
        T::to_data(self)
431
0
    }
432
433
0
    fn into_data(self) -> ArrayData {
434
0
        self.to_data()
435
0
    }
436
437
0
    fn data_type(&self) -> &DataType {
438
0
        T::data_type(self)
439
0
    }
440
441
0
    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
442
0
        T::slice(self, offset, length)
443
0
    }
444
445
46
    fn len(&self) -> usize {
446
46
        T::len(self)
447
46
    }
448
449
0
    fn is_empty(&self) -> bool {
450
0
        T::is_empty(self)
451
0
    }
452
453
0
    fn offset(&self) -> usize {
454
0
        T::offset(self)
455
0
    }
456
457
42
    fn nulls(&self) -> Option<&NullBuffer> {
458
42
        T::nulls(self)
459
42
    }
460
461
0
    fn logical_nulls(&self) -> Option<NullBuffer> {
462
0
        T::logical_nulls(self)
463
0
    }
464
465
0
    fn is_null(&self, index: usize) -> bool {
466
0
        T::is_null(self, index)
467
0
    }
468
469
0
    fn is_valid(&self, index: usize) -> bool {
470
0
        T::is_valid(self, index)
471
0
    }
472
473
29
    fn null_count(&self) -> usize {
474
29
        T::null_count(self)
475
29
    }
476
477
0
    fn logical_null_count(&self) -> usize {
478
0
        T::logical_null_count(self)
479
0
    }
480
481
0
    fn is_nullable(&self) -> bool {
482
0
        T::is_nullable(self)
483
0
    }
484
485
0
    fn get_buffer_memory_size(&self) -> usize {
486
0
        T::get_buffer_memory_size(self)
487
0
    }
488
489
0
    fn get_array_memory_size(&self) -> usize {
490
0
        T::get_array_memory_size(self)
491
0
    }
492
}
493
494
/// A generic trait for accessing the values of an [`Array`]
495
///
496
/// This trait helps write specialized implementations of algorithms for
497
/// different array types. Specialized implementations allow the compiler
498
/// to optimize the code for the specific array type, which can lead to
499
/// significant performance improvements.
500
///
501
/// # Example
502
/// For example, to write three different implementations of a string length function
503
/// for [`StringArray`], [`LargeStringArray`], and [`StringViewArray`], you can write
504
///
505
/// ```
506
/// # use std::sync::Arc;
507
/// # use arrow_array::{ArrayAccessor, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray};
508
/// # use arrow_buffer::ArrowNativeType;
509
/// # use arrow_array::cast::AsArray;
510
/// # use arrow_array::iterator::ArrayIter;
511
/// # use arrow_array::types::{Int32Type, Int64Type};
512
/// # use arrow_schema::{ArrowError, DataType};
513
/// /// This function takes a dynamically typed `ArrayRef` and calls
514
/// /// calls one of three specialized implementations
515
/// fn character_length(arg: ArrayRef) -> Result<ArrayRef, ArrowError> {
516
///     match arg.data_type() {
517
///         DataType::Utf8 => {
518
///             // downcast the ArrayRef to a StringArray and call the specialized implementation
519
///             let string_array = arg.as_string::<i32>();
520
///             character_length_general::<Int32Type, _>(string_array)
521
///         }
522
///         DataType::LargeUtf8 => {
523
///             character_length_general::<Int64Type, _>(arg.as_string::<i64>())
524
///         }
525
///         DataType::Utf8View => {
526
///             character_length_general::<Int32Type, _>(arg.as_string_view())
527
///         }
528
///         _ => Err(ArrowError::InvalidArgumentError("Unsupported data type".to_string())),
529
///     }
530
/// }
531
///
532
/// /// A generic implementation of the character_length function
533
/// /// This function uses the `ArrayAccessor` trait to access the values of the array
534
/// /// so the compiler can generated specialized implementations for different array types
535
/// ///
536
/// /// Returns a new array with the length of each string in the input array
537
/// /// * Int32Array for Utf8 and Utf8View arrays (lengths are 32-bit integers)
538
/// /// * Int64Array for LargeUtf8 arrays (lengths are 64-bit integers)
539
/// ///
540
/// /// This is generic on the type of the primitive array (different string arrays have
541
/// /// different lengths) and the type of the array accessor (different string arrays
542
/// /// have different ways to access the values)
543
/// fn character_length_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
544
///     array: V,
545
/// ) -> Result<ArrayRef, ArrowError>
546
/// where
547
///     T::Native: OffsetSizeTrait,
548
/// {
549
///     let iter = ArrayIter::new(array);
550
///     // Create a Int32Array / Int64Array with the length of each string
551
///     let result = iter
552
///         .map(|string| {
553
///             string.map(|string: &str| {
554
///                 T::Native::from_usize(string.chars().count())
555
///                     .expect("should not fail as string.chars will always return integer")
556
///             })
557
///         })
558
///         .collect::<PrimitiveArray<T>>();
559
///
560
///     /// Return the result as a new ArrayRef (dynamically typed)
561
///     Ok(Arc::new(result) as ArrayRef)
562
/// }
563
/// ```
564
///
565
/// # Validity
566
///
567
/// An [`ArrayAccessor`] must always return a well-defined value for an index
568
/// that is within the bounds `0..Array::len`, including for null indexes where
569
/// [`Array::is_null`] is true.
570
///
571
/// The value at null indexes is unspecified, and implementations must not rely
572
/// on a specific value such as [`Default::default`] being returned, however, it
573
/// must not be undefined
574
pub trait ArrayAccessor: Array {
575
    /// The Arrow type of the element being accessed.
576
    type Item: Send + Sync;
577
578
    /// Returns the element at index `i`
579
    /// # Panics
580
    /// Panics if the value is outside the bounds of the array
581
    fn value(&self, index: usize) -> Self::Item;
582
583
    /// Returns the element at index `i`
584
    /// # Safety
585
    /// Caller is responsible for ensuring that the index is within the bounds of the array
586
    unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
587
}
588
589
/// A trait for Arrow String Arrays, currently three types are supported:
590
/// - `StringArray`
591
/// - `LargeStringArray`
592
/// - `StringViewArray`
593
///
594
/// This trait helps to abstract over the different types of string arrays
595
/// so that we don't need to duplicate the implementation for each type.
596
pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
597
    /// Returns true if all data within this string array is ASCII
598
    fn is_ascii(&self) -> bool;
599
600
    /// Constructs a new iterator
601
    fn iter(&self) -> ArrayIter<Self>;
602
}
603
604
impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
605
0
    fn is_ascii(&self) -> bool {
606
0
        GenericStringArray::<O>::is_ascii(self)
607
0
    }
608
609
0
    fn iter(&self) -> ArrayIter<Self> {
610
0
        GenericStringArray::<O>::iter(self)
611
0
    }
612
}
613
impl<'a> StringArrayType<'a> for &'a StringViewArray {
614
0
    fn is_ascii(&self) -> bool {
615
0
        StringViewArray::is_ascii(self)
616
0
    }
617
618
0
    fn iter(&self) -> ArrayIter<Self> {
619
0
        StringViewArray::iter(self)
620
0
    }
621
}
622
623
/// A trait for Arrow String Arrays, currently three types are supported:
624
/// - `BinaryArray`
625
/// - `LargeBinaryArray`
626
/// - `BinaryViewArray`
627
///
628
/// This trait helps to abstract over the different types of binary arrays
629
/// so that we don't need to duplicate the implementation for each type.
630
pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
631
    /// Constructs a new iterator
632
    fn iter(&self) -> ArrayIter<Self>;
633
}
634
635
impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
636
0
    fn iter(&self) -> ArrayIter<Self> {
637
0
        GenericBinaryArray::<O>::iter(self)
638
0
    }
639
}
640
impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
641
0
    fn iter(&self) -> ArrayIter<Self> {
642
0
        BinaryViewArray::iter(self)
643
0
    }
644
}
645
646
impl PartialEq for dyn Array + '_ {
647
435
    fn eq(&self, other: &Self) -> bool {
648
435
        self.to_data().eq(&other.to_data())
649
435
    }
650
}
651
652
impl<T: Array> PartialEq<T> for dyn Array + '_ {
653
    fn eq(&self, other: &T) -> bool {
654
        self.to_data().eq(&other.to_data())
655
    }
656
}
657
658
impl PartialEq for NullArray {
659
0
    fn eq(&self, other: &NullArray) -> bool {
660
0
        self.to_data().eq(&other.to_data())
661
0
    }
662
}
663
664
impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
665
3
    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
666
3
        self.to_data().eq(&other.to_data())
667
3
    }
668
}
669
670
impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
671
    fn eq(&self, other: &Self) -> bool {
672
        self.to_data().eq(&other.to_data())
673
    }
674
}
675
676
impl PartialEq for BooleanArray {
677
0
    fn eq(&self, other: &BooleanArray) -> bool {
678
0
        self.to_data().eq(&other.to_data())
679
0
    }
680
}
681
682
impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
683
    fn eq(&self, other: &Self) -> bool {
684
        self.to_data().eq(&other.to_data())
685
    }
686
}
687
688
impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
689
    fn eq(&self, other: &Self) -> bool {
690
        self.to_data().eq(&other.to_data())
691
    }
692
}
693
694
impl PartialEq for FixedSizeBinaryArray {
695
1
    fn eq(&self, other: &Self) -> bool {
696
1
        self.to_data().eq(&other.to_data())
697
1
    }
698
}
699
700
impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
701
    fn eq(&self, other: &Self) -> bool {
702
        self.to_data().eq(&other.to_data())
703
    }
704
}
705
706
impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
707
    fn eq(&self, other: &Self) -> bool {
708
        self.to_data().eq(&other.to_data())
709
    }
710
}
711
712
impl PartialEq for MapArray {
713
0
    fn eq(&self, other: &Self) -> bool {
714
0
        self.to_data().eq(&other.to_data())
715
0
    }
716
}
717
718
impl PartialEq for FixedSizeListArray {
719
0
    fn eq(&self, other: &Self) -> bool {
720
0
        self.to_data().eq(&other.to_data())
721
0
    }
722
}
723
724
impl PartialEq for StructArray {
725
0
    fn eq(&self, other: &Self) -> bool {
726
0
        self.to_data().eq(&other.to_data())
727
0
    }
728
}
729
730
impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
731
    fn eq(&self, other: &Self) -> bool {
732
        self.to_data().eq(&other.to_data())
733
    }
734
}
735
736
impl<R: RunEndIndexType> PartialEq for RunArray<R> {
737
    fn eq(&self, other: &Self) -> bool {
738
        self.to_data().eq(&other.to_data())
739
    }
740
}
741
742
/// Constructs an array using the input `data`.
743
/// Returns a reference-counted `Array` instance.
744
42
pub fn make_array(data: ArrayData) -> ArrayRef {
745
42
    match data.data_type() {
746
1
        DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
747
0
        DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
748
0
        DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
749
4
        DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
750
3
        DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
751
0
        DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
752
0
        DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
753
0
        DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
754
0
        DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
755
0
        DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
756
1
        DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
757
2
        DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
758
0
        DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
759
0
        DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
760
0
        DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
761
        DataType::Time32(TimeUnit::Millisecond) => {
762
0
            Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
763
        }
764
        DataType::Time64(TimeUnit::Microsecond) => {
765
0
            Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
766
        }
767
        DataType::Time64(TimeUnit::Nanosecond) => {
768
0
            Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
769
        }
770
        DataType::Timestamp(TimeUnit::Second, _) => {
771
0
            Arc::new(TimestampSecondArray::from(data)) as ArrayRef
772
        }
773
        DataType::Timestamp(TimeUnit::Millisecond, _) => {
774
0
            Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
775
        }
776
        DataType::Timestamp(TimeUnit::Microsecond, _) => {
777
0
            Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
778
        }
779
        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
780
0
            Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
781
        }
782
        DataType::Interval(IntervalUnit::YearMonth) => {
783
0
            Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
784
        }
785
        DataType::Interval(IntervalUnit::DayTime) => {
786
0
            Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
787
        }
788
        DataType::Interval(IntervalUnit::MonthDayNano) => {
789
0
            Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
790
        }
791
        DataType::Duration(TimeUnit::Second) => {
792
0
            Arc::new(DurationSecondArray::from(data)) as ArrayRef
793
        }
794
        DataType::Duration(TimeUnit::Millisecond) => {
795
0
            Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
796
        }
797
        DataType::Duration(TimeUnit::Microsecond) => {
798
0
            Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
799
        }
800
        DataType::Duration(TimeUnit::Nanosecond) => {
801
0
            Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
802
        }
803
0
        DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
804
0
        DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
805
3
        DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
806
0
        DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
807
12
        DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
808
0
        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
809
0
        DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
810
6
        DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
811
0
        DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
812
0
        DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
813
0
        DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
814
7
        DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
815
3
        DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
816
0
        DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
817
0
        DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
818
0
        DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
819
0
            DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
820
0
            DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
821
0
            DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
822
0
            DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
823
0
            DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
824
0
            DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
825
0
            DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
826
0
            DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
827
0
            dt => panic!("Unexpected dictionary key type {dt:?}"),
828
        },
829
0
        DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
830
0
            DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
831
0
            DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
832
0
            DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
833
0
            dt => panic!("Unexpected data type for run_ends array {dt:?}"),
834
        },
835
0
        DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
836
0
        DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
837
0
        DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
838
0
        DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
839
0
        DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
840
0
        dt => panic!("Unexpected data type {dt:?}"),
841
    }
842
42
}
843
844
/// Creates a new empty array
845
///
846
/// ```
847
/// use std::sync::Arc;
848
/// use arrow_schema::DataType;
849
/// use arrow_array::{ArrayRef, Int32Array, new_empty_array};
850
///
851
/// let empty_array = new_empty_array(&DataType::Int32);
852
/// let array: ArrayRef = Arc::new(Int32Array::from(vec![] as Vec<i32>));
853
///
854
/// assert_eq!(&array, &empty_array);
855
/// ```
856
0
pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
857
0
    let data = ArrayData::new_empty(data_type);
858
0
    make_array(data)
859
0
}
860
861
/// Creates a new array of `data_type` of length `length` filled
862
/// entirely of `NULL` values
863
///
864
/// ```
865
/// use std::sync::Arc;
866
/// use arrow_schema::DataType;
867
/// use arrow_array::{ArrayRef, Int32Array, new_null_array};
868
///
869
/// let null_array = new_null_array(&DataType::Int32, 3);
870
/// let array: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None]));
871
///
872
/// assert_eq!(&array, &null_array);
873
/// ```
874
0
pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
875
0
    make_array(ArrayData::new_null(data_type, length))
876
0
}
877
878
/// Helper function that gets offset from an [`ArrayData`]
879
///
880
/// # Safety
881
///
882
/// - ArrayData must contain a valid [`OffsetBuffer`] as its first buffer
883
53
unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
884
53
    match data.is_empty() && 
data3
.buffers()[0].
is_empty3
() {
885
0
        true => OffsetBuffer::new_empty(),
886
        false => {
887
53
            let buffer =
888
53
                ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
889
            // Safety:
890
            // ArrayData is valid
891
53
            unsafe { OffsetBuffer::new_unchecked(buffer) }
892
        }
893
    }
894
53
}
895
896
/// Helper function for printing potentially long arrays.
897
0
fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
898
0
where
899
0
    A: Array,
900
0
    F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
901
{
902
0
    let head = std::cmp::min(10, array.len());
903
904
0
    for i in 0..head {
905
0
        if array.is_null(i) {
906
0
            writeln!(f, "  null,")?;
907
        } else {
908
0
            write!(f, "  ")?;
909
0
            print_item(array, i, f)?;
910
0
            writeln!(f, ",")?;
911
        }
912
    }
913
0
    if array.len() > 10 {
914
0
        if array.len() > 20 {
915
0
            writeln!(f, "  ...{} elements...,", array.len() - 20)?;
916
0
        }
917
918
0
        let tail = std::cmp::max(head, array.len() - 10);
919
920
0
        for i in tail..array.len() {
921
0
            if array.is_null(i) {
922
0
                writeln!(f, "  null,")?;
923
            } else {
924
0
                write!(f, "  ")?;
925
0
                print_item(array, i, f)?;
926
0
                writeln!(f, ",")?;
927
            }
928
        }
929
0
    }
930
0
    Ok(())
931
0
}
932
933
#[cfg(test)]
934
mod tests {
935
    use super::*;
936
    use crate::cast::{as_union_array, downcast_array};
937
    use crate::downcast_run_array;
938
    use arrow_buffer::MutableBuffer;
939
    use arrow_schema::{Field, Fields, UnionFields, UnionMode};
940
941
    #[test]
942
    fn test_empty_primitive() {
943
        let array = new_empty_array(&DataType::Int32);
944
        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
945
        assert_eq!(a.len(), 0);
946
        let expected: &[i32] = &[];
947
        assert_eq!(a.values(), expected);
948
    }
949
950
    #[test]
951
    fn test_empty_variable_sized() {
952
        let array = new_empty_array(&DataType::Utf8);
953
        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
954
        assert_eq!(a.len(), 0);
955
        assert_eq!(a.value_offsets()[0], 0i32);
956
    }
957
958
    #[test]
959
    fn test_empty_list_primitive() {
960
        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
961
        let array = new_empty_array(&data_type);
962
        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
963
        assert_eq!(a.len(), 0);
964
        assert_eq!(a.value_offsets()[0], 0i32);
965
    }
966
967
    #[test]
968
    fn test_null_boolean() {
969
        let array = new_null_array(&DataType::Boolean, 9);
970
        let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
971
        assert_eq!(a.len(), 9);
972
        for i in 0..9 {
973
            assert!(a.is_null(i));
974
        }
975
    }
976
977
    #[test]
978
    fn test_null_primitive() {
979
        let array = new_null_array(&DataType::Int32, 9);
980
        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
981
        assert_eq!(a.len(), 9);
982
        for i in 0..9 {
983
            assert!(a.is_null(i));
984
        }
985
    }
986
987
    #[test]
988
    fn test_null_struct() {
989
        // It is possible to create a null struct containing a non-nullable child
990
        // see https://github.com/apache/arrow-rs/pull/3244 for details
991
        let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
992
        let array = new_null_array(&struct_type, 9);
993
994
        let a = array.as_any().downcast_ref::<StructArray>().unwrap();
995
        assert_eq!(a.len(), 9);
996
        assert_eq!(a.column(0).len(), 9);
997
        for i in 0..9 {
998
            assert!(a.is_null(i));
999
        }
1000
1001
        // Make sure we can slice the resulting array.
1002
        a.slice(0, 5);
1003
    }
1004
1005
    #[test]
1006
    fn test_null_variable_sized() {
1007
        let array = new_null_array(&DataType::Utf8, 9);
1008
        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1009
        assert_eq!(a.len(), 9);
1010
        assert_eq!(a.value_offsets()[9], 0i32);
1011
        for i in 0..9 {
1012
            assert!(a.is_null(i));
1013
        }
1014
    }
1015
1016
    #[test]
1017
    fn test_null_list_primitive() {
1018
        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1019
        let array = new_null_array(&data_type, 9);
1020
        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1021
        assert_eq!(a.len(), 9);
1022
        assert_eq!(a.value_offsets()[9], 0i32);
1023
        for i in 0..9 {
1024
            assert!(a.is_null(i));
1025
        }
1026
    }
1027
1028
    #[test]
1029
    fn test_null_map() {
1030
        let data_type = DataType::Map(
1031
            Arc::new(Field::new(
1032
                "entry",
1033
                DataType::Struct(Fields::from(vec![
1034
                    Field::new("key", DataType::Utf8, false),
1035
                    Field::new("value", DataType::Int32, true),
1036
                ])),
1037
                false,
1038
            )),
1039
            false,
1040
        );
1041
        let array = new_null_array(&data_type, 9);
1042
        let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1043
        assert_eq!(a.len(), 9);
1044
        assert_eq!(a.value_offsets()[9], 0i32);
1045
        for i in 0..9 {
1046
            assert!(a.is_null(i));
1047
        }
1048
    }
1049
1050
    #[test]
1051
    fn test_null_dictionary() {
1052
        let values =
1053
            vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1054
1055
        let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1056
        let array = Arc::new(array) as ArrayRef;
1057
1058
        let null_array = new_null_array(array.data_type(), 9);
1059
        assert_eq!(&array, &null_array);
1060
        assert_eq!(
1061
            array.to_data().buffers()[0].len(),
1062
            null_array.to_data().buffers()[0].len()
1063
        );
1064
    }
1065
1066
    #[test]
1067
    fn test_null_union() {
1068
        for mode in [UnionMode::Sparse, UnionMode::Dense] {
1069
            let data_type = DataType::Union(
1070
                UnionFields::new(
1071
                    vec![2, 1],
1072
                    vec![
1073
                        Field::new("foo", DataType::Int32, true),
1074
                        Field::new("bar", DataType::Int64, true),
1075
                    ],
1076
                ),
1077
                mode,
1078
            );
1079
            let array = new_null_array(&data_type, 4);
1080
1081
            let array = as_union_array(array.as_ref());
1082
            assert_eq!(array.len(), 4);
1083
            assert_eq!(array.null_count(), 0);
1084
            assert_eq!(array.logical_null_count(), 4);
1085
1086
            for i in 0..4 {
1087
                let a = array.value(i);
1088
                assert_eq!(a.len(), 1);
1089
                assert_eq!(a.null_count(), 1);
1090
                assert_eq!(a.logical_null_count(), 1);
1091
                assert!(a.is_null(0))
1092
            }
1093
1094
            array.to_data().validate_full().unwrap();
1095
        }
1096
    }
1097
1098
    #[test]
1099
    #[allow(unused_parens)]
1100
    fn test_null_runs() {
1101
        for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1102
            let data_type = DataType::RunEndEncoded(
1103
                Arc::new(Field::new("run_ends", r, false)),
1104
                Arc::new(Field::new("values", DataType::Utf8, true)),
1105
            );
1106
1107
            let array = new_null_array(&data_type, 4);
1108
            let array = array.as_ref();
1109
1110
            downcast_run_array! {
1111
                array => {
1112
                    assert_eq!(array.len(), 4);
1113
                    assert_eq!(array.null_count(), 0);
1114
                    assert_eq!(array.logical_null_count(), 4);
1115
                    assert_eq!(array.values().len(), 1);
1116
                    assert_eq!(array.values().null_count(), 1);
1117
                    assert_eq!(array.run_ends().len(), 4);
1118
                    assert_eq!(array.run_ends().values(), &[4]);
1119
1120
                    let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1121
                    assert_eq!(idx, &[0,0,0,0]);
1122
                }
1123
                d => unreachable!("{d}")
1124
            }
1125
        }
1126
    }
1127
1128
    #[test]
1129
    fn test_null_fixed_size_binary() {
1130
        for size in [1, 2, 7] {
1131
            let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1132
            let array = array
1133
                .as_ref()
1134
                .as_any()
1135
                .downcast_ref::<FixedSizeBinaryArray>()
1136
                .unwrap();
1137
1138
            assert_eq!(array.len(), 6);
1139
            assert_eq!(array.null_count(), 6);
1140
            assert_eq!(array.logical_null_count(), 6);
1141
            array.iter().for_each(|x| assert!(x.is_none()));
1142
        }
1143
    }
1144
1145
    #[test]
1146
    fn test_memory_size_null() {
1147
        let null_arr = NullArray::new(32);
1148
1149
        assert_eq!(0, null_arr.get_buffer_memory_size());
1150
        assert_eq!(
1151
            std::mem::size_of::<usize>(),
1152
            null_arr.get_array_memory_size()
1153
        );
1154
    }
1155
1156
    #[test]
1157
    fn test_memory_size_primitive() {
1158
        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1159
        let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1160
1161
        // subtract empty array to avoid magic numbers for the size of additional fields
1162
        assert_eq!(
1163
            arr.get_array_memory_size() - empty.get_array_memory_size(),
1164
            128 * std::mem::size_of::<i64>()
1165
        );
1166
    }
1167
1168
    #[test]
1169
    fn test_memory_size_primitive_sliced() {
1170
        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1171
        let slice1 = arr.slice(0, 64);
1172
        let slice2 = arr.slice(64, 64);
1173
1174
        // both slices report the full buffer memory usage, even though the buffers are shared
1175
        assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1176
        assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1177
    }
1178
1179
    #[test]
1180
    fn test_memory_size_primitive_nullable() {
1181
        let arr: PrimitiveArray<Int64Type> = (0..128)
1182
            .map(|i| if i % 20 == 0 { Some(i) } else { None })
1183
            .collect();
1184
        let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1185
            ArrayData::builder(arr.data_type().clone())
1186
                .add_buffer(MutableBuffer::new(0).into())
1187
                .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1188
                .build()
1189
                .unwrap(),
1190
        );
1191
1192
        // expected size is the size of the PrimitiveArray struct,
1193
        // which includes the optional validity buffer
1194
        // plus one buffer on the heap
1195
        assert_eq!(
1196
            std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1197
            empty_with_bitmap.get_array_memory_size()
1198
        );
1199
1200
        // subtract empty array to avoid magic numbers for the size of additional fields
1201
        // the size of the validity bitmap is rounded up to 64 bytes
1202
        assert_eq!(
1203
            arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1204
            128 * std::mem::size_of::<i64>() + 64
1205
        );
1206
    }
1207
1208
    #[test]
1209
    fn test_memory_size_dictionary() {
1210
        let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1211
        let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1212
            (0..256).map(|i| (i % values.len()) as i16),
1213
        );
1214
1215
        let dict_data_type = DataType::Dictionary(
1216
            Box::new(keys.data_type().clone()),
1217
            Box::new(values.data_type().clone()),
1218
        );
1219
        let dict_data = keys
1220
            .into_data()
1221
            .into_builder()
1222
            .data_type(dict_data_type)
1223
            .child_data(vec![values.into_data()])
1224
            .build()
1225
            .unwrap();
1226
1227
        let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1228
            Box::new(DataType::Int16),
1229
            Box::new(DataType::Int64),
1230
        ));
1231
1232
        let arr = DictionaryArray::<Int16Type>::from(dict_data);
1233
        let empty = DictionaryArray::<Int16Type>::from(empty_data);
1234
1235
        let expected_keys_size = 256 * std::mem::size_of::<i16>();
1236
        assert_eq!(
1237
            arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1238
            expected_keys_size
1239
        );
1240
1241
        let expected_values_size = 16 * std::mem::size_of::<i64>();
1242
        assert_eq!(
1243
            arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1244
            expected_values_size
1245
        );
1246
1247
        let expected_size = expected_keys_size + expected_values_size;
1248
        assert_eq!(
1249
            arr.get_array_memory_size() - empty.get_array_memory_size(),
1250
            expected_size
1251
        );
1252
    }
1253
1254
    /// Test function that takes an &dyn Array
1255
    fn compute_my_thing(arr: &dyn Array) -> bool {
1256
        !arr.is_empty()
1257
    }
1258
1259
    #[test]
1260
    fn test_array_ref_as_array() {
1261
        let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1262
1263
        // works well!
1264
        assert!(compute_my_thing(&arr));
1265
1266
        // Should also work when wrapped as an ArrayRef
1267
        let arr: ArrayRef = Arc::new(arr);
1268
        assert!(compute_my_thing(&arr));
1269
        assert!(compute_my_thing(arr.as_ref()));
1270
    }
1271
1272
    #[test]
1273
    fn test_downcast_array() {
1274
        let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1275
1276
        let boxed: ArrayRef = Arc::new(array);
1277
        let array: Int32Array = downcast_array(&boxed);
1278
1279
        let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1280
        assert_eq!(array, expected);
1281
    }
1282
}