Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/map_array.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::array::{get_offsets, print_long_array};
19
use crate::iterator::MapArrayIter;
20
use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
21
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
22
use arrow_data::{ArrayData, ArrayDataBuilder};
23
use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24
use std::any::Any;
25
use std::sync::Arc;
26
27
/// An array of key-value maps
28
///
29
/// Keys should always be non-null, but values can be null.
30
///
31
/// [`MapArray`] is physically a [`ListArray`] of key values pairs stored as an `entries`
32
/// [`StructArray`] with 2 child fields.
33
///
34
/// See [`MapBuilder`](crate::builder::MapBuilder) for how to construct a [`MapArray`]
35
#[derive(Clone)]
36
pub struct MapArray {
37
    data_type: DataType,
38
    nulls: Option<NullBuffer>,
39
    /// The [`StructArray`] that is the direct child of this array
40
    entries: StructArray,
41
    /// The start and end offsets of each entry
42
    value_offsets: OffsetBuffer<i32>,
43
}
44
45
impl MapArray {
46
    /// Create a new [`MapArray`] from the provided parts
47
    ///
48
    /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
49
    /// to construct a [`MapArray`]
50
    ///
51
    /// # Errors
52
    ///
53
    /// Errors if
54
    ///
55
    /// * `offsets.len() - 1 != nulls.len()`
56
    /// * `offsets.last() > entries.len()`
57
    /// * `field.is_nullable()`
58
    /// * `entries.null_count() != 0`
59
    /// * `entries.columns().len() != 2`
60
    /// * `field.data_type() != entries.data_type()`
61
20
    pub fn try_new(
62
20
        field: FieldRef,
63
20
        offsets: OffsetBuffer<i32>,
64
20
        entries: StructArray,
65
20
        nulls: Option<NullBuffer>,
66
20
        ordered: bool,
67
20
    ) -> Result<Self, ArrowError> {
68
20
        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
69
20
        let end_offset = offsets.last().unwrap().as_usize();
70
        // don't need to check other values of `offsets` because they are checked
71
        // during construction of `OffsetBuffer`
72
20
        if end_offset > entries.len() {
73
0
            return Err(ArrowError::InvalidArgumentError(format!(
74
0
                "Max offset of {end_offset} exceeds length of entries {}",
75
0
                entries.len()
76
0
            )));
77
20
        }
78
79
20
        if let Some(
n7
) = nulls.as_ref() {
80
7
            if n.len() != len {
81
0
                return Err(ArrowError::InvalidArgumentError(format!(
82
0
                    "Incorrect length of null buffer for MapArray, expected {len} got {}",
83
0
                    n.len(),
84
0
                )));
85
7
            }
86
13
        }
87
20
        if field.is_nullable() || entries.null_count() != 0 {
88
0
            return Err(ArrowError::InvalidArgumentError(
89
0
                "MapArray entries cannot contain nulls".to_string(),
90
0
            ));
91
20
        }
92
93
20
        if field.data_type() != entries.data_type() {
94
0
            return Err(ArrowError::InvalidArgumentError(format!(
95
0
                "MapArray expected data type {} got {} for {:?}",
96
0
                field.data_type(),
97
0
                entries.data_type(),
98
0
                field.name()
99
0
            )));
100
20
        }
101
102
20
        if entries.columns().len() != 2 {
103
0
            return Err(ArrowError::InvalidArgumentError(format!(
104
0
                "MapArray entries must contain two children, got {}",
105
0
                entries.columns().len()
106
0
            )));
107
20
        }
108
109
20
        Ok(Self {
110
20
            data_type: DataType::Map(field, ordered),
111
20
            nulls,
112
20
            entries,
113
20
            value_offsets: offsets,
114
20
        })
115
20
    }
116
117
    /// Create a new [`MapArray`] from the provided parts
118
    ///
119
    /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
120
    /// to construct a [`MapArray`]
121
    ///
122
    /// # Panics
123
    ///
124
    /// Panics if [`Self::try_new`] returns an error
125
20
    pub fn new(
126
20
        field: FieldRef,
127
20
        offsets: OffsetBuffer<i32>,
128
20
        entries: StructArray,
129
20
        nulls: Option<NullBuffer>,
130
20
        ordered: bool,
131
20
    ) -> Self {
132
20
        Self::try_new(field, offsets, entries, nulls, ordered).unwrap()
133
20
    }
134
135
    /// Deconstruct this array into its constituent parts
136
0
    pub fn into_parts(
137
0
        self,
138
0
    ) -> (
139
0
        FieldRef,
140
0
        OffsetBuffer<i32>,
141
0
        StructArray,
142
0
        Option<NullBuffer>,
143
0
        bool,
144
0
    ) {
145
0
        let (f, ordered) = match self.data_type {
146
0
            DataType::Map(f, ordered) => (f, ordered),
147
0
            _ => unreachable!(),
148
        };
149
0
        (f, self.value_offsets, self.entries, self.nulls, ordered)
150
0
    }
151
152
    /// Returns a reference to the offsets of this map
153
    ///
154
    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
155
    /// allowing for zero-copy cloning
156
    #[inline]
157
0
    pub fn offsets(&self) -> &OffsetBuffer<i32> {
158
0
        &self.value_offsets
159
0
    }
160
161
    /// Returns a reference to the keys of this map
162
0
    pub fn keys(&self) -> &ArrayRef {
163
0
        self.entries.column(0)
164
0
    }
165
166
    /// Returns a reference to the values of this map
167
0
    pub fn values(&self) -> &ArrayRef {
168
0
        self.entries.column(1)
169
0
    }
170
171
    /// Returns a reference to the [`StructArray`] entries of this map
172
0
    pub fn entries(&self) -> &StructArray {
173
0
        &self.entries
174
0
    }
175
176
    /// Returns the data type of the map's keys.
177
0
    pub fn key_type(&self) -> &DataType {
178
0
        self.keys().data_type()
179
0
    }
180
181
    /// Returns the data type of the map's values.
182
0
    pub fn value_type(&self) -> &DataType {
183
0
        self.values().data_type()
184
0
    }
185
186
    /// Returns ith value of this map array.
187
    ///
188
    /// Note: This method does not check for nulls and the value is arbitrary
189
    /// if [`is_null`](Self::is_null) returns true for the index.
190
    ///
191
    /// # Safety
192
    /// Caller must ensure that the index is within the array bounds
193
0
    pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
194
0
        let end = *self.value_offsets().get_unchecked(i + 1);
195
0
        let start = *self.value_offsets().get_unchecked(i);
196
0
        self.entries
197
0
            .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
198
0
    }
199
200
    /// Returns ith value of this map array.
201
    ///
202
    /// This is a [`StructArray`] containing two fields
203
    ///
204
    /// Note: This method does not check for nulls and the value is arbitrary
205
    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
206
    ///
207
    /// # Panics
208
    /// Panics if index `i` is out of bounds
209
1
    pub fn value(&self, i: usize) -> StructArray {
210
1
        let end = self.value_offsets()[i + 1] as usize;
211
1
        let start = self.value_offsets()[i] as usize;
212
1
        self.entries.slice(start, end - start)
213
1
    }
214
215
    /// Returns the offset values in the offsets buffer
216
    #[inline]
217
4
    pub fn value_offsets(&self) -> &[i32] {
218
4
        &self.value_offsets
219
4
    }
220
221
    /// Returns the length for value at index `i`.
222
    #[inline]
223
2
    pub fn value_length(&self, i: usize) -> i32 {
224
2
        let offsets = self.value_offsets();
225
2
        offsets[i + 1] - offsets[i]
226
2
    }
227
228
    /// Returns a zero-copy slice of this array with the indicated offset and length.
229
6
    pub fn slice(&self, offset: usize, length: usize) -> Self {
230
        Self {
231
6
            data_type: self.data_type.clone(),
232
6
            nulls: self.nulls.as_ref().map(|n| 
n2
.
slice2
(
offset2
,
length2
)),
233
6
            entries: self.entries.clone(),
234
6
            value_offsets: self.value_offsets.slice(offset, length),
235
        }
236
6
    }
237
238
    /// constructs a new iterator
239
0
    pub fn iter(&self) -> MapArrayIter<'_> {
240
0
        MapArrayIter::new(self)
241
0
    }
242
}
243
244
impl From<ArrayData> for MapArray {
245
6
    fn from(data: ArrayData) -> Self {
246
6
        Self::try_new_from_array_data(data)
247
6
            .expect("Expected infallible creation of MapArray from ArrayData failed")
248
6
    }
249
}
250
251
impl From<MapArray> for ArrayData {
252
27
    fn from(array: MapArray) -> Self {
253
27
        let len = array.len();
254
27
        let builder = ArrayDataBuilder::new(array.data_type)
255
27
            .len(len)
256
27
            .nulls(array.nulls)
257
27
            .buffers(vec![array.value_offsets.into_inner().into_inner()])
258
27
            .child_data(vec![array.entries.to_data()]);
259
260
27
        unsafe { builder.build_unchecked() }
261
27
    }
262
}
263
264
impl MapArray {
265
6
    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
266
6
        if !
matches!0
(data.data_type(), DataType::Map(_, _)) {
267
0
            return Err(ArrowError::InvalidArgumentError(format!(
268
0
                "MapArray expected ArrayData with DataType::Map got {}",
269
0
                data.data_type()
270
0
            )));
271
6
        }
272
273
6
        if data.buffers().len() != 1 {
274
0
            return Err(ArrowError::InvalidArgumentError(format!(
275
0
                "MapArray data should contain a single buffer only (value offsets), had {}",
276
0
                data.len()
277
0
            )));
278
6
        }
279
280
6
        if data.child_data().len() != 1 {
281
0
            return Err(ArrowError::InvalidArgumentError(format!(
282
0
                "MapArray should contain a single child array (values array), had {}",
283
0
                data.child_data().len()
284
0
            )));
285
6
        }
286
287
6
        let entries = data.child_data()[0].clone();
288
289
6
        if let DataType::Struct(fields) = entries.data_type() {
290
6
            if fields.len() != 2 {
291
0
                return Err(ArrowError::InvalidArgumentError(format!(
292
0
                    "MapArray should contain a struct array with 2 fields, have {} fields",
293
0
                    fields.len()
294
0
                )));
295
6
            }
296
        } else {
297
0
            return Err(ArrowError::InvalidArgumentError(format!(
298
0
                "MapArray should contain a struct array child, found {:?}",
299
0
                entries.data_type()
300
0
            )));
301
        }
302
6
        let entries = entries.into();
303
304
        // SAFETY:
305
        // ArrayData is valid, and verified type above
306
6
        let value_offsets = unsafe { get_offsets(&data) };
307
308
6
        Ok(Self {
309
6
            data_type: data.data_type().clone(),
310
6
            nulls: data.nulls().cloned(),
311
6
            entries,
312
6
            value_offsets,
313
6
        })
314
6
    }
315
316
    /// Creates map array from provided keys, values and entry_offsets.
317
    pub fn new_from_strings<'a>(
318
        keys: impl Iterator<Item = &'a str>,
319
        values: &dyn Array,
320
        entry_offsets: &[u32],
321
    ) -> Result<Self, ArrowError> {
322
        let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
323
        let keys_data = StringArray::from_iter_values(keys);
324
325
        let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
326
        let values_field = Arc::new(Field::new(
327
            "values",
328
            values.data_type().clone(),
329
            values.null_count() > 0,
330
        ));
331
332
        let entry_struct = StructArray::from(vec![
333
            (keys_field, Arc::new(keys_data) as ArrayRef),
334
            (values_field, make_array(values.to_data())),
335
        ]);
336
337
        let map_data_type = DataType::Map(
338
            Arc::new(Field::new(
339
                "entries",
340
                entry_struct.data_type().clone(),
341
                false,
342
            )),
343
            false,
344
        );
345
        let map_data = ArrayData::builder(map_data_type)
346
            .len(entry_offsets.len() - 1)
347
            .add_buffer(entry_offsets_buffer)
348
            .add_child_data(entry_struct.into_data())
349
            .build()?;
350
351
        Ok(MapArray::from(map_data))
352
    }
353
}
354
355
impl Array for MapArray {
356
2
    fn as_any(&self) -> &dyn Any {
357
2
        self
358
2
    }
359
360
27
    fn to_data(&self) -> ArrayData {
361
27
        self.clone().into_data()
362
27
    }
363
364
27
    fn into_data(self) -> ArrayData {
365
27
        self.into()
366
27
    }
367
368
38
    fn data_type(&self) -> &DataType {
369
38
        &self.data_type
370
38
    }
371
372
6
    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
373
6
        Arc::new(self.slice(offset, length))
374
6
    }
375
376
80
    fn len(&self) -> usize {
377
80
        self.value_offsets.len() - 1
378
80
    }
379
380
0
    fn is_empty(&self) -> bool {
381
0
        self.value_offsets.len() <= 1
382
0
    }
383
384
0
    fn shrink_to_fit(&mut self) {
385
0
        if let Some(nulls) = &mut self.nulls {
386
0
            nulls.shrink_to_fit();
387
0
        }
388
0
        self.entries.shrink_to_fit();
389
0
        self.value_offsets.shrink_to_fit();
390
0
    }
391
392
0
    fn offset(&self) -> usize {
393
0
        0
394
0
    }
395
396
0
    fn nulls(&self) -> Option<&NullBuffer> {
397
0
        self.nulls.as_ref()
398
0
    }
399
400
0
    fn logical_null_count(&self) -> usize {
401
        // More efficient that the default implementation
402
0
        self.null_count()
403
0
    }
404
405
0
    fn get_buffer_memory_size(&self) -> usize {
406
0
        let mut size = self.entries.get_buffer_memory_size();
407
0
        size += self.value_offsets.inner().inner().capacity();
408
0
        if let Some(n) = self.nulls.as_ref() {
409
0
            size += n.buffer().capacity();
410
0
        }
411
0
        size
412
0
    }
413
414
0
    fn get_array_memory_size(&self) -> usize {
415
0
        let mut size = std::mem::size_of::<Self>() + self.entries.get_array_memory_size();
416
0
        size += self.value_offsets.inner().inner().capacity();
417
0
        if let Some(n) = self.nulls.as_ref() {
418
0
            size += n.buffer().capacity();
419
0
        }
420
0
        size
421
0
    }
422
}
423
424
impl ArrayAccessor for &MapArray {
425
    type Item = StructArray;
426
427
0
    fn value(&self, index: usize) -> Self::Item {
428
0
        MapArray::value(self, index)
429
0
    }
430
431
0
    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
432
0
        MapArray::value(self, index)
433
0
    }
434
}
435
436
impl std::fmt::Debug for MapArray {
437
0
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
438
0
        write!(f, "MapArray\n[\n")?;
439
0
        print_long_array(self, f, |array, index, f| {
440
0
            std::fmt::Debug::fmt(&array.value(index), f)
441
0
        })?;
442
0
        write!(f, "]")
443
0
    }
444
}
445
446
impl From<MapArray> for ListArray {
447
0
    fn from(value: MapArray) -> Self {
448
0
        let field = match value.data_type() {
449
0
            DataType::Map(field, _) => field,
450
0
            _ => unreachable!("This should be a map type."),
451
        };
452
0
        let data_type = DataType::List(field.clone());
453
0
        let builder = value.into_data().into_builder().data_type(data_type);
454
0
        let array_data = unsafe { builder.build_unchecked() };
455
456
0
        ListArray::from(array_data)
457
0
    }
458
}
459
460
#[cfg(test)]
461
mod tests {
462
    use crate::cast::AsArray;
463
    use crate::types::UInt32Type;
464
    use crate::{Int32Array, UInt32Array};
465
    use arrow_schema::Fields;
466
467
    use super::*;
468
469
    fn create_from_buffers() -> MapArray {
470
        // Construct key and values
471
        let keys_data = ArrayData::builder(DataType::Int32)
472
            .len(8)
473
            .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
474
            .build()
475
            .unwrap();
476
        let values_data = ArrayData::builder(DataType::UInt32)
477
            .len(8)
478
            .add_buffer(Buffer::from(
479
                [0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
480
            ))
481
            .build()
482
            .unwrap();
483
484
        // Construct a buffer for value offsets, for the nested array:
485
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
486
        let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
487
488
        let keys = Arc::new(Field::new("keys", DataType::Int32, false));
489
        let values = Arc::new(Field::new("values", DataType::UInt32, false));
490
        let entry_struct = StructArray::from(vec![
491
            (keys, make_array(keys_data)),
492
            (values, make_array(values_data)),
493
        ]);
494
495
        // Construct a map array from the above two
496
        let map_data_type = DataType::Map(
497
            Arc::new(Field::new(
498
                "entries",
499
                entry_struct.data_type().clone(),
500
                false,
501
            )),
502
            false,
503
        );
504
        let map_data = ArrayData::builder(map_data_type)
505
            .len(3)
506
            .add_buffer(entry_offsets)
507
            .add_child_data(entry_struct.into_data())
508
            .build()
509
            .unwrap();
510
        MapArray::from(map_data)
511
    }
512
513
    #[test]
514
    fn test_map_array() {
515
        // Construct key and values
516
        let key_data = ArrayData::builder(DataType::Int32)
517
            .len(8)
518
            .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
519
            .build()
520
            .unwrap();
521
        let value_data = ArrayData::builder(DataType::UInt32)
522
            .len(8)
523
            .add_buffer(Buffer::from(
524
                [0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
525
            ))
526
            .null_bit_buffer(Some(Buffer::from(&[0b11010110])))
527
            .build()
528
            .unwrap();
529
530
        // Construct a buffer for value offsets, for the nested array:
531
        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
532
        let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
533
534
        let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
535
        let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
536
        let entry_struct = StructArray::from(vec![
537
            (keys_field.clone(), make_array(key_data)),
538
            (values_field.clone(), make_array(value_data.clone())),
539
        ]);
540
541
        // Construct a map array from the above two
542
        let map_data_type = DataType::Map(
543
            Arc::new(Field::new(
544
                "entries",
545
                entry_struct.data_type().clone(),
546
                false,
547
            )),
548
            false,
549
        );
550
        let map_data = ArrayData::builder(map_data_type)
551
            .len(3)
552
            .add_buffer(entry_offsets)
553
            .add_child_data(entry_struct.into_data())
554
            .build()
555
            .unwrap();
556
        let map_array = MapArray::from(map_data);
557
558
        assert_eq!(value_data, map_array.values().to_data());
559
        assert_eq!(&DataType::UInt32, map_array.value_type());
560
        assert_eq!(3, map_array.len());
561
        assert_eq!(0, map_array.null_count());
562
        assert_eq!(6, map_array.value_offsets()[2]);
563
        assert_eq!(2, map_array.value_length(2));
564
565
        let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef;
566
        let value_array =
567
            Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef;
568
        let struct_array = StructArray::from(vec![
569
            (keys_field.clone(), key_array),
570
            (values_field.clone(), value_array),
571
        ]);
572
        assert_eq!(
573
            struct_array,
574
            StructArray::from(map_array.value(0).into_data())
575
        );
576
        assert_eq!(
577
            &struct_array,
578
            unsafe { map_array.value_unchecked(0) }
579
                .as_any()
580
                .downcast_ref::<StructArray>()
581
                .unwrap()
582
        );
583
        for i in 0..3 {
584
            assert!(map_array.is_valid(i));
585
            assert!(!map_array.is_null(i));
586
        }
587
588
        // Now test with a non-zero offset
589
        let map_array = map_array.slice(1, 2);
590
591
        assert_eq!(value_data, map_array.values().to_data());
592
        assert_eq!(&DataType::UInt32, map_array.value_type());
593
        assert_eq!(2, map_array.len());
594
        assert_eq!(0, map_array.null_count());
595
        assert_eq!(6, map_array.value_offsets()[1]);
596
        assert_eq!(2, map_array.value_length(1));
597
598
        let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
599
        let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
600
        let struct_array =
601
            StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
602
        assert_eq!(
603
            &struct_array,
604
            map_array
605
                .value(0)
606
                .as_any()
607
                .downcast_ref::<StructArray>()
608
                .unwrap()
609
        );
610
        assert_eq!(
611
            &struct_array,
612
            unsafe { map_array.value_unchecked(0) }
613
                .as_any()
614
                .downcast_ref::<StructArray>()
615
                .unwrap()
616
        );
617
    }
618
619
    #[test]
620
    #[ignore = "Test fails because slice of <list<struct>> is still buggy"]
621
    fn test_map_array_slice() {
622
        let map_array = create_from_buffers();
623
624
        let sliced_array = map_array.slice(1, 2);
625
        assert_eq!(2, sliced_array.len());
626
        assert_eq!(1, sliced_array.offset());
627
        let sliced_array_data = sliced_array.to_data();
628
        for array_data in sliced_array_data.child_data() {
629
            assert_eq!(array_data.offset(), 1);
630
        }
631
632
        // Check offset and length for each non-null value.
633
        let sliced_map_array = sliced_array.as_any().downcast_ref::<MapArray>().unwrap();
634
        assert_eq!(3, sliced_map_array.value_offsets()[0]);
635
        assert_eq!(3, sliced_map_array.value_length(0));
636
        assert_eq!(6, sliced_map_array.value_offsets()[1]);
637
        assert_eq!(2, sliced_map_array.value_length(1));
638
639
        // Construct key and values
640
        let keys_data = ArrayData::builder(DataType::Int32)
641
            .len(5)
642
            .add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
643
            .build()
644
            .unwrap();
645
        let values_data = ArrayData::builder(DataType::UInt32)
646
            .len(5)
647
            .add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
648
            .build()
649
            .unwrap();
650
651
        // Construct a buffer for value offsets, for the nested array:
652
        //  [[3, 4, 5], [6, 7]]
653
        let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());
654
655
        let keys = Arc::new(Field::new("keys", DataType::Int32, false));
656
        let values = Arc::new(Field::new("values", DataType::UInt32, false));
657
        let entry_struct = StructArray::from(vec![
658
            (keys, make_array(keys_data)),
659
            (values, make_array(values_data)),
660
        ]);
661
662
        // Construct a map array from the above two
663
        let map_data_type = DataType::Map(
664
            Arc::new(Field::new(
665
                "entries",
666
                entry_struct.data_type().clone(),
667
                false,
668
            )),
669
            false,
670
        );
671
        let expected_map_data = ArrayData::builder(map_data_type)
672
            .len(2)
673
            .add_buffer(entry_offsets)
674
            .add_child_data(entry_struct.into_data())
675
            .build()
676
            .unwrap();
677
        let expected_map_array = MapArray::from(expected_map_data);
678
679
        assert_eq!(&expected_map_array, sliced_map_array)
680
    }
681
682
    #[test]
683
    #[should_panic(expected = "index out of bounds: the len is ")]
684
    fn test_map_array_index_out_of_bound() {
685
        let map_array = create_from_buffers();
686
687
        map_array.value(map_array.len());
688
    }
689
690
    #[test]
691
    #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
692
    fn test_from_array_data_validation() {
693
        // A DictionaryArray has similar buffer layout to a MapArray
694
        // but the meaning of the values differs
695
        let struct_t = DataType::Struct(Fields::from(vec![
696
            Field::new("keys", DataType::Int32, true),
697
            Field::new("values", DataType::UInt32, true),
698
        ]));
699
        let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t));
700
        let _ = MapArray::from(ArrayData::new_empty(&dict_t));
701
    }
702
703
    #[test]
704
    fn test_new_from_strings() {
705
        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
706
        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
707
708
        // Construct a buffer for value offsets, for the nested array:
709
        //  [[a, b, c], [d, e, f], [g, h]]
710
        let entry_offsets = [0, 3, 6, 8];
711
712
        let map_array =
713
            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
714
                .unwrap();
715
716
        assert_eq!(
717
            &values_data,
718
            map_array.values().as_primitive::<UInt32Type>()
719
        );
720
        assert_eq!(&DataType::UInt32, map_array.value_type());
721
        assert_eq!(3, map_array.len());
722
        assert_eq!(0, map_array.null_count());
723
        assert_eq!(6, map_array.value_offsets()[2]);
724
        assert_eq!(2, map_array.value_length(2));
725
726
        let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
727
        let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
728
        let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
729
        let values_field = Arc::new(Field::new("values", DataType::UInt32, false));
730
        let struct_array =
731
            StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
732
        assert_eq!(
733
            struct_array,
734
            StructArray::from(map_array.value(0).into_data())
735
        );
736
        assert_eq!(
737
            &struct_array,
738
            unsafe { map_array.value_unchecked(0) }
739
                .as_any()
740
                .downcast_ref::<StructArray>()
741
                .unwrap()
742
        );
743
        for i in 0..3 {
744
            assert!(map_array.is_valid(i));
745
            assert!(!map_array.is_null(i));
746
        }
747
    }
748
749
    #[test]
750
    fn test_try_new() {
751
        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
752
        let fields = Fields::from(vec![
753
            Field::new("key", DataType::Int32, false),
754
            Field::new("values", DataType::Int32, false),
755
        ]);
756
        let columns = vec![
757
            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
758
            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
759
        ];
760
761
        let entries = StructArray::new(fields.clone(), columns, None);
762
        let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
763
764
        MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
765
766
        let nulls = NullBuffer::new_null(3);
767
        MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false);
768
769
        let nulls = NullBuffer::new_null(3);
770
        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
771
        let err = MapArray::try_new(
772
            field.clone(),
773
            offsets.clone(),
774
            entries.clone(),
775
            Some(nulls),
776
            false,
777
        )
778
        .unwrap_err();
779
780
        assert_eq!(
781
            err.to_string(),
782
            "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
783
        );
784
785
        let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
786
            .unwrap_err();
787
788
        assert_eq!(
789
            err.to_string(),
790
            "Invalid argument error: Max offset of 5 exceeds length of entries 2"
791
        );
792
793
        let field = Arc::new(Field::new("element", DataType::Int64, false));
794
        let err = MapArray::try_new(field, offsets.clone(), entries, None, false)
795
            .unwrap_err()
796
            .to_string();
797
798
        assert!(
799
            err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
800
            "{err}"
801
        );
802
803
        let fields = Fields::from(vec![
804
            Field::new("a", DataType::Int32, false),
805
            Field::new("b", DataType::Int32, false),
806
            Field::new("c", DataType::Int32, false),
807
        ]);
808
        let columns = vec![
809
            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
810
            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
811
            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
812
        ];
813
814
        let s = StructArray::new(fields.clone(), columns, None);
815
        let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
816
        let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err();
817
818
        assert_eq!(
819
            err.to_string(),
820
            "Invalid argument error: MapArray entries must contain two children, got 3"
821
        );
822
    }
823
}