/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/map_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::array::{get_offsets, print_long_array}; |
19 | | use crate::iterator::MapArrayIter; |
20 | | use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray}; |
21 | | use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice}; |
22 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
23 | | use arrow_schema::{ArrowError, DataType, Field, FieldRef}; |
24 | | use std::any::Any; |
25 | | use std::sync::Arc; |
26 | | |
27 | | /// An array of key-value maps |
28 | | /// |
29 | | /// Keys should always be non-null, but values can be null. |
30 | | /// |
31 | | /// [`MapArray`] is physically a [`ListArray`] of key values pairs stored as an `entries` |
32 | | /// [`StructArray`] with 2 child fields. |
33 | | /// |
34 | | /// See [`MapBuilder`](crate::builder::MapBuilder) for how to construct a [`MapArray`] |
35 | | #[derive(Clone)] |
36 | | pub struct MapArray { |
37 | | data_type: DataType, |
38 | | nulls: Option<NullBuffer>, |
39 | | /// The [`StructArray`] that is the direct child of this array |
40 | | entries: StructArray, |
41 | | /// The start and end offsets of each entry |
42 | | value_offsets: OffsetBuffer<i32>, |
43 | | } |
44 | | |
45 | | impl MapArray { |
46 | | /// Create a new [`MapArray`] from the provided parts |
47 | | /// |
48 | | /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface |
49 | | /// to construct a [`MapArray`] |
50 | | /// |
51 | | /// # Errors |
52 | | /// |
53 | | /// Errors if |
54 | | /// |
55 | | /// * `offsets.len() - 1 != nulls.len()` |
56 | | /// * `offsets.last() > entries.len()` |
57 | | /// * `field.is_nullable()` |
58 | | /// * `entries.null_count() != 0` |
59 | | /// * `entries.columns().len() != 2` |
60 | | /// * `field.data_type() != entries.data_type()` |
61 | 20 | pub fn try_new( |
62 | 20 | field: FieldRef, |
63 | 20 | offsets: OffsetBuffer<i32>, |
64 | 20 | entries: StructArray, |
65 | 20 | nulls: Option<NullBuffer>, |
66 | 20 | ordered: bool, |
67 | 20 | ) -> Result<Self, ArrowError> { |
68 | 20 | let len = offsets.len() - 1; // Offsets guaranteed to not be empty |
69 | 20 | let end_offset = offsets.last().unwrap().as_usize(); |
70 | | // don't need to check other values of `offsets` because they are checked |
71 | | // during construction of `OffsetBuffer` |
72 | 20 | if end_offset > entries.len() { |
73 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
74 | 0 | "Max offset of {end_offset} exceeds length of entries {}", |
75 | 0 | entries.len() |
76 | 0 | ))); |
77 | 20 | } |
78 | | |
79 | 20 | if let Some(n7 ) = nulls.as_ref() { |
80 | 7 | if n.len() != len { |
81 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
82 | 0 | "Incorrect length of null buffer for MapArray, expected {len} got {}", |
83 | 0 | n.len(), |
84 | 0 | ))); |
85 | 7 | } |
86 | 13 | } |
87 | 20 | if field.is_nullable() || entries.null_count() != 0 { |
88 | 0 | return Err(ArrowError::InvalidArgumentError( |
89 | 0 | "MapArray entries cannot contain nulls".to_string(), |
90 | 0 | )); |
91 | 20 | } |
92 | | |
93 | 20 | if field.data_type() != entries.data_type() { |
94 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
95 | 0 | "MapArray expected data type {} got {} for {:?}", |
96 | 0 | field.data_type(), |
97 | 0 | entries.data_type(), |
98 | 0 | field.name() |
99 | 0 | ))); |
100 | 20 | } |
101 | | |
102 | 20 | if entries.columns().len() != 2 { |
103 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
104 | 0 | "MapArray entries must contain two children, got {}", |
105 | 0 | entries.columns().len() |
106 | 0 | ))); |
107 | 20 | } |
108 | | |
109 | 20 | Ok(Self { |
110 | 20 | data_type: DataType::Map(field, ordered), |
111 | 20 | nulls, |
112 | 20 | entries, |
113 | 20 | value_offsets: offsets, |
114 | 20 | }) |
115 | 20 | } |
116 | | |
117 | | /// Create a new [`MapArray`] from the provided parts |
118 | | /// |
119 | | /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface |
120 | | /// to construct a [`MapArray`] |
121 | | /// |
122 | | /// # Panics |
123 | | /// |
124 | | /// Panics if [`Self::try_new`] returns an error |
125 | 20 | pub fn new( |
126 | 20 | field: FieldRef, |
127 | 20 | offsets: OffsetBuffer<i32>, |
128 | 20 | entries: StructArray, |
129 | 20 | nulls: Option<NullBuffer>, |
130 | 20 | ordered: bool, |
131 | 20 | ) -> Self { |
132 | 20 | Self::try_new(field, offsets, entries, nulls, ordered).unwrap() |
133 | 20 | } |
134 | | |
135 | | /// Deconstruct this array into its constituent parts |
136 | 0 | pub fn into_parts( |
137 | 0 | self, |
138 | 0 | ) -> ( |
139 | 0 | FieldRef, |
140 | 0 | OffsetBuffer<i32>, |
141 | 0 | StructArray, |
142 | 0 | Option<NullBuffer>, |
143 | 0 | bool, |
144 | 0 | ) { |
145 | 0 | let (f, ordered) = match self.data_type { |
146 | 0 | DataType::Map(f, ordered) => (f, ordered), |
147 | 0 | _ => unreachable!(), |
148 | | }; |
149 | 0 | (f, self.value_offsets, self.entries, self.nulls, ordered) |
150 | 0 | } |
151 | | |
152 | | /// Returns a reference to the offsets of this map |
153 | | /// |
154 | | /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`] |
155 | | /// allowing for zero-copy cloning |
156 | | #[inline] |
157 | 0 | pub fn offsets(&self) -> &OffsetBuffer<i32> { |
158 | 0 | &self.value_offsets |
159 | 0 | } |
160 | | |
161 | | /// Returns a reference to the keys of this map |
162 | 0 | pub fn keys(&self) -> &ArrayRef { |
163 | 0 | self.entries.column(0) |
164 | 0 | } |
165 | | |
166 | | /// Returns a reference to the values of this map |
167 | 0 | pub fn values(&self) -> &ArrayRef { |
168 | 0 | self.entries.column(1) |
169 | 0 | } |
170 | | |
171 | | /// Returns a reference to the [`StructArray`] entries of this map |
172 | 0 | pub fn entries(&self) -> &StructArray { |
173 | 0 | &self.entries |
174 | 0 | } |
175 | | |
176 | | /// Returns the data type of the map's keys. |
177 | 0 | pub fn key_type(&self) -> &DataType { |
178 | 0 | self.keys().data_type() |
179 | 0 | } |
180 | | |
181 | | /// Returns the data type of the map's values. |
182 | 0 | pub fn value_type(&self) -> &DataType { |
183 | 0 | self.values().data_type() |
184 | 0 | } |
185 | | |
186 | | /// Returns ith value of this map array. |
187 | | /// |
188 | | /// Note: This method does not check for nulls and the value is arbitrary |
189 | | /// if [`is_null`](Self::is_null) returns true for the index. |
190 | | /// |
191 | | /// # Safety |
192 | | /// Caller must ensure that the index is within the array bounds |
193 | 0 | pub unsafe fn value_unchecked(&self, i: usize) -> StructArray { |
194 | 0 | let end = *self.value_offsets().get_unchecked(i + 1); |
195 | 0 | let start = *self.value_offsets().get_unchecked(i); |
196 | 0 | self.entries |
197 | 0 | .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap()) |
198 | 0 | } |
199 | | |
200 | | /// Returns ith value of this map array. |
201 | | /// |
202 | | /// This is a [`StructArray`] containing two fields |
203 | | /// |
204 | | /// Note: This method does not check for nulls and the value is arbitrary |
205 | | /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index. |
206 | | /// |
207 | | /// # Panics |
208 | | /// Panics if index `i` is out of bounds |
209 | 1 | pub fn value(&self, i: usize) -> StructArray { |
210 | 1 | let end = self.value_offsets()[i + 1] as usize; |
211 | 1 | let start = self.value_offsets()[i] as usize; |
212 | 1 | self.entries.slice(start, end - start) |
213 | 1 | } |
214 | | |
215 | | /// Returns the offset values in the offsets buffer |
216 | | #[inline] |
217 | 4 | pub fn value_offsets(&self) -> &[i32] { |
218 | 4 | &self.value_offsets |
219 | 4 | } |
220 | | |
221 | | /// Returns the length for value at index `i`. |
222 | | #[inline] |
223 | 2 | pub fn value_length(&self, i: usize) -> i32 { |
224 | 2 | let offsets = self.value_offsets(); |
225 | 2 | offsets[i + 1] - offsets[i] |
226 | 2 | } |
227 | | |
228 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
229 | 6 | pub fn slice(&self, offset: usize, length: usize) -> Self { |
230 | | Self { |
231 | 6 | data_type: self.data_type.clone(), |
232 | 6 | nulls: self.nulls.as_ref().map(|n| n2 .slice2 (offset2 , length2 )), |
233 | 6 | entries: self.entries.clone(), |
234 | 6 | value_offsets: self.value_offsets.slice(offset, length), |
235 | | } |
236 | 6 | } |
237 | | |
238 | | /// constructs a new iterator |
239 | 0 | pub fn iter(&self) -> MapArrayIter<'_> { |
240 | 0 | MapArrayIter::new(self) |
241 | 0 | } |
242 | | } |
243 | | |
244 | | impl From<ArrayData> for MapArray { |
245 | 6 | fn from(data: ArrayData) -> Self { |
246 | 6 | Self::try_new_from_array_data(data) |
247 | 6 | .expect("Expected infallible creation of MapArray from ArrayData failed") |
248 | 6 | } |
249 | | } |
250 | | |
251 | | impl From<MapArray> for ArrayData { |
252 | 27 | fn from(array: MapArray) -> Self { |
253 | 27 | let len = array.len(); |
254 | 27 | let builder = ArrayDataBuilder::new(array.data_type) |
255 | 27 | .len(len) |
256 | 27 | .nulls(array.nulls) |
257 | 27 | .buffers(vec![array.value_offsets.into_inner().into_inner()]) |
258 | 27 | .child_data(vec![array.entries.to_data()]); |
259 | | |
260 | 27 | unsafe { builder.build_unchecked() } |
261 | 27 | } |
262 | | } |
263 | | |
264 | | impl MapArray { |
265 | 6 | fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> { |
266 | 6 | if !matches!0 (data.data_type(), DataType::Map(_, _)) { |
267 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
268 | 0 | "MapArray expected ArrayData with DataType::Map got {}", |
269 | 0 | data.data_type() |
270 | 0 | ))); |
271 | 6 | } |
272 | | |
273 | 6 | if data.buffers().len() != 1 { |
274 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
275 | 0 | "MapArray data should contain a single buffer only (value offsets), had {}", |
276 | 0 | data.len() |
277 | 0 | ))); |
278 | 6 | } |
279 | | |
280 | 6 | if data.child_data().len() != 1 { |
281 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
282 | 0 | "MapArray should contain a single child array (values array), had {}", |
283 | 0 | data.child_data().len() |
284 | 0 | ))); |
285 | 6 | } |
286 | | |
287 | 6 | let entries = data.child_data()[0].clone(); |
288 | | |
289 | 6 | if let DataType::Struct(fields) = entries.data_type() { |
290 | 6 | if fields.len() != 2 { |
291 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
292 | 0 | "MapArray should contain a struct array with 2 fields, have {} fields", |
293 | 0 | fields.len() |
294 | 0 | ))); |
295 | 6 | } |
296 | | } else { |
297 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
298 | 0 | "MapArray should contain a struct array child, found {:?}", |
299 | 0 | entries.data_type() |
300 | 0 | ))); |
301 | | } |
302 | 6 | let entries = entries.into(); |
303 | | |
304 | | // SAFETY: |
305 | | // ArrayData is valid, and verified type above |
306 | 6 | let value_offsets = unsafe { get_offsets(&data) }; |
307 | | |
308 | 6 | Ok(Self { |
309 | 6 | data_type: data.data_type().clone(), |
310 | 6 | nulls: data.nulls().cloned(), |
311 | 6 | entries, |
312 | 6 | value_offsets, |
313 | 6 | }) |
314 | 6 | } |
315 | | |
316 | | /// Creates map array from provided keys, values and entry_offsets. |
317 | | pub fn new_from_strings<'a>( |
318 | | keys: impl Iterator<Item = &'a str>, |
319 | | values: &dyn Array, |
320 | | entry_offsets: &[u32], |
321 | | ) -> Result<Self, ArrowError> { |
322 | | let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice()); |
323 | | let keys_data = StringArray::from_iter_values(keys); |
324 | | |
325 | | let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false)); |
326 | | let values_field = Arc::new(Field::new( |
327 | | "values", |
328 | | values.data_type().clone(), |
329 | | values.null_count() > 0, |
330 | | )); |
331 | | |
332 | | let entry_struct = StructArray::from(vec![ |
333 | | (keys_field, Arc::new(keys_data) as ArrayRef), |
334 | | (values_field, make_array(values.to_data())), |
335 | | ]); |
336 | | |
337 | | let map_data_type = DataType::Map( |
338 | | Arc::new(Field::new( |
339 | | "entries", |
340 | | entry_struct.data_type().clone(), |
341 | | false, |
342 | | )), |
343 | | false, |
344 | | ); |
345 | | let map_data = ArrayData::builder(map_data_type) |
346 | | .len(entry_offsets.len() - 1) |
347 | | .add_buffer(entry_offsets_buffer) |
348 | | .add_child_data(entry_struct.into_data()) |
349 | | .build()?; |
350 | | |
351 | | Ok(MapArray::from(map_data)) |
352 | | } |
353 | | } |
354 | | |
355 | | impl Array for MapArray { |
356 | 2 | fn as_any(&self) -> &dyn Any { |
357 | 2 | self |
358 | 2 | } |
359 | | |
360 | 27 | fn to_data(&self) -> ArrayData { |
361 | 27 | self.clone().into_data() |
362 | 27 | } |
363 | | |
364 | 27 | fn into_data(self) -> ArrayData { |
365 | 27 | self.into() |
366 | 27 | } |
367 | | |
368 | 38 | fn data_type(&self) -> &DataType { |
369 | 38 | &self.data_type |
370 | 38 | } |
371 | | |
372 | 6 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
373 | 6 | Arc::new(self.slice(offset, length)) |
374 | 6 | } |
375 | | |
376 | 80 | fn len(&self) -> usize { |
377 | 80 | self.value_offsets.len() - 1 |
378 | 80 | } |
379 | | |
380 | 0 | fn is_empty(&self) -> bool { |
381 | 0 | self.value_offsets.len() <= 1 |
382 | 0 | } |
383 | | |
384 | 0 | fn shrink_to_fit(&mut self) { |
385 | 0 | if let Some(nulls) = &mut self.nulls { |
386 | 0 | nulls.shrink_to_fit(); |
387 | 0 | } |
388 | 0 | self.entries.shrink_to_fit(); |
389 | 0 | self.value_offsets.shrink_to_fit(); |
390 | 0 | } |
391 | | |
392 | 0 | fn offset(&self) -> usize { |
393 | 0 | 0 |
394 | 0 | } |
395 | | |
396 | 0 | fn nulls(&self) -> Option<&NullBuffer> { |
397 | 0 | self.nulls.as_ref() |
398 | 0 | } |
399 | | |
400 | 0 | fn logical_null_count(&self) -> usize { |
401 | | // More efficient that the default implementation |
402 | 0 | self.null_count() |
403 | 0 | } |
404 | | |
405 | 0 | fn get_buffer_memory_size(&self) -> usize { |
406 | 0 | let mut size = self.entries.get_buffer_memory_size(); |
407 | 0 | size += self.value_offsets.inner().inner().capacity(); |
408 | 0 | if let Some(n) = self.nulls.as_ref() { |
409 | 0 | size += n.buffer().capacity(); |
410 | 0 | } |
411 | 0 | size |
412 | 0 | } |
413 | | |
414 | 0 | fn get_array_memory_size(&self) -> usize { |
415 | 0 | let mut size = std::mem::size_of::<Self>() + self.entries.get_array_memory_size(); |
416 | 0 | size += self.value_offsets.inner().inner().capacity(); |
417 | 0 | if let Some(n) = self.nulls.as_ref() { |
418 | 0 | size += n.buffer().capacity(); |
419 | 0 | } |
420 | 0 | size |
421 | 0 | } |
422 | | } |
423 | | |
424 | | impl ArrayAccessor for &MapArray { |
425 | | type Item = StructArray; |
426 | | |
427 | 0 | fn value(&self, index: usize) -> Self::Item { |
428 | 0 | MapArray::value(self, index) |
429 | 0 | } |
430 | | |
431 | 0 | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
432 | 0 | MapArray::value(self, index) |
433 | 0 | } |
434 | | } |
435 | | |
436 | | impl std::fmt::Debug for MapArray { |
437 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
438 | 0 | write!(f, "MapArray\n[\n")?; |
439 | 0 | print_long_array(self, f, |array, index, f| { |
440 | 0 | std::fmt::Debug::fmt(&array.value(index), f) |
441 | 0 | })?; |
442 | 0 | write!(f, "]") |
443 | 0 | } |
444 | | } |
445 | | |
446 | | impl From<MapArray> for ListArray { |
447 | 0 | fn from(value: MapArray) -> Self { |
448 | 0 | let field = match value.data_type() { |
449 | 0 | DataType::Map(field, _) => field, |
450 | 0 | _ => unreachable!("This should be a map type."), |
451 | | }; |
452 | 0 | let data_type = DataType::List(field.clone()); |
453 | 0 | let builder = value.into_data().into_builder().data_type(data_type); |
454 | 0 | let array_data = unsafe { builder.build_unchecked() }; |
455 | | |
456 | 0 | ListArray::from(array_data) |
457 | 0 | } |
458 | | } |
459 | | |
460 | | #[cfg(test)] |
461 | | mod tests { |
462 | | use crate::cast::AsArray; |
463 | | use crate::types::UInt32Type; |
464 | | use crate::{Int32Array, UInt32Array}; |
465 | | use arrow_schema::Fields; |
466 | | |
467 | | use super::*; |
468 | | |
469 | | fn create_from_buffers() -> MapArray { |
470 | | // Construct key and values |
471 | | let keys_data = ArrayData::builder(DataType::Int32) |
472 | | .len(8) |
473 | | .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice())) |
474 | | .build() |
475 | | .unwrap(); |
476 | | let values_data = ArrayData::builder(DataType::UInt32) |
477 | | .len(8) |
478 | | .add_buffer(Buffer::from( |
479 | | [0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(), |
480 | | )) |
481 | | .build() |
482 | | .unwrap(); |
483 | | |
484 | | // Construct a buffer for value offsets, for the nested array: |
485 | | // [[0, 1, 2], [3, 4, 5], [6, 7]] |
486 | | let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice()); |
487 | | |
488 | | let keys = Arc::new(Field::new("keys", DataType::Int32, false)); |
489 | | let values = Arc::new(Field::new("values", DataType::UInt32, false)); |
490 | | let entry_struct = StructArray::from(vec![ |
491 | | (keys, make_array(keys_data)), |
492 | | (values, make_array(values_data)), |
493 | | ]); |
494 | | |
495 | | // Construct a map array from the above two |
496 | | let map_data_type = DataType::Map( |
497 | | Arc::new(Field::new( |
498 | | "entries", |
499 | | entry_struct.data_type().clone(), |
500 | | false, |
501 | | )), |
502 | | false, |
503 | | ); |
504 | | let map_data = ArrayData::builder(map_data_type) |
505 | | .len(3) |
506 | | .add_buffer(entry_offsets) |
507 | | .add_child_data(entry_struct.into_data()) |
508 | | .build() |
509 | | .unwrap(); |
510 | | MapArray::from(map_data) |
511 | | } |
512 | | |
513 | | #[test] |
514 | | fn test_map_array() { |
515 | | // Construct key and values |
516 | | let key_data = ArrayData::builder(DataType::Int32) |
517 | | .len(8) |
518 | | .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice())) |
519 | | .build() |
520 | | .unwrap(); |
521 | | let value_data = ArrayData::builder(DataType::UInt32) |
522 | | .len(8) |
523 | | .add_buffer(Buffer::from( |
524 | | [0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(), |
525 | | )) |
526 | | .null_bit_buffer(Some(Buffer::from(&[0b11010110]))) |
527 | | .build() |
528 | | .unwrap(); |
529 | | |
530 | | // Construct a buffer for value offsets, for the nested array: |
531 | | // [[0, 1, 2], [3, 4, 5], [6, 7]] |
532 | | let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice()); |
533 | | |
534 | | let keys_field = Arc::new(Field::new("keys", DataType::Int32, false)); |
535 | | let values_field = Arc::new(Field::new("values", DataType::UInt32, true)); |
536 | | let entry_struct = StructArray::from(vec![ |
537 | | (keys_field.clone(), make_array(key_data)), |
538 | | (values_field.clone(), make_array(value_data.clone())), |
539 | | ]); |
540 | | |
541 | | // Construct a map array from the above two |
542 | | let map_data_type = DataType::Map( |
543 | | Arc::new(Field::new( |
544 | | "entries", |
545 | | entry_struct.data_type().clone(), |
546 | | false, |
547 | | )), |
548 | | false, |
549 | | ); |
550 | | let map_data = ArrayData::builder(map_data_type) |
551 | | .len(3) |
552 | | .add_buffer(entry_offsets) |
553 | | .add_child_data(entry_struct.into_data()) |
554 | | .build() |
555 | | .unwrap(); |
556 | | let map_array = MapArray::from(map_data); |
557 | | |
558 | | assert_eq!(value_data, map_array.values().to_data()); |
559 | | assert_eq!(&DataType::UInt32, map_array.value_type()); |
560 | | assert_eq!(3, map_array.len()); |
561 | | assert_eq!(0, map_array.null_count()); |
562 | | assert_eq!(6, map_array.value_offsets()[2]); |
563 | | assert_eq!(2, map_array.value_length(2)); |
564 | | |
565 | | let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef; |
566 | | let value_array = |
567 | | Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef; |
568 | | let struct_array = StructArray::from(vec![ |
569 | | (keys_field.clone(), key_array), |
570 | | (values_field.clone(), value_array), |
571 | | ]); |
572 | | assert_eq!( |
573 | | struct_array, |
574 | | StructArray::from(map_array.value(0).into_data()) |
575 | | ); |
576 | | assert_eq!( |
577 | | &struct_array, |
578 | | unsafe { map_array.value_unchecked(0) } |
579 | | .as_any() |
580 | | .downcast_ref::<StructArray>() |
581 | | .unwrap() |
582 | | ); |
583 | | for i in 0..3 { |
584 | | assert!(map_array.is_valid(i)); |
585 | | assert!(!map_array.is_null(i)); |
586 | | } |
587 | | |
588 | | // Now test with a non-zero offset |
589 | | let map_array = map_array.slice(1, 2); |
590 | | |
591 | | assert_eq!(value_data, map_array.values().to_data()); |
592 | | assert_eq!(&DataType::UInt32, map_array.value_type()); |
593 | | assert_eq!(2, map_array.len()); |
594 | | assert_eq!(0, map_array.null_count()); |
595 | | assert_eq!(6, map_array.value_offsets()[1]); |
596 | | assert_eq!(2, map_array.value_length(1)); |
597 | | |
598 | | let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef; |
599 | | let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef; |
600 | | let struct_array = |
601 | | StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]); |
602 | | assert_eq!( |
603 | | &struct_array, |
604 | | map_array |
605 | | .value(0) |
606 | | .as_any() |
607 | | .downcast_ref::<StructArray>() |
608 | | .unwrap() |
609 | | ); |
610 | | assert_eq!( |
611 | | &struct_array, |
612 | | unsafe { map_array.value_unchecked(0) } |
613 | | .as_any() |
614 | | .downcast_ref::<StructArray>() |
615 | | .unwrap() |
616 | | ); |
617 | | } |
618 | | |
619 | | #[test] |
620 | | #[ignore = "Test fails because slice of <list<struct>> is still buggy"] |
621 | | fn test_map_array_slice() { |
622 | | let map_array = create_from_buffers(); |
623 | | |
624 | | let sliced_array = map_array.slice(1, 2); |
625 | | assert_eq!(2, sliced_array.len()); |
626 | | assert_eq!(1, sliced_array.offset()); |
627 | | let sliced_array_data = sliced_array.to_data(); |
628 | | for array_data in sliced_array_data.child_data() { |
629 | | assert_eq!(array_data.offset(), 1); |
630 | | } |
631 | | |
632 | | // Check offset and length for each non-null value. |
633 | | let sliced_map_array = sliced_array.as_any().downcast_ref::<MapArray>().unwrap(); |
634 | | assert_eq!(3, sliced_map_array.value_offsets()[0]); |
635 | | assert_eq!(3, sliced_map_array.value_length(0)); |
636 | | assert_eq!(6, sliced_map_array.value_offsets()[1]); |
637 | | assert_eq!(2, sliced_map_array.value_length(1)); |
638 | | |
639 | | // Construct key and values |
640 | | let keys_data = ArrayData::builder(DataType::Int32) |
641 | | .len(5) |
642 | | .add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice())) |
643 | | .build() |
644 | | .unwrap(); |
645 | | let values_data = ArrayData::builder(DataType::UInt32) |
646 | | .len(5) |
647 | | .add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice())) |
648 | | .build() |
649 | | .unwrap(); |
650 | | |
651 | | // Construct a buffer for value offsets, for the nested array: |
652 | | // [[3, 4, 5], [6, 7]] |
653 | | let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice()); |
654 | | |
655 | | let keys = Arc::new(Field::new("keys", DataType::Int32, false)); |
656 | | let values = Arc::new(Field::new("values", DataType::UInt32, false)); |
657 | | let entry_struct = StructArray::from(vec![ |
658 | | (keys, make_array(keys_data)), |
659 | | (values, make_array(values_data)), |
660 | | ]); |
661 | | |
662 | | // Construct a map array from the above two |
663 | | let map_data_type = DataType::Map( |
664 | | Arc::new(Field::new( |
665 | | "entries", |
666 | | entry_struct.data_type().clone(), |
667 | | false, |
668 | | )), |
669 | | false, |
670 | | ); |
671 | | let expected_map_data = ArrayData::builder(map_data_type) |
672 | | .len(2) |
673 | | .add_buffer(entry_offsets) |
674 | | .add_child_data(entry_struct.into_data()) |
675 | | .build() |
676 | | .unwrap(); |
677 | | let expected_map_array = MapArray::from(expected_map_data); |
678 | | |
679 | | assert_eq!(&expected_map_array, sliced_map_array) |
680 | | } |
681 | | |
682 | | #[test] |
683 | | #[should_panic(expected = "index out of bounds: the len is ")] |
684 | | fn test_map_array_index_out_of_bound() { |
685 | | let map_array = create_from_buffers(); |
686 | | |
687 | | map_array.value(map_array.len()); |
688 | | } |
689 | | |
690 | | #[test] |
691 | | #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")] |
692 | | fn test_from_array_data_validation() { |
693 | | // A DictionaryArray has similar buffer layout to a MapArray |
694 | | // but the meaning of the values differs |
695 | | let struct_t = DataType::Struct(Fields::from(vec![ |
696 | | Field::new("keys", DataType::Int32, true), |
697 | | Field::new("values", DataType::UInt32, true), |
698 | | ])); |
699 | | let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t)); |
700 | | let _ = MapArray::from(ArrayData::new_empty(&dict_t)); |
701 | | } |
702 | | |
703 | | #[test] |
704 | | fn test_new_from_strings() { |
705 | | let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"]; |
706 | | let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]); |
707 | | |
708 | | // Construct a buffer for value offsets, for the nested array: |
709 | | // [[a, b, c], [d, e, f], [g, h]] |
710 | | let entry_offsets = [0, 3, 6, 8]; |
711 | | |
712 | | let map_array = |
713 | | MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets) |
714 | | .unwrap(); |
715 | | |
716 | | assert_eq!( |
717 | | &values_data, |
718 | | map_array.values().as_primitive::<UInt32Type>() |
719 | | ); |
720 | | assert_eq!(&DataType::UInt32, map_array.value_type()); |
721 | | assert_eq!(3, map_array.len()); |
722 | | assert_eq!(0, map_array.null_count()); |
723 | | assert_eq!(6, map_array.value_offsets()[2]); |
724 | | assert_eq!(2, map_array.value_length(2)); |
725 | | |
726 | | let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef; |
727 | | let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef; |
728 | | let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false)); |
729 | | let values_field = Arc::new(Field::new("values", DataType::UInt32, false)); |
730 | | let struct_array = |
731 | | StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]); |
732 | | assert_eq!( |
733 | | struct_array, |
734 | | StructArray::from(map_array.value(0).into_data()) |
735 | | ); |
736 | | assert_eq!( |
737 | | &struct_array, |
738 | | unsafe { map_array.value_unchecked(0) } |
739 | | .as_any() |
740 | | .downcast_ref::<StructArray>() |
741 | | .unwrap() |
742 | | ); |
743 | | for i in 0..3 { |
744 | | assert!(map_array.is_valid(i)); |
745 | | assert!(!map_array.is_null(i)); |
746 | | } |
747 | | } |
748 | | |
749 | | #[test] |
750 | | fn test_try_new() { |
751 | | let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into()); |
752 | | let fields = Fields::from(vec![ |
753 | | Field::new("key", DataType::Int32, false), |
754 | | Field::new("values", DataType::Int32, false), |
755 | | ]); |
756 | | let columns = vec![ |
757 | | Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _, |
758 | | Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _, |
759 | | ]; |
760 | | |
761 | | let entries = StructArray::new(fields.clone(), columns, None); |
762 | | let field = Arc::new(Field::new("entries", DataType::Struct(fields), false)); |
763 | | |
764 | | MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false); |
765 | | |
766 | | let nulls = NullBuffer::new_null(3); |
767 | | MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false); |
768 | | |
769 | | let nulls = NullBuffer::new_null(3); |
770 | | let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into()); |
771 | | let err = MapArray::try_new( |
772 | | field.clone(), |
773 | | offsets.clone(), |
774 | | entries.clone(), |
775 | | Some(nulls), |
776 | | false, |
777 | | ) |
778 | | .unwrap_err(); |
779 | | |
780 | | assert_eq!( |
781 | | err.to_string(), |
782 | | "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3" |
783 | | ); |
784 | | |
785 | | let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false) |
786 | | .unwrap_err(); |
787 | | |
788 | | assert_eq!( |
789 | | err.to_string(), |
790 | | "Invalid argument error: Max offset of 5 exceeds length of entries 2" |
791 | | ); |
792 | | |
793 | | let field = Arc::new(Field::new("element", DataType::Int64, false)); |
794 | | let err = MapArray::try_new(field, offsets.clone(), entries, None, false) |
795 | | .unwrap_err() |
796 | | .to_string(); |
797 | | |
798 | | assert!( |
799 | | err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"), |
800 | | "{err}" |
801 | | ); |
802 | | |
803 | | let fields = Fields::from(vec![ |
804 | | Field::new("a", DataType::Int32, false), |
805 | | Field::new("b", DataType::Int32, false), |
806 | | Field::new("c", DataType::Int32, false), |
807 | | ]); |
808 | | let columns = vec![ |
809 | | Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _, |
810 | | Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _, |
811 | | Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _, |
812 | | ]; |
813 | | |
814 | | let s = StructArray::new(fields.clone(), columns, None); |
815 | | let field = Arc::new(Field::new("entries", DataType::Struct(fields), false)); |
816 | | let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err(); |
817 | | |
818 | | assert_eq!( |
819 | | err.to_string(), |
820 | | "Invalid argument error: MapArray entries must contain two children, got 3" |
821 | | ); |
822 | | } |
823 | | } |