/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/fixed_size_list_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::array::print_long_array; |
19 | | use crate::builder::{FixedSizeListBuilder, PrimitiveBuilder}; |
20 | | use crate::iterator::FixedSizeListIter; |
21 | | use crate::{Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, make_array}; |
22 | | use arrow_buffer::ArrowNativeType; |
23 | | use arrow_buffer::buffer::NullBuffer; |
24 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
25 | | use arrow_schema::{ArrowError, DataType, FieldRef}; |
26 | | use std::any::Any; |
27 | | use std::sync::Arc; |
28 | | |
29 | | /// An array of [fixed length lists], similar to JSON arrays |
30 | | /// (e.g. `["A", "B"]`). |
31 | | /// |
32 | | /// Lists are represented using a `values` child |
33 | | /// array where each list has a fixed size of `value_length`. |
34 | | /// |
35 | | /// Use [`FixedSizeListBuilder`] to construct a [`FixedSizeListArray`]. |
36 | | /// |
37 | | /// # Representation |
38 | | /// |
39 | | /// A [`FixedSizeListArray`] can represent a list of values of any other |
40 | | /// supported Arrow type. Each element of the `FixedSizeListArray` itself is |
41 | | /// a list which may contain NULL and non-null values, |
42 | | /// or may itself be NULL. |
43 | | /// |
44 | | /// For example, this `FixedSizeListArray` stores lists of strings: |
45 | | /// |
46 | | /// ```text |
47 | | /// ┌─────────────┐ |
48 | | /// │ [A,B] │ |
49 | | /// ├─────────────┤ |
50 | | /// │ NULL │ |
51 | | /// ├─────────────┤ |
52 | | /// │ [C,NULL] │ |
53 | | /// └─────────────┘ |
54 | | /// ``` |
55 | | /// |
56 | | /// The `values` of this `FixedSizeListArray`s are stored in a child |
57 | | /// [`StringArray`] where logical null values take up `values_length` slots in the array |
58 | | /// as shown in the following diagram. The logical values |
59 | | /// are shown on the left, and the actual `FixedSizeListArray` encoding on the right |
60 | | /// |
61 | | /// ```text |
62 | | /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ |
63 | | /// ┌ ─ ─ ─ ─ ─ ─ ─ ─┐ |
64 | | /// ┌─────────────┐ │ ┌───┐ ┌───┐ ┌──────┐ │ |
65 | | /// │ [A,B] │ │ 1 │ │ │ 1 │ │ A │ │ 0 |
66 | | /// ├─────────────┤ │ ├───┤ ├───┤ ├──────┤ │ |
67 | | /// │ NULL │ │ 0 │ │ │ 1 │ │ B │ │ 1 |
68 | | /// ├─────────────┤ │ ├───┤ ├───┤ ├──────┤ │ |
69 | | /// │ [C,NULL] │ │ 1 │ │ │ 0 │ │ ???? │ │ 2 |
70 | | /// └─────────────┘ │ └───┘ ├───┤ ├──────┤ │ |
71 | | /// | │ 0 │ │ ???? │ │ 3 |
72 | | /// Logical Values │ Validity ├───┤ ├──────┤ │ |
73 | | /// (nulls) │ │ 1 │ │ C │ │ 4 |
74 | | /// │ ├───┤ ├──────┤ │ |
75 | | /// │ │ 0 │ │ ???? │ │ 5 |
76 | | /// │ └───┘ └──────┘ │ |
77 | | /// │ Values │ |
78 | | /// │ FixedSizeListArray (Array) │ |
79 | | /// └ ─ ─ ─ ─ ─ ─ ─ ─┘ |
80 | | /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ |
81 | | /// ``` |
82 | | /// |
83 | | /// # Example |
84 | | /// |
85 | | /// ``` |
86 | | /// # use std::sync::Arc; |
87 | | /// # use arrow_array::{Array, FixedSizeListArray, Int32Array}; |
88 | | /// # use arrow_data::ArrayData; |
89 | | /// # use arrow_schema::{DataType, Field}; |
90 | | /// # use arrow_buffer::Buffer; |
91 | | /// // Construct a value array |
92 | | /// let value_data = ArrayData::builder(DataType::Int32) |
93 | | /// .len(9) |
94 | | /// .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8])) |
95 | | /// .build() |
96 | | /// .unwrap(); |
97 | | /// let list_data_type = DataType::FixedSizeList( |
98 | | /// Arc::new(Field::new_list_field(DataType::Int32, false)), |
99 | | /// 3, |
100 | | /// ); |
101 | | /// let list_data = ArrayData::builder(list_data_type.clone()) |
102 | | /// .len(3) |
103 | | /// .add_child_data(value_data.clone()) |
104 | | /// .build() |
105 | | /// .unwrap(); |
106 | | /// let list_array = FixedSizeListArray::from(list_data); |
107 | | /// let list0 = list_array.value(0); |
108 | | /// let list1 = list_array.value(1); |
109 | | /// let list2 = list_array.value(2); |
110 | | /// |
111 | | /// assert_eq!( &[0, 1, 2], list0.as_any().downcast_ref::<Int32Array>().unwrap().values()); |
112 | | /// assert_eq!( &[3, 4, 5], list1.as_any().downcast_ref::<Int32Array>().unwrap().values()); |
113 | | /// assert_eq!( &[6, 7, 8], list2.as_any().downcast_ref::<Int32Array>().unwrap().values()); |
114 | | /// ``` |
115 | | /// |
116 | | /// [`StringArray`]: crate::array::StringArray |
117 | | /// [fixed size arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-list-layout) |
118 | | #[derive(Clone)] |
119 | | pub struct FixedSizeListArray { |
120 | | data_type: DataType, // Must be DataType::FixedSizeList(value_length) |
121 | | values: ArrayRef, |
122 | | nulls: Option<NullBuffer>, |
123 | | value_length: i32, |
124 | | len: usize, |
125 | | } |
126 | | |
127 | | impl FixedSizeListArray { |
128 | | /// Create a new [`FixedSizeListArray`] with `size` element size, panicking on failure. |
129 | | /// |
130 | | /// Note that if `size == 0` and `nulls` is `None` (a degenerate, non-nullable |
131 | | /// `FixedSizeListArray`), this function will set the length of the array to 0. |
132 | | /// |
133 | | /// If you would like to have a degenerate, non-nullable `FixedSizeListArray` with arbitrary |
134 | | /// length, use the [`try_new_with_length()`] constructor. |
135 | | /// |
136 | | /// [`try_new_with_length()`]: Self::try_new_with_length |
137 | | /// |
138 | | /// # Panics |
139 | | /// |
140 | | /// Panics if [`Self::try_new`] returns an error |
141 | 11 | pub fn new(field: FieldRef, size: i32, values: ArrayRef, nulls: Option<NullBuffer>) -> Self { |
142 | 11 | Self::try_new(field, size, values, nulls).unwrap() |
143 | 11 | } |
144 | | |
145 | | /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure. |
146 | | /// |
147 | | /// Note that if `size == 0` and `nulls` is `None` (a degenerate, non-nullable |
148 | | /// `FixedSizeListArray`), this function will set the length of the array to 0. |
149 | | /// |
150 | | /// If you would like to have a degenerate, non-nullable `FixedSizeListArray` with arbitrary |
151 | | /// length, use the [`try_new_with_length()`] constructor. |
152 | | /// |
153 | | /// [`try_new_with_length()`]: Self::try_new_with_length |
154 | | /// |
155 | | /// # Errors |
156 | | /// |
157 | | /// * `size < 0` |
158 | | /// * `values.len() != nulls.len() * size` if `nulls` is `Some` |
159 | | /// * `values.data_type() != field.data_type()` |
160 | | /// * `!field.is_nullable() && !nulls.expand(size).contains(values.logical_nulls())` |
161 | 12 | pub fn try_new( |
162 | 12 | field: FieldRef, |
163 | 12 | size: i32, |
164 | 12 | values: ArrayRef, |
165 | 12 | nulls: Option<NullBuffer>, |
166 | 12 | ) -> Result<Self, ArrowError> { |
167 | 12 | let s = size.to_usize().ok_or_else(|| {0 |
168 | 0 | ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}")) |
169 | 0 | })?; |
170 | | |
171 | 12 | if s == 0 { |
172 | | // Note that for degenerate (`size == 0`) and non-nullable `FixedSizeList`s, we will set |
173 | | // the length to 0 (`_or_default`). |
174 | 0 | let len = nulls.as_ref().map(|x| x.len()).unwrap_or_default(); |
175 | | |
176 | 0 | Self::try_new_with_length(field, size, values, nulls, len) |
177 | | } else { |
178 | 12 | if values.len() % s != 0 { |
179 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
180 | 0 | "Incorrect length of values buffer for FixedSizeListArray, \ |
181 | 0 | expected a multiple of {s} got {}", |
182 | 0 | values.len(), |
183 | 0 | ))); |
184 | 12 | } |
185 | | |
186 | 12 | let len = values.len() / s; |
187 | | |
188 | | // Check that the null buffer length is correct (if it exists). |
189 | 12 | if let Some(null_buffer6 ) = &nulls { |
190 | 6 | if s * null_buffer.len() != values.len() { |
191 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
192 | 0 | "Incorrect length of values buffer for FixedSizeListArray, \ |
193 | 0 | expected {} got {}", |
194 | 0 | s * null_buffer.len(), |
195 | 0 | values.len(), |
196 | 0 | ))); |
197 | 6 | } |
198 | 6 | } |
199 | | |
200 | 12 | Self::try_new_with_length(field, size, values, nulls, len) |
201 | | } |
202 | 12 | } |
203 | | |
204 | | /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure. |
205 | | /// |
206 | | /// This method exists to allow the construction of arbitrary length degenerate (`size == 0`) |
207 | | /// and non-nullable `FixedSizeListArray`s. If you want a nullable `FixedSizeListArray`, then |
208 | | /// you can use [`try_new()`] instead. |
209 | | /// |
210 | | /// [`try_new()`]: Self::try_new |
211 | | /// |
212 | | /// # Errors |
213 | | /// |
214 | | /// * `size < 0` |
215 | | /// * `nulls.len() != len` if `nulls` is `Some` |
216 | | /// * `values.len() != len * size` |
217 | | /// * `values.data_type() != field.data_type()` |
218 | | /// * `!field.is_nullable() && !nulls.expand(size).contains(values.logical_nulls())` |
219 | 12 | pub fn try_new_with_length( |
220 | 12 | field: FieldRef, |
221 | 12 | size: i32, |
222 | 12 | values: ArrayRef, |
223 | 12 | nulls: Option<NullBuffer>, |
224 | 12 | len: usize, |
225 | 12 | ) -> Result<Self, ArrowError> { |
226 | 12 | let s = size.to_usize().ok_or_else(|| {0 |
227 | 0 | ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}")) |
228 | 0 | })?; |
229 | | |
230 | 12 | if let Some(null_buffer6 ) = &nulls { |
231 | 6 | if null_buffer.len() != len { |
232 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
233 | 0 | "Invalid null buffer for FixedSizeListArray, expected {len} found {}", |
234 | 0 | null_buffer.len() |
235 | 0 | ))); |
236 | 6 | } |
237 | 6 | } |
238 | | |
239 | 12 | if s == 0 && !values.is_empty()0 { |
240 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
241 | 0 | "An degenerate FixedSizeListArray should have no underlying values, found {} values", |
242 | 0 | values.len() |
243 | 0 | ))); |
244 | 12 | } |
245 | | |
246 | 12 | if values.len() != len * s { |
247 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
248 | 0 | "Incorrect length of values buffer for FixedSizeListArray, expected {} got {}", |
249 | 0 | len * s, |
250 | 0 | values.len(), |
251 | 0 | ))); |
252 | 12 | } |
253 | | |
254 | 12 | if field.data_type() != values.data_type() { |
255 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
256 | 0 | "FixedSizeListArray expected data type {} got {} for {:?}", |
257 | 0 | field.data_type(), |
258 | 0 | values.data_type(), |
259 | 0 | field.name() |
260 | 0 | ))); |
261 | 12 | } |
262 | | |
263 | 12 | if let Some(a8 ) = values.logical_nulls() { |
264 | 8 | let nulls_valid = field.is_nullable() |
265 | 0 | || nulls |
266 | 0 | .as_ref() |
267 | 0 | .map(|n| n.expand(size as _).contains(&a)) |
268 | 0 | .unwrap_or_default() |
269 | 0 | || (nulls.is_none() && a.null_count() == 0); |
270 | | |
271 | 8 | if !nulls_valid { |
272 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
273 | 0 | "Found unmasked nulls for non-nullable FixedSizeListArray field {:?}", |
274 | 0 | field.name() |
275 | 0 | ))); |
276 | 8 | } |
277 | 4 | } |
278 | | |
279 | 12 | let data_type = DataType::FixedSizeList(field, size); |
280 | 12 | Ok(Self { |
281 | 12 | data_type, |
282 | 12 | values, |
283 | 12 | value_length: size, |
284 | 12 | nulls, |
285 | 12 | len, |
286 | 12 | }) |
287 | 12 | } |
288 | | |
289 | | /// Create a new [`FixedSizeListArray`] of length `len` where all values are null |
290 | | /// |
291 | | /// # Panics |
292 | | /// |
293 | | /// Panics if |
294 | | /// |
295 | | /// * `size < 0` |
296 | | /// * `size * len` would overflow `usize` |
297 | 0 | pub fn new_null(field: FieldRef, size: i32, len: usize) -> Self { |
298 | 0 | let capacity = size.to_usize().unwrap().checked_mul(len).unwrap(); |
299 | 0 | Self { |
300 | 0 | values: make_array(ArrayData::new_null(field.data_type(), capacity)), |
301 | 0 | data_type: DataType::FixedSizeList(field, size), |
302 | 0 | nulls: Some(NullBuffer::new_null(len)), |
303 | 0 | value_length: size, |
304 | 0 | len, |
305 | 0 | } |
306 | 0 | } |
307 | | |
308 | | /// Deconstruct this array into its constituent parts |
309 | 0 | pub fn into_parts(self) -> (FieldRef, i32, ArrayRef, Option<NullBuffer>) { |
310 | 0 | let f = match self.data_type { |
311 | 0 | DataType::FixedSizeList(f, _) => f, |
312 | 0 | _ => unreachable!(), |
313 | | }; |
314 | 0 | (f, self.value_length, self.values, self.nulls) |
315 | 0 | } |
316 | | |
317 | | /// Returns a reference to the values of this list. |
318 | 5 | pub fn values(&self) -> &ArrayRef { |
319 | 5 | &self.values |
320 | 5 | } |
321 | | |
322 | | /// Returns a clone of the value type of this list. |
323 | 0 | pub fn value_type(&self) -> DataType { |
324 | 0 | self.values.data_type().clone() |
325 | 0 | } |
326 | | |
327 | | /// Returns ith value of this list array. |
328 | | /// |
329 | | /// Note: This method does not check for nulls and the value is arbitrary |
330 | | /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index. |
331 | | /// |
332 | | /// # Panics |
333 | | /// Panics if index `i` is out of bounds |
334 | 5 | pub fn value(&self, i: usize) -> ArrayRef { |
335 | 5 | self.values |
336 | 5 | .slice(self.value_offset_at(i), self.value_length() as usize) |
337 | 5 | } |
338 | | |
339 | | /// Returns the offset for value at index `i`. |
340 | | /// |
341 | | /// Note this doesn't do any bound checking, for performance reason. |
342 | | #[inline] |
343 | 20 | pub fn value_offset(&self, i: usize) -> i32 { |
344 | 20 | self.value_offset_at(i) as i32 |
345 | 20 | } |
346 | | |
347 | | /// Returns the length for an element. |
348 | | /// |
349 | | /// All elements have the same length as the array is a fixed size. |
350 | | #[inline] |
351 | 5 | pub const fn value_length(&self) -> i32 { |
352 | 5 | self.value_length |
353 | 5 | } |
354 | | |
355 | | #[inline] |
356 | 25 | const fn value_offset_at(&self, i: usize) -> usize { |
357 | 25 | i * self.value_length as usize |
358 | 25 | } |
359 | | |
360 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
361 | 0 | pub fn slice(&self, offset: usize, len: usize) -> Self { |
362 | 0 | assert!( |
363 | 0 | offset.saturating_add(len) <= self.len, |
364 | 0 | "the length + offset of the sliced FixedSizeListArray cannot exceed the existing length" |
365 | | ); |
366 | 0 | let size = self.value_length as usize; |
367 | | |
368 | | Self { |
369 | 0 | data_type: self.data_type.clone(), |
370 | 0 | values: self.values.slice(offset * size, len * size), |
371 | 0 | nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)), |
372 | 0 | value_length: self.value_length, |
373 | 0 | len, |
374 | | } |
375 | 0 | } |
376 | | |
377 | | /// Creates a [`FixedSizeListArray`] from an iterator of primitive values |
378 | | /// # Example |
379 | | /// ``` |
380 | | /// # use arrow_array::FixedSizeListArray; |
381 | | /// # use arrow_array::types::Int32Type; |
382 | | /// |
383 | | /// let data = vec![ |
384 | | /// Some(vec![Some(0), Some(1), Some(2)]), |
385 | | /// None, |
386 | | /// Some(vec![Some(3), None, Some(5)]), |
387 | | /// Some(vec![Some(6), Some(7), Some(45)]), |
388 | | /// ]; |
389 | | /// let list_array = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(data, 3); |
390 | | /// println!("{:?}", list_array); |
391 | | /// ``` |
392 | 11 | pub fn from_iter_primitive<T, P, I>(iter: I, length: i32) -> Self |
393 | 11 | where |
394 | 11 | T: ArrowPrimitiveType, |
395 | 11 | P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>, |
396 | 11 | I: IntoIterator<Item = Option<P>>, |
397 | | { |
398 | 11 | let l = length as usize; |
399 | 11 | let iter = iter.into_iter(); |
400 | 11 | let size_hint = iter.size_hint().0; |
401 | 11 | let mut builder = FixedSizeListBuilder::with_capacity( |
402 | 11 | PrimitiveBuilder::<T>::with_capacity(size_hint * l), |
403 | 11 | length, |
404 | 11 | size_hint, |
405 | | ); |
406 | | |
407 | 55 | for i44 in iter { |
408 | 44 | match i { |
409 | 36 | Some(p) => { |
410 | 112 | for t76 in p { |
411 | 76 | builder.values().append_option(t); |
412 | 76 | } |
413 | 36 | builder.append(true); |
414 | | } |
415 | | None => { |
416 | 8 | builder.values().append_nulls(l); |
417 | 8 | builder.append(false) |
418 | | } |
419 | | } |
420 | | } |
421 | 11 | builder.finish() |
422 | 11 | } |
423 | | |
424 | | /// constructs a new iterator |
425 | 0 | pub fn iter(&self) -> FixedSizeListIter<'_> { |
426 | 0 | FixedSizeListIter::new(self) |
427 | 0 | } |
428 | | } |
429 | | |
430 | | impl From<ArrayData> for FixedSizeListArray { |
431 | 10 | fn from(data: ArrayData) -> Self { |
432 | 10 | let value_length = match data.data_type() { |
433 | 10 | DataType::FixedSizeList(_, len) => *len, |
434 | 0 | data_type => { |
435 | 0 | panic!( |
436 | 0 | "FixedSizeListArray data should contain a FixedSizeList data type, got {data_type}" |
437 | | ) |
438 | | } |
439 | | }; |
440 | | |
441 | 10 | let size = value_length as usize; |
442 | 10 | let values = |
443 | 10 | make_array(data.child_data()[0].slice(data.offset() * size, data.len() * size)); |
444 | 10 | Self { |
445 | 10 | data_type: data.data_type().clone(), |
446 | 10 | values, |
447 | 10 | nulls: data.nulls().cloned(), |
448 | 10 | value_length, |
449 | 10 | len: data.len(), |
450 | 10 | } |
451 | 10 | } |
452 | | } |
453 | | |
454 | | impl From<FixedSizeListArray> for ArrayData { |
455 | 14 | fn from(array: FixedSizeListArray) -> Self { |
456 | 14 | let builder = ArrayDataBuilder::new(array.data_type) |
457 | 14 | .len(array.len) |
458 | 14 | .nulls(array.nulls) |
459 | 14 | .child_data(vec![array.values.to_data()]); |
460 | | |
461 | 14 | unsafe { builder.build_unchecked() } |
462 | 14 | } |
463 | | } |
464 | | |
465 | | impl Array for FixedSizeListArray { |
466 | 5 | fn as_any(&self) -> &dyn Any { |
467 | 5 | self |
468 | 5 | } |
469 | | |
470 | 14 | fn to_data(&self) -> ArrayData { |
471 | 14 | self.clone().into() |
472 | 14 | } |
473 | | |
474 | 0 | fn into_data(self) -> ArrayData { |
475 | 0 | self.into() |
476 | 0 | } |
477 | | |
478 | 18 | fn data_type(&self) -> &DataType { |
479 | 18 | &self.data_type |
480 | 18 | } |
481 | | |
482 | 0 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
483 | 0 | Arc::new(self.slice(offset, length)) |
484 | 0 | } |
485 | | |
486 | 12 | fn len(&self) -> usize { |
487 | 12 | self.len |
488 | 12 | } |
489 | | |
490 | 0 | fn is_empty(&self) -> bool { |
491 | 0 | self.len == 0 |
492 | 0 | } |
493 | | |
494 | 0 | fn shrink_to_fit(&mut self) { |
495 | 0 | self.values.shrink_to_fit(); |
496 | 0 | if let Some(nulls) = &mut self.nulls { |
497 | 0 | nulls.shrink_to_fit(); |
498 | 0 | } |
499 | 0 | } |
500 | | |
501 | 0 | fn offset(&self) -> usize { |
502 | 0 | 0 |
503 | 0 | } |
504 | | |
505 | 13 | fn nulls(&self) -> Option<&NullBuffer> { |
506 | 13 | self.nulls.as_ref() |
507 | 13 | } |
508 | | |
509 | 0 | fn logical_null_count(&self) -> usize { |
510 | | // More efficient that the default implementation |
511 | 0 | self.null_count() |
512 | 0 | } |
513 | | |
514 | 0 | fn get_buffer_memory_size(&self) -> usize { |
515 | 0 | let mut size = self.values.get_buffer_memory_size(); |
516 | 0 | if let Some(n) = self.nulls.as_ref() { |
517 | 0 | size += n.buffer().capacity(); |
518 | 0 | } |
519 | 0 | size |
520 | 0 | } |
521 | | |
522 | 0 | fn get_array_memory_size(&self) -> usize { |
523 | 0 | let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size(); |
524 | 0 | if let Some(n) = self.nulls.as_ref() { |
525 | 0 | size += n.buffer().capacity(); |
526 | 0 | } |
527 | 0 | size |
528 | 0 | } |
529 | | } |
530 | | |
531 | | impl ArrayAccessor for FixedSizeListArray { |
532 | | type Item = ArrayRef; |
533 | | |
534 | 0 | fn value(&self, index: usize) -> Self::Item { |
535 | 0 | FixedSizeListArray::value(self, index) |
536 | 0 | } |
537 | | |
538 | 0 | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
539 | 0 | FixedSizeListArray::value(self, index) |
540 | 0 | } |
541 | | } |
542 | | |
543 | | impl std::fmt::Debug for FixedSizeListArray { |
544 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
545 | 0 | write!(f, "FixedSizeListArray<{}>\n[\n", self.value_length())?; |
546 | 0 | print_long_array(self, f, |array, index, f| { |
547 | 0 | std::fmt::Debug::fmt(&array.value(index), f) |
548 | 0 | })?; |
549 | 0 | write!(f, "]") |
550 | 0 | } |
551 | | } |
552 | | |
553 | | impl ArrayAccessor for &FixedSizeListArray { |
554 | | type Item = ArrayRef; |
555 | | |
556 | 0 | fn value(&self, index: usize) -> Self::Item { |
557 | 0 | FixedSizeListArray::value(self, index) |
558 | 0 | } |
559 | | |
560 | 0 | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
561 | 0 | FixedSizeListArray::value(self, index) |
562 | 0 | } |
563 | | } |
564 | | |
565 | | #[cfg(test)] |
566 | | mod tests { |
567 | | use arrow_buffer::{BooleanBuffer, Buffer, bit_util}; |
568 | | use arrow_schema::Field; |
569 | | |
570 | | use crate::cast::AsArray; |
571 | | use crate::types::Int32Type; |
572 | | use crate::{Int32Array, new_empty_array}; |
573 | | |
574 | | use super::*; |
575 | | |
576 | | #[test] |
577 | | fn test_fixed_size_list_array() { |
578 | | // Construct a value array |
579 | | let value_data = ArrayData::builder(DataType::Int32) |
580 | | .len(9) |
581 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8])) |
582 | | .build() |
583 | | .unwrap(); |
584 | | |
585 | | // Construct a list array from the above two |
586 | | let list_data_type = |
587 | | DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3); |
588 | | let list_data = ArrayData::builder(list_data_type.clone()) |
589 | | .len(3) |
590 | | .add_child_data(value_data.clone()) |
591 | | .build() |
592 | | .unwrap(); |
593 | | let list_array = FixedSizeListArray::from(list_data); |
594 | | |
595 | | assert_eq!(value_data, list_array.values().to_data()); |
596 | | assert_eq!(DataType::Int32, list_array.value_type()); |
597 | | assert_eq!(3, list_array.len()); |
598 | | assert_eq!(0, list_array.null_count()); |
599 | | assert_eq!(6, list_array.value_offset(2)); |
600 | | assert_eq!(3, list_array.value_length()); |
601 | | assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
602 | | for i in 0..3 { |
603 | | assert!(list_array.is_valid(i)); |
604 | | assert!(!list_array.is_null(i)); |
605 | | } |
606 | | |
607 | | // Now test with a non-zero offset |
608 | | let list_data = ArrayData::builder(list_data_type) |
609 | | .len(2) |
610 | | .offset(1) |
611 | | .add_child_data(value_data.clone()) |
612 | | .build() |
613 | | .unwrap(); |
614 | | let list_array = FixedSizeListArray::from(list_data); |
615 | | |
616 | | assert_eq!(value_data.slice(3, 6), list_array.values().to_data()); |
617 | | assert_eq!(DataType::Int32, list_array.value_type()); |
618 | | assert_eq!(2, list_array.len()); |
619 | | assert_eq!(0, list_array.null_count()); |
620 | | assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
621 | | assert_eq!(3, list_array.value_offset(1)); |
622 | | assert_eq!(3, list_array.value_length()); |
623 | | } |
624 | | |
625 | | #[test] |
626 | | #[should_panic(expected = "assertion failed: (offset + length) <= self.len()")] |
627 | | // Different error messages, so skip for now |
628 | | // https://github.com/apache/arrow-rs/issues/1545 |
629 | | #[cfg(not(feature = "force_validate"))] |
630 | | fn test_fixed_size_list_array_unequal_children() { |
631 | | // Construct a value array |
632 | | let value_data = ArrayData::builder(DataType::Int32) |
633 | | .len(8) |
634 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
635 | | .build() |
636 | | .unwrap(); |
637 | | |
638 | | // Construct a list array from the above two |
639 | | let list_data_type = |
640 | | DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3); |
641 | | let list_data = unsafe { |
642 | | ArrayData::builder(list_data_type) |
643 | | .len(3) |
644 | | .add_child_data(value_data) |
645 | | .build_unchecked() |
646 | | }; |
647 | | drop(FixedSizeListArray::from(list_data)); |
648 | | } |
649 | | |
650 | | #[test] |
651 | | fn test_fixed_size_list_array_slice() { |
652 | | // Construct a value array |
653 | | let value_data = ArrayData::builder(DataType::Int32) |
654 | | .len(10) |
655 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
656 | | .build() |
657 | | .unwrap(); |
658 | | |
659 | | // Set null buts for the nested array: |
660 | | // [[0, 1], null, null, [6, 7], [8, 9]] |
661 | | // 01011001 00000001 |
662 | | let mut null_bits: [u8; 1] = [0; 1]; |
663 | | bit_util::set_bit(&mut null_bits, 0); |
664 | | bit_util::set_bit(&mut null_bits, 3); |
665 | | bit_util::set_bit(&mut null_bits, 4); |
666 | | |
667 | | // Construct a fixed size list array from the above two |
668 | | let list_data_type = |
669 | | DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 2); |
670 | | let list_data = ArrayData::builder(list_data_type) |
671 | | .len(5) |
672 | | .add_child_data(value_data.clone()) |
673 | | .null_bit_buffer(Some(Buffer::from(null_bits))) |
674 | | .build() |
675 | | .unwrap(); |
676 | | let list_array = FixedSizeListArray::from(list_data); |
677 | | |
678 | | assert_eq!(value_data, list_array.values().to_data()); |
679 | | assert_eq!(DataType::Int32, list_array.value_type()); |
680 | | assert_eq!(5, list_array.len()); |
681 | | assert_eq!(2, list_array.null_count()); |
682 | | assert_eq!(6, list_array.value_offset(3)); |
683 | | assert_eq!(2, list_array.value_length()); |
684 | | |
685 | | let sliced_array = list_array.slice(1, 4); |
686 | | assert_eq!(4, sliced_array.len()); |
687 | | assert_eq!(2, sliced_array.null_count()); |
688 | | |
689 | | for i in 0..sliced_array.len() { |
690 | | if bit_util::get_bit(&null_bits, 1 + i) { |
691 | | assert!(sliced_array.is_valid(i)); |
692 | | } else { |
693 | | assert!(sliced_array.is_null(i)); |
694 | | } |
695 | | } |
696 | | |
697 | | // Check offset and length for each non-null value. |
698 | | let sliced_list_array = sliced_array |
699 | | .as_any() |
700 | | .downcast_ref::<FixedSizeListArray>() |
701 | | .unwrap(); |
702 | | assert_eq!(2, sliced_list_array.value_length()); |
703 | | assert_eq!(4, sliced_list_array.value_offset(2)); |
704 | | assert_eq!(6, sliced_list_array.value_offset(3)); |
705 | | } |
706 | | |
707 | | #[test] |
708 | | #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")] |
709 | | fn test_fixed_size_list_array_index_out_of_bound() { |
710 | | // Construct a value array |
711 | | let value_data = ArrayData::builder(DataType::Int32) |
712 | | .len(10) |
713 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
714 | | .build() |
715 | | .unwrap(); |
716 | | |
717 | | // Set null buts for the nested array: |
718 | | // [[0, 1], null, null, [6, 7], [8, 9]] |
719 | | // 01011001 00000001 |
720 | | let mut null_bits: [u8; 1] = [0; 1]; |
721 | | bit_util::set_bit(&mut null_bits, 0); |
722 | | bit_util::set_bit(&mut null_bits, 3); |
723 | | bit_util::set_bit(&mut null_bits, 4); |
724 | | |
725 | | // Construct a fixed size list array from the above two |
726 | | let list_data_type = |
727 | | DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 2); |
728 | | let list_data = ArrayData::builder(list_data_type) |
729 | | .len(5) |
730 | | .add_child_data(value_data) |
731 | | .null_bit_buffer(Some(Buffer::from(null_bits))) |
732 | | .build() |
733 | | .unwrap(); |
734 | | let list_array = FixedSizeListArray::from(list_data); |
735 | | |
736 | | list_array.value(10); |
737 | | } |
738 | | |
739 | | #[test] |
740 | | fn test_fixed_size_list_constructors() { |
741 | | let values = Arc::new(Int32Array::from_iter([ |
742 | | Some(1), |
743 | | Some(2), |
744 | | None, |
745 | | None, |
746 | | Some(3), |
747 | | Some(4), |
748 | | ])); |
749 | | |
750 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
751 | | let list = FixedSizeListArray::new(field.clone(), 2, values.clone(), None); |
752 | | assert_eq!(list.len(), 3); |
753 | | |
754 | | let nulls = NullBuffer::new_null(3); |
755 | | let list = FixedSizeListArray::new(field.clone(), 2, values.clone(), Some(nulls)); |
756 | | assert_eq!(list.len(), 3); |
757 | | |
758 | | let list = FixedSizeListArray::new(field.clone(), 3, values.clone(), None); |
759 | | assert_eq!(list.len(), 2); |
760 | | |
761 | | let err = FixedSizeListArray::try_new(field.clone(), 4, values.clone(), None).unwrap_err(); |
762 | | assert_eq!( |
763 | | err.to_string(), |
764 | | "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, \ |
765 | | expected a multiple of 4 got 6", |
766 | | ); |
767 | | |
768 | | let err = |
769 | | FixedSizeListArray::try_new_with_length(field.clone(), 4, values.clone(), None, 1) |
770 | | .unwrap_err(); |
771 | | assert_eq!( |
772 | | err.to_string(), |
773 | | "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, expected 4 got 6" |
774 | | ); |
775 | | |
776 | | let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None).unwrap_err(); |
777 | | assert_eq!( |
778 | | err.to_string(), |
779 | | "Invalid argument error: Size cannot be negative, got -1" |
780 | | ); |
781 | | |
782 | | let nulls = NullBuffer::new_null(2); |
783 | | let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err(); |
784 | | assert_eq!( |
785 | | err.to_string(), |
786 | | "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, expected 4 got 6" |
787 | | ); |
788 | | |
789 | | let field = Arc::new(Field::new_list_field(DataType::Int32, false)); |
790 | | let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err(); |
791 | | assert_eq!( |
792 | | err.to_string(), |
793 | | "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"" |
794 | | ); |
795 | | |
796 | | // Valid as nulls in child masked by parent |
797 | | let nulls = NullBuffer::new(BooleanBuffer::new(Buffer::from([0b0000101]), 0, 3)); |
798 | | FixedSizeListArray::new(field, 2, values.clone(), Some(nulls)); |
799 | | |
800 | | let field = Arc::new(Field::new_list_field(DataType::Int64, true)); |
801 | | let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err(); |
802 | | assert_eq!( |
803 | | err.to_string(), |
804 | | "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"" |
805 | | ); |
806 | | } |
807 | | |
808 | | #[test] |
809 | | fn degenerate_fixed_size_list() { |
810 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
811 | | let nulls = NullBuffer::new_null(2); |
812 | | let values = new_empty_array(&DataType::Int32); |
813 | | let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), Some(nulls.clone())); |
814 | | assert_eq!(list.len(), 2); |
815 | | |
816 | | // Test invalid null buffer length. |
817 | | let err = FixedSizeListArray::try_new_with_length( |
818 | | field.clone(), |
819 | | 0, |
820 | | values.clone(), |
821 | | Some(nulls), |
822 | | 5, |
823 | | ) |
824 | | .unwrap_err(); |
825 | | assert_eq!( |
826 | | err.to_string(), |
827 | | "Invalid argument error: Invalid null buffer for FixedSizeListArray, expected 5 found 2" |
828 | | ); |
829 | | |
830 | | // Test non-empty values for degenerate list. |
831 | | let non_empty_values = Arc::new(Int32Array::from(vec![1, 2, 3])); |
832 | | let err = |
833 | | FixedSizeListArray::try_new_with_length(field.clone(), 0, non_empty_values, None, 3) |
834 | | .unwrap_err(); |
835 | | assert_eq!( |
836 | | err.to_string(), |
837 | | "Invalid argument error: An degenerate FixedSizeListArray should have no underlying values, found 3 values" |
838 | | ); |
839 | | } |
840 | | } |