/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/list_view_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use arrow_buffer::{NullBuffer, ScalarBuffer}; |
19 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
20 | | use arrow_schema::{ArrowError, DataType, FieldRef}; |
21 | | use std::any::Any; |
22 | | use std::ops::Add; |
23 | | use std::sync::Arc; |
24 | | |
25 | | use crate::array::{make_array, print_long_array}; |
26 | | use crate::iterator::GenericListViewArrayIter; |
27 | | use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait, new_empty_array}; |
28 | | |
29 | | /// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`. |
30 | | pub type ListViewArray = GenericListViewArray<i32>; |
31 | | |
32 | | /// A [`GenericListViewArray`] of variable size lists, storing offsets as `i64`. |
33 | | pub type LargeListViewArray = GenericListViewArray<i64>; |
34 | | |
35 | | /// An array of [variable length lists], specifically in the [list-view layout]. |
36 | | /// |
37 | | /// Differs from [`GenericListArray`] (which represents the [list layout]) in that |
38 | | /// the sizes of the child arrays are explicitly encoded in a separate buffer, instead |
39 | | /// of being derived from the difference between subsequent offsets in the offset buffer. |
40 | | /// |
41 | | /// This allows the offsets (and subsequently child data) to be out of order. It also |
42 | | /// allows take / filter operations to be implemented without copying the underlying data. |
43 | | /// |
44 | | /// # Representation |
45 | | /// |
46 | | /// Given the same example array from [`GenericListArray`], it would be represented |
47 | | /// as such via a list-view layout array: |
48 | | /// |
49 | | /// ```text |
50 | | /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
51 | | /// ┌ ─ ─ ─ ─ ─ ─ ┐ │ |
52 | | /// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ |
53 | | /// │ [A,B,C] │ │ (0,3) │ │ 1 │ │ 0 │ │ 3 │ │ │ 1 │ │ A │ │ 0 │ |
54 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
55 | | /// │ [] │ │ (3,0) │ │ 1 │ │ 3 │ │ 0 │ │ │ 1 │ │ B │ │ 1 │ |
56 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
57 | | /// │ NULL │ │ (?,?) │ │ 0 │ │ ? │ │ ? │ │ │ 1 │ │ C │ │ 2 │ |
58 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
59 | | /// │ [D] │ │ (4,1) │ │ 1 │ │ 4 │ │ 1 │ │ │ ? │ │ ? │ │ 3 │ |
60 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
61 | | /// │ [NULL, F] │ │ (5,2) │ │ 1 │ │ 5 │ │ 2 │ │ │ 1 │ │ D │ │ 4 │ |
62 | | /// └─────────────┘ └───────┘ │ └───┘ └───┘ └───┘ ├───┤ ├───┤ |
63 | | /// │ │ 0 │ │ ? │ │ 5 │ |
64 | | /// Logical Logical │ Validity Offsets Sizes ├───┤ ├───┤ |
65 | | /// Values Offset (nulls) │ │ 1 │ │ F │ │ 6 │ |
66 | | /// & Size │ └───┘ └───┘ |
67 | | /// │ Values │ │ |
68 | | /// (offsets[i], │ ListViewArray (Array) |
69 | | /// sizes[i]) └ ─ ─ ─ ─ ─ ─ ┘ │ |
70 | | /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
71 | | /// ``` |
72 | | /// |
73 | | /// Another way of representing the same array but taking advantage of the offsets being out of order: |
74 | | /// |
75 | | /// ```text |
76 | | /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
77 | | /// ┌ ─ ─ ─ ─ ─ ─ ┐ │ |
78 | | /// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ |
79 | | /// │ [A,B,C] │ │ (2,3) │ │ 1 │ │ 2 │ │ 3 │ │ │ 0 │ │ ? │ │ 0 │ |
80 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
81 | | /// │ [] │ │ (0,0) │ │ 1 │ │ 0 │ │ 0 │ │ │ 1 │ │ F │ │ 1 │ |
82 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
83 | | /// │ NULL │ │ (?,?) │ │ 0 │ │ ? │ │ ? │ │ │ 1 │ │ A │ │ 2 │ |
84 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
85 | | /// │ [D] │ │ (5,1) │ │ 1 │ │ 5 │ │ 1 │ │ │ 1 │ │ B │ │ 3 │ |
86 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
87 | | /// │ [NULL, F] │ │ (0,2) │ │ 1 │ │ 0 │ │ 2 │ │ │ 1 │ │ C │ │ 4 │ |
88 | | /// └─────────────┘ └───────┘ │ └───┘ └───┘ └───┘ ├───┤ ├───┤ |
89 | | /// │ │ 1 │ │ D │ │ 5 │ |
90 | | /// Logical Logical │ Validity Offsets Sizes └───┘ └───┘ |
91 | | /// Values Offset (nulls) │ Values │ │ |
92 | | /// & Size │ (Array) |
93 | | /// └ ─ ─ ─ ─ ─ ─ ┘ │ |
94 | | /// (offsets[i], │ ListViewArray |
95 | | /// sizes[i]) │ |
96 | | /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
97 | | /// ``` |
98 | | /// |
99 | | /// [`GenericListArray`]: crate::array::GenericListArray |
100 | | /// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout |
101 | | /// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout |
102 | | /// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout |
103 | | #[derive(Clone)] |
104 | | pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> { |
105 | | data_type: DataType, |
106 | | nulls: Option<NullBuffer>, |
107 | | values: ArrayRef, |
108 | | // Unlike GenericListArray, we do not use OffsetBuffer here as offsets are not |
109 | | // guaranteed to be monotonically increasing. |
110 | | value_offsets: ScalarBuffer<OffsetSize>, |
111 | | value_sizes: ScalarBuffer<OffsetSize>, |
112 | | } |
113 | | |
114 | | impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> { |
115 | | /// The data type constructor of listview array. |
116 | | /// The input is the schema of the child array and |
117 | | /// the output is the [`DataType`], ListView or LargeListView. |
118 | | pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE { |
119 | | DataType::LargeListView |
120 | | } else { |
121 | | DataType::ListView |
122 | | }; |
123 | | |
124 | | /// Create a new [`GenericListViewArray`] from the provided parts |
125 | | /// |
126 | | /// # Errors |
127 | | /// |
128 | | /// Errors if |
129 | | /// |
130 | | /// * `offsets.len() != sizes.len()` |
131 | | /// * `offsets.len() != nulls.len()` |
132 | | /// * `offsets[i] > values.len()` |
133 | | /// * `!field.is_nullable() && values.is_nullable()` |
134 | | /// * `field.data_type() != values.data_type()` |
135 | | /// * `0 <= offsets[i] <= length of the child array` |
136 | | /// * `0 <= offsets[i] + size[i] <= length of the child array` |
137 | 34 | pub fn try_new( |
138 | 34 | field: FieldRef, |
139 | 34 | offsets: ScalarBuffer<OffsetSize>, |
140 | 34 | sizes: ScalarBuffer<OffsetSize>, |
141 | 34 | values: ArrayRef, |
142 | 34 | nulls: Option<NullBuffer>, |
143 | 34 | ) -> Result<Self, ArrowError> { |
144 | 34 | let len = offsets.len(); |
145 | 34 | if let Some(n28 ) = nulls.as_ref() { |
146 | 28 | if n.len() != len { |
147 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
148 | 0 | "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}", |
149 | 0 | OffsetSize::PREFIX, |
150 | 0 | n.len(), |
151 | 0 | ))); |
152 | 28 | } |
153 | 6 | } |
154 | 34 | if len != sizes.len() { |
155 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
156 | 0 | "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}", |
157 | 0 | OffsetSize::PREFIX, |
158 | 0 | sizes.len() |
159 | 0 | ))); |
160 | 34 | } |
161 | | |
162 | 122 | for (offset, size) in offsets.iter()34 .zip34 (sizes.iter()34 ) { |
163 | 122 | let offset = offset.as_usize(); |
164 | 122 | let size = size.as_usize(); |
165 | 122 | if offset.checked_add(size).ok_or_else(|| {0 |
166 | 0 | ArrowError::InvalidArgumentError(format!( |
167 | 0 | "Overflow in offset + size for {}ListViewArray", |
168 | 0 | OffsetSize::PREFIX |
169 | 0 | )) |
170 | 122 | }0 )?0 > values.len() |
171 | | { |
172 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
173 | 0 | "Offset + size for {}ListViewArray must be within the bounds of the child array, got offset: {offset}, size: {size}, child array length: {}", |
174 | 0 | OffsetSize::PREFIX, |
175 | 0 | values.len() |
176 | 0 | ))); |
177 | 122 | } |
178 | | } |
179 | | |
180 | 34 | if !field.is_nullable() && values0 .is_nullable0 () { |
181 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
182 | 0 | "Non-nullable field of {}ListViewArray {:?} cannot contain nulls", |
183 | 0 | OffsetSize::PREFIX, |
184 | 0 | field.name() |
185 | 0 | ))); |
186 | 34 | } |
187 | | |
188 | 34 | if field.data_type() != values.data_type() { |
189 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
190 | 0 | "{}ListViewArray expected data type {} got {} for {:?}", |
191 | 0 | OffsetSize::PREFIX, |
192 | 0 | field.data_type(), |
193 | 0 | values.data_type(), |
194 | 0 | field.name() |
195 | 0 | ))); |
196 | 34 | } |
197 | | |
198 | 34 | Ok(Self { |
199 | 34 | data_type: Self::DATA_TYPE_CONSTRUCTOR(field), |
200 | 34 | nulls, |
201 | 34 | values, |
202 | 34 | value_offsets: offsets, |
203 | 34 | value_sizes: sizes, |
204 | 34 | }) |
205 | 34 | } |
206 | | |
207 | | /// Create a new [`GenericListViewArray`] from the provided parts |
208 | | /// |
209 | | /// # Panics |
210 | | /// |
211 | | /// Panics if [`Self::try_new`] returns an error |
212 | 32 | pub fn new( |
213 | 32 | field: FieldRef, |
214 | 32 | offsets: ScalarBuffer<OffsetSize>, |
215 | 32 | sizes: ScalarBuffer<OffsetSize>, |
216 | 32 | values: ArrayRef, |
217 | 32 | nulls: Option<NullBuffer>, |
218 | 32 | ) -> Self { |
219 | 32 | Self::try_new(field, offsets, sizes, values, nulls).unwrap() |
220 | 32 | } |
221 | | |
222 | | /// Create a new [`GenericListViewArray`] of length `len` where all values are null |
223 | | pub fn new_null(field: FieldRef, len: usize) -> Self { |
224 | | let values = new_empty_array(field.data_type()); |
225 | | Self { |
226 | | data_type: Self::DATA_TYPE_CONSTRUCTOR(field), |
227 | | nulls: Some(NullBuffer::new_null(len)), |
228 | | value_offsets: ScalarBuffer::from(vec![]), |
229 | | value_sizes: ScalarBuffer::from(vec![]), |
230 | | values, |
231 | | } |
232 | | } |
233 | | |
234 | | /// Deconstruct this array into its constituent parts |
235 | | pub fn into_parts( |
236 | | self, |
237 | | ) -> ( |
238 | | FieldRef, |
239 | | ScalarBuffer<OffsetSize>, |
240 | | ScalarBuffer<OffsetSize>, |
241 | | ArrayRef, |
242 | | Option<NullBuffer>, |
243 | | ) { |
244 | | let f = match self.data_type { |
245 | | DataType::ListView(f) | DataType::LargeListView(f) => f, |
246 | | _ => unreachable!(), |
247 | | }; |
248 | | ( |
249 | | f, |
250 | | self.value_offsets, |
251 | | self.value_sizes, |
252 | | self.values, |
253 | | self.nulls, |
254 | | ) |
255 | | } |
256 | | |
257 | | /// Returns a reference to the offsets of this list |
258 | | /// |
259 | | /// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`] |
260 | | /// allowing for zero-copy cloning |
261 | | #[inline] |
262 | 24 | pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> { |
263 | 24 | &self.value_offsets |
264 | 24 | } |
265 | | |
266 | | /// Returns a reference to the values of this list |
267 | | #[inline] |
268 | 24 | pub fn values(&self) -> &ArrayRef { |
269 | 24 | &self.values |
270 | 24 | } |
271 | | |
272 | | /// Returns a reference to the sizes of this list |
273 | | /// |
274 | | /// Unlike [`Self::value_sizes`] this returns the [`ScalarBuffer`] |
275 | | /// allowing for zero-copy cloning |
276 | | #[inline] |
277 | 58 | pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> { |
278 | 58 | &self.value_sizes |
279 | 58 | } |
280 | | |
281 | | /// Returns a clone of the value type of this list. |
282 | | pub fn value_type(&self) -> DataType { |
283 | | self.values.data_type().clone() |
284 | | } |
285 | | |
286 | | /// Returns ith value of this list view array. |
287 | | /// |
288 | | /// Note: This method does not check for nulls and the value is arbitrary |
289 | | /// if [`is_null`](Self::is_null) returns true for the index. |
290 | | /// |
291 | | /// # Safety |
292 | | /// Caller must ensure that the index is within the array bounds |
293 | | pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef { |
294 | | let offset = unsafe { self.value_offsets().get_unchecked(i).as_usize() }; |
295 | | let length = unsafe { self.value_sizes().get_unchecked(i).as_usize() }; |
296 | | self.values.slice(offset, length) |
297 | | } |
298 | | |
299 | | /// Returns ith value of this list view array. |
300 | | /// |
301 | | /// Note: This method does not check for nulls and the value is arbitrary |
302 | | /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index. |
303 | | /// |
304 | | /// # Panics |
305 | | /// Panics if the index is out of bounds |
306 | 0 | pub fn value(&self, i: usize) -> ArrayRef { |
307 | 0 | let offset = self.value_offsets()[i].as_usize(); |
308 | 0 | let length = self.value_sizes()[i].as_usize(); |
309 | 0 | self.values.slice(offset, length) |
310 | 0 | } |
311 | | |
312 | | /// Returns the offset values in the offsets buffer |
313 | | #[inline] |
314 | 0 | pub fn value_offsets(&self) -> &[OffsetSize] { |
315 | 0 | &self.value_offsets |
316 | 0 | } |
317 | | |
318 | | /// Returns the sizes values in the offsets buffer |
319 | | #[inline] |
320 | 0 | pub fn value_sizes(&self) -> &[OffsetSize] { |
321 | 0 | &self.value_sizes |
322 | 0 | } |
323 | | |
324 | | /// Returns the size for value at index `i`. |
325 | | #[inline] |
326 | | pub fn value_size(&self, i: usize) -> OffsetSize { |
327 | | self.value_sizes[i] |
328 | | } |
329 | | |
330 | | /// Returns the offset for value at index `i`. |
331 | | pub fn value_offset(&self, i: usize) -> OffsetSize { |
332 | | self.value_offsets[i] |
333 | | } |
334 | | |
335 | | /// Constructs a new iterator |
336 | | pub fn iter(&self) -> GenericListViewArrayIter<'_, OffsetSize> { |
337 | | GenericListViewArrayIter::<'_, OffsetSize>::new(self) |
338 | | } |
339 | | |
340 | | #[inline] |
341 | 12 | fn get_type(data_type: &DataType) -> Option<&DataType> { |
342 | 12 | match (OffsetSize::IS_LARGE, data_type) { |
343 | 6 | (true, DataType::LargeListView(child)) | (false, DataType::ListView(child)) => { |
344 | 12 | Some(child.data_type()) |
345 | | } |
346 | 0 | _ => None, |
347 | | } |
348 | 12 | } |
349 | | |
350 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
351 | 7 | pub fn slice(&self, offset: usize, length: usize) -> Self { |
352 | | Self { |
353 | 7 | data_type: self.data_type.clone(), |
354 | 7 | nulls: self.nulls.as_ref().map(|n| n6 .slice6 (offset6 , length6 )), |
355 | 7 | values: self.values.clone(), |
356 | 7 | value_offsets: self.value_offsets.slice(offset, length), |
357 | 7 | value_sizes: self.value_sizes.slice(offset, length), |
358 | | } |
359 | 7 | } |
360 | | } |
361 | | |
362 | | impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> { |
363 | | type Item = ArrayRef; |
364 | | |
365 | | fn value(&self, index: usize) -> Self::Item { |
366 | | GenericListViewArray::value(self, index) |
367 | | } |
368 | | |
369 | | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
370 | | unsafe { GenericListViewArray::value_unchecked(self, index) } |
371 | | } |
372 | | } |
373 | | |
374 | | impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> { |
375 | 30 | fn as_any(&self) -> &dyn Any { |
376 | 30 | self |
377 | 30 | } |
378 | | |
379 | 28 | fn to_data(&self) -> ArrayData { |
380 | 28 | self.clone().into() |
381 | 28 | } |
382 | | |
383 | 0 | fn into_data(self) -> ArrayData { |
384 | 0 | self.into() |
385 | 0 | } |
386 | | |
387 | 46 | fn data_type(&self) -> &DataType { |
388 | 46 | &self.data_type |
389 | 46 | } |
390 | | |
391 | 0 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
392 | 0 | Arc::new(self.slice(offset, length)) |
393 | 0 | } |
394 | | |
395 | 40 | fn len(&self) -> usize { |
396 | 40 | self.sizes().len() |
397 | 40 | } |
398 | | |
399 | 0 | fn is_empty(&self) -> bool { |
400 | 0 | self.value_sizes.is_empty() |
401 | 0 | } |
402 | | |
403 | 0 | fn shrink_to_fit(&mut self) { |
404 | 0 | if let Some(nulls) = &mut self.nulls { |
405 | 0 | nulls.shrink_to_fit(); |
406 | 0 | } |
407 | 0 | self.values.shrink_to_fit(); |
408 | 0 | self.value_offsets.shrink_to_fit(); |
409 | 0 | self.value_sizes.shrink_to_fit(); |
410 | 0 | } |
411 | | |
412 | 0 | fn offset(&self) -> usize { |
413 | 0 | 0 |
414 | 0 | } |
415 | | |
416 | 24 | fn nulls(&self) -> Option<&NullBuffer> { |
417 | 24 | self.nulls.as_ref() |
418 | 24 | } |
419 | | |
420 | 0 | fn logical_null_count(&self) -> usize { |
421 | | // More efficient that the default implementation |
422 | 0 | self.null_count() |
423 | 0 | } |
424 | | |
425 | 0 | fn get_buffer_memory_size(&self) -> usize { |
426 | 0 | let mut size = self.values.get_buffer_memory_size(); |
427 | 0 | size += self.value_offsets.inner().capacity(); |
428 | 0 | size += self.value_sizes.inner().capacity(); |
429 | 0 | if let Some(n) = self.nulls.as_ref() { |
430 | 0 | size += n.buffer().capacity(); |
431 | 0 | } |
432 | 0 | size |
433 | 0 | } |
434 | | |
435 | 0 | fn get_array_memory_size(&self) -> usize { |
436 | 0 | let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size(); |
437 | 0 | size += self.value_offsets.inner().capacity(); |
438 | 0 | size += self.value_sizes.inner().capacity(); |
439 | 0 | if let Some(n) = self.nulls.as_ref() { |
440 | 0 | size += n.buffer().capacity(); |
441 | 0 | } |
442 | 0 | size |
443 | 0 | } |
444 | | } |
445 | | |
446 | | impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> { |
447 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
448 | 0 | let prefix = OffsetSize::PREFIX; |
449 | 0 | write!(f, "{prefix}ListViewArray\n[\n")?; |
450 | 0 | print_long_array(self, f, |array, index, f| { |
451 | 0 | std::fmt::Debug::fmt(&array.value(index), f) |
452 | 0 | })?; |
453 | 0 | write!(f, "]") |
454 | 0 | } |
455 | | } |
456 | | |
457 | | impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData { |
458 | 28 | fn from(array: GenericListViewArray<OffsetSize>) -> Self { |
459 | 28 | let len = array.len(); |
460 | 28 | let builder = ArrayDataBuilder::new(array.data_type) |
461 | 28 | .len(len) |
462 | 28 | .nulls(array.nulls) |
463 | 28 | .buffers(vec![ |
464 | 28 | array.value_offsets.into_inner(), |
465 | 28 | array.value_sizes.into_inner(), |
466 | | ]) |
467 | 28 | .child_data(vec![array.values.to_data()]); |
468 | | |
469 | 28 | unsafe { builder.build_unchecked() } |
470 | 28 | } |
471 | | } |
472 | | |
473 | | impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListViewArray<OffsetSize> { |
474 | 12 | fn from(data: ArrayData) -> Self { |
475 | 12 | Self::try_new_from_array_data(data) |
476 | 12 | .expect("Expected infallible creation of GenericListViewArray from ArrayDataRef failed") |
477 | 12 | } |
478 | | } |
479 | | |
480 | | impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewArray<OffsetSize> { |
481 | | fn from(value: FixedSizeListArray) -> Self { |
482 | | let (field, size) = match value.data_type() { |
483 | | DataType::FixedSizeList(f, size) => (f, *size as usize), |
484 | | _ => unreachable!(), |
485 | | }; |
486 | | let mut acc = 0_usize; |
487 | | let iter = std::iter::repeat_n(size, value.len()); |
488 | | let mut sizes = Vec::with_capacity(iter.size_hint().0); |
489 | | let mut offsets = Vec::with_capacity(iter.size_hint().0); |
490 | | |
491 | | for size in iter { |
492 | | offsets.push(OffsetSize::usize_as(acc)); |
493 | | acc = acc.add(size); |
494 | | sizes.push(OffsetSize::usize_as(size)); |
495 | | } |
496 | | let sizes = ScalarBuffer::from(sizes); |
497 | | let offsets = ScalarBuffer::from(offsets); |
498 | | Self { |
499 | | data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()), |
500 | | nulls: value.nulls().cloned(), |
501 | | values: value.values().clone(), |
502 | | value_offsets: offsets, |
503 | | value_sizes: sizes, |
504 | | } |
505 | | } |
506 | | } |
507 | | |
508 | | impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> { |
509 | 12 | fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> { |
510 | 12 | if data.buffers().len() != 2 { |
511 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
512 | 0 | "ListViewArray data should contain two buffers (value offsets & value sizes), had {}", |
513 | 0 | data.buffers().len() |
514 | 0 | ))); |
515 | 12 | } |
516 | | |
517 | 12 | if data.child_data().len() != 1 { |
518 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
519 | 0 | "ListViewArray should contain a single child array (values array), had {}", |
520 | 0 | data.child_data().len() |
521 | 0 | ))); |
522 | 12 | } |
523 | | |
524 | 12 | let values = data.child_data()[0].clone(); |
525 | | |
526 | 12 | if let Some(child_data_type) = Self::get_type(data.data_type()) { |
527 | 12 | if values.data_type() != child_data_type { |
528 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
529 | 0 | "{}ListViewArray's child datatype {:?} does not \ |
530 | 0 | correspond to the List's datatype {:?}", |
531 | 0 | OffsetSize::PREFIX, |
532 | 0 | values.data_type(), |
533 | 0 | child_data_type |
534 | 0 | ))); |
535 | 12 | } |
536 | | } else { |
537 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
538 | 0 | "{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}", |
539 | 0 | OffsetSize::PREFIX, |
540 | 0 | OffsetSize::PREFIX, |
541 | 0 | data.data_type() |
542 | 0 | ))); |
543 | | } |
544 | | |
545 | 12 | let values = make_array(values); |
546 | | // ArrayData is valid, and verified type above |
547 | 12 | let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()); |
548 | 12 | let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len()); |
549 | | |
550 | 12 | Ok(Self { |
551 | 12 | data_type: data.data_type().clone(), |
552 | 12 | nulls: data.nulls().cloned(), |
553 | 12 | values, |
554 | 12 | value_offsets, |
555 | 12 | value_sizes, |
556 | 12 | }) |
557 | 12 | } |
558 | | } |
559 | | |
560 | | #[cfg(test)] |
561 | | mod tests { |
562 | | use arrow_buffer::{BooleanBuffer, Buffer, ScalarBuffer, bit_util}; |
563 | | use arrow_schema::Field; |
564 | | |
565 | | use crate::builder::{FixedSizeListBuilder, Int32Builder}; |
566 | | use crate::cast::AsArray; |
567 | | use crate::types::Int32Type; |
568 | | use crate::{Int32Array, Int64Array}; |
569 | | |
570 | | use super::*; |
571 | | |
572 | | #[test] |
573 | | fn test_empty_list_view_array() { |
574 | | // Construct an empty value array |
575 | | let vec: Vec<i32> = vec![]; |
576 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
577 | | let sizes = ScalarBuffer::from(vec![]); |
578 | | let offsets = ScalarBuffer::from(vec![]); |
579 | | let values = Int32Array::from(vec); |
580 | | let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
581 | | |
582 | | assert_eq!(list_array.len(), 0) |
583 | | } |
584 | | |
585 | | #[test] |
586 | | fn test_list_view_array() { |
587 | | // Construct a value array |
588 | | let value_data = ArrayData::builder(DataType::Int32) |
589 | | .len(8) |
590 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
591 | | .build() |
592 | | .unwrap(); |
593 | | |
594 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
595 | | let sizes = ScalarBuffer::from(vec![3i32, 3, 2]); |
596 | | let offsets = ScalarBuffer::from(vec![0i32, 3, 6]); |
597 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); |
598 | | let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
599 | | |
600 | | let values = list_array.values(); |
601 | | assert_eq!(value_data, values.to_data()); |
602 | | assert_eq!(DataType::Int32, list_array.value_type()); |
603 | | assert_eq!(3, list_array.len()); |
604 | | assert_eq!(0, list_array.null_count()); |
605 | | assert_eq!(6, list_array.value_offsets()[2]); |
606 | | assert_eq!(2, list_array.value_sizes()[2]); |
607 | | assert_eq!(2, list_array.value_size(2)); |
608 | | assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
609 | | assert_eq!( |
610 | | 0, |
611 | | unsafe { list_array.value_unchecked(0) } |
612 | | .as_primitive::<Int32Type>() |
613 | | .value(0) |
614 | | ); |
615 | | for i in 0..3 { |
616 | | assert!(list_array.is_valid(i)); |
617 | | assert!(!list_array.is_null(i)); |
618 | | } |
619 | | } |
620 | | |
621 | | #[test] |
622 | | fn test_large_list_view_array() { |
623 | | // Construct a value array |
624 | | let value_data = ArrayData::builder(DataType::Int32) |
625 | | .len(8) |
626 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
627 | | .build() |
628 | | .unwrap(); |
629 | | |
630 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
631 | | let sizes = ScalarBuffer::from(vec![3i64, 3, 2]); |
632 | | let offsets = ScalarBuffer::from(vec![0i64, 3, 6]); |
633 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); |
634 | | let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
635 | | |
636 | | let values = list_array.values(); |
637 | | assert_eq!(value_data, values.to_data()); |
638 | | assert_eq!(DataType::Int32, list_array.value_type()); |
639 | | assert_eq!(3, list_array.len()); |
640 | | assert_eq!(0, list_array.null_count()); |
641 | | assert_eq!(6, list_array.value_offsets()[2]); |
642 | | assert_eq!(2, list_array.value_sizes()[2]); |
643 | | assert_eq!(2, list_array.value_size(2)); |
644 | | assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
645 | | assert_eq!( |
646 | | 0, |
647 | | unsafe { list_array.value_unchecked(0) } |
648 | | .as_primitive::<Int32Type>() |
649 | | .value(0) |
650 | | ); |
651 | | for i in 0..3 { |
652 | | assert!(list_array.is_valid(i)); |
653 | | assert!(!list_array.is_null(i)); |
654 | | } |
655 | | } |
656 | | |
657 | | #[test] |
658 | | fn test_list_view_array_slice() { |
659 | | // Construct a value array |
660 | | let value_data = ArrayData::builder(DataType::Int32) |
661 | | .len(10) |
662 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
663 | | .build() |
664 | | .unwrap(); |
665 | | |
666 | | // 01011001 00000001 |
667 | | let mut null_bits: [u8; 2] = [0; 2]; |
668 | | bit_util::set_bit(&mut null_bits, 0); |
669 | | bit_util::set_bit(&mut null_bits, 3); |
670 | | bit_util::set_bit(&mut null_bits, 4); |
671 | | bit_util::set_bit(&mut null_bits, 6); |
672 | | bit_util::set_bit(&mut null_bits, 8); |
673 | | let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9); |
674 | | let null_buffer = NullBuffer::new(buffer); |
675 | | |
676 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
677 | | let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]); |
678 | | let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]); |
679 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
680 | | let list_array = |
681 | | ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer)); |
682 | | |
683 | | let values = list_array.values(); |
684 | | assert_eq!(value_data, values.to_data()); |
685 | | assert_eq!(DataType::Int32, list_array.value_type()); |
686 | | assert_eq!(9, list_array.len()); |
687 | | assert_eq!(4, list_array.null_count()); |
688 | | assert_eq!(2, list_array.value_offsets()[3]); |
689 | | assert_eq!(2, list_array.value_sizes()[3]); |
690 | | assert_eq!(2, list_array.value_size(3)); |
691 | | |
692 | | let sliced_array = list_array.slice(1, 6); |
693 | | assert_eq!(6, sliced_array.len()); |
694 | | assert_eq!(3, sliced_array.null_count()); |
695 | | |
696 | | for i in 0..sliced_array.len() { |
697 | | if bit_util::get_bit(&null_bits, 1 + i) { |
698 | | assert!(sliced_array.is_valid(i)); |
699 | | } else { |
700 | | assert!(sliced_array.is_null(i)); |
701 | | } |
702 | | } |
703 | | |
704 | | // Check offset and length for each non-null value. |
705 | | let sliced_list_array = sliced_array |
706 | | .as_any() |
707 | | .downcast_ref::<ListViewArray>() |
708 | | .unwrap(); |
709 | | assert_eq!(2, sliced_list_array.value_offsets()[2]); |
710 | | assert_eq!(2, sliced_list_array.value_sizes()[2]); |
711 | | assert_eq!(2, sliced_list_array.value_size(2)); |
712 | | |
713 | | assert_eq!(4, sliced_list_array.value_offsets()[3]); |
714 | | assert_eq!(2, sliced_list_array.value_sizes()[3]); |
715 | | assert_eq!(2, sliced_list_array.value_size(3)); |
716 | | |
717 | | assert_eq!(6, sliced_list_array.value_offsets()[5]); |
718 | | assert_eq!(3, sliced_list_array.value_sizes()[5]); |
719 | | assert_eq!(3, sliced_list_array.value_size(5)); |
720 | | } |
721 | | |
722 | | #[test] |
723 | | fn test_large_list_view_array_slice() { |
724 | | // Construct a value array |
725 | | let value_data = ArrayData::builder(DataType::Int32) |
726 | | .len(10) |
727 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
728 | | .build() |
729 | | .unwrap(); |
730 | | |
731 | | // 01011001 00000001 |
732 | | let mut null_bits: [u8; 2] = [0; 2]; |
733 | | bit_util::set_bit(&mut null_bits, 0); |
734 | | bit_util::set_bit(&mut null_bits, 3); |
735 | | bit_util::set_bit(&mut null_bits, 4); |
736 | | bit_util::set_bit(&mut null_bits, 6); |
737 | | bit_util::set_bit(&mut null_bits, 8); |
738 | | let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9); |
739 | | let null_buffer = NullBuffer::new(buffer); |
740 | | |
741 | | // Construct a large list view array from the above two |
742 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
743 | | let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]); |
744 | | let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]); |
745 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
746 | | let list_array = |
747 | | LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer)); |
748 | | |
749 | | let values = list_array.values(); |
750 | | assert_eq!(value_data, values.to_data()); |
751 | | assert_eq!(DataType::Int32, list_array.value_type()); |
752 | | assert_eq!(9, list_array.len()); |
753 | | assert_eq!(4, list_array.null_count()); |
754 | | assert_eq!(2, list_array.value_offsets()[3]); |
755 | | assert_eq!(2, list_array.value_sizes()[3]); |
756 | | assert_eq!(2, list_array.value_size(3)); |
757 | | |
758 | | let sliced_array = list_array.slice(1, 6); |
759 | | assert_eq!(6, sliced_array.len()); |
760 | | assert_eq!(3, sliced_array.null_count()); |
761 | | |
762 | | for i in 0..sliced_array.len() { |
763 | | if bit_util::get_bit(&null_bits, 1 + i) { |
764 | | assert!(sliced_array.is_valid(i)); |
765 | | } else { |
766 | | assert!(sliced_array.is_null(i)); |
767 | | } |
768 | | } |
769 | | |
770 | | // Check offset and length for each non-null value. |
771 | | let sliced_list_array = sliced_array |
772 | | .as_any() |
773 | | .downcast_ref::<LargeListViewArray>() |
774 | | .unwrap(); |
775 | | assert_eq!(2, sliced_list_array.value_offsets()[2]); |
776 | | assert_eq!(2, sliced_list_array.value_size(2)); |
777 | | assert_eq!(2, sliced_list_array.value_sizes()[2]); |
778 | | |
779 | | assert_eq!(4, sliced_list_array.value_offsets()[3]); |
780 | | assert_eq!(2, sliced_list_array.value_size(3)); |
781 | | assert_eq!(2, sliced_list_array.value_sizes()[3]); |
782 | | |
783 | | assert_eq!(6, sliced_list_array.value_offsets()[5]); |
784 | | assert_eq!(3, sliced_list_array.value_size(5)); |
785 | | assert_eq!(2, sliced_list_array.value_sizes()[3]); |
786 | | } |
787 | | |
788 | | #[test] |
789 | | #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")] |
790 | | fn test_list_view_array_index_out_of_bound() { |
791 | | // 01011001 00000001 |
792 | | let mut null_bits: [u8; 2] = [0; 2]; |
793 | | bit_util::set_bit(&mut null_bits, 0); |
794 | | bit_util::set_bit(&mut null_bits, 3); |
795 | | bit_util::set_bit(&mut null_bits, 4); |
796 | | bit_util::set_bit(&mut null_bits, 6); |
797 | | bit_util::set_bit(&mut null_bits, 8); |
798 | | let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9); |
799 | | let null_buffer = NullBuffer::new(buffer); |
800 | | |
801 | | // Construct a buffer for value offsets, for the nested array: |
802 | | // [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]] |
803 | | // Construct a list array from the above two |
804 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
805 | | let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]); |
806 | | let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]); |
807 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
808 | | let list_array = |
809 | | ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer)); |
810 | | |
811 | | assert_eq!(9, list_array.len()); |
812 | | list_array.value(10); |
813 | | } |
814 | | #[test] |
815 | | #[should_panic( |
816 | | expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 0" |
817 | | )] |
818 | | #[cfg(not(feature = "force_validate"))] |
819 | | fn test_list_view_array_invalid_buffer_len() { |
820 | | let value_data = unsafe { |
821 | | ArrayData::builder(DataType::Int32) |
822 | | .len(8) |
823 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
824 | | .build_unchecked() |
825 | | }; |
826 | | let list_data_type = |
827 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
828 | | let list_data = unsafe { |
829 | | ArrayData::builder(list_data_type) |
830 | | .len(3) |
831 | | .add_child_data(value_data) |
832 | | .build_unchecked() |
833 | | }; |
834 | | drop(ListViewArray::from(list_data)); |
835 | | } |
836 | | |
837 | | #[test] |
838 | | #[should_panic( |
839 | | expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 1" |
840 | | )] |
841 | | #[cfg(not(feature = "force_validate"))] |
842 | | fn test_list_view_array_invalid_child_array_len() { |
843 | | let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]); |
844 | | let list_data_type = |
845 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
846 | | let list_data = unsafe { |
847 | | ArrayData::builder(list_data_type) |
848 | | .len(3) |
849 | | .add_buffer(value_offsets) |
850 | | .build_unchecked() |
851 | | }; |
852 | | drop(ListViewArray::from(list_data)); |
853 | | } |
854 | | |
855 | | #[test] |
856 | | fn test_list_view_array_offsets_need_not_start_at_zero() { |
857 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
858 | | let sizes = ScalarBuffer::from(vec![0i32, 0, 3]); |
859 | | let offsets = ScalarBuffer::from(vec![2i32, 2, 5]); |
860 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); |
861 | | let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
862 | | |
863 | | assert_eq!(list_array.value_size(0), 0); |
864 | | assert_eq!(list_array.value_size(1), 0); |
865 | | assert_eq!(list_array.value_size(2), 3); |
866 | | } |
867 | | |
868 | | #[test] |
869 | | #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")] |
870 | | #[cfg(not(feature = "force_validate"))] |
871 | | fn test_list_view_array_alignment() { |
872 | | let offset_buf = Buffer::from_slice_ref([0_u64]); |
873 | | let offset_buf2 = offset_buf.slice(1); |
874 | | |
875 | | let size_buf = Buffer::from_slice_ref([0_u64]); |
876 | | let size_buf2 = size_buf.slice(1); |
877 | | |
878 | | let values: [i32; 8] = [0; 8]; |
879 | | let value_data = unsafe { |
880 | | ArrayData::builder(DataType::Int32) |
881 | | .add_buffer(Buffer::from_slice_ref(values)) |
882 | | .build_unchecked() |
883 | | }; |
884 | | |
885 | | let list_data_type = |
886 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
887 | | let list_data = unsafe { |
888 | | ArrayData::builder(list_data_type) |
889 | | .add_buffer(offset_buf2) |
890 | | .add_buffer(size_buf2) |
891 | | .add_child_data(value_data) |
892 | | .build_unchecked() |
893 | | }; |
894 | | drop(ListViewArray::from(list_data)); |
895 | | } |
896 | | |
897 | | #[test] |
898 | | fn test_empty_offsets() { |
899 | | let f = Arc::new(Field::new("element", DataType::Int32, true)); |
900 | | let string = ListViewArray::from( |
901 | | ArrayData::builder(DataType::ListView(f.clone())) |
902 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
903 | | .add_child_data(ArrayData::new_empty(&DataType::Int32)) |
904 | | .build() |
905 | | .unwrap(), |
906 | | ); |
907 | | assert_eq!(string.value_offsets(), &[] as &[i32; 0]); |
908 | | assert_eq!(string.value_sizes(), &[] as &[i32; 0]); |
909 | | |
910 | | let string = LargeListViewArray::from( |
911 | | ArrayData::builder(DataType::LargeListView(f)) |
912 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
913 | | .add_child_data(ArrayData::new_empty(&DataType::Int32)) |
914 | | .build() |
915 | | .unwrap(), |
916 | | ); |
917 | | assert_eq!(string.len(), 0); |
918 | | assert_eq!(string.value_offsets(), &[] as &[i64; 0]); |
919 | | assert_eq!(string.value_sizes(), &[] as &[i64; 0]); |
920 | | } |
921 | | |
922 | | #[test] |
923 | | fn test_try_new() { |
924 | | let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]); |
925 | | let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]); |
926 | | let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None); |
927 | | let values = Arc::new(values) as ArrayRef; |
928 | | |
929 | | let field = Arc::new(Field::new("element", DataType::Int32, false)); |
930 | | ListViewArray::new( |
931 | | field.clone(), |
932 | | offsets.clone(), |
933 | | sizes.clone(), |
934 | | values.clone(), |
935 | | None, |
936 | | ); |
937 | | |
938 | | let nulls = NullBuffer::new_null(4); |
939 | | ListViewArray::new( |
940 | | field.clone(), |
941 | | offsets, |
942 | | sizes.clone(), |
943 | | values.clone(), |
944 | | Some(nulls), |
945 | | ); |
946 | | |
947 | | let nulls = NullBuffer::new_null(4); |
948 | | let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]); |
949 | | let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]); |
950 | | let err = LargeListViewArray::try_new( |
951 | | field, |
952 | | offsets.clone(), |
953 | | sizes.clone(), |
954 | | values.clone(), |
955 | | Some(nulls), |
956 | | ) |
957 | | .unwrap_err(); |
958 | | |
959 | | assert_eq!( |
960 | | err.to_string(), |
961 | | "Invalid argument error: Incorrect length of null buffer for LargeListViewArray, expected 5 got 4" |
962 | | ); |
963 | | |
964 | | let field = Arc::new(Field::new("element", DataType::Int64, false)); |
965 | | let err = LargeListViewArray::try_new( |
966 | | field.clone(), |
967 | | offsets.clone(), |
968 | | sizes.clone(), |
969 | | values.clone(), |
970 | | None, |
971 | | ) |
972 | | .unwrap_err(); |
973 | | |
974 | | assert_eq!( |
975 | | err.to_string(), |
976 | | "Invalid argument error: LargeListViewArray expected data type Int64 got Int32 for \"element\"" |
977 | | ); |
978 | | |
979 | | let nulls = NullBuffer::new_null(7); |
980 | | let values = Int64Array::new(vec![0; 7].into(), Some(nulls)); |
981 | | let values = Arc::new(values); |
982 | | |
983 | | let err = LargeListViewArray::try_new( |
984 | | field, |
985 | | offsets.clone(), |
986 | | sizes.clone(), |
987 | | values.clone(), |
988 | | None, |
989 | | ) |
990 | | .unwrap_err(); |
991 | | |
992 | | assert_eq!( |
993 | | err.to_string(), |
994 | | "Invalid argument error: Non-nullable field of LargeListViewArray \"element\" cannot contain nulls" |
995 | | ); |
996 | | } |
997 | | |
998 | | #[test] |
999 | | fn test_from_fixed_size_list() { |
1000 | | let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3); |
1001 | | builder.values().append_slice(&[1, 2, 3]); |
1002 | | builder.append(true); |
1003 | | builder.values().append_slice(&[0, 0, 0]); |
1004 | | builder.append(false); |
1005 | | builder.values().append_slice(&[4, 5, 6]); |
1006 | | builder.append(true); |
1007 | | let list: ListViewArray = builder.finish().into(); |
1008 | | let values: Vec<_> = list |
1009 | | .iter() |
1010 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1011 | | .collect(); |
1012 | | assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]); |
1013 | | let offsets = list.value_offsets(); |
1014 | | assert_eq!(offsets, &[0, 3, 6]); |
1015 | | let sizes = list.value_sizes(); |
1016 | | assert_eq!(sizes, &[3, 3, 3]); |
1017 | | } |
1018 | | |
1019 | | #[test] |
1020 | | fn test_list_view_array_overlap_lists() { |
1021 | | let value_data = unsafe { |
1022 | | ArrayData::builder(DataType::Int32) |
1023 | | .len(8) |
1024 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
1025 | | .build_unchecked() |
1026 | | }; |
1027 | | let list_data_type = |
1028 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
1029 | | let list_data = unsafe { |
1030 | | ArrayData::builder(list_data_type) |
1031 | | .len(2) |
1032 | | .add_buffer(Buffer::from_slice_ref([0, 3])) // offsets |
1033 | | .add_buffer(Buffer::from_slice_ref([5, 5])) // sizes |
1034 | | .add_child_data(value_data) |
1035 | | .build_unchecked() |
1036 | | }; |
1037 | | let array = ListViewArray::from(list_data); |
1038 | | |
1039 | | assert_eq!(array.len(), 2); |
1040 | | assert_eq!(array.value_size(0), 5); |
1041 | | assert_eq!(array.value_size(1), 5); |
1042 | | |
1043 | | let values: Vec<_> = array |
1044 | | .iter() |
1045 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1046 | | .collect(); |
1047 | | assert_eq!( |
1048 | | values, |
1049 | | vec![Some(vec![0, 1, 2, 3, 4]), Some(vec![3, 4, 5, 6, 7])] |
1050 | | ); |
1051 | | } |
1052 | | |
1053 | | #[test] |
1054 | | fn test_list_view_array_incomplete_offsets() { |
1055 | | let value_data = unsafe { |
1056 | | ArrayData::builder(DataType::Int32) |
1057 | | .len(50) |
1058 | | .add_buffer(Buffer::from_slice_ref((0..50).collect::<Vec<i32>>())) |
1059 | | .build_unchecked() |
1060 | | }; |
1061 | | let list_data_type = |
1062 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
1063 | | let list_data = unsafe { |
1064 | | ArrayData::builder(list_data_type) |
1065 | | .len(3) |
1066 | | .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // offsets |
1067 | | .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // sizes |
1068 | | .add_child_data(value_data) |
1069 | | .build_unchecked() |
1070 | | }; |
1071 | | let array = ListViewArray::from(list_data); |
1072 | | |
1073 | | assert_eq!(array.len(), 3); |
1074 | | assert_eq!(array.value_size(0), 0); |
1075 | | assert_eq!(array.value_size(1), 5); |
1076 | | assert_eq!(array.value_size(2), 10); |
1077 | | |
1078 | | let values: Vec<_> = array |
1079 | | .iter() |
1080 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1081 | | .collect(); |
1082 | | assert_eq!( |
1083 | | values, |
1084 | | vec![ |
1085 | | Some(vec![]), |
1086 | | Some(vec![5, 6, 7, 8, 9]), |
1087 | | Some(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) |
1088 | | ] |
1089 | | ); |
1090 | | } |
1091 | | |
1092 | | #[test] |
1093 | | fn test_list_view_array_empty_lists() { |
1094 | | let value_data = unsafe { |
1095 | | ArrayData::builder(DataType::Int32) |
1096 | | .len(0) |
1097 | | .add_buffer(Buffer::from_slice_ref::<i32, &[_; 0]>(&[])) |
1098 | | .build_unchecked() |
1099 | | }; |
1100 | | let list_data_type = |
1101 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
1102 | | let list_data = unsafe { |
1103 | | ArrayData::builder(list_data_type) |
1104 | | .len(3) |
1105 | | .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // offsets |
1106 | | .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // sizes |
1107 | | .add_child_data(value_data) |
1108 | | .build_unchecked() |
1109 | | }; |
1110 | | let array = ListViewArray::from(list_data); |
1111 | | |
1112 | | assert_eq!(array.len(), 3); |
1113 | | assert_eq!(array.value_size(0), 0); |
1114 | | assert_eq!(array.value_size(1), 0); |
1115 | | assert_eq!(array.value_size(2), 0); |
1116 | | |
1117 | | let values: Vec<_> = array |
1118 | | .iter() |
1119 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1120 | | .collect(); |
1121 | | assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]); |
1122 | | } |
1123 | | } |