/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/list_view_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use arrow_buffer::{NullBuffer, ScalarBuffer}; |
19 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
20 | | use arrow_schema::{ArrowError, DataType, FieldRef}; |
21 | | use std::any::Any; |
22 | | use std::ops::Add; |
23 | | use std::sync::Arc; |
24 | | |
25 | | use crate::array::{make_array, print_long_array}; |
26 | | use crate::iterator::GenericListViewArrayIter; |
27 | | use crate::{new_empty_array, Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait}; |
28 | | |
29 | | /// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`. |
30 | | pub type ListViewArray = GenericListViewArray<i32>; |
31 | | |
32 | | /// A [`GenericListViewArray`] of variable size lists, storing offsets as `i64`. |
33 | | pub type LargeListViewArray = GenericListViewArray<i64>; |
34 | | |
35 | | /// An array of [variable length lists], specifically in the [list-view layout]. |
36 | | /// |
37 | | /// Differs from [`GenericListArray`] (which represents the [list layout]) in that |
38 | | /// the sizes of the child arrays are explicitly encoded in a separate buffer, instead |
39 | | /// of being derived from the difference between subsequent offsets in the offset buffer. |
40 | | /// |
41 | | /// This allows the offsets (and subsequently child data) to be out of order. It also |
42 | | /// allows take / filter operations to be implemented without copying the underlying data. |
43 | | /// |
44 | | /// # Representation |
45 | | /// |
46 | | /// Given the same example array from [`GenericListArray`], it would be represented |
47 | | /// as such via a list-view layout array: |
48 | | /// |
49 | | /// ```text |
50 | | /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
51 | | /// ┌ ─ ─ ─ ─ ─ ─ ┐ │ |
52 | | /// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ |
53 | | /// │ [A,B,C] │ │ (0,3) │ │ 1 │ │ 0 │ │ 3 │ │ │ 1 │ │ A │ │ 0 │ |
54 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
55 | | /// │ [] │ │ (3,0) │ │ 1 │ │ 3 │ │ 0 │ │ │ 1 │ │ B │ │ 1 │ |
56 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
57 | | /// │ NULL │ │ (?,?) │ │ 0 │ │ ? │ │ ? │ │ │ 1 │ │ C │ │ 2 │ |
58 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
59 | | /// │ [D] │ │ (4,1) │ │ 1 │ │ 4 │ │ 1 │ │ │ ? │ │ ? │ │ 3 │ |
60 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
61 | | /// │ [NULL, F] │ │ (5,2) │ │ 1 │ │ 5 │ │ 2 │ │ │ 1 │ │ D │ │ 4 │ |
62 | | /// └─────────────┘ └───────┘ │ └───┘ └───┘ └───┘ ├───┤ ├───┤ |
63 | | /// │ │ 0 │ │ ? │ │ 5 │ |
64 | | /// Logical Logical │ Validity Offsets Sizes ├───┤ ├───┤ |
65 | | /// Values Offset (nulls) │ │ 1 │ │ F │ │ 6 │ |
66 | | /// & Size │ └───┘ └───┘ |
67 | | /// │ Values │ │ |
68 | | /// (offsets[i], │ ListViewArray (Array) |
69 | | /// sizes[i]) └ ─ ─ ─ ─ ─ ─ ┘ │ |
70 | | /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
71 | | /// ``` |
72 | | /// |
73 | | /// Another way of representing the same array but taking advantage of the offsets being out of order: |
74 | | /// |
75 | | /// ```text |
76 | | /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
77 | | /// ┌ ─ ─ ─ ─ ─ ─ ┐ │ |
78 | | /// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ |
79 | | /// │ [A,B,C] │ │ (2,3) │ │ 1 │ │ 2 │ │ 3 │ │ │ 0 │ │ ? │ │ 0 │ |
80 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
81 | | /// │ [] │ │ (0,0) │ │ 1 │ │ 0 │ │ 0 │ │ │ 1 │ │ F │ │ 1 │ |
82 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
83 | | /// │ NULL │ │ (?,?) │ │ 0 │ │ ? │ │ ? │ │ │ 1 │ │ A │ │ 2 │ |
84 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
85 | | /// │ [D] │ │ (5,1) │ │ 1 │ │ 5 │ │ 1 │ │ │ 1 │ │ B │ │ 3 │ |
86 | | /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ ├───┤ |
87 | | /// │ [NULL, F] │ │ (0,2) │ │ 1 │ │ 0 │ │ 2 │ │ │ 1 │ │ C │ │ 4 │ |
88 | | /// └─────────────┘ └───────┘ │ └───┘ └───┘ └───┘ ├───┤ ├───┤ |
89 | | /// │ │ 1 │ │ D │ │ 5 │ |
90 | | /// Logical Logical │ Validity Offsets Sizes └───┘ └───┘ |
91 | | /// Values Offset (nulls) │ Values │ │ |
92 | | /// & Size │ (Array) |
93 | | /// └ ─ ─ ─ ─ ─ ─ ┘ │ |
94 | | /// (offsets[i], │ ListViewArray |
95 | | /// sizes[i]) │ |
96 | | /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
97 | | /// ``` |
98 | | /// |
99 | | /// [`GenericListArray`]: crate::array::GenericListArray |
100 | | /// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout |
101 | | /// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout |
102 | | /// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout |
103 | | #[derive(Clone)] |
104 | | pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> { |
105 | | data_type: DataType, |
106 | | nulls: Option<NullBuffer>, |
107 | | values: ArrayRef, |
108 | | // Unlike GenericListArray, we do not use OffsetBuffer here as offsets are not |
109 | | // guaranteed to be monotonically increasing. |
110 | | value_offsets: ScalarBuffer<OffsetSize>, |
111 | | value_sizes: ScalarBuffer<OffsetSize>, |
112 | | } |
113 | | |
114 | | impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> { |
115 | | /// The data type constructor of listview array. |
116 | | /// The input is the schema of the child array and |
117 | | /// the output is the [`DataType`], ListView or LargeListView. |
118 | | pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE { |
119 | | DataType::LargeListView |
120 | | } else { |
121 | | DataType::ListView |
122 | | }; |
123 | | |
124 | | /// Create a new [`GenericListViewArray`] from the provided parts |
125 | | /// |
126 | | /// # Errors |
127 | | /// |
128 | | /// Errors if |
129 | | /// |
130 | | /// * `offsets.len() != sizes.len()` |
131 | | /// * `offsets.len() != nulls.len()` |
132 | | /// * `offsets[i] > values.len()` |
133 | | /// * `!field.is_nullable() && values.is_nullable()` |
134 | | /// * `field.data_type() != values.data_type()` |
135 | | /// * `0 <= offsets[i] <= length of the child array` |
136 | | /// * `0 <= offsets[i] + size[i] <= length of the child array` |
137 | 0 | pub fn try_new( |
138 | 0 | field: FieldRef, |
139 | 0 | offsets: ScalarBuffer<OffsetSize>, |
140 | 0 | sizes: ScalarBuffer<OffsetSize>, |
141 | 0 | values: ArrayRef, |
142 | 0 | nulls: Option<NullBuffer>, |
143 | 0 | ) -> Result<Self, ArrowError> { |
144 | 0 | let len = offsets.len(); |
145 | 0 | if let Some(n) = nulls.as_ref() { |
146 | 0 | if n.len() != len { |
147 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
148 | 0 | "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}", |
149 | 0 | OffsetSize::PREFIX, |
150 | 0 | n.len(), |
151 | 0 | ))); |
152 | 0 | } |
153 | 0 | } |
154 | 0 | if len != sizes.len() { |
155 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
156 | 0 | "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}", |
157 | 0 | OffsetSize::PREFIX, sizes.len() |
158 | 0 | ))); |
159 | 0 | } |
160 | | |
161 | 0 | for (offset, size) in offsets.iter().zip(sizes.iter()) { |
162 | 0 | let offset = offset.as_usize(); |
163 | 0 | let size = size.as_usize(); |
164 | 0 | if offset.checked_add(size).ok_or_else(|| { |
165 | 0 | ArrowError::InvalidArgumentError(format!( |
166 | 0 | "Overflow in offset + size for {}ListViewArray", |
167 | 0 | OffsetSize::PREFIX |
168 | 0 | )) |
169 | 0 | })? > values.len() |
170 | | { |
171 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
172 | 0 | "Offset + size for {}ListViewArray must be within the bounds of the child array, got offset: {offset}, size: {size}, child array length: {}", |
173 | 0 | OffsetSize::PREFIX, |
174 | 0 | values.len() |
175 | 0 | ))); |
176 | 0 | } |
177 | | } |
178 | | |
179 | 0 | if !field.is_nullable() && values.is_nullable() { |
180 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
181 | 0 | "Non-nullable field of {}ListViewArray {:?} cannot contain nulls", |
182 | 0 | OffsetSize::PREFIX, |
183 | 0 | field.name() |
184 | 0 | ))); |
185 | 0 | } |
186 | | |
187 | 0 | if field.data_type() != values.data_type() { |
188 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
189 | 0 | "{}ListViewArray expected data type {} got {} for {:?}", |
190 | 0 | OffsetSize::PREFIX, |
191 | 0 | field.data_type(), |
192 | 0 | values.data_type(), |
193 | 0 | field.name() |
194 | 0 | ))); |
195 | 0 | } |
196 | | |
197 | 0 | Ok(Self { |
198 | 0 | data_type: Self::DATA_TYPE_CONSTRUCTOR(field), |
199 | 0 | nulls, |
200 | 0 | values, |
201 | 0 | value_offsets: offsets, |
202 | 0 | value_sizes: sizes, |
203 | 0 | }) |
204 | 0 | } |
205 | | |
206 | | /// Create a new [`GenericListViewArray`] from the provided parts |
207 | | /// |
208 | | /// # Panics |
209 | | /// |
210 | | /// Panics if [`Self::try_new`] returns an error |
211 | 0 | pub fn new( |
212 | 0 | field: FieldRef, |
213 | 0 | offsets: ScalarBuffer<OffsetSize>, |
214 | 0 | sizes: ScalarBuffer<OffsetSize>, |
215 | 0 | values: ArrayRef, |
216 | 0 | nulls: Option<NullBuffer>, |
217 | 0 | ) -> Self { |
218 | 0 | Self::try_new(field, offsets, sizes, values, nulls).unwrap() |
219 | 0 | } |
220 | | |
221 | | /// Create a new [`GenericListViewArray`] of length `len` where all values are null |
222 | | pub fn new_null(field: FieldRef, len: usize) -> Self { |
223 | | let values = new_empty_array(field.data_type()); |
224 | | Self { |
225 | | data_type: Self::DATA_TYPE_CONSTRUCTOR(field), |
226 | | nulls: Some(NullBuffer::new_null(len)), |
227 | | value_offsets: ScalarBuffer::from(vec![]), |
228 | | value_sizes: ScalarBuffer::from(vec![]), |
229 | | values, |
230 | | } |
231 | | } |
232 | | |
233 | | /// Deconstruct this array into its constituent parts |
234 | | pub fn into_parts( |
235 | | self, |
236 | | ) -> ( |
237 | | FieldRef, |
238 | | ScalarBuffer<OffsetSize>, |
239 | | ScalarBuffer<OffsetSize>, |
240 | | ArrayRef, |
241 | | Option<NullBuffer>, |
242 | | ) { |
243 | | let f = match self.data_type { |
244 | | DataType::ListView(f) | DataType::LargeListView(f) => f, |
245 | | _ => unreachable!(), |
246 | | }; |
247 | | ( |
248 | | f, |
249 | | self.value_offsets, |
250 | | self.value_sizes, |
251 | | self.values, |
252 | | self.nulls, |
253 | | ) |
254 | | } |
255 | | |
256 | | /// Returns a reference to the offsets of this list |
257 | | /// |
258 | | /// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`] |
259 | | /// allowing for zero-copy cloning |
260 | | #[inline] |
261 | | pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> { |
262 | | &self.value_offsets |
263 | | } |
264 | | |
265 | | /// Returns a reference to the values of this list |
266 | | #[inline] |
267 | | pub fn values(&self) -> &ArrayRef { |
268 | | &self.values |
269 | | } |
270 | | |
271 | | /// Returns a reference to the sizes of this list |
272 | | /// |
273 | | /// Unlike [`Self::value_sizes`] this returns the [`ScalarBuffer`] |
274 | | /// allowing for zero-copy cloning |
275 | | #[inline] |
276 | 0 | pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> { |
277 | 0 | &self.value_sizes |
278 | 0 | } |
279 | | |
280 | | /// Returns a clone of the value type of this list. |
281 | | pub fn value_type(&self) -> DataType { |
282 | | self.values.data_type().clone() |
283 | | } |
284 | | |
285 | | /// Returns ith value of this list view array. |
286 | | /// |
287 | | /// Note: This method does not check for nulls and the value is arbitrary |
288 | | /// if [`is_null`](Self::is_null) returns true for the index. |
289 | | /// |
290 | | /// # Safety |
291 | | /// Caller must ensure that the index is within the array bounds |
292 | | pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef { |
293 | | let offset = self.value_offsets().get_unchecked(i).as_usize(); |
294 | | let length = self.value_sizes().get_unchecked(i).as_usize(); |
295 | | self.values.slice(offset, length) |
296 | | } |
297 | | |
298 | | /// Returns ith value of this list view array. |
299 | | /// |
300 | | /// Note: This method does not check for nulls and the value is arbitrary |
301 | | /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index. |
302 | | /// |
303 | | /// # Panics |
304 | | /// Panics if the index is out of bounds |
305 | 0 | pub fn value(&self, i: usize) -> ArrayRef { |
306 | 0 | let offset = self.value_offsets()[i].as_usize(); |
307 | 0 | let length = self.value_sizes()[i].as_usize(); |
308 | 0 | self.values.slice(offset, length) |
309 | 0 | } |
310 | | |
311 | | /// Returns the offset values in the offsets buffer |
312 | | #[inline] |
313 | 0 | pub fn value_offsets(&self) -> &[OffsetSize] { |
314 | 0 | &self.value_offsets |
315 | 0 | } |
316 | | |
317 | | /// Returns the sizes values in the offsets buffer |
318 | | #[inline] |
319 | 0 | pub fn value_sizes(&self) -> &[OffsetSize] { |
320 | 0 | &self.value_sizes |
321 | 0 | } |
322 | | |
323 | | /// Returns the size for value at index `i`. |
324 | | #[inline] |
325 | | pub fn value_size(&self, i: usize) -> OffsetSize { |
326 | | self.value_sizes[i] |
327 | | } |
328 | | |
329 | | /// Returns the offset for value at index `i`. |
330 | | pub fn value_offset(&self, i: usize) -> OffsetSize { |
331 | | self.value_offsets[i] |
332 | | } |
333 | | |
334 | | /// Constructs a new iterator |
335 | | pub fn iter(&self) -> GenericListViewArrayIter<'_, OffsetSize> { |
336 | | GenericListViewArrayIter::<'_, OffsetSize>::new(self) |
337 | | } |
338 | | |
339 | | #[inline] |
340 | 0 | fn get_type(data_type: &DataType) -> Option<&DataType> { |
341 | 0 | match (OffsetSize::IS_LARGE, data_type) { |
342 | 0 | (true, DataType::LargeListView(child)) | (false, DataType::ListView(child)) => { |
343 | 0 | Some(child.data_type()) |
344 | | } |
345 | 0 | _ => None, |
346 | | } |
347 | 0 | } |
348 | | |
349 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
350 | 0 | pub fn slice(&self, offset: usize, length: usize) -> Self { |
351 | | Self { |
352 | 0 | data_type: self.data_type.clone(), |
353 | 0 | nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)), |
354 | 0 | values: self.values.clone(), |
355 | 0 | value_offsets: self.value_offsets.slice(offset, length), |
356 | 0 | value_sizes: self.value_sizes.slice(offset, length), |
357 | | } |
358 | 0 | } |
359 | | } |
360 | | |
361 | | impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> { |
362 | | type Item = ArrayRef; |
363 | | |
364 | | fn value(&self, index: usize) -> Self::Item { |
365 | | GenericListViewArray::value(self, index) |
366 | | } |
367 | | |
368 | | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
369 | | GenericListViewArray::value_unchecked(self, index) |
370 | | } |
371 | | } |
372 | | |
373 | | impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> { |
374 | 0 | fn as_any(&self) -> &dyn Any { |
375 | 0 | self |
376 | 0 | } |
377 | | |
378 | 0 | fn to_data(&self) -> ArrayData { |
379 | 0 | self.clone().into() |
380 | 0 | } |
381 | | |
382 | 0 | fn into_data(self) -> ArrayData { |
383 | 0 | self.into() |
384 | 0 | } |
385 | | |
386 | 0 | fn data_type(&self) -> &DataType { |
387 | 0 | &self.data_type |
388 | 0 | } |
389 | | |
390 | 0 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
391 | 0 | Arc::new(self.slice(offset, length)) |
392 | 0 | } |
393 | | |
394 | 0 | fn len(&self) -> usize { |
395 | 0 | self.sizes().len() |
396 | 0 | } |
397 | | |
398 | 0 | fn is_empty(&self) -> bool { |
399 | 0 | self.value_sizes.is_empty() |
400 | 0 | } |
401 | | |
402 | 0 | fn shrink_to_fit(&mut self) { |
403 | 0 | if let Some(nulls) = &mut self.nulls { |
404 | 0 | nulls.shrink_to_fit(); |
405 | 0 | } |
406 | 0 | self.values.shrink_to_fit(); |
407 | 0 | self.value_offsets.shrink_to_fit(); |
408 | 0 | self.value_sizes.shrink_to_fit(); |
409 | 0 | } |
410 | | |
411 | 0 | fn offset(&self) -> usize { |
412 | 0 | 0 |
413 | 0 | } |
414 | | |
415 | 0 | fn nulls(&self) -> Option<&NullBuffer> { |
416 | 0 | self.nulls.as_ref() |
417 | 0 | } |
418 | | |
419 | 0 | fn logical_null_count(&self) -> usize { |
420 | | // More efficient that the default implementation |
421 | 0 | self.null_count() |
422 | 0 | } |
423 | | |
424 | 0 | fn get_buffer_memory_size(&self) -> usize { |
425 | 0 | let mut size = self.values.get_buffer_memory_size(); |
426 | 0 | size += self.value_offsets.inner().capacity(); |
427 | 0 | size += self.value_sizes.inner().capacity(); |
428 | 0 | if let Some(n) = self.nulls.as_ref() { |
429 | 0 | size += n.buffer().capacity(); |
430 | 0 | } |
431 | 0 | size |
432 | 0 | } |
433 | | |
434 | 0 | fn get_array_memory_size(&self) -> usize { |
435 | 0 | let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size(); |
436 | 0 | size += self.value_offsets.inner().capacity(); |
437 | 0 | size += self.value_sizes.inner().capacity(); |
438 | 0 | if let Some(n) = self.nulls.as_ref() { |
439 | 0 | size += n.buffer().capacity(); |
440 | 0 | } |
441 | 0 | size |
442 | 0 | } |
443 | | } |
444 | | |
445 | | impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> { |
446 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
447 | 0 | let prefix = OffsetSize::PREFIX; |
448 | 0 | write!(f, "{prefix}ListViewArray\n[\n")?; |
449 | 0 | print_long_array(self, f, |array, index, f| { |
450 | 0 | std::fmt::Debug::fmt(&array.value(index), f) |
451 | 0 | })?; |
452 | 0 | write!(f, "]") |
453 | 0 | } |
454 | | } |
455 | | |
456 | | impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData { |
457 | 0 | fn from(array: GenericListViewArray<OffsetSize>) -> Self { |
458 | 0 | let len = array.len(); |
459 | 0 | let builder = ArrayDataBuilder::new(array.data_type) |
460 | 0 | .len(len) |
461 | 0 | .nulls(array.nulls) |
462 | 0 | .buffers(vec![ |
463 | 0 | array.value_offsets.into_inner(), |
464 | 0 | array.value_sizes.into_inner(), |
465 | | ]) |
466 | 0 | .child_data(vec![array.values.to_data()]); |
467 | | |
468 | 0 | unsafe { builder.build_unchecked() } |
469 | 0 | } |
470 | | } |
471 | | |
472 | | impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListViewArray<OffsetSize> { |
473 | 0 | fn from(data: ArrayData) -> Self { |
474 | 0 | Self::try_new_from_array_data(data) |
475 | 0 | .expect("Expected infallible creation of GenericListViewArray from ArrayDataRef failed") |
476 | 0 | } |
477 | | } |
478 | | |
479 | | impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewArray<OffsetSize> { |
480 | | fn from(value: FixedSizeListArray) -> Self { |
481 | | let (field, size) = match value.data_type() { |
482 | | DataType::FixedSizeList(f, size) => (f, *size as usize), |
483 | | _ => unreachable!(), |
484 | | }; |
485 | | let mut acc = 0_usize; |
486 | | let iter = std::iter::repeat_n(size, value.len()); |
487 | | let mut sizes = Vec::with_capacity(iter.size_hint().0); |
488 | | let mut offsets = Vec::with_capacity(iter.size_hint().0); |
489 | | |
490 | | for size in iter { |
491 | | offsets.push(OffsetSize::usize_as(acc)); |
492 | | acc = acc.add(size); |
493 | | sizes.push(OffsetSize::usize_as(size)); |
494 | | } |
495 | | let sizes = ScalarBuffer::from(sizes); |
496 | | let offsets = ScalarBuffer::from(offsets); |
497 | | Self { |
498 | | data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()), |
499 | | nulls: value.nulls().cloned(), |
500 | | values: value.values().clone(), |
501 | | value_offsets: offsets, |
502 | | value_sizes: sizes, |
503 | | } |
504 | | } |
505 | | } |
506 | | |
507 | | impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> { |
508 | 0 | fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> { |
509 | 0 | if data.buffers().len() != 2 { |
510 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
511 | 0 | "ListViewArray data should contain two buffers (value offsets & value sizes), had {}", |
512 | 0 | data.buffers().len() |
513 | 0 | ))); |
514 | 0 | } |
515 | | |
516 | 0 | if data.child_data().len() != 1 { |
517 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
518 | 0 | "ListViewArray should contain a single child array (values array), had {}", |
519 | 0 | data.child_data().len() |
520 | 0 | ))); |
521 | 0 | } |
522 | | |
523 | 0 | let values = data.child_data()[0].clone(); |
524 | | |
525 | 0 | if let Some(child_data_type) = Self::get_type(data.data_type()) { |
526 | 0 | if values.data_type() != child_data_type { |
527 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
528 | 0 | "{}ListViewArray's child datatype {:?} does not \ |
529 | 0 | correspond to the List's datatype {:?}", |
530 | 0 | OffsetSize::PREFIX, |
531 | 0 | values.data_type(), |
532 | 0 | child_data_type |
533 | 0 | ))); |
534 | 0 | } |
535 | | } else { |
536 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
537 | 0 | "{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}", |
538 | 0 | OffsetSize::PREFIX, |
539 | 0 | OffsetSize::PREFIX, |
540 | 0 | data.data_type() |
541 | 0 | ))); |
542 | | } |
543 | | |
544 | 0 | let values = make_array(values); |
545 | | // ArrayData is valid, and verified type above |
546 | 0 | let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()); |
547 | 0 | let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len()); |
548 | | |
549 | 0 | Ok(Self { |
550 | 0 | data_type: data.data_type().clone(), |
551 | 0 | nulls: data.nulls().cloned(), |
552 | 0 | values, |
553 | 0 | value_offsets, |
554 | 0 | value_sizes, |
555 | 0 | }) |
556 | 0 | } |
557 | | } |
558 | | |
559 | | #[cfg(test)] |
560 | | mod tests { |
561 | | use arrow_buffer::{bit_util, BooleanBuffer, Buffer, ScalarBuffer}; |
562 | | use arrow_schema::Field; |
563 | | |
564 | | use crate::builder::{FixedSizeListBuilder, Int32Builder}; |
565 | | use crate::cast::AsArray; |
566 | | use crate::types::Int32Type; |
567 | | use crate::{Int32Array, Int64Array}; |
568 | | |
569 | | use super::*; |
570 | | |
571 | | #[test] |
572 | | fn test_empty_list_view_array() { |
573 | | // Construct an empty value array |
574 | | let vec: Vec<i32> = vec![]; |
575 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
576 | | let sizes = ScalarBuffer::from(vec![]); |
577 | | let offsets = ScalarBuffer::from(vec![]); |
578 | | let values = Int32Array::from(vec); |
579 | | let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
580 | | |
581 | | assert_eq!(list_array.len(), 0) |
582 | | } |
583 | | |
584 | | #[test] |
585 | | fn test_list_view_array() { |
586 | | // Construct a value array |
587 | | let value_data = ArrayData::builder(DataType::Int32) |
588 | | .len(8) |
589 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
590 | | .build() |
591 | | .unwrap(); |
592 | | |
593 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
594 | | let sizes = ScalarBuffer::from(vec![3i32, 3, 2]); |
595 | | let offsets = ScalarBuffer::from(vec![0i32, 3, 6]); |
596 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); |
597 | | let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
598 | | |
599 | | let values = list_array.values(); |
600 | | assert_eq!(value_data, values.to_data()); |
601 | | assert_eq!(DataType::Int32, list_array.value_type()); |
602 | | assert_eq!(3, list_array.len()); |
603 | | assert_eq!(0, list_array.null_count()); |
604 | | assert_eq!(6, list_array.value_offsets()[2]); |
605 | | assert_eq!(2, list_array.value_sizes()[2]); |
606 | | assert_eq!(2, list_array.value_size(2)); |
607 | | assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
608 | | assert_eq!( |
609 | | 0, |
610 | | unsafe { list_array.value_unchecked(0) } |
611 | | .as_primitive::<Int32Type>() |
612 | | .value(0) |
613 | | ); |
614 | | for i in 0..3 { |
615 | | assert!(list_array.is_valid(i)); |
616 | | assert!(!list_array.is_null(i)); |
617 | | } |
618 | | } |
619 | | |
620 | | #[test] |
621 | | fn test_large_list_view_array() { |
622 | | // Construct a value array |
623 | | let value_data = ArrayData::builder(DataType::Int32) |
624 | | .len(8) |
625 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
626 | | .build() |
627 | | .unwrap(); |
628 | | |
629 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
630 | | let sizes = ScalarBuffer::from(vec![3i64, 3, 2]); |
631 | | let offsets = ScalarBuffer::from(vec![0i64, 3, 6]); |
632 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); |
633 | | let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
634 | | |
635 | | let values = list_array.values(); |
636 | | assert_eq!(value_data, values.to_data()); |
637 | | assert_eq!(DataType::Int32, list_array.value_type()); |
638 | | assert_eq!(3, list_array.len()); |
639 | | assert_eq!(0, list_array.null_count()); |
640 | | assert_eq!(6, list_array.value_offsets()[2]); |
641 | | assert_eq!(2, list_array.value_sizes()[2]); |
642 | | assert_eq!(2, list_array.value_size(2)); |
643 | | assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
644 | | assert_eq!( |
645 | | 0, |
646 | | unsafe { list_array.value_unchecked(0) } |
647 | | .as_primitive::<Int32Type>() |
648 | | .value(0) |
649 | | ); |
650 | | for i in 0..3 { |
651 | | assert!(list_array.is_valid(i)); |
652 | | assert!(!list_array.is_null(i)); |
653 | | } |
654 | | } |
655 | | |
656 | | #[test] |
657 | | fn test_list_view_array_slice() { |
658 | | // Construct a value array |
659 | | let value_data = ArrayData::builder(DataType::Int32) |
660 | | .len(10) |
661 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
662 | | .build() |
663 | | .unwrap(); |
664 | | |
665 | | // 01011001 00000001 |
666 | | let mut null_bits: [u8; 2] = [0; 2]; |
667 | | bit_util::set_bit(&mut null_bits, 0); |
668 | | bit_util::set_bit(&mut null_bits, 3); |
669 | | bit_util::set_bit(&mut null_bits, 4); |
670 | | bit_util::set_bit(&mut null_bits, 6); |
671 | | bit_util::set_bit(&mut null_bits, 8); |
672 | | let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9); |
673 | | let null_buffer = NullBuffer::new(buffer); |
674 | | |
675 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
676 | | let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]); |
677 | | let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]); |
678 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
679 | | let list_array = |
680 | | ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer)); |
681 | | |
682 | | let values = list_array.values(); |
683 | | assert_eq!(value_data, values.to_data()); |
684 | | assert_eq!(DataType::Int32, list_array.value_type()); |
685 | | assert_eq!(9, list_array.len()); |
686 | | assert_eq!(4, list_array.null_count()); |
687 | | assert_eq!(2, list_array.value_offsets()[3]); |
688 | | assert_eq!(2, list_array.value_sizes()[3]); |
689 | | assert_eq!(2, list_array.value_size(3)); |
690 | | |
691 | | let sliced_array = list_array.slice(1, 6); |
692 | | assert_eq!(6, sliced_array.len()); |
693 | | assert_eq!(3, sliced_array.null_count()); |
694 | | |
695 | | for i in 0..sliced_array.len() { |
696 | | if bit_util::get_bit(&null_bits, 1 + i) { |
697 | | assert!(sliced_array.is_valid(i)); |
698 | | } else { |
699 | | assert!(sliced_array.is_null(i)); |
700 | | } |
701 | | } |
702 | | |
703 | | // Check offset and length for each non-null value. |
704 | | let sliced_list_array = sliced_array |
705 | | .as_any() |
706 | | .downcast_ref::<ListViewArray>() |
707 | | .unwrap(); |
708 | | assert_eq!(2, sliced_list_array.value_offsets()[2]); |
709 | | assert_eq!(2, sliced_list_array.value_sizes()[2]); |
710 | | assert_eq!(2, sliced_list_array.value_size(2)); |
711 | | |
712 | | assert_eq!(4, sliced_list_array.value_offsets()[3]); |
713 | | assert_eq!(2, sliced_list_array.value_sizes()[3]); |
714 | | assert_eq!(2, sliced_list_array.value_size(3)); |
715 | | |
716 | | assert_eq!(6, sliced_list_array.value_offsets()[5]); |
717 | | assert_eq!(3, sliced_list_array.value_sizes()[5]); |
718 | | assert_eq!(3, sliced_list_array.value_size(5)); |
719 | | } |
720 | | |
721 | | #[test] |
722 | | fn test_large_list_view_array_slice() { |
723 | | // Construct a value array |
724 | | let value_data = ArrayData::builder(DataType::Int32) |
725 | | .len(10) |
726 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
727 | | .build() |
728 | | .unwrap(); |
729 | | |
730 | | // 01011001 00000001 |
731 | | let mut null_bits: [u8; 2] = [0; 2]; |
732 | | bit_util::set_bit(&mut null_bits, 0); |
733 | | bit_util::set_bit(&mut null_bits, 3); |
734 | | bit_util::set_bit(&mut null_bits, 4); |
735 | | bit_util::set_bit(&mut null_bits, 6); |
736 | | bit_util::set_bit(&mut null_bits, 8); |
737 | | let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9); |
738 | | let null_buffer = NullBuffer::new(buffer); |
739 | | |
740 | | // Construct a large list view array from the above two |
741 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
742 | | let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]); |
743 | | let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]); |
744 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
745 | | let list_array = |
746 | | LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer)); |
747 | | |
748 | | let values = list_array.values(); |
749 | | assert_eq!(value_data, values.to_data()); |
750 | | assert_eq!(DataType::Int32, list_array.value_type()); |
751 | | assert_eq!(9, list_array.len()); |
752 | | assert_eq!(4, list_array.null_count()); |
753 | | assert_eq!(2, list_array.value_offsets()[3]); |
754 | | assert_eq!(2, list_array.value_sizes()[3]); |
755 | | assert_eq!(2, list_array.value_size(3)); |
756 | | |
757 | | let sliced_array = list_array.slice(1, 6); |
758 | | assert_eq!(6, sliced_array.len()); |
759 | | assert_eq!(3, sliced_array.null_count()); |
760 | | |
761 | | for i in 0..sliced_array.len() { |
762 | | if bit_util::get_bit(&null_bits, 1 + i) { |
763 | | assert!(sliced_array.is_valid(i)); |
764 | | } else { |
765 | | assert!(sliced_array.is_null(i)); |
766 | | } |
767 | | } |
768 | | |
769 | | // Check offset and length for each non-null value. |
770 | | let sliced_list_array = sliced_array |
771 | | .as_any() |
772 | | .downcast_ref::<LargeListViewArray>() |
773 | | .unwrap(); |
774 | | assert_eq!(2, sliced_list_array.value_offsets()[2]); |
775 | | assert_eq!(2, sliced_list_array.value_size(2)); |
776 | | assert_eq!(2, sliced_list_array.value_sizes()[2]); |
777 | | |
778 | | assert_eq!(4, sliced_list_array.value_offsets()[3]); |
779 | | assert_eq!(2, sliced_list_array.value_size(3)); |
780 | | assert_eq!(2, sliced_list_array.value_sizes()[3]); |
781 | | |
782 | | assert_eq!(6, sliced_list_array.value_offsets()[5]); |
783 | | assert_eq!(3, sliced_list_array.value_size(5)); |
784 | | assert_eq!(2, sliced_list_array.value_sizes()[3]); |
785 | | } |
786 | | |
787 | | #[test] |
788 | | #[should_panic(expected = "index out of bounds: the len is 9 but the index is 10")] |
789 | | fn test_list_view_array_index_out_of_bound() { |
790 | | // 01011001 00000001 |
791 | | let mut null_bits: [u8; 2] = [0; 2]; |
792 | | bit_util::set_bit(&mut null_bits, 0); |
793 | | bit_util::set_bit(&mut null_bits, 3); |
794 | | bit_util::set_bit(&mut null_bits, 4); |
795 | | bit_util::set_bit(&mut null_bits, 6); |
796 | | bit_util::set_bit(&mut null_bits, 8); |
797 | | let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9); |
798 | | let null_buffer = NullBuffer::new(buffer); |
799 | | |
800 | | // Construct a buffer for value offsets, for the nested array: |
801 | | // [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]] |
802 | | // Construct a list array from the above two |
803 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
804 | | let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]); |
805 | | let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]); |
806 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
807 | | let list_array = |
808 | | ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(null_buffer)); |
809 | | |
810 | | assert_eq!(9, list_array.len()); |
811 | | list_array.value(10); |
812 | | } |
813 | | #[test] |
814 | | #[should_panic( |
815 | | expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 0" |
816 | | )] |
817 | | #[cfg(not(feature = "force_validate"))] |
818 | | fn test_list_view_array_invalid_buffer_len() { |
819 | | let value_data = unsafe { |
820 | | ArrayData::builder(DataType::Int32) |
821 | | .len(8) |
822 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
823 | | .build_unchecked() |
824 | | }; |
825 | | let list_data_type = |
826 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
827 | | let list_data = unsafe { |
828 | | ArrayData::builder(list_data_type) |
829 | | .len(3) |
830 | | .add_child_data(value_data) |
831 | | .build_unchecked() |
832 | | }; |
833 | | drop(ListViewArray::from(list_data)); |
834 | | } |
835 | | |
836 | | #[test] |
837 | | #[should_panic( |
838 | | expected = "ListViewArray data should contain two buffers (value offsets & value sizes), had 1" |
839 | | )] |
840 | | #[cfg(not(feature = "force_validate"))] |
841 | | fn test_list_view_array_invalid_child_array_len() { |
842 | | let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]); |
843 | | let list_data_type = |
844 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
845 | | let list_data = unsafe { |
846 | | ArrayData::builder(list_data_type) |
847 | | .len(3) |
848 | | .add_buffer(value_offsets) |
849 | | .build_unchecked() |
850 | | }; |
851 | | drop(ListViewArray::from(list_data)); |
852 | | } |
853 | | |
854 | | #[test] |
855 | | fn test_list_view_array_offsets_need_not_start_at_zero() { |
856 | | let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
857 | | let sizes = ScalarBuffer::from(vec![0i32, 0, 3]); |
858 | | let offsets = ScalarBuffer::from(vec![2i32, 2, 5]); |
859 | | let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); |
860 | | let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None); |
861 | | |
862 | | assert_eq!(list_array.value_size(0), 0); |
863 | | assert_eq!(list_array.value_size(1), 0); |
864 | | assert_eq!(list_array.value_size(2), 3); |
865 | | } |
866 | | |
867 | | #[test] |
868 | | #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")] |
869 | | #[cfg(not(feature = "force_validate"))] |
870 | | fn test_list_view_array_alignment() { |
871 | | let offset_buf = Buffer::from_slice_ref([0_u64]); |
872 | | let offset_buf2 = offset_buf.slice(1); |
873 | | |
874 | | let size_buf = Buffer::from_slice_ref([0_u64]); |
875 | | let size_buf2 = size_buf.slice(1); |
876 | | |
877 | | let values: [i32; 8] = [0; 8]; |
878 | | let value_data = unsafe { |
879 | | ArrayData::builder(DataType::Int32) |
880 | | .add_buffer(Buffer::from_slice_ref(values)) |
881 | | .build_unchecked() |
882 | | }; |
883 | | |
884 | | let list_data_type = |
885 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
886 | | let list_data = unsafe { |
887 | | ArrayData::builder(list_data_type) |
888 | | .add_buffer(offset_buf2) |
889 | | .add_buffer(size_buf2) |
890 | | .add_child_data(value_data) |
891 | | .build_unchecked() |
892 | | }; |
893 | | drop(ListViewArray::from(list_data)); |
894 | | } |
895 | | |
896 | | #[test] |
897 | | fn test_empty_offsets() { |
898 | | let f = Arc::new(Field::new("element", DataType::Int32, true)); |
899 | | let string = ListViewArray::from( |
900 | | ArrayData::builder(DataType::ListView(f.clone())) |
901 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
902 | | .add_child_data(ArrayData::new_empty(&DataType::Int32)) |
903 | | .build() |
904 | | .unwrap(), |
905 | | ); |
906 | | assert_eq!(string.value_offsets(), &[] as &[i32; 0]); |
907 | | assert_eq!(string.value_sizes(), &[] as &[i32; 0]); |
908 | | |
909 | | let string = LargeListViewArray::from( |
910 | | ArrayData::builder(DataType::LargeListView(f)) |
911 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
912 | | .add_child_data(ArrayData::new_empty(&DataType::Int32)) |
913 | | .build() |
914 | | .unwrap(), |
915 | | ); |
916 | | assert_eq!(string.len(), 0); |
917 | | assert_eq!(string.value_offsets(), &[] as &[i64; 0]); |
918 | | assert_eq!(string.value_sizes(), &[] as &[i64; 0]); |
919 | | } |
920 | | |
921 | | #[test] |
922 | | fn test_try_new() { |
923 | | let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]); |
924 | | let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]); |
925 | | let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None); |
926 | | let values = Arc::new(values) as ArrayRef; |
927 | | |
928 | | let field = Arc::new(Field::new("element", DataType::Int32, false)); |
929 | | ListViewArray::new( |
930 | | field.clone(), |
931 | | offsets.clone(), |
932 | | sizes.clone(), |
933 | | values.clone(), |
934 | | None, |
935 | | ); |
936 | | |
937 | | let nulls = NullBuffer::new_null(4); |
938 | | ListViewArray::new( |
939 | | field.clone(), |
940 | | offsets, |
941 | | sizes.clone(), |
942 | | values.clone(), |
943 | | Some(nulls), |
944 | | ); |
945 | | |
946 | | let nulls = NullBuffer::new_null(4); |
947 | | let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]); |
948 | | let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]); |
949 | | let err = LargeListViewArray::try_new( |
950 | | field, |
951 | | offsets.clone(), |
952 | | sizes.clone(), |
953 | | values.clone(), |
954 | | Some(nulls), |
955 | | ) |
956 | | .unwrap_err(); |
957 | | |
958 | | assert_eq!( |
959 | | err.to_string(), |
960 | | "Invalid argument error: Incorrect length of null buffer for LargeListViewArray, expected 5 got 4" |
961 | | ); |
962 | | |
963 | | let field = Arc::new(Field::new("element", DataType::Int64, false)); |
964 | | let err = LargeListViewArray::try_new( |
965 | | field.clone(), |
966 | | offsets.clone(), |
967 | | sizes.clone(), |
968 | | values.clone(), |
969 | | None, |
970 | | ) |
971 | | .unwrap_err(); |
972 | | |
973 | | assert_eq!( |
974 | | err.to_string(), |
975 | | "Invalid argument error: LargeListViewArray expected data type Int64 got Int32 for \"element\"" |
976 | | ); |
977 | | |
978 | | let nulls = NullBuffer::new_null(7); |
979 | | let values = Int64Array::new(vec![0; 7].into(), Some(nulls)); |
980 | | let values = Arc::new(values); |
981 | | |
982 | | let err = LargeListViewArray::try_new( |
983 | | field, |
984 | | offsets.clone(), |
985 | | sizes.clone(), |
986 | | values.clone(), |
987 | | None, |
988 | | ) |
989 | | .unwrap_err(); |
990 | | |
991 | | assert_eq!( |
992 | | err.to_string(), |
993 | | "Invalid argument error: Non-nullable field of LargeListViewArray \"element\" cannot contain nulls" |
994 | | ); |
995 | | } |
996 | | |
997 | | #[test] |
998 | | fn test_from_fixed_size_list() { |
999 | | let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3); |
1000 | | builder.values().append_slice(&[1, 2, 3]); |
1001 | | builder.append(true); |
1002 | | builder.values().append_slice(&[0, 0, 0]); |
1003 | | builder.append(false); |
1004 | | builder.values().append_slice(&[4, 5, 6]); |
1005 | | builder.append(true); |
1006 | | let list: ListViewArray = builder.finish().into(); |
1007 | | let values: Vec<_> = list |
1008 | | .iter() |
1009 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1010 | | .collect(); |
1011 | | assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]); |
1012 | | let offsets = list.value_offsets(); |
1013 | | assert_eq!(offsets, &[0, 3, 6]); |
1014 | | let sizes = list.value_sizes(); |
1015 | | assert_eq!(sizes, &[3, 3, 3]); |
1016 | | } |
1017 | | |
1018 | | #[test] |
1019 | | fn test_list_view_array_overlap_lists() { |
1020 | | let value_data = unsafe { |
1021 | | ArrayData::builder(DataType::Int32) |
1022 | | .len(8) |
1023 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
1024 | | .build_unchecked() |
1025 | | }; |
1026 | | let list_data_type = |
1027 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
1028 | | let list_data = unsafe { |
1029 | | ArrayData::builder(list_data_type) |
1030 | | .len(2) |
1031 | | .add_buffer(Buffer::from_slice_ref([0, 3])) // offsets |
1032 | | .add_buffer(Buffer::from_slice_ref([5, 5])) // sizes |
1033 | | .add_child_data(value_data) |
1034 | | .build_unchecked() |
1035 | | }; |
1036 | | let array = ListViewArray::from(list_data); |
1037 | | |
1038 | | assert_eq!(array.len(), 2); |
1039 | | assert_eq!(array.value_size(0), 5); |
1040 | | assert_eq!(array.value_size(1), 5); |
1041 | | |
1042 | | let values: Vec<_> = array |
1043 | | .iter() |
1044 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1045 | | .collect(); |
1046 | | assert_eq!( |
1047 | | values, |
1048 | | vec![Some(vec![0, 1, 2, 3, 4]), Some(vec![3, 4, 5, 6, 7])] |
1049 | | ); |
1050 | | } |
1051 | | |
1052 | | #[test] |
1053 | | fn test_list_view_array_incomplete_offsets() { |
1054 | | let value_data = unsafe { |
1055 | | ArrayData::builder(DataType::Int32) |
1056 | | .len(50) |
1057 | | .add_buffer(Buffer::from_slice_ref((0..50).collect::<Vec<i32>>())) |
1058 | | .build_unchecked() |
1059 | | }; |
1060 | | let list_data_type = |
1061 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
1062 | | let list_data = unsafe { |
1063 | | ArrayData::builder(list_data_type) |
1064 | | .len(3) |
1065 | | .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // offsets |
1066 | | .add_buffer(Buffer::from_slice_ref([0, 5, 10])) // sizes |
1067 | | .add_child_data(value_data) |
1068 | | .build_unchecked() |
1069 | | }; |
1070 | | let array = ListViewArray::from(list_data); |
1071 | | |
1072 | | assert_eq!(array.len(), 3); |
1073 | | assert_eq!(array.value_size(0), 0); |
1074 | | assert_eq!(array.value_size(1), 5); |
1075 | | assert_eq!(array.value_size(2), 10); |
1076 | | |
1077 | | let values: Vec<_> = array |
1078 | | .iter() |
1079 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1080 | | .collect(); |
1081 | | assert_eq!( |
1082 | | values, |
1083 | | vec![ |
1084 | | Some(vec![]), |
1085 | | Some(vec![5, 6, 7, 8, 9]), |
1086 | | Some(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) |
1087 | | ] |
1088 | | ); |
1089 | | } |
1090 | | |
1091 | | #[test] |
1092 | | fn test_list_view_array_empty_lists() { |
1093 | | let value_data = unsafe { |
1094 | | ArrayData::builder(DataType::Int32) |
1095 | | .len(0) |
1096 | | .add_buffer(Buffer::from_slice_ref::<i32, &[_; 0]>(&[])) |
1097 | | .build_unchecked() |
1098 | | }; |
1099 | | let list_data_type = |
1100 | | DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); |
1101 | | let list_data = unsafe { |
1102 | | ArrayData::builder(list_data_type) |
1103 | | .len(3) |
1104 | | .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // offsets |
1105 | | .add_buffer(Buffer::from_slice_ref([0, 0, 0])) // sizes |
1106 | | .add_child_data(value_data) |
1107 | | .build_unchecked() |
1108 | | }; |
1109 | | let array = ListViewArray::from(list_data); |
1110 | | |
1111 | | assert_eq!(array.len(), 3); |
1112 | | assert_eq!(array.value_size(0), 0); |
1113 | | assert_eq!(array.value_size(1), 0); |
1114 | | assert_eq!(array.value_size(2), 0); |
1115 | | |
1116 | | let values: Vec<_> = array |
1117 | | .iter() |
1118 | | .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
1119 | | .collect(); |
1120 | | assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]); |
1121 | | } |
1122 | | } |