/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/generic_list_builder.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::builder::ArrayBuilder; |
19 | | use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; |
20 | | use arrow_buffer::NullBufferBuilder; |
21 | | use arrow_buffer::{Buffer, OffsetBuffer}; |
22 | | use arrow_schema::{Field, FieldRef}; |
23 | | use std::any::Any; |
24 | | use std::sync::Arc; |
25 | | |
26 | | /// Builder for [`GenericListArray`] |
27 | | /// |
28 | | /// Use [`ListBuilder`] to build [`ListArray`]s and [`LargeListBuilder`] to build [`LargeListArray`]s. |
29 | | /// |
30 | | /// # Example |
31 | | /// |
32 | | /// Here is code that constructs a ListArray with the contents: |
33 | | /// `[[A,B,C], [], NULL, [D], [NULL, F]]` |
34 | | /// |
35 | | /// ``` |
36 | | /// # use std::sync::Arc; |
37 | | /// # use arrow_array::{builder::ListBuilder, builder::StringBuilder, ArrayRef, StringArray, Array}; |
38 | | /// # |
39 | | /// let values_builder = StringBuilder::new(); |
40 | | /// let mut builder = ListBuilder::new(values_builder); |
41 | | /// |
42 | | /// // [A, B, C] |
43 | | /// builder.values().append_value("A"); |
44 | | /// builder.values().append_value("B"); |
45 | | /// builder.values().append_value("C"); |
46 | | /// builder.append(true); |
47 | | /// |
48 | | /// // [ ] (empty list) |
49 | | /// builder.append(true); |
50 | | /// |
51 | | /// // Null |
52 | | /// builder.append(false); |
53 | | /// |
54 | | /// // [D] |
55 | | /// builder.values().append_value("D"); |
56 | | /// builder.append(true); |
57 | | /// |
58 | | /// // [NULL, F] |
59 | | /// builder.values().append_null(); |
60 | | /// builder.values().append_value("F"); |
61 | | /// builder.append(true); |
62 | | /// |
63 | | /// // Build the array |
64 | | /// let array = builder.finish(); |
65 | | /// |
66 | | /// // Values is a string array |
67 | | /// // "A", "B" "C", "?", "D", NULL, "F" |
68 | | /// assert_eq!( |
69 | | /// array.values().as_ref(), |
70 | | /// &StringArray::from(vec![ |
71 | | /// Some("A"), Some("B"), Some("C"), |
72 | | /// Some("D"), None, Some("F") |
73 | | /// ]) |
74 | | /// ); |
75 | | /// |
76 | | /// // Offsets are indexes into the values array |
77 | | /// assert_eq!( |
78 | | /// array.value_offsets(), |
79 | | /// &[0, 3, 3, 3, 4, 6] |
80 | | /// ); |
81 | | /// ``` |
82 | | /// |
83 | | /// [`ListBuilder`]: crate::builder::ListBuilder |
84 | | /// [`ListArray`]: crate::array::ListArray |
85 | | /// [`LargeListBuilder`]: crate::builder::LargeListBuilder |
86 | | /// [`LargeListArray`]: crate::array::LargeListArray |
87 | | #[derive(Debug)] |
88 | | pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> { |
89 | | offsets_builder: Vec<OffsetSize>, |
90 | | null_buffer_builder: NullBufferBuilder, |
91 | | values_builder: T, |
92 | | field: Option<FieldRef>, |
93 | | } |
94 | | |
95 | | impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for GenericListBuilder<O, T> { |
96 | | fn default() -> Self { |
97 | | Self::new(T::default()) |
98 | | } |
99 | | } |
100 | | |
101 | | impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> { |
102 | | /// Creates a new [`GenericListBuilder`] from a given values array builder |
103 | 12 | pub fn new(values_builder: T) -> Self { |
104 | 12 | let capacity = values_builder.len(); |
105 | 12 | Self::with_capacity(values_builder, capacity) |
106 | 12 | } |
107 | | |
108 | | /// Creates a new [`GenericListBuilder`] from a given values array builder |
109 | | /// `capacity` is the number of items to pre-allocate space for in this builder |
110 | 12 | pub fn with_capacity(values_builder: T, capacity: usize) -> Self { |
111 | 12 | let mut offsets_builder = Vec::with_capacity(capacity + 1); |
112 | 12 | offsets_builder.push(OffsetSize::zero()); |
113 | 12 | Self { |
114 | 12 | offsets_builder, |
115 | 12 | null_buffer_builder: NullBufferBuilder::new(capacity), |
116 | 12 | values_builder, |
117 | 12 | field: None, |
118 | 12 | } |
119 | 12 | } |
120 | | |
121 | | /// Override the field passed to [`GenericListArray::new`] |
122 | | /// |
123 | | /// By default a nullable field is created with the name `item` |
124 | | /// |
125 | | /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the |
126 | | /// field's data type does not match that of `T` |
127 | 0 | pub fn with_field(self, field: impl Into<FieldRef>) -> Self { |
128 | 0 | Self { |
129 | 0 | field: Some(field.into()), |
130 | 0 | ..self |
131 | 0 | } |
132 | 0 | } |
133 | | } |
134 | | |
135 | | impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder |
136 | | for GenericListBuilder<OffsetSize, T> |
137 | | where |
138 | | T: 'static, |
139 | | { |
140 | | /// Returns the builder as a non-mutable `Any` reference. |
141 | 0 | fn as_any(&self) -> &dyn Any { |
142 | 0 | self |
143 | 0 | } |
144 | | |
145 | | /// Returns the builder as a mutable `Any` reference. |
146 | 2 | fn as_any_mut(&mut self) -> &mut dyn Any { |
147 | 2 | self |
148 | 2 | } |
149 | | |
150 | | /// Returns the boxed builder as a box of `Any`. |
151 | 0 | fn into_box_any(self: Box<Self>) -> Box<dyn Any> { |
152 | 0 | self |
153 | 0 | } |
154 | | |
155 | | /// Returns the number of array slots in the builder |
156 | 7 | fn len(&self) -> usize { |
157 | 7 | self.null_buffer_builder.len() |
158 | 7 | } |
159 | | |
160 | | /// Builds the array and reset this builder. |
161 | 5 | fn finish(&mut self) -> ArrayRef { |
162 | 5 | Arc::new(self.finish()) |
163 | 5 | } |
164 | | |
165 | | /// Builds the array without resetting the builder. |
166 | 0 | fn finish_cloned(&self) -> ArrayRef { |
167 | 0 | Arc::new(self.finish_cloned()) |
168 | 0 | } |
169 | | } |
170 | | |
171 | | impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> |
172 | | where |
173 | | T: 'static, |
174 | | { |
175 | | /// Returns the child array builder as a mutable reference. |
176 | | /// |
177 | | /// This mutable reference can be used to append values into the child array builder, |
178 | | /// but you must call [`append`](#method.append) to delimit each distinct list value. |
179 | 16 | pub fn values(&mut self) -> &mut T { |
180 | 16 | &mut self.values_builder |
181 | 16 | } |
182 | | |
183 | | /// Returns the child array builder as an immutable reference |
184 | | pub fn values_ref(&self) -> &T { |
185 | | &self.values_builder |
186 | | } |
187 | | |
188 | | /// Finish the current variable-length list array slot |
189 | | /// |
190 | | /// # Panics |
191 | | /// |
192 | | /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX` |
193 | | #[inline] |
194 | 18 | pub fn append(&mut self, is_valid: bool) { |
195 | 18 | self.offsets_builder.push(self.next_offset()); |
196 | 18 | self.null_buffer_builder.append(is_valid); |
197 | 18 | } |
198 | | |
199 | | /// Returns the next offset |
200 | | /// |
201 | | /// # Panics |
202 | | /// |
203 | | /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX` |
204 | | #[inline] |
205 | 18 | fn next_offset(&self) -> OffsetSize { |
206 | 18 | OffsetSize::from_usize(self.values_builder.len()).unwrap() |
207 | 18 | } |
208 | | |
209 | | /// Append a value to this [`GenericListBuilder`] |
210 | | /// |
211 | | /// ``` |
212 | | /// # use arrow_array::builder::{Int32Builder, ListBuilder}; |
213 | | /// # use arrow_array::cast::AsArray; |
214 | | /// # use arrow_array::{Array, Int32Array}; |
215 | | /// # use arrow_array::types::Int32Type; |
216 | | /// let mut builder = ListBuilder::new(Int32Builder::new()); |
217 | | /// |
218 | | /// builder.append_value([Some(1), Some(2), Some(3)]); |
219 | | /// builder.append_value([]); |
220 | | /// builder.append_value([None]); |
221 | | /// |
222 | | /// let array = builder.finish(); |
223 | | /// assert_eq!(array.len(), 3); |
224 | | /// |
225 | | /// assert_eq!(array.value_offsets(), &[0, 3, 3, 4]); |
226 | | /// let values = array.values().as_primitive::<Int32Type>(); |
227 | | /// assert_eq!(values, &Int32Array::from(vec![Some(1), Some(2), Some(3), None])); |
228 | | /// ``` |
229 | | /// |
230 | | /// This is an alternative API to appending directly to [`Self::values`] and |
231 | | /// delimiting the result with [`Self::append`] |
232 | | /// |
233 | | /// ``` |
234 | | /// # use arrow_array::builder::{Int32Builder, ListBuilder}; |
235 | | /// # use arrow_array::cast::AsArray; |
236 | | /// # use arrow_array::{Array, Int32Array}; |
237 | | /// # use arrow_array::types::Int32Type; |
238 | | /// let mut builder = ListBuilder::new(Int32Builder::new()); |
239 | | /// |
240 | | /// builder.values().append_value(1); |
241 | | /// builder.values().append_value(2); |
242 | | /// builder.values().append_value(3); |
243 | | /// builder.append(true); |
244 | | /// builder.append(true); |
245 | | /// builder.values().append_null(); |
246 | | /// builder.append(true); |
247 | | /// |
248 | | /// let array = builder.finish(); |
249 | | /// assert_eq!(array.len(), 3); |
250 | | /// |
251 | | /// assert_eq!(array.value_offsets(), &[0, 3, 3, 4]); |
252 | | /// let values = array.values().as_primitive::<Int32Type>(); |
253 | | /// assert_eq!(values, &Int32Array::from(vec![Some(1), Some(2), Some(3), None])); |
254 | | /// ``` |
255 | | #[inline] |
256 | | pub fn append_value<I, V>(&mut self, i: I) |
257 | | where |
258 | | T: Extend<Option<V>>, |
259 | | I: IntoIterator<Item = Option<V>>, |
260 | | { |
261 | | self.extend(std::iter::once(Some(i))) |
262 | | } |
263 | | |
264 | | /// Append a null to this [`GenericListBuilder`] |
265 | | /// |
266 | | /// See [`Self::append_value`] for an example use. |
267 | | #[inline] |
268 | | pub fn append_null(&mut self) { |
269 | | self.offsets_builder.push(self.next_offset()); |
270 | | self.null_buffer_builder.append_null(); |
271 | | } |
272 | | |
273 | | /// Appends `n` `null`s into the builder. |
274 | | #[inline] |
275 | | pub fn append_nulls(&mut self, n: usize) { |
276 | | let next_offset = self.next_offset(); |
277 | | self.offsets_builder |
278 | | .extend(std::iter::repeat_n(next_offset, n)); |
279 | | self.null_buffer_builder.append_n_nulls(n); |
280 | | } |
281 | | |
282 | | /// Appends an optional value into this [`GenericListBuilder`] |
283 | | /// |
284 | | /// If `Some` calls [`Self::append_value`] otherwise calls [`Self::append_null`] |
285 | | #[inline] |
286 | | pub fn append_option<I, V>(&mut self, i: Option<I>) |
287 | | where |
288 | | T: Extend<Option<V>>, |
289 | | I: IntoIterator<Item = Option<V>>, |
290 | | { |
291 | | match i { |
292 | | Some(i) => self.append_value(i), |
293 | | None => self.append_null(), |
294 | | } |
295 | | } |
296 | | |
297 | | /// Builds the [`GenericListArray`] and reset this builder. |
298 | 12 | pub fn finish(&mut self) -> GenericListArray<OffsetSize> { |
299 | 12 | let values = self.values_builder.finish(); |
300 | 12 | let nulls = self.null_buffer_builder.finish(); |
301 | | |
302 | 12 | let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder)); |
303 | | // Safety: Safe by construction |
304 | 12 | let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) }; |
305 | 12 | self.offsets_builder.push(OffsetSize::zero()); |
306 | | |
307 | 12 | let field = match &self.field { |
308 | 0 | Some(f) => f.clone(), |
309 | 12 | None => Arc::new(Field::new_list_field(values.data_type().clone(), true)), |
310 | | }; |
311 | | |
312 | 12 | GenericListArray::new(field, offsets, values, nulls) |
313 | 12 | } |
314 | | |
315 | | /// Builds the [`GenericListArray`] without resetting the builder. |
316 | 0 | pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> { |
317 | 0 | let values = self.values_builder.finish_cloned(); |
318 | 0 | let nulls = self.null_buffer_builder.finish_cloned(); |
319 | | |
320 | 0 | let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice()); |
321 | | // Safety: safe by construction |
322 | 0 | let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) }; |
323 | | |
324 | 0 | let field = match &self.field { |
325 | 0 | Some(f) => f.clone(), |
326 | 0 | None => Arc::new(Field::new_list_field(values.data_type().clone(), true)), |
327 | | }; |
328 | | |
329 | 0 | GenericListArray::new(field, offsets, values, nulls) |
330 | 0 | } |
331 | | |
332 | | /// Returns the current offsets buffer as a slice |
333 | | pub fn offsets_slice(&self) -> &[OffsetSize] { |
334 | | self.offsets_builder.as_slice() |
335 | | } |
336 | | |
337 | | /// Returns the current null buffer as a slice |
338 | | pub fn validity_slice(&self) -> Option<&[u8]> { |
339 | | self.null_buffer_builder.as_slice() |
340 | | } |
341 | | } |
342 | | |
343 | | impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B> |
344 | | where |
345 | | O: OffsetSizeTrait, |
346 | | B: ArrayBuilder + Extend<E>, |
347 | | V: IntoIterator<Item = E>, |
348 | | { |
349 | | #[inline] |
350 | | fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) { |
351 | | for v in iter { |
352 | | match v { |
353 | | Some(elements) => { |
354 | | self.values_builder.extend(elements); |
355 | | self.append(true); |
356 | | } |
357 | | None => self.append(false), |
358 | | } |
359 | | } |
360 | | } |
361 | | } |
362 | | |
363 | | #[cfg(test)] |
364 | | mod tests { |
365 | | use super::*; |
366 | | use crate::builder::{make_builder, Int32Builder, ListBuilder}; |
367 | | use crate::cast::AsArray; |
368 | | use crate::types::Int32Type; |
369 | | use crate::Int32Array; |
370 | | use arrow_schema::DataType; |
371 | | |
372 | | fn _test_generic_list_array_builder<O: OffsetSizeTrait>() { |
373 | | let values_builder = Int32Builder::with_capacity(10); |
374 | | let mut builder = GenericListBuilder::<O, _>::new(values_builder); |
375 | | |
376 | | // [[0, 1, 2], [3, 4, 5], [6, 7]] |
377 | | builder.values().append_value(0); |
378 | | builder.values().append_value(1); |
379 | | builder.values().append_value(2); |
380 | | builder.append(true); |
381 | | builder.values().append_value(3); |
382 | | builder.values().append_value(4); |
383 | | builder.values().append_value(5); |
384 | | builder.append(true); |
385 | | builder.values().append_value(6); |
386 | | builder.values().append_value(7); |
387 | | builder.append(true); |
388 | | let list_array = builder.finish(); |
389 | | |
390 | | let list_values = list_array.values().as_primitive::<Int32Type>(); |
391 | | assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]); |
392 | | assert_eq!(list_array.value_offsets(), [0, 3, 6, 8].map(O::usize_as)); |
393 | | assert_eq!(DataType::Int32, list_array.value_type()); |
394 | | assert_eq!(3, list_array.len()); |
395 | | assert_eq!(0, list_array.null_count()); |
396 | | assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]); |
397 | | assert_eq!(O::from_usize(2).unwrap(), list_array.value_length(2)); |
398 | | for i in 0..3 { |
399 | | assert!(list_array.is_valid(i)); |
400 | | assert!(!list_array.is_null(i)); |
401 | | } |
402 | | } |
403 | | |
404 | | #[test] |
405 | | fn test_list_array_builder() { |
406 | | _test_generic_list_array_builder::<i32>() |
407 | | } |
408 | | |
409 | | #[test] |
410 | | fn test_large_list_array_builder() { |
411 | | _test_generic_list_array_builder::<i64>() |
412 | | } |
413 | | |
414 | | fn _test_generic_list_array_builder_nulls<O: OffsetSizeTrait>() { |
415 | | let values_builder = Int32Builder::with_capacity(10); |
416 | | let mut builder = GenericListBuilder::<O, _>::new(values_builder); |
417 | | |
418 | | // [[0, 1, 2], null, [3, null, 5], [6, 7], null, null, [8]] |
419 | | builder.values().append_value(0); |
420 | | builder.values().append_value(1); |
421 | | builder.values().append_value(2); |
422 | | builder.append(true); |
423 | | builder.append(false); |
424 | | builder.values().append_value(3); |
425 | | builder.values().append_null(); |
426 | | builder.values().append_value(5); |
427 | | builder.append(true); |
428 | | builder.values().append_value(6); |
429 | | builder.values().append_value(7); |
430 | | builder.append(true); |
431 | | builder.append_nulls(2); |
432 | | builder.values().append_value(8); |
433 | | builder.append(true); |
434 | | |
435 | | let list_array = builder.finish(); |
436 | | |
437 | | assert_eq!(DataType::Int32, list_array.value_type()); |
438 | | assert_eq!(7, list_array.len()); |
439 | | assert_eq!(3, list_array.null_count()); |
440 | | assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]); |
441 | | assert_eq!(O::from_usize(9).unwrap(), list_array.value_offsets()[7]); |
442 | | assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2)); |
443 | | assert!(list_array.is_null(4)); |
444 | | assert!(list_array.is_null(5)); |
445 | | } |
446 | | |
447 | | #[test] |
448 | | fn test_list_array_builder_nulls() { |
449 | | _test_generic_list_array_builder_nulls::<i32>() |
450 | | } |
451 | | |
452 | | #[test] |
453 | | fn test_large_list_array_builder_nulls() { |
454 | | _test_generic_list_array_builder_nulls::<i64>() |
455 | | } |
456 | | |
457 | | #[test] |
458 | | fn test_list_array_builder_finish() { |
459 | | let values_builder = Int32Array::builder(5); |
460 | | let mut builder = ListBuilder::new(values_builder); |
461 | | |
462 | | builder.values().append_slice(&[1, 2, 3]); |
463 | | builder.append(true); |
464 | | builder.values().append_slice(&[4, 5, 6]); |
465 | | builder.append(true); |
466 | | |
467 | | let mut arr = builder.finish(); |
468 | | assert_eq!(2, arr.len()); |
469 | | assert!(builder.is_empty()); |
470 | | |
471 | | builder.values().append_slice(&[7, 8, 9]); |
472 | | builder.append(true); |
473 | | arr = builder.finish(); |
474 | | assert_eq!(1, arr.len()); |
475 | | assert!(builder.is_empty()); |
476 | | } |
477 | | |
478 | | #[test] |
479 | | fn test_list_array_builder_finish_cloned() { |
480 | | let values_builder = Int32Array::builder(5); |
481 | | let mut builder = ListBuilder::new(values_builder); |
482 | | |
483 | | builder.values().append_slice(&[1, 2, 3]); |
484 | | builder.append(true); |
485 | | builder.values().append_slice(&[4, 5, 6]); |
486 | | builder.append(true); |
487 | | |
488 | | let mut arr = builder.finish_cloned(); |
489 | | assert_eq!(2, arr.len()); |
490 | | assert!(!builder.is_empty()); |
491 | | |
492 | | builder.values().append_slice(&[7, 8, 9]); |
493 | | builder.append(true); |
494 | | arr = builder.finish(); |
495 | | assert_eq!(3, arr.len()); |
496 | | assert!(builder.is_empty()); |
497 | | } |
498 | | |
499 | | #[test] |
500 | | fn test_list_list_array_builder() { |
501 | | let primitive_builder = Int32Builder::with_capacity(10); |
502 | | let values_builder = ListBuilder::new(primitive_builder); |
503 | | let mut builder = ListBuilder::new(values_builder); |
504 | | |
505 | | // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] |
506 | | builder.values().values().append_value(1); |
507 | | builder.values().values().append_value(2); |
508 | | builder.values().append(true); |
509 | | builder.values().values().append_value(3); |
510 | | builder.values().values().append_value(4); |
511 | | builder.values().append(true); |
512 | | builder.append(true); |
513 | | |
514 | | builder.values().values().append_value(5); |
515 | | builder.values().values().append_value(6); |
516 | | builder.values().values().append_value(7); |
517 | | builder.values().append(true); |
518 | | builder.values().append(false); |
519 | | builder.values().values().append_value(8); |
520 | | builder.values().append(true); |
521 | | builder.append(true); |
522 | | |
523 | | builder.append(false); |
524 | | |
525 | | builder.values().values().append_value(9); |
526 | | builder.values().values().append_value(10); |
527 | | builder.values().append(true); |
528 | | builder.append(true); |
529 | | |
530 | | let l1 = builder.finish(); |
531 | | |
532 | | assert_eq!(4, l1.len()); |
533 | | assert_eq!(1, l1.null_count()); |
534 | | |
535 | | assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]); |
536 | | let l2 = l1.values().as_list::<i32>(); |
537 | | |
538 | | assert_eq!(6, l2.len()); |
539 | | assert_eq!(1, l2.null_count()); |
540 | | assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]); |
541 | | |
542 | | let i1 = l2.values().as_primitive::<Int32Type>(); |
543 | | assert_eq!(10, i1.len()); |
544 | | assert_eq!(0, i1.null_count()); |
545 | | assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); |
546 | | } |
547 | | |
548 | | #[test] |
549 | | fn test_extend() { |
550 | | let mut builder = ListBuilder::new(Int32Builder::new()); |
551 | | builder.extend([ |
552 | | Some(vec![Some(1), Some(2), Some(7), None]), |
553 | | Some(vec![]), |
554 | | Some(vec![Some(4), Some(5)]), |
555 | | None, |
556 | | ]); |
557 | | |
558 | | let array = builder.finish(); |
559 | | assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]); |
560 | | assert_eq!(array.null_count(), 1); |
561 | | assert_eq!(array.logical_null_count(), 1); |
562 | | assert!(array.is_null(3)); |
563 | | let elements = array.values().as_primitive::<Int32Type>(); |
564 | | assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]); |
565 | | assert_eq!(elements.null_count(), 1); |
566 | | assert_eq!(elements.logical_null_count(), 1); |
567 | | assert!(elements.is_null(3)); |
568 | | } |
569 | | |
570 | | #[test] |
571 | | fn test_boxed_primitive_array_builder() { |
572 | | let values_builder = make_builder(&DataType::Int32, 5); |
573 | | let mut builder = ListBuilder::new(values_builder); |
574 | | |
575 | | builder |
576 | | .values() |
577 | | .as_any_mut() |
578 | | .downcast_mut::<Int32Builder>() |
579 | | .expect("should be an Int32Builder") |
580 | | .append_slice(&[1, 2, 3]); |
581 | | builder.append(true); |
582 | | |
583 | | builder |
584 | | .values() |
585 | | .as_any_mut() |
586 | | .downcast_mut::<Int32Builder>() |
587 | | .expect("should be an Int32Builder") |
588 | | .append_slice(&[4, 5, 6]); |
589 | | builder.append(true); |
590 | | |
591 | | let arr = builder.finish(); |
592 | | assert_eq!(2, arr.len()); |
593 | | |
594 | | let elements = arr.values().as_primitive::<Int32Type>(); |
595 | | assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]); |
596 | | } |
597 | | |
598 | | #[test] |
599 | | fn test_boxed_list_list_array_builder() { |
600 | | // This test is same as `test_list_list_array_builder` but uses boxed builders. |
601 | | let values_builder = make_builder( |
602 | | &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), |
603 | | 10, |
604 | | ); |
605 | | test_boxed_generic_list_generic_list_array_builder::<i32>(values_builder); |
606 | | } |
607 | | |
608 | | #[test] |
609 | | fn test_boxed_large_list_large_list_array_builder() { |
610 | | // This test is same as `test_list_list_array_builder` but uses boxed builders. |
611 | | let values_builder = make_builder( |
612 | | &DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))), |
613 | | 10, |
614 | | ); |
615 | | test_boxed_generic_list_generic_list_array_builder::<i64>(values_builder); |
616 | | } |
617 | | |
618 | | fn test_boxed_generic_list_generic_list_array_builder<O: OffsetSizeTrait + PartialEq>( |
619 | | values_builder: Box<dyn ArrayBuilder>, |
620 | | ) { |
621 | | let mut builder: GenericListBuilder<O, Box<dyn ArrayBuilder>> = |
622 | | GenericListBuilder::<O, Box<dyn ArrayBuilder>>::new(values_builder); |
623 | | |
624 | | // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] |
625 | | builder |
626 | | .values() |
627 | | .as_any_mut() |
628 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
629 | | .expect("should be an (Large)ListBuilder") |
630 | | .values() |
631 | | .as_any_mut() |
632 | | .downcast_mut::<Int32Builder>() |
633 | | .expect("should be an Int32Builder") |
634 | | .append_value(1); |
635 | | builder |
636 | | .values() |
637 | | .as_any_mut() |
638 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
639 | | .expect("should be an (Large)ListBuilder") |
640 | | .values() |
641 | | .as_any_mut() |
642 | | .downcast_mut::<Int32Builder>() |
643 | | .expect("should be an Int32Builder") |
644 | | .append_value(2); |
645 | | builder |
646 | | .values() |
647 | | .as_any_mut() |
648 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
649 | | .expect("should be an (Large)ListBuilder") |
650 | | .append(true); |
651 | | builder |
652 | | .values() |
653 | | .as_any_mut() |
654 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
655 | | .expect("should be an (Large)ListBuilder") |
656 | | .values() |
657 | | .as_any_mut() |
658 | | .downcast_mut::<Int32Builder>() |
659 | | .expect("should be an Int32Builder") |
660 | | .append_value(3); |
661 | | builder |
662 | | .values() |
663 | | .as_any_mut() |
664 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
665 | | .expect("should be an (Large)ListBuilder") |
666 | | .values() |
667 | | .as_any_mut() |
668 | | .downcast_mut::<Int32Builder>() |
669 | | .expect("should be an Int32Builder") |
670 | | .append_value(4); |
671 | | builder |
672 | | .values() |
673 | | .as_any_mut() |
674 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
675 | | .expect("should be an (Large)ListBuilder") |
676 | | .append(true); |
677 | | builder.append(true); |
678 | | |
679 | | builder |
680 | | .values() |
681 | | .as_any_mut() |
682 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
683 | | .expect("should be an (Large)ListBuilder") |
684 | | .values() |
685 | | .as_any_mut() |
686 | | .downcast_mut::<Int32Builder>() |
687 | | .expect("should be an Int32Builder") |
688 | | .append_value(5); |
689 | | builder |
690 | | .values() |
691 | | .as_any_mut() |
692 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
693 | | .expect("should be an (Large)ListBuilder") |
694 | | .values() |
695 | | .as_any_mut() |
696 | | .downcast_mut::<Int32Builder>() |
697 | | .expect("should be an Int32Builder") |
698 | | .append_value(6); |
699 | | builder |
700 | | .values() |
701 | | .as_any_mut() |
702 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
703 | | .expect("should be an (Large)ListBuilder") |
704 | | .values() |
705 | | .as_any_mut() |
706 | | .downcast_mut::<Int32Builder>() |
707 | | .expect("should be an (Large)ListBuilder") |
708 | | .append_value(7); |
709 | | builder |
710 | | .values() |
711 | | .as_any_mut() |
712 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
713 | | .expect("should be an (Large)ListBuilder") |
714 | | .append(true); |
715 | | builder |
716 | | .values() |
717 | | .as_any_mut() |
718 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
719 | | .expect("should be an (Large)ListBuilder") |
720 | | .append(false); |
721 | | builder |
722 | | .values() |
723 | | .as_any_mut() |
724 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
725 | | .expect("should be an (Large)ListBuilder") |
726 | | .values() |
727 | | .as_any_mut() |
728 | | .downcast_mut::<Int32Builder>() |
729 | | .expect("should be an Int32Builder") |
730 | | .append_value(8); |
731 | | builder |
732 | | .values() |
733 | | .as_any_mut() |
734 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
735 | | .expect("should be an (Large)ListBuilder") |
736 | | .append(true); |
737 | | builder.append(true); |
738 | | |
739 | | builder.append(false); |
740 | | |
741 | | builder |
742 | | .values() |
743 | | .as_any_mut() |
744 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
745 | | .expect("should be an (Large)ListBuilder") |
746 | | .values() |
747 | | .as_any_mut() |
748 | | .downcast_mut::<Int32Builder>() |
749 | | .expect("should be an Int32Builder") |
750 | | .append_value(9); |
751 | | builder |
752 | | .values() |
753 | | .as_any_mut() |
754 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
755 | | .expect("should be an (Large)ListBuilder") |
756 | | .values() |
757 | | .as_any_mut() |
758 | | .downcast_mut::<Int32Builder>() |
759 | | .expect("should be an Int32Builder") |
760 | | .append_value(10); |
761 | | builder |
762 | | .values() |
763 | | .as_any_mut() |
764 | | .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>() |
765 | | .expect("should be an (Large)ListBuilder") |
766 | | .append(true); |
767 | | builder.append(true); |
768 | | |
769 | | let l1 = builder.finish(); |
770 | | |
771 | | assert_eq!(4, l1.len()); |
772 | | assert_eq!(1, l1.null_count()); |
773 | | |
774 | | assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6].map(O::usize_as)); |
775 | | let l2 = l1.values().as_list::<O>(); |
776 | | |
777 | | assert_eq!(6, l2.len()); |
778 | | assert_eq!(1, l2.null_count()); |
779 | | assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10].map(O::usize_as)); |
780 | | |
781 | | let i1 = l2.values().as_primitive::<Int32Type>(); |
782 | | assert_eq!(10, i1.len()); |
783 | | assert_eq!(0, i1.null_count()); |
784 | | assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); |
785 | | } |
786 | | |
787 | | #[test] |
788 | | fn test_with_field() { |
789 | | let field = Arc::new(Field::new("bar", DataType::Int32, false)); |
790 | | let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone()); |
791 | | builder.append_value([Some(1), Some(2), Some(3)]); |
792 | | builder.append_null(); // This is fine as nullability refers to nullability of values |
793 | | builder.append_value([Some(4)]); |
794 | | let array = builder.finish(); |
795 | | assert_eq!(array.len(), 3); |
796 | | assert_eq!(array.data_type(), &DataType::List(field.clone())); |
797 | | |
798 | | builder.append_value([Some(4), Some(5)]); |
799 | | let array = builder.finish(); |
800 | | assert_eq!(array.data_type(), &DataType::List(field)); |
801 | | assert_eq!(array.len(), 1); |
802 | | } |
803 | | |
804 | | #[test] |
805 | | #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")] |
806 | | fn test_checks_nullability() { |
807 | | let field = Arc::new(Field::new_list_field(DataType::Int32, false)); |
808 | | let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone()); |
809 | | builder.append_value([Some(1), None]); |
810 | | builder.finish(); |
811 | | } |
812 | | |
813 | | #[test] |
814 | | #[should_panic(expected = "ListArray expected data type Int64 got Int32")] |
815 | | fn test_checks_data_type() { |
816 | | let field = Arc::new(Field::new_list_field(DataType::Int64, false)); |
817 | | let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone()); |
818 | | builder.append_value([Some(1)]); |
819 | | builder.finish(); |
820 | | } |
821 | | } |