/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/struct_builder.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::builder::*; |
19 | | use crate::StructArray; |
20 | | use arrow_buffer::NullBufferBuilder; |
21 | | use arrow_schema::{Fields, SchemaBuilder}; |
22 | | use std::sync::Arc; |
23 | | |
24 | | /// Builder for [`StructArray`] |
25 | | /// |
26 | | /// Note that callers should make sure that methods of all the child field builders are |
27 | | /// properly called to maintain the consistency of the data structure. |
28 | | /// |
29 | | /// |
30 | | /// Handling arrays with complex layouts, such as `List<Struct<List<Struct>>>`, in Rust can be challenging due to its strong typing system. |
31 | | /// To construct a collection builder ([`ListBuilder`], [`LargeListBuilder`], or [`MapBuilder`]) using [`make_builder`], multiple calls are required. This complexity arises from the recursive approach utilized by [`StructBuilder::from_fields`]. |
32 | | /// |
33 | | /// Initially, [`StructBuilder::from_fields`] invokes [`make_builder`], which returns a `Box<dyn ArrayBuilder>`. To obtain the specific collection builder, one must first use [`StructBuilder::field_builder`] to get a `Collection<[Box<dyn ArrayBuilder>]>`. Subsequently, the `values()` result from this operation can be downcast to the desired builder type. |
34 | | /// |
35 | | /// For example, when working with [`ListBuilder`], you would first call [`StructBuilder::field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>`] and then downcast the [`Box<dyn ArrayBuilder>`] to the specific [`StructBuilder`] you need. |
36 | | /// |
37 | | /// For a practical example see the code below: |
38 | | /// |
39 | | /// ```rust |
40 | | /// use arrow_array::builder::{ArrayBuilder, ListBuilder, StringBuilder, StructBuilder}; |
41 | | /// use arrow_schema::{DataType, Field, Fields}; |
42 | | /// use std::sync::Arc; |
43 | | /// |
44 | | /// // This is an example column that has a List<Struct<List<Struct>>> layout |
45 | | /// let mut example_col = ListBuilder::new(StructBuilder::from_fields( |
46 | | /// vec![Field::new( |
47 | | /// "value_list", |
48 | | /// DataType::List(Arc::new(Field::new_list_field( |
49 | | /// DataType::Struct(Fields::from(vec![ |
50 | | /// Field::new("key", DataType::Utf8, true), |
51 | | /// Field::new("value", DataType::Utf8, true), |
52 | | /// ])), //In this example we are trying to get to this builder and insert key/value pairs |
53 | | /// true, |
54 | | /// ))), |
55 | | /// true, |
56 | | /// )], |
57 | | /// 0, |
58 | | /// )); |
59 | | /// |
60 | | /// // We can obtain the StructBuilder without issues, because example_col was created with StructBuilder |
61 | | /// let col_struct_builder: &mut StructBuilder = example_col.values(); |
62 | | /// |
63 | | /// // We can't obtain the ListBuilder<StructBuilder> with the expected generic types, because under the hood |
64 | | /// // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed as such to the ListBuilder constructor |
65 | | /// |
66 | | /// // This panics in runtime, even though we know that the builder is a ListBuilder<StructBuilder>. |
67 | | /// // let sb = col_struct_builder |
68 | | /// // .field_builder::<ListBuilder<StructBuilder>>(0) |
69 | | /// // .as_mut() |
70 | | /// // .unwrap(); |
71 | | /// |
72 | | /// //To keep in line with Rust's strong typing, we fetch a ListBuilder<Box<dyn ArrayBuilder>> from the column StructBuilder first... |
73 | | /// let mut list_builder_option = |
74 | | /// col_struct_builder.field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(0); |
75 | | /// |
76 | | /// let list_builder = list_builder_option.as_mut().unwrap(); |
77 | | /// |
78 | | /// // ... and then downcast the key/value pair values to a StructBuilder |
79 | | /// let struct_builder = list_builder |
80 | | /// .values() |
81 | | /// .as_any_mut() |
82 | | /// .downcast_mut::<StructBuilder>() |
83 | | /// .unwrap(); |
84 | | /// |
85 | | /// // We can now append values to the StructBuilder |
86 | | /// let key_builder = struct_builder.field_builder::<StringBuilder>(0).unwrap(); |
87 | | /// key_builder.append_value("my key"); |
88 | | /// |
89 | | /// let value_builder = struct_builder.field_builder::<StringBuilder>(1).unwrap(); |
90 | | /// value_builder.append_value("my value"); |
91 | | /// |
92 | | /// struct_builder.append(true); |
93 | | /// list_builder.append(true); |
94 | | /// col_struct_builder.append(true); |
95 | | /// example_col.append(true); |
96 | | /// |
97 | | /// let array = example_col.finish(); |
98 | | /// |
99 | | /// println!("My array: {:?}", array); |
100 | | /// ``` |
101 | | /// |
102 | | pub struct StructBuilder { |
103 | | fields: Fields, |
104 | | field_builders: Vec<Box<dyn ArrayBuilder>>, |
105 | | null_buffer_builder: NullBufferBuilder, |
106 | | } |
107 | | |
108 | | impl std::fmt::Debug for StructBuilder { |
109 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
110 | 0 | f.debug_struct("StructBuilder") |
111 | 0 | .field("fields", &self.fields) |
112 | 0 | .field("bitmap_builder", &self.null_buffer_builder) |
113 | 0 | .field("len", &self.len()) |
114 | 0 | .finish() |
115 | 0 | } |
116 | | } |
117 | | |
118 | | impl ArrayBuilder for StructBuilder { |
119 | | /// Returns the number of array slots in the builder. |
120 | | /// |
121 | | /// Note that this always return the first child field builder's length, and it is |
122 | | /// the caller's responsibility to maintain the consistency that all the child field |
123 | | /// builder should have the equal number of elements. |
124 | 24 | fn len(&self) -> usize { |
125 | 24 | self.null_buffer_builder.len() |
126 | 24 | } |
127 | | |
128 | | /// Builds the array. |
129 | 6 | fn finish(&mut self) -> ArrayRef { |
130 | 6 | Arc::new(self.finish()) |
131 | 6 | } |
132 | | |
133 | | /// Builds the array without resetting the builder. |
134 | 0 | fn finish_cloned(&self) -> ArrayRef { |
135 | 0 | Arc::new(self.finish_cloned()) |
136 | 0 | } |
137 | | |
138 | | /// Returns the builder as a non-mutable `Any` reference. |
139 | | /// |
140 | | /// This is most useful when one wants to call non-mutable APIs on a specific builder |
141 | | /// type. In this case, one can first cast this into a `Any`, and then use |
142 | | /// `downcast_ref` to get a reference on the specific builder. |
143 | 0 | fn as_any(&self) -> &dyn Any { |
144 | 0 | self |
145 | 0 | } |
146 | | |
147 | | /// Returns the builder as a mutable `Any` reference. |
148 | | /// |
149 | | /// This is most useful when one wants to call mutable APIs on a specific builder |
150 | | /// type. In this case, one can first cast this into a `Any`, and then use |
151 | | /// `downcast_mut` to get a reference on the specific builder. |
152 | 1 | fn as_any_mut(&mut self) -> &mut dyn Any { |
153 | 1 | self |
154 | 1 | } |
155 | | |
156 | | /// Returns the boxed builder as a box of `Any`. |
157 | 0 | fn into_box_any(self: Box<Self>) -> Box<dyn Any> { |
158 | 0 | self |
159 | 0 | } |
160 | | } |
161 | | |
162 | | impl StructBuilder { |
163 | | /// Creates a new `StructBuilder` |
164 | 8 | pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self { |
165 | 8 | Self { |
166 | 8 | field_builders, |
167 | 8 | fields: fields.into(), |
168 | 8 | null_buffer_builder: NullBufferBuilder::new(0), |
169 | 8 | } |
170 | 8 | } |
171 | | |
172 | | /// Creates a new `StructBuilder` from [`Fields`] and `capacity` |
173 | 0 | pub fn from_fields(fields: impl Into<Fields>, capacity: usize) -> Self { |
174 | 0 | let fields = fields.into(); |
175 | 0 | let mut builders = Vec::with_capacity(fields.len()); |
176 | 0 | for field in &fields { |
177 | 0 | builders.push(make_builder(field.data_type(), capacity)); |
178 | 0 | } |
179 | 0 | Self::new(fields, builders) |
180 | 0 | } |
181 | | |
182 | | /// Returns a mutable reference to the child field builder at index `i`. |
183 | | /// Result will be `None` if the input type `T` provided doesn't match the actual |
184 | | /// field builder's type. |
185 | 19 | pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> { |
186 | 19 | self.field_builders[i].as_any_mut().downcast_mut::<T>() |
187 | 19 | } |
188 | | |
189 | | /// Returns a reference to field builders |
190 | 0 | pub fn field_builders(&self) -> &[Box<dyn ArrayBuilder>] { |
191 | 0 | &self.field_builders |
192 | 0 | } |
193 | | |
194 | | /// Returns a mutable reference to field builders |
195 | 0 | pub fn field_builders_mut(&mut self) -> &mut [Box<dyn ArrayBuilder>] { |
196 | 0 | &mut self.field_builders |
197 | 0 | } |
198 | | |
199 | | /// Returns the number of fields for the struct this builder is building. |
200 | 0 | pub fn num_fields(&self) -> usize { |
201 | 0 | self.field_builders.len() |
202 | 0 | } |
203 | | |
204 | | /// Appends an element (either null or non-null) to the struct. The actual elements |
205 | | /// should be appended for each child sub-array in a consistent way. |
206 | | #[inline] |
207 | 12 | pub fn append(&mut self, is_valid: bool) { |
208 | 12 | self.null_buffer_builder.append(is_valid); |
209 | 12 | } |
210 | | |
211 | | /// Appends a null element to the struct. |
212 | | #[inline] |
213 | | pub fn append_null(&mut self) { |
214 | | self.append(false) |
215 | | } |
216 | | |
217 | | /// Appends `n` `null`s into the builder. |
218 | | #[inline] |
219 | | pub fn append_nulls(&mut self, n: usize) { |
220 | | self.null_buffer_builder.append_slice(&vec![false; n]); |
221 | | } |
222 | | |
223 | | /// Builds the `StructArray` and reset this builder. |
224 | 8 | pub fn finish(&mut self) -> StructArray { |
225 | 8 | self.validate_content(); |
226 | 8 | if self.fields.is_empty() { |
227 | 0 | return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish()); |
228 | 8 | } |
229 | | |
230 | 13 | let arrays8 = self.field_builders.iter_mut()8 .map8 (|f| f.finish()).collect8 (); |
231 | 8 | let nulls = self.null_buffer_builder.finish(); |
232 | 8 | StructArray::new(self.fields.clone(), arrays, nulls) |
233 | 8 | } |
234 | | |
235 | | /// Builds the `StructArray` without resetting the builder. |
236 | 0 | pub fn finish_cloned(&self) -> StructArray { |
237 | 0 | self.validate_content(); |
238 | | |
239 | 0 | if self.fields.is_empty() { |
240 | 0 | return StructArray::new_empty_fields( |
241 | 0 | self.len(), |
242 | 0 | self.null_buffer_builder.finish_cloned(), |
243 | | ); |
244 | 0 | } |
245 | | |
246 | 0 | let arrays = self |
247 | 0 | .field_builders |
248 | 0 | .iter() |
249 | 0 | .map(|f| f.finish_cloned()) |
250 | 0 | .collect(); |
251 | | |
252 | 0 | let nulls = self.null_buffer_builder.finish_cloned(); |
253 | | |
254 | 0 | StructArray::new(self.fields.clone(), arrays, nulls) |
255 | 0 | } |
256 | | |
257 | | /// Constructs and validates contents in the builder to ensure that |
258 | | /// - fields and field_builders are of equal length |
259 | | /// - the number of items in individual field_builders are equal to self.len() |
260 | 8 | fn validate_content(&self) { |
261 | 8 | if self.fields.len() != self.field_builders.len() { |
262 | 0 | panic!("Number of fields is not equal to the number of field_builders."); |
263 | 8 | } |
264 | 13 | self.field_builders.iter()8 .enumerate8 ().for_each8 (|(idx, x)| { |
265 | 13 | if x.len() != self.len() { |
266 | 0 | let builder = SchemaBuilder::from(&self.fields); |
267 | 0 | let schema = builder.finish(); |
268 | | |
269 | 0 | panic!("{}", format!( |
270 | 0 | "StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).", |
271 | | schema, |
272 | | idx, |
273 | 0 | self.fields[idx].data_type(), |
274 | 0 | self.len(), |
275 | 0 | x.len() |
276 | | )); |
277 | 13 | } |
278 | 13 | }); |
279 | 8 | } |
280 | | |
281 | | /// Returns the current null buffer as a slice |
282 | 0 | pub fn validity_slice(&self) -> Option<&[u8]> { |
283 | 0 | self.null_buffer_builder.as_slice() |
284 | 0 | } |
285 | | } |
286 | | |
287 | | #[cfg(test)] |
288 | | mod tests { |
289 | | use std::any::type_name; |
290 | | |
291 | | use super::*; |
292 | | use arrow_buffer::Buffer; |
293 | | use arrow_data::ArrayData; |
294 | | use arrow_schema::Field; |
295 | | |
296 | | use crate::{array::Array, types::ArrowDictionaryKeyType}; |
297 | | |
298 | | #[test] |
299 | | fn test_struct_array_builder() { |
300 | | let string_builder = StringBuilder::new(); |
301 | | let int_builder = Int32Builder::new(); |
302 | | |
303 | | let fields = vec![ |
304 | | Field::new("f1", DataType::Utf8, true), |
305 | | Field::new("f2", DataType::Int32, true), |
306 | | ]; |
307 | | let field_builders = vec![ |
308 | | Box::new(string_builder) as Box<dyn ArrayBuilder>, |
309 | | Box::new(int_builder) as Box<dyn ArrayBuilder>, |
310 | | ]; |
311 | | |
312 | | let mut builder = StructBuilder::new(fields, field_builders); |
313 | | assert_eq!(2, builder.num_fields()); |
314 | | |
315 | | let string_builder = builder |
316 | | .field_builder::<StringBuilder>(0) |
317 | | .expect("builder at field 0 should be string builder"); |
318 | | string_builder.append_value("joe"); |
319 | | string_builder.append_null(); |
320 | | string_builder.append_null(); |
321 | | string_builder.append_value("mark"); |
322 | | string_builder.append_nulls(2); |
323 | | string_builder.append_value("terry"); |
324 | | |
325 | | let int_builder = builder |
326 | | .field_builder::<Int32Builder>(1) |
327 | | .expect("builder at field 1 should be int builder"); |
328 | | int_builder.append_value(1); |
329 | | int_builder.append_value(2); |
330 | | int_builder.append_null(); |
331 | | int_builder.append_value(4); |
332 | | int_builder.append_nulls(2); |
333 | | int_builder.append_value(3); |
334 | | |
335 | | builder.append(true); |
336 | | builder.append(true); |
337 | | builder.append_null(); |
338 | | builder.append(true); |
339 | | |
340 | | builder.append_nulls(2); |
341 | | builder.append(true); |
342 | | |
343 | | let struct_data = builder.finish().into_data(); |
344 | | |
345 | | assert_eq!(7, struct_data.len()); |
346 | | assert_eq!(3, struct_data.null_count()); |
347 | | assert_eq!(&[75_u8], struct_data.nulls().unwrap().validity()); |
348 | | |
349 | | let expected_string_data = ArrayData::builder(DataType::Utf8) |
350 | | .len(7) |
351 | | .null_bit_buffer(Some(Buffer::from(&[73_u8]))) |
352 | | .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7, 7, 7, 12])) |
353 | | .add_buffer(Buffer::from_slice_ref(b"joemarkterry")) |
354 | | .build() |
355 | | .unwrap(); |
356 | | |
357 | | let expected_int_data = ArrayData::builder(DataType::Int32) |
358 | | .len(7) |
359 | | .null_bit_buffer(Some(Buffer::from_slice_ref([75_u8]))) |
360 | | .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4, 4, 4, 3])) |
361 | | .build() |
362 | | .unwrap(); |
363 | | |
364 | | assert_eq!(expected_string_data, struct_data.child_data()[0]); |
365 | | assert_eq!(expected_int_data, struct_data.child_data()[1]); |
366 | | |
367 | | assert!(struct_data.is_null(4)); |
368 | | assert!(struct_data.is_null(5)); |
369 | | } |
370 | | |
371 | | #[test] |
372 | | fn test_struct_array_builder_finish() { |
373 | | let int_builder = Int32Builder::new(); |
374 | | let bool_builder = BooleanBuilder::new(); |
375 | | |
376 | | let fields = vec![ |
377 | | Field::new("f1", DataType::Int32, false), |
378 | | Field::new("f2", DataType::Boolean, false), |
379 | | ]; |
380 | | let field_builders = vec![ |
381 | | Box::new(int_builder) as Box<dyn ArrayBuilder>, |
382 | | Box::new(bool_builder) as Box<dyn ArrayBuilder>, |
383 | | ]; |
384 | | |
385 | | let mut builder = StructBuilder::new(fields, field_builders); |
386 | | builder |
387 | | .field_builder::<Int32Builder>(0) |
388 | | .unwrap() |
389 | | .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
390 | | builder |
391 | | .field_builder::<BooleanBuilder>(1) |
392 | | .unwrap() |
393 | | .append_slice(&[ |
394 | | false, true, false, true, false, true, false, true, false, true, |
395 | | ]); |
396 | | |
397 | | // Append slot values - all are valid. |
398 | | for _ in 0..10 { |
399 | | builder.append(true); |
400 | | } |
401 | | |
402 | | assert_eq!(10, builder.len()); |
403 | | |
404 | | let arr = builder.finish(); |
405 | | |
406 | | assert_eq!(10, arr.len()); |
407 | | assert_eq!(0, builder.len()); |
408 | | |
409 | | builder |
410 | | .field_builder::<Int32Builder>(0) |
411 | | .unwrap() |
412 | | .append_slice(&[1, 3, 5, 7, 9]); |
413 | | builder |
414 | | .field_builder::<BooleanBuilder>(1) |
415 | | .unwrap() |
416 | | .append_slice(&[false, true, false, true, false]); |
417 | | |
418 | | // Append slot values - all are valid. |
419 | | for _ in 0..5 { |
420 | | builder.append(true); |
421 | | } |
422 | | |
423 | | assert_eq!(5, builder.len()); |
424 | | |
425 | | let arr = builder.finish(); |
426 | | |
427 | | assert_eq!(5, arr.len()); |
428 | | assert_eq!(0, builder.len()); |
429 | | } |
430 | | |
431 | | #[test] |
432 | | fn test_build_fixed_size_list() { |
433 | | const LIST_LENGTH: i32 = 4; |
434 | | let fixed_size_list_dtype = |
435 | | DataType::new_fixed_size_list(DataType::Int32, LIST_LENGTH, false); |
436 | | let mut builder = make_builder(&fixed_size_list_dtype, 10); |
437 | | let builder = builder |
438 | | .as_any_mut() |
439 | | .downcast_mut::<FixedSizeListBuilder<Box<dyn ArrayBuilder>>>(); |
440 | | match builder { |
441 | | Some(builder) => { |
442 | | assert_eq!(builder.value_length(), LIST_LENGTH); |
443 | | assert!(builder |
444 | | .values() |
445 | | .as_any_mut() |
446 | | .downcast_mut::<Int32Builder>() |
447 | | .is_some()); |
448 | | } |
449 | | None => panic!("expected FixedSizeListBuilder, got a different builder type"), |
450 | | } |
451 | | } |
452 | | |
453 | | #[test] |
454 | | fn test_struct_array_builder_finish_cloned() { |
455 | | let int_builder = Int32Builder::new(); |
456 | | let bool_builder = BooleanBuilder::new(); |
457 | | |
458 | | let fields = vec![ |
459 | | Field::new("f1", DataType::Int32, false), |
460 | | Field::new("f2", DataType::Boolean, false), |
461 | | ]; |
462 | | let field_builders = vec![ |
463 | | Box::new(int_builder) as Box<dyn ArrayBuilder>, |
464 | | Box::new(bool_builder) as Box<dyn ArrayBuilder>, |
465 | | ]; |
466 | | |
467 | | let mut builder = StructBuilder::new(fields, field_builders); |
468 | | builder |
469 | | .field_builder::<Int32Builder>(0) |
470 | | .unwrap() |
471 | | .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); |
472 | | builder |
473 | | .field_builder::<BooleanBuilder>(1) |
474 | | .unwrap() |
475 | | .append_slice(&[ |
476 | | false, true, false, true, false, true, false, true, false, true, |
477 | | ]); |
478 | | |
479 | | // Append slot values - all are valid. |
480 | | for _ in 0..10 { |
481 | | builder.append(true); |
482 | | } |
483 | | |
484 | | assert_eq!(10, builder.len()); |
485 | | |
486 | | let mut arr = builder.finish_cloned(); |
487 | | |
488 | | assert_eq!(10, arr.len()); |
489 | | assert_eq!(10, builder.len()); |
490 | | |
491 | | builder |
492 | | .field_builder::<Int32Builder>(0) |
493 | | .unwrap() |
494 | | .append_slice(&[1, 3, 5, 7, 9]); |
495 | | builder |
496 | | .field_builder::<BooleanBuilder>(1) |
497 | | .unwrap() |
498 | | .append_slice(&[false, true, false, true, false]); |
499 | | |
500 | | // Append slot values - all are valid. |
501 | | for _ in 0..5 { |
502 | | builder.append(true); |
503 | | } |
504 | | |
505 | | assert_eq!(15, builder.len()); |
506 | | |
507 | | arr = builder.finish(); |
508 | | |
509 | | assert_eq!(15, arr.len()); |
510 | | assert_eq!(0, builder.len()); |
511 | | } |
512 | | |
513 | | #[test] |
514 | | fn test_struct_array_builder_from_schema() { |
515 | | let mut fields = vec![ |
516 | | Field::new("f1", DataType::Float32, false), |
517 | | Field::new("f2", DataType::Utf8, false), |
518 | | ]; |
519 | | let sub_fields = vec![ |
520 | | Field::new("g1", DataType::Int32, false), |
521 | | Field::new("g2", DataType::Boolean, false), |
522 | | ]; |
523 | | let struct_type = DataType::Struct(sub_fields.into()); |
524 | | fields.push(Field::new("f3", struct_type, false)); |
525 | | |
526 | | let mut builder = StructBuilder::from_fields(fields, 5); |
527 | | assert_eq!(3, builder.num_fields()); |
528 | | assert!(builder.field_builder::<Float32Builder>(0).is_some()); |
529 | | assert!(builder.field_builder::<StringBuilder>(1).is_some()); |
530 | | assert!(builder.field_builder::<StructBuilder>(2).is_some()); |
531 | | } |
532 | | |
533 | | #[test] |
534 | | fn test_datatype_properties() { |
535 | | let fields = Fields::from(vec![ |
536 | | Field::new("f1", DataType::Decimal128(1, 2), false), |
537 | | Field::new( |
538 | | "f2", |
539 | | DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())), |
540 | | false, |
541 | | ), |
542 | | ]); |
543 | | let mut builder = StructBuilder::from_fields(fields.clone(), 1); |
544 | | builder |
545 | | .field_builder::<Decimal128Builder>(0) |
546 | | .unwrap() |
547 | | .append_value(1); |
548 | | builder |
549 | | .field_builder::<TimestampMillisecondBuilder>(1) |
550 | | .unwrap() |
551 | | .append_value(1); |
552 | | builder.append(true); |
553 | | let array = builder.finish(); |
554 | | |
555 | | assert_eq!(array.data_type(), &DataType::Struct(fields.clone())); |
556 | | assert_eq!(array.column(0).data_type(), fields[0].data_type()); |
557 | | assert_eq!(array.column(1).data_type(), fields[1].data_type()); |
558 | | } |
559 | | |
560 | | #[test] |
561 | | fn test_struct_array_builder_from_dictionary_type_int8_key() { |
562 | | test_struct_array_builder_from_dictionary_type_inner::<Int8Type>(DataType::Int8); |
563 | | } |
564 | | |
565 | | #[test] |
566 | | fn test_struct_array_builder_from_dictionary_type_int16_key() { |
567 | | test_struct_array_builder_from_dictionary_type_inner::<Int16Type>(DataType::Int16); |
568 | | } |
569 | | |
570 | | #[test] |
571 | | fn test_struct_array_builder_from_dictionary_type_int32_key() { |
572 | | test_struct_array_builder_from_dictionary_type_inner::<Int32Type>(DataType::Int32); |
573 | | } |
574 | | |
575 | | #[test] |
576 | | fn test_struct_array_builder_from_dictionary_type_int64_key() { |
577 | | test_struct_array_builder_from_dictionary_type_inner::<Int64Type>(DataType::Int64); |
578 | | } |
579 | | |
580 | | fn test_struct_array_builder_from_dictionary_type_inner<K: ArrowDictionaryKeyType>( |
581 | | key_type: DataType, |
582 | | ) { |
583 | | let dict_field = Field::new( |
584 | | "f1", |
585 | | DataType::Dictionary(Box::new(key_type), Box::new(DataType::Utf8)), |
586 | | false, |
587 | | ); |
588 | | let fields = vec![dict_field.clone()]; |
589 | | let expected_dtype = DataType::Struct(fields.into()); |
590 | | let cloned_dict_field = dict_field.clone(); |
591 | | let expected_child_dtype = dict_field.data_type(); |
592 | | let mut struct_builder = StructBuilder::from_fields(vec![cloned_dict_field], 5); |
593 | | let Some(dict_builder) = struct_builder.field_builder::<StringDictionaryBuilder<K>>(0) |
594 | | else { |
595 | | panic!( |
596 | | "Builder should be StringDictionaryBuilder<{}>", |
597 | | type_name::<K>() |
598 | | ) |
599 | | }; |
600 | | dict_builder.append_value("dict string"); |
601 | | struct_builder.append(true); |
602 | | let array = struct_builder.finish(); |
603 | | |
604 | | assert_eq!(array.data_type(), &expected_dtype); |
605 | | assert_eq!(array.column(0).data_type(), expected_child_dtype); |
606 | | assert_eq!(array.column(0).len(), 1); |
607 | | } |
608 | | |
609 | | #[test] |
610 | | #[should_panic( |
611 | | expected = "Data type Dictionary(UInt64, Utf8) with key type UInt64 is not currently supported" |
612 | | )] |
613 | | fn test_struct_array_builder_from_schema_unsupported_type() { |
614 | | let fields = vec![ |
615 | | Field::new("f1", DataType::UInt64, false), |
616 | | Field::new( |
617 | | "f2", |
618 | | DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)), |
619 | | false, |
620 | | ), |
621 | | ]; |
622 | | |
623 | | let _ = StructBuilder::from_fields(fields, 5); |
624 | | } |
625 | | |
626 | | #[test] |
627 | | #[should_panic(expected = "Dictionary value type Int32 is not currently supported")] |
628 | | fn test_struct_array_builder_from_dict_with_unsupported_value_type() { |
629 | | let fields = vec![Field::new( |
630 | | "f1", |
631 | | DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32)), |
632 | | false, |
633 | | )]; |
634 | | |
635 | | let _ = StructBuilder::from_fields(fields, 5); |
636 | | } |
637 | | |
638 | | #[test] |
639 | | fn test_struct_array_builder_field_builder_type_mismatch() { |
640 | | let int_builder = Int32Builder::with_capacity(10); |
641 | | |
642 | | let fields = vec![Field::new("f1", DataType::Int32, false)]; |
643 | | let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>]; |
644 | | |
645 | | let mut builder = StructBuilder::new(fields, field_builders); |
646 | | assert!(builder.field_builder::<BinaryBuilder>(0).is_none()); |
647 | | } |
648 | | |
649 | | #[test] |
650 | | #[should_panic( |
651 | | expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)." |
652 | | )] |
653 | | fn test_struct_array_builder_unequal_field_builders_lengths() { |
654 | | let mut int_builder = Int32Builder::with_capacity(10); |
655 | | let mut bool_builder = BooleanBuilder::new(); |
656 | | |
657 | | int_builder.append_value(1); |
658 | | int_builder.append_value(2); |
659 | | bool_builder.append_value(true); |
660 | | |
661 | | let fields = vec![ |
662 | | Field::new("f1", DataType::Int32, false), |
663 | | Field::new("f2", DataType::Boolean, false), |
664 | | ]; |
665 | | let field_builders = vec![ |
666 | | Box::new(int_builder) as Box<dyn ArrayBuilder>, |
667 | | Box::new(bool_builder) as Box<dyn ArrayBuilder>, |
668 | | ]; |
669 | | |
670 | | let mut builder = StructBuilder::new(fields, field_builders); |
671 | | builder.append(true); |
672 | | builder.append(true); |
673 | | builder.finish(); |
674 | | } |
675 | | |
676 | | #[test] |
677 | | #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")] |
678 | | fn test_struct_array_builder_unequal_field_field_builders() { |
679 | | let int_builder = Int32Builder::with_capacity(10); |
680 | | |
681 | | let fields = vec![ |
682 | | Field::new("f1", DataType::Int32, false), |
683 | | Field::new("f2", DataType::Boolean, false), |
684 | | ]; |
685 | | let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>]; |
686 | | |
687 | | let mut builder = StructBuilder::new(fields, field_builders); |
688 | | builder.finish(); |
689 | | } |
690 | | |
691 | | #[test] |
692 | | #[should_panic( |
693 | | expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(Nanosecond, Some(\\\"UTC\\\")) got Timestamp(Nanosecond, None)" |
694 | | )] |
695 | | fn test_struct_array_mismatch_builder() { |
696 | | let fields = vec![Field::new( |
697 | | "timestamp", |
698 | | DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_owned().into())), |
699 | | false, |
700 | | )]; |
701 | | |
702 | | let field_builders: Vec<Box<dyn ArrayBuilder>> = |
703 | | vec![Box::new(TimestampNanosecondBuilder::new())]; |
704 | | |
705 | | let mut sa = StructBuilder::new(fields, field_builders); |
706 | | sa.finish(); |
707 | | } |
708 | | |
709 | | #[test] |
710 | | fn test_empty() { |
711 | | let mut builder = StructBuilder::new(Fields::empty(), vec![]); |
712 | | builder.append(true); |
713 | | builder.append(false); |
714 | | |
715 | | let a1 = builder.finish_cloned(); |
716 | | let a2 = builder.finish(); |
717 | | assert_eq!(a1, a2); |
718 | | assert_eq!(a1.len(), 2); |
719 | | assert_eq!(a1.null_count(), 1); |
720 | | assert!(a1.is_valid(0)); |
721 | | assert!(a1.is_null(1)); |
722 | | } |
723 | | } |