/Users/andrewlamb/Software/arrow-rs/arrow-select/src/concat.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Defines concat kernel for `ArrayRef` |
19 | | //! |
20 | | //! Example: |
21 | | //! |
22 | | //! ``` |
23 | | //! use arrow_array::{ArrayRef, StringArray}; |
24 | | //! use arrow_select::concat::concat; |
25 | | //! |
26 | | //! let arr = concat(&[ |
27 | | //! &StringArray::from(vec!["hello", "world"]), |
28 | | //! &StringArray::from(vec!["!"]), |
29 | | //! ]).unwrap(); |
30 | | //! assert_eq!(arr.len(), 3); |
31 | | //! ``` |
32 | | |
33 | | use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values}; |
34 | | use arrow_array::builder::{ |
35 | | BooleanBuilder, GenericByteBuilder, GenericByteViewBuilder, PrimitiveBuilder, |
36 | | }; |
37 | | use arrow_array::cast::AsArray; |
38 | | use arrow_array::types::*; |
39 | | use arrow_array::*; |
40 | | use arrow_buffer::{ |
41 | | ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer, |
42 | | }; |
43 | | use arrow_data::ArrayDataBuilder; |
44 | | use arrow_data::transform::{Capacities, MutableArrayData}; |
45 | | use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef}; |
46 | | use std::{collections::HashSet, ops::Add, sync::Arc}; |
47 | | |
48 | 42 | fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities { |
49 | 42 | let mut item_capacity = 0; |
50 | 42 | let mut bytes_capacity = 0; |
51 | 171 | for array129 in arrays { |
52 | 129 | let a = array.as_bytes::<T>(); |
53 | | |
54 | | // Guaranteed to always have at least one element |
55 | 129 | let offsets = a.value_offsets(); |
56 | 129 | bytes_capacity += offsets[offsets.len() - 1].as_usize() - offsets[0].as_usize(); |
57 | 129 | item_capacity += a.len() |
58 | | } |
59 | | |
60 | 42 | Capacities::Binary(item_capacity, Some(bytes_capacity)) |
61 | 42 | } |
62 | | |
63 | 1 | fn fixed_size_list_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities { |
64 | 1 | if let DataType::FixedSizeList(f, _) = data_type { |
65 | 3 | let item_capacity1 = arrays1 .iter1 ().map1 (|a| a.len()).sum1 (); |
66 | 1 | let child_data_type = f.data_type(); |
67 | 1 | match child_data_type { |
68 | | // These types should match the types that `get_capacity` |
69 | | // has special handling for. |
70 | | DataType::Utf8 |
71 | | | DataType::LargeUtf8 |
72 | | | DataType::Binary |
73 | | | DataType::LargeBinary |
74 | | | DataType::FixedSizeList(_, _) => { |
75 | 0 | let values: Vec<&dyn arrow_array::Array> = arrays |
76 | 0 | .iter() |
77 | 0 | .map(|a| a.as_fixed_size_list().values().as_ref()) |
78 | 0 | .collect(); |
79 | 0 | Capacities::List( |
80 | 0 | item_capacity, |
81 | 0 | Some(Box::new(get_capacity(&values, child_data_type))), |
82 | 0 | ) |
83 | | } |
84 | 1 | _ => Capacities::Array(item_capacity), |
85 | | } |
86 | | } else { |
87 | 0 | unreachable!("illegal data type for fixed size list") |
88 | | } |
89 | 1 | } |
90 | | |
91 | 11 | fn concat_byte_view<B: ByteViewType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> { |
92 | 11 | let mut builder = |
93 | 93 | GenericByteViewBuilder::<B>::with_capacity11 (arrays11 .iter11 ().map11 (|a| a.len()).sum11 ()); |
94 | 93 | for &array in arrays11 .iter11 () { |
95 | 93 | builder.append_array(array.as_byte_view()); |
96 | 93 | } |
97 | 11 | Ok(Arc::new(builder.finish())) |
98 | 11 | } |
99 | | |
100 | 13 | fn concat_dictionaries<K: ArrowDictionaryKeyType>( |
101 | 13 | arrays: &[&dyn Array], |
102 | 13 | ) -> Result<ArrayRef, ArrowError> { |
103 | 13 | let mut output_len = 0; |
104 | 13 | let dictionaries: Vec<_> = arrays |
105 | 13 | .iter() |
106 | 80.0k | .map13 (|x| x.as_dictionary::<K>()) |
107 | 80.0k | .inspect13 (|d| output_len += d.len()) |
108 | 13 | .collect(); |
109 | | |
110 | 13 | if !should_merge_dictionary_values::<K>(&dictionaries, output_len) { |
111 | 7 | return concat_fallback(arrays, Capacities::Array(output_len)); |
112 | 6 | } |
113 | | |
114 | 6 | let merged = merge_dictionary_values(&dictionaries, None)?0 ; |
115 | | |
116 | | // Recompute keys |
117 | 6 | let mut key_values = Vec::with_capacity(output_len); |
118 | | |
119 | 6 | let mut has_nulls = false; |
120 | 80.0k | for (d, mapping) in dictionaries.iter()6 .zip6 (merged.key_mappings6 ) { |
121 | 80.0k | has_nulls |= d.null_count() != 0; |
122 | 80.0k | for key in d.keys()80.0k .values80.0k () { |
123 | | // Use get to safely handle nulls |
124 | 80.0k | key_values.push(mapping.get(key.as_usize()).copied().unwrap_or_default()) |
125 | | } |
126 | | } |
127 | | |
128 | 6 | let nulls = has_nulls.then(|| {1 |
129 | 1 | let mut nulls = BooleanBufferBuilder::new(output_len); |
130 | 3 | for d2 in &dictionaries { |
131 | 2 | match d.nulls() { |
132 | 1 | Some(n) => nulls.append_buffer(n.inner()), |
133 | 1 | None => nulls.append_n(d.len(), true), |
134 | | } |
135 | | } |
136 | 1 | NullBuffer::new(nulls.finish()) |
137 | 1 | }); |
138 | | |
139 | 6 | let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?0 ; |
140 | | // Sanity check |
141 | 6 | assert_eq!(keys.len(), output_len); |
142 | | |
143 | 6 | let array = unsafe { DictionaryArray::new_unchecked(keys, merged.values) }; |
144 | 6 | Ok(Arc::new(array)) |
145 | 13 | } |
146 | | |
147 | 5 | fn concat_lists<OffsetSize: OffsetSizeTrait>( |
148 | 5 | arrays: &[&dyn Array], |
149 | 5 | field: &FieldRef, |
150 | 5 | ) -> Result<ArrayRef, ArrowError> { |
151 | 5 | let mut output_len = 0; |
152 | 5 | let mut list_has_nulls = false; |
153 | 5 | let mut list_has_slices = false; |
154 | | |
155 | 5 | let lists = arrays |
156 | 5 | .iter() |
157 | 80.0k | .map5 (|x| x.as_list::<OffsetSize>()) |
158 | 80.0k | .inspect5 (|l| { |
159 | 80.0k | output_len += l.len(); |
160 | 80.0k | list_has_nulls |= l.null_count() != 0; |
161 | 80.0k | list_has_slices |= l.offsets()[0] > OffsetSize::zero() |
162 | 80.0k | || l.offsets().last().unwrap().as_usize() < l.values().len(); |
163 | 80.0k | }) |
164 | 5 | .collect::<Vec<_>>(); |
165 | | |
166 | 5 | let lists_nulls = list_has_nulls.then(|| {3 |
167 | 3 | let mut nulls = BooleanBufferBuilder::new(output_len); |
168 | 10 | for l7 in &lists { |
169 | 7 | match l.nulls() { |
170 | 6 | Some(n) => nulls.append_buffer(n.inner()), |
171 | 1 | None => nulls.append_n(l.len(), true), |
172 | | } |
173 | | } |
174 | 3 | NullBuffer::new(nulls.finish()) |
175 | 3 | }); |
176 | | |
177 | | // If any of the lists have slices, we need to slice the values |
178 | | // to ensure that the offsets are correct |
179 | | let mut sliced_values; |
180 | 5 | let values: Vec<&dyn Array> = if list_has_slices { |
181 | 2 | sliced_values = Vec::with_capacity(lists.len()); |
182 | 6 | for l4 in &lists { |
183 | 4 | // if the first offset is non-zero, we need to slice the values so when |
184 | 4 | // we concatenate them below only the relevant values are included |
185 | 4 | let offsets = l.offsets(); |
186 | 4 | let start_offset = offsets[0].as_usize(); |
187 | 4 | let end_offset = offsets.last().unwrap().as_usize(); |
188 | 4 | sliced_values.push(l.values().slice(start_offset, end_offset - start_offset)); |
189 | 4 | } |
190 | 4 | sliced_values.iter()2 .map2 (|a| a.as_ref()).collect2 () |
191 | | } else { |
192 | 80.0k | lists.iter()3 .map3 (|x| x.values().as_ref()).collect3 () |
193 | | }; |
194 | | |
195 | 5 | let concatenated_values = concat(values.as_slice())?0 ; |
196 | | |
197 | | // Merge value offsets from the lists |
198 | 5 | let value_offset_buffer = |
199 | 80.0k | OffsetBuffer::<OffsetSize>::from_lengths5 (lists.iter()5 .flat_map5 (|x| x.offsets().lengths())); |
200 | | |
201 | 5 | let array = GenericListArray::<OffsetSize>::try_new( |
202 | 5 | Arc::clone(field), |
203 | 5 | value_offset_buffer, |
204 | 5 | concatenated_values, |
205 | 5 | lists_nulls, |
206 | 0 | )?; |
207 | | |
208 | 5 | Ok(Arc::new(array)) |
209 | 5 | } |
210 | | |
211 | 2 | fn concat_list_view<OffsetSize: OffsetSizeTrait>( |
212 | 2 | arrays: &[&dyn Array], |
213 | 2 | field: &FieldRef, |
214 | 2 | ) -> Result<ArrayRef, ArrowError> { |
215 | 2 | let mut output_len = 0; |
216 | 2 | let mut list_has_nulls = false; |
217 | | |
218 | 2 | let lists = arrays |
219 | 2 | .iter() |
220 | 6 | .map2 (|x| x.as_list_view::<OffsetSize>()) |
221 | 6 | .inspect2 (|l| { |
222 | 6 | output_len += l.len(); |
223 | 6 | list_has_nulls |= l.null_count() != 0; |
224 | 6 | }) |
225 | 2 | .collect::<Vec<_>>(); |
226 | | |
227 | 2 | let lists_nulls = list_has_nulls.then(|| { |
228 | 2 | let mut nulls = BooleanBufferBuilder::new(output_len); |
229 | 8 | for l6 in &lists { |
230 | 6 | match l.nulls() { |
231 | 4 | Some(n) => nulls.append_buffer(n.inner()), |
232 | 2 | None => nulls.append_n(l.len(), true), |
233 | | } |
234 | | } |
235 | 2 | NullBuffer::new(nulls.finish()) |
236 | 2 | }); |
237 | | |
238 | 6 | let values2 : Vec<&dyn Array>2 = lists.iter()2 .map2 (|l| l.values().as_ref()).collect2 (); |
239 | | |
240 | 2 | let concatenated_values = concat(values.as_slice())?0 ; |
241 | | |
242 | 6 | let sizes2 : ScalarBuffer<OffsetSize>2 = lists.iter()2 .flat_map2 (|x| x.sizes()).copied2 ().collect2 (); |
243 | | |
244 | 6 | let mut offsets2 = MutableBuffer::with_capacity2 (lists.iter()2 .map2 (|l| l.offsets().len()).sum2 ()); |
245 | 2 | let mut global_offset = OffsetSize::zero(); |
246 | 6 | for l in lists2 .iter2 () { |
247 | 12 | for &offset in l6 .offsets6 () { |
248 | 12 | offsets.push(offset + global_offset); |
249 | 12 | } |
250 | | |
251 | | // advance the offsets |
252 | 6 | global_offset += OffsetSize::from_usize(l.values().len()).unwrap(); |
253 | | } |
254 | | |
255 | 2 | let offsets = ScalarBuffer::from(offsets); |
256 | | |
257 | 2 | let array = GenericListViewArray::try_new( |
258 | 2 | field.clone(), |
259 | 2 | offsets, |
260 | 2 | sizes, |
261 | 2 | concatenated_values, |
262 | 2 | lists_nulls, |
263 | 0 | )?; |
264 | | |
265 | 2 | Ok(Arc::new(array)) |
266 | 2 | } |
267 | | |
268 | 34 | fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> { |
269 | 241 | let mut builder34 = PrimitiveBuilder::<T>::with_capacity34 (arrays34 .iter34 ().map34 (|a| a.len()).sum34 ()) |
270 | 34 | .with_data_type(arrays[0].data_type().clone()); |
271 | | |
272 | 275 | for array241 in arrays { |
273 | 241 | builder.append_array(array.as_primitive()); |
274 | 241 | } |
275 | | |
276 | 34 | Ok(Arc::new(builder.finish())) |
277 | 34 | } |
278 | | |
279 | 1 | fn concat_boolean(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> { |
280 | 2 | let mut builder1 = BooleanBuilder::with_capacity1 (arrays1 .iter1 ().map1 (|a| a.len()).sum1 ()); |
281 | | |
282 | 3 | for array2 in arrays { |
283 | 2 | builder.append_array(array.as_boolean()); |
284 | 2 | } |
285 | | |
286 | 1 | Ok(Arc::new(builder.finish())) |
287 | 1 | } |
288 | | |
289 | 42 | fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> { |
290 | 42 | let (item_capacity, bytes_capacity) = match binary_capacity::<T>(arrays) { |
291 | 42 | Capacities::Binary(item_capacity, Some(bytes_capacity)) => (item_capacity, bytes_capacity), |
292 | 0 | _ => unreachable!(), |
293 | | }; |
294 | | |
295 | 42 | let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, bytes_capacity); |
296 | | |
297 | 171 | for array129 in arrays { |
298 | 129 | builder.append_array(array.as_bytes::<T>())?0 ; |
299 | | } |
300 | | |
301 | 42 | Ok(Arc::new(builder.finish())) |
302 | 42 | } |
303 | | |
304 | 5 | fn concat_structs(arrays: &[&dyn Array], fields: &Fields) -> Result<ArrayRef, ArrowError> { |
305 | 5 | let mut len = 0; |
306 | 5 | let mut has_nulls = false; |
307 | 5 | let structs = arrays |
308 | 5 | .iter() |
309 | 11 | .map5 (|a| { |
310 | 11 | len += a.len(); |
311 | 11 | has_nulls |= a.null_count() > 0; |
312 | 11 | a.as_struct() |
313 | 11 | }) |
314 | 5 | .collect::<Vec<_>>(); |
315 | | |
316 | 5 | let nulls = has_nulls.then(|| {1 |
317 | 1 | let mut b = BooleanBufferBuilder::new(len); |
318 | 3 | for s2 in &structs { |
319 | 2 | match s.nulls() { |
320 | 2 | Some(n) => b.append_buffer(n.inner()), |
321 | 0 | None => b.append_n(s.len(), true), |
322 | | } |
323 | | } |
324 | 1 | NullBuffer::new(b.finish()) |
325 | 1 | }); |
326 | | |
327 | 5 | let column_concat_result = (0..fields.len()) |
328 | 5 | .map(|i| {4 |
329 | 4 | let extracted_cols = structs |
330 | 4 | .iter() |
331 | 9 | .map4 (|s| s.column(i).as_ref()) |
332 | 4 | .collect::<Vec<_>>(); |
333 | 4 | concat(&extracted_cols) |
334 | 4 | }) |
335 | 5 | .collect::<Result<Vec<_>, ArrowError>>()?0 ; |
336 | | |
337 | 5 | Ok(Arc::new(StructArray::try_new_with_length( |
338 | 5 | fields.clone(), |
339 | 5 | column_concat_result, |
340 | 5 | nulls, |
341 | 5 | len, |
342 | 0 | )?)) |
343 | 5 | } |
344 | | |
345 | | /// Concatenate multiple RunArray instances into a single RunArray. |
346 | | /// |
347 | | /// This function handles the special case of concatenating RunArrays by: |
348 | | /// 1. Collecting all run ends and values from input arrays |
349 | | /// 2. Adjusting run ends to account for the length of previous arrays |
350 | | /// 3. Creating a new RunArray with the combined data |
351 | 4 | fn concat_run_arrays<R: RunEndIndexType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> |
352 | 4 | where |
353 | 4 | R::Native: Add<Output = R::Native>, |
354 | | { |
355 | 4 | let run_arrays: Vec<_> = arrays |
356 | 4 | .iter() |
357 | 9 | .map4 (|x| x.as_run::<R>()) |
358 | 9 | .filter4 (|x| !x.run_ends().is_empty()) |
359 | 4 | .collect(); |
360 | | |
361 | | // The run ends need to be adjusted by the sum of the lengths of the previous arrays. |
362 | 4 | let needed_run_end_adjustments = std::iter::once(R::default_value()) |
363 | 4 | .chain( |
364 | 4 | run_arrays |
365 | 4 | .iter() |
366 | 9 | .scan4 (R::default_value4 (), |acc, run_array| { |
367 | 9 | *acc = *acc + *run_array.run_ends().values().last().unwrap(); |
368 | 9 | Some(*acc) |
369 | 9 | }), |
370 | | ) |
371 | 4 | .collect::<Vec<_>>(); |
372 | | |
373 | | // This works out nicely to be the total (logical) length of the resulting array. |
374 | 4 | let total_len = needed_run_end_adjustments.last().unwrap().as_usize(); |
375 | | |
376 | 4 | let run_ends_array = |
377 | 4 | PrimitiveArray::<R>::from_iter_values(run_arrays.iter().enumerate().flat_map( |
378 | 9 | move |(i, run_array)| { |
379 | 9 | let adjustment = needed_run_end_adjustments[i]; |
380 | 9 | run_array |
381 | 9 | .run_ends() |
382 | 9 | .values() |
383 | 9 | .iter() |
384 | 20 | .map9 (move |run_end| *run_end + adjustment) |
385 | 9 | }, |
386 | | )); |
387 | | |
388 | 4 | let all_values = concat( |
389 | 4 | &run_arrays |
390 | 4 | .iter() |
391 | 9 | .map4 (|x| x.values().as_ref()) |
392 | 4 | .collect::<Vec<_>>(), |
393 | 0 | )?; |
394 | | |
395 | 4 | let builder = ArrayDataBuilder::new(run_arrays[0].data_type().clone()) |
396 | 4 | .len(total_len) |
397 | 4 | .child_data(vec![run_ends_array.into_data(), all_values.into_data()]); |
398 | | |
399 | | // `build_unchecked` is used to avoid recursive validation of child arrays. |
400 | 4 | let array_data = unsafe { builder.build_unchecked() }; |
401 | 4 | array_data.validate_data()?0 ; |
402 | | |
403 | 4 | Ok(Arc::<RunArray<R>>::new(array_data.into())) |
404 | 4 | } |
405 | | |
406 | | macro_rules! dict_helper { |
407 | | ($t:ty, $arrays:expr) => { |
408 | | return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _) |
409 | | }; |
410 | | } |
411 | | |
412 | | macro_rules! primitive_concat { |
413 | | ($t:ty, $arrays:expr) => { |
414 | | return Ok(Arc::new(concat_primitives::<$t>($arrays)?) as _) |
415 | | }; |
416 | | } |
417 | | |
418 | 1 | fn get_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities { |
419 | 1 | match data_type { |
420 | 0 | DataType::Utf8 => binary_capacity::<Utf8Type>(arrays), |
421 | 0 | DataType::LargeUtf8 => binary_capacity::<LargeUtf8Type>(arrays), |
422 | 0 | DataType::Binary => binary_capacity::<BinaryType>(arrays), |
423 | 0 | DataType::LargeBinary => binary_capacity::<LargeBinaryType>(arrays), |
424 | 1 | DataType::FixedSizeList(_, _) => fixed_size_list_capacity(arrays, data_type), |
425 | 0 | _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()), |
426 | | } |
427 | 1 | } |
428 | | |
429 | | /// Concatenate multiple [Array] of the same type into a single [ArrayRef]. |
430 | 139 | pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> { |
431 | 139 | if arrays.is_empty() { |
432 | 1 | return Err(ArrowError::ComputeError( |
433 | 1 | "concat requires input of at least one array".to_string(), |
434 | 1 | )); |
435 | 138 | } else if arrays.len() == 1 { |
436 | 15 | let array = arrays[0]; |
437 | 15 | return Ok(array.slice(0, array.len())); |
438 | 123 | } |
439 | | |
440 | 123 | let d = arrays[0].data_type(); |
441 | 160k | if arrays123 .iter().skip(1).any123 (|array| array.data_type() != d) { |
442 | | // Create error message with up to 10 unique data types in the order they appear |
443 | 5 | let error_message = { |
444 | | // 10 max unique data types to print and another 1 to know if there are more |
445 | 5 | let mut unique_data_types = HashSet::with_capacity(11); |
446 | | |
447 | 5 | let mut error_message = |
448 | 5 | format!("It is not possible to concatenate arrays of different data types ({d}"); |
449 | 5 | unique_data_types.insert(d); |
450 | | |
451 | 47 | for array44 in arrays { |
452 | 44 | let is_unique = unique_data_types.insert(array.data_type()); |
453 | | |
454 | 44 | if unique_data_types.len() == 11 { |
455 | 2 | error_message.push_str(", ..."); |
456 | 2 | break; |
457 | 42 | } |
458 | | |
459 | 42 | if is_unique { |
460 | 30 | error_message.push_str(", "); |
461 | 30 | error_message.push_str(&array.data_type().to_string()); |
462 | 30 | }12 |
463 | | } |
464 | | |
465 | 5 | error_message.push_str(")."); |
466 | | |
467 | 5 | error_message |
468 | | }; |
469 | | |
470 | 5 | return Err(ArrowError::InvalidArgumentError(error_message)); |
471 | 118 | } |
472 | | |
473 | 0 | downcast_primitive! { |
474 | 4 | d => (primitive_concat, arrays0 ), |
475 | 1 | DataType::Boolean => concat_boolean(arrays), |
476 | 13 | DataType::Dictionary(k, _) => { |
477 | 0 | downcast_integer! { |
478 | 13 | k.as_ref() => (dict_helper, arrays4 ), |
479 | 0 | _ => unreachable!("illegal dictionary key type {k}") |
480 | | } |
481 | | } |
482 | 5 | DataType::List(field) => concat_lists::<i32>(arrays, field), |
483 | 0 | DataType::LargeList(field) => concat_lists::<i64>(arrays, field), |
484 | 2 | DataType::ListView(field) => concat_list_view::<i32>(arrays, field), |
485 | 0 | DataType::LargeListView(field) => concat_list_view::<i64>(arrays, field), |
486 | 5 | DataType::Struct(fields) => concat_structs(arrays, fields), |
487 | 39 | DataType::Utf8 => concat_bytes::<Utf8Type>(arrays), |
488 | 1 | DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays), |
489 | 0 | DataType::Binary => concat_bytes::<BinaryType>(arrays), |
490 | 2 | DataType::LargeBinary => concat_bytes::<LargeBinaryType>(arrays), |
491 | 4 | DataType::RunEndEncoded(r, _) => { |
492 | | // Handle RunEndEncoded arrays with special concat function |
493 | | // We need to downcast based on the run end type |
494 | 4 | match r.data_type() { |
495 | 0 | DataType::Int16 => concat_run_arrays::<Int16Type>(arrays), |
496 | 4 | DataType::Int32 => concat_run_arrays::<Int32Type>(arrays), |
497 | 0 | DataType::Int64 => concat_run_arrays::<Int64Type>(arrays), |
498 | 0 | _ => unreachable!("Unsupported run end index type: {r:?}"), |
499 | | } |
500 | | } |
501 | 10 | DataType::Utf8View => concat_byte_view::<StringViewType>(arrays), |
502 | 1 | DataType::BinaryView => concat_byte_view::<BinaryViewType>(arrays), |
503 | | _ => { |
504 | 1 | let capacity = get_capacity(arrays, d); |
505 | 1 | concat_fallback(arrays, capacity) |
506 | | } |
507 | | } |
508 | 139 | } |
509 | | |
510 | | /// Concatenates arrays using MutableArrayData |
511 | | /// |
512 | | /// This will naively concatenate dictionaries |
513 | 8 | fn concat_fallback(arrays: &[&dyn Array], capacity: Capacities) -> Result<ArrayRef, ArrowError> { |
514 | 18 | let array_data8 : Vec<_>8 = arrays8 .iter8 ().map8 (|a| a.to_data()).collect8 ::<Vec<_>>(); |
515 | 8 | let array_data = array_data.iter().collect(); |
516 | 8 | let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity); |
517 | | |
518 | 18 | for (i, a) in arrays8 .iter8 ().enumerate8 () { |
519 | 18 | mutable.extend(i, 0, a.len()) |
520 | | } |
521 | | |
522 | 8 | Ok(make_array(mutable.freeze())) |
523 | 8 | } |
524 | | |
525 | | /// Concatenates `batches` together into a single [`RecordBatch`]. |
526 | | /// |
527 | | /// The output batch has the specified `schemas`; The schema of the |
528 | | /// input are ignored. |
529 | | /// |
530 | | /// Returns an error if the types of underlying arrays are different. |
531 | 30 | pub fn concat_batches<'a>( |
532 | 30 | schema: &SchemaRef, |
533 | 30 | input_batches: impl IntoIterator<Item = &'a RecordBatch>, |
534 | 30 | ) -> Result<RecordBatch, ArrowError> { |
535 | | // When schema is empty, sum the number of the rows of all batches |
536 | 30 | if schema.fields().is_empty() { |
537 | 2 | let num_rows: usize = input_batches.into_iter().map(RecordBatch::num_rows).sum(); |
538 | 2 | let mut options = RecordBatchOptions::default(); |
539 | 2 | options.row_count = Some(num_rows); |
540 | 2 | return RecordBatch::try_new_with_options(schema.clone(), vec![], &options); |
541 | 28 | } |
542 | | |
543 | 28 | let batches: Vec<&RecordBatch> = input_batches.into_iter().collect(); |
544 | 28 | if batches.is_empty() { |
545 | 2 | return Ok(RecordBatch::new_empty(schema.clone())); |
546 | 26 | } |
547 | 26 | let field_num = schema.fields().len(); |
548 | 26 | let mut arrays = Vec::with_capacity(field_num); |
549 | 40 | for i in 0..field_num26 { |
550 | 40 | let array39 = concat( |
551 | 40 | &batches |
552 | 40 | .iter() |
553 | 341 | .map40 (|batch| batch.column(i).as_ref()) |
554 | 40 | .collect::<Vec<_>>(), |
555 | 1 | )?; |
556 | 39 | arrays.push(array); |
557 | | } |
558 | 25 | RecordBatch::try_new(schema.clone(), arrays) |
559 | 30 | } |
560 | | |
561 | | #[cfg(test)] |
562 | | mod tests { |
563 | | use super::*; |
564 | | use arrow_array::builder::{ |
565 | | GenericListBuilder, Int64Builder, ListViewBuilder, StringDictionaryBuilder, |
566 | | }; |
567 | | use arrow_schema::{Field, Schema}; |
568 | | use std::fmt::Debug; |
569 | | |
570 | | #[test] |
571 | 1 | fn test_concat_empty_vec() { |
572 | 1 | let re = concat(&[]); |
573 | 1 | assert!(re.is_err()); |
574 | 1 | } |
575 | | |
576 | | #[test] |
577 | 1 | fn test_concat_batches_no_columns() { |
578 | | // Test concat using empty schema / batches without columns |
579 | 1 | let schema = Arc::new(Schema::empty()); |
580 | | |
581 | 1 | let mut options = RecordBatchOptions::default(); |
582 | 1 | options.row_count = Some(100); |
583 | 1 | let batch = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap(); |
584 | | // put in 2 batches of 100 rows each |
585 | 1 | let re = concat_batches(&schema, &[batch.clone(), batch]).unwrap(); |
586 | | |
587 | 1 | assert_eq!(re.num_rows(), 200); |
588 | 1 | } |
589 | | |
590 | | #[test] |
591 | 1 | fn test_concat_one_element_vec() { |
592 | 1 | let arr = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
593 | 1 | Some(-1), |
594 | 1 | Some(2), |
595 | 1 | None, |
596 | 1 | ])) as ArrayRef; |
597 | 1 | let result = concat(&[arr.as_ref()]).unwrap(); |
598 | 1 | assert_eq!( |
599 | 1 | &arr, &result, |
600 | 0 | "concatenating single element array gives back the same result" |
601 | | ); |
602 | 1 | } |
603 | | |
604 | | #[test] |
605 | 1 | fn test_concat_incompatible_datatypes() { |
606 | 1 | let re = concat(&[ |
607 | 1 | &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]), |
608 | 1 | // 2 string to make sure we only mention unique types |
609 | 1 | &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]), |
610 | 1 | &StringArray::from(vec![Some("hey"), Some(""), Some("you")]), |
611 | 1 | // Another type to make sure we are showing all the incompatible types |
612 | 1 | &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]), |
613 | 1 | ]); |
614 | | |
615 | 1 | assert_eq!( |
616 | 1 | re.unwrap_err().to_string(), |
617 | | "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)." |
618 | | ); |
619 | 1 | } |
620 | | |
621 | | #[test] |
622 | 1 | fn test_concat_10_incompatible_datatypes_should_include_all_of_them() { |
623 | 1 | let re = concat(&[ |
624 | 1 | &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]), |
625 | 1 | // 2 string to make sure we only mention unique types |
626 | 1 | &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]), |
627 | 1 | &StringArray::from(vec![Some("hey"), Some(""), Some("you")]), |
628 | 1 | // Another type to make sure we are showing all the incompatible types |
629 | 1 | &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]), |
630 | 1 | &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]), |
631 | 1 | &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]), |
632 | 1 | &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]), |
633 | 1 | &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]), |
634 | 1 | &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]), |
635 | 1 | // Non unique |
636 | 1 | &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]), |
637 | 1 | &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]), |
638 | 1 | &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]), |
639 | 1 | ]); |
640 | | |
641 | 1 | assert_eq!( |
642 | 1 | re.unwrap_err().to_string(), |
643 | | "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32)." |
644 | | ); |
645 | 1 | } |
646 | | |
647 | | #[test] |
648 | 1 | fn test_concat_11_incompatible_datatypes_should_only_include_10() { |
649 | 1 | let re = concat(&[ |
650 | 1 | &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]), |
651 | 1 | // 2 string to make sure we only mention unique types |
652 | 1 | &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]), |
653 | 1 | &StringArray::from(vec![Some("hey"), Some(""), Some("you")]), |
654 | 1 | // Another type to make sure we are showing all the incompatible types |
655 | 1 | &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]), |
656 | 1 | &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]), |
657 | 1 | &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]), |
658 | 1 | &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]), |
659 | 1 | &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]), |
660 | 1 | &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]), |
661 | 1 | // Non unique |
662 | 1 | &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]), |
663 | 1 | &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]), |
664 | 1 | &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]), |
665 | 1 | &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]), |
666 | 1 | ]); |
667 | | |
668 | 1 | assert_eq!( |
669 | 1 | re.unwrap_err().to_string(), |
670 | | "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)." |
671 | | ); |
672 | 1 | } |
673 | | |
674 | | #[test] |
675 | 1 | fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() { |
676 | 1 | let re = concat(&[ |
677 | 1 | &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]), |
678 | 1 | // 2 string to make sure we only mention unique types |
679 | 1 | &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]), |
680 | 1 | &StringArray::from(vec![Some("hey"), Some(""), Some("you")]), |
681 | 1 | // Another type to make sure we are showing all the incompatible types |
682 | 1 | &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]), |
683 | 1 | &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]), |
684 | 1 | &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]), |
685 | 1 | &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]), |
686 | 1 | &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]), |
687 | 1 | &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]), |
688 | 1 | // Non unique |
689 | 1 | &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]), |
690 | 1 | &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]), |
691 | 1 | &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]), |
692 | 1 | &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]), |
693 | 1 | &PrimitiveArray::<Float16Type>::new_null(3), |
694 | 1 | &BooleanArray::from(vec![Some(true), Some(false), None]), |
695 | 1 | ]); |
696 | | |
697 | 1 | assert_eq!( |
698 | 1 | re.unwrap_err().to_string(), |
699 | | "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)." |
700 | | ); |
701 | 1 | } |
702 | | |
703 | | #[test] |
704 | 1 | fn test_concat_string_arrays() { |
705 | 1 | let arr = concat(&[ |
706 | 1 | &StringArray::from(vec!["hello", "world"]), |
707 | 1 | &StringArray::from(vec!["2", "3", "4"]), |
708 | 1 | &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]), |
709 | 1 | ]) |
710 | 1 | .unwrap(); |
711 | | |
712 | 1 | let expected_output = Arc::new(StringArray::from(vec![ |
713 | 1 | Some("hello"), |
714 | 1 | Some("world"), |
715 | 1 | Some("2"), |
716 | 1 | Some("3"), |
717 | 1 | Some("4"), |
718 | 1 | Some("foo"), |
719 | 1 | Some("bar"), |
720 | 1 | None, |
721 | 1 | Some("baz"), |
722 | 1 | ])) as ArrayRef; |
723 | | |
724 | 1 | assert_eq!(&arr, &expected_output); |
725 | 1 | } |
726 | | |
727 | | #[test] |
728 | 1 | fn test_concat_string_view_arrays() { |
729 | 1 | let arr = concat(&[ |
730 | 1 | &StringViewArray::from(vec!["helloxxxxxxxxxxa", "world____________"]), |
731 | 1 | &StringViewArray::from(vec!["helloxxxxxxxxxxy", "3", "4"]), |
732 | 1 | &StringViewArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]), |
733 | 1 | ]) |
734 | 1 | .unwrap(); |
735 | | |
736 | 1 | let expected_output = Arc::new(StringViewArray::from(vec![ |
737 | 1 | Some("helloxxxxxxxxxxa"), |
738 | 1 | Some("world____________"), |
739 | 1 | Some("helloxxxxxxxxxxy"), |
740 | 1 | Some("3"), |
741 | 1 | Some("4"), |
742 | 1 | Some("foo"), |
743 | 1 | Some("bar"), |
744 | 1 | None, |
745 | 1 | Some("baz"), |
746 | 1 | ])) as ArrayRef; |
747 | | |
748 | 1 | assert_eq!(&arr, &expected_output); |
749 | 1 | } |
750 | | |
751 | | #[test] |
752 | 1 | fn test_concat_primitive_arrays() { |
753 | 1 | let arr = concat(&[ |
754 | 1 | &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]), |
755 | 1 | &PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]), |
756 | 1 | &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]), |
757 | 1 | ]) |
758 | 1 | .unwrap(); |
759 | | |
760 | 1 | let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
761 | 1 | Some(-1), |
762 | 1 | Some(-1), |
763 | 1 | Some(2), |
764 | 1 | None, |
765 | 1 | None, |
766 | 1 | Some(101), |
767 | 1 | Some(102), |
768 | 1 | Some(103), |
769 | 1 | None, |
770 | 1 | Some(256), |
771 | 1 | Some(512), |
772 | 1 | Some(1024), |
773 | 1 | ])) as ArrayRef; |
774 | | |
775 | 1 | assert_eq!(&arr, &expected_output); |
776 | 1 | } |
777 | | |
778 | | #[test] |
779 | 1 | fn test_concat_primitive_array_slices() { |
780 | 1 | let input_1 = |
781 | 1 | PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]) |
782 | 1 | .slice(1, 3); |
783 | | |
784 | 1 | let input_2 = |
785 | 1 | PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]) |
786 | 1 | .slice(1, 3); |
787 | 1 | let arr = concat(&[&input_1, &input_2]).unwrap(); |
788 | | |
789 | 1 | let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
790 | 1 | Some(-1), |
791 | 1 | Some(2), |
792 | 1 | None, |
793 | 1 | Some(102), |
794 | 1 | Some(103), |
795 | 1 | None, |
796 | 1 | ])) as ArrayRef; |
797 | | |
798 | 1 | assert_eq!(&arr, &expected_output); |
799 | 1 | } |
800 | | |
801 | | #[test] |
802 | 1 | fn test_concat_boolean_primitive_arrays() { |
803 | 1 | let arr = concat(&[ |
804 | 1 | &BooleanArray::from(vec![ |
805 | 1 | Some(true), |
806 | 1 | Some(true), |
807 | 1 | Some(false), |
808 | 1 | None, |
809 | 1 | None, |
810 | 1 | Some(false), |
811 | 1 | ]), |
812 | 1 | &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]), |
813 | 1 | ]) |
814 | 1 | .unwrap(); |
815 | | |
816 | 1 | let expected_output = Arc::new(BooleanArray::from(vec![ |
817 | 1 | Some(true), |
818 | 1 | Some(true), |
819 | 1 | Some(false), |
820 | 1 | None, |
821 | 1 | None, |
822 | 1 | Some(false), |
823 | 1 | None, |
824 | 1 | Some(false), |
825 | 1 | Some(true), |
826 | 1 | Some(false), |
827 | 1 | ])) as ArrayRef; |
828 | | |
829 | 1 | assert_eq!(&arr, &expected_output); |
830 | 1 | } |
831 | | |
832 | | #[test] |
833 | 1 | fn test_concat_primitive_list_arrays() { |
834 | 1 | let list1 = [ |
835 | 1 | Some(vec![Some(-1), Some(-1), Some(2), None, None]), |
836 | 1 | Some(vec![]), |
837 | 1 | None, |
838 | 1 | Some(vec![Some(10)]), |
839 | 1 | ]; |
840 | 1 | let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone()); |
841 | | |
842 | 1 | let list2 = [ |
843 | 1 | None, |
844 | 1 | Some(vec![Some(100), None, Some(101)]), |
845 | 1 | Some(vec![Some(102)]), |
846 | 1 | ]; |
847 | 1 | let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone()); |
848 | | |
849 | 1 | let list3 = [Some(vec![Some(1000), Some(1001)])]; |
850 | 1 | let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone()); |
851 | | |
852 | 1 | let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap(); |
853 | | |
854 | 1 | let expected = list1.into_iter().chain(list2).chain(list3); |
855 | 1 | let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected); |
856 | | |
857 | 1 | assert_eq!(array_result.as_ref(), &array_expected as &dyn Array); |
858 | 1 | } |
859 | | |
860 | | #[test] |
861 | 1 | fn test_concat_primitive_list_arrays_slices() { |
862 | 1 | let list1 = [ |
863 | 1 | Some(vec![Some(-1), Some(-1), Some(2), None, None]), |
864 | 1 | Some(vec![]), // In slice |
865 | 1 | None, // In slice |
866 | 1 | Some(vec![Some(10)]), |
867 | 1 | ]; |
868 | 1 | let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone()); |
869 | 1 | let list1_array = list1_array.slice(1, 2); |
870 | 1 | let list1_values = list1.into_iter().skip(1).take(2); |
871 | | |
872 | 1 | let list2 = [ |
873 | 1 | None, |
874 | 1 | Some(vec![Some(100), None, Some(101)]), |
875 | 1 | Some(vec![Some(102)]), |
876 | 1 | ]; |
877 | 1 | let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone()); |
878 | | |
879 | | // verify that this test covers the case when the first offset is non zero |
880 | 1 | assert!(list1_array.offsets()[0].as_usize() > 0); |
881 | 1 | let array_result = concat(&[&list1_array, &list2_array]).unwrap(); |
882 | | |
883 | 1 | let expected = list1_values.chain(list2); |
884 | 1 | let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected); |
885 | | |
886 | 1 | assert_eq!(array_result.as_ref(), &array_expected as &dyn Array); |
887 | 1 | } |
888 | | |
889 | | #[test] |
890 | 1 | fn test_concat_primitive_list_arrays_sliced_lengths() { |
891 | 1 | let list1 = [ |
892 | 1 | Some(vec![Some(-1), Some(-1), Some(2), None, None]), // In slice |
893 | 1 | Some(vec![]), // In slice |
894 | 1 | None, // In slice |
895 | 1 | Some(vec![Some(10)]), |
896 | 1 | ]; |
897 | 1 | let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone()); |
898 | 1 | let list1_array = list1_array.slice(0, 3); // no offset, but not all values |
899 | 1 | let list1_values = list1.into_iter().take(3); |
900 | | |
901 | 1 | let list2 = [ |
902 | 1 | None, |
903 | 1 | Some(vec![Some(100), None, Some(101)]), |
904 | 1 | Some(vec![Some(102)]), |
905 | 1 | ]; |
906 | 1 | let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone()); |
907 | | |
908 | | // verify that this test covers the case when the first offset is zero, but the |
909 | | // last offset doesn't cover the entire array |
910 | 1 | assert_eq!(list1_array.offsets()[0].as_usize(), 0); |
911 | 1 | assert!(list1_array.offsets().last().unwrap().as_usize() < list1_array.values().len()); |
912 | 1 | let array_result = concat(&[&list1_array, &list2_array]).unwrap(); |
913 | | |
914 | 1 | let expected = list1_values.chain(list2); |
915 | 1 | let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected); |
916 | | |
917 | 1 | assert_eq!(array_result.as_ref(), &array_expected as &dyn Array); |
918 | 1 | } |
919 | | |
920 | | #[test] |
921 | 1 | fn test_concat_primitive_fixed_size_list_arrays() { |
922 | 1 | let list1 = [ |
923 | 1 | Some(vec![Some(-1), None]), |
924 | 1 | None, |
925 | 1 | Some(vec![Some(10), Some(20)]), |
926 | 1 | ]; |
927 | 1 | let list1_array = |
928 | 1 | FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone(), 2); |
929 | | |
930 | 1 | let list2 = [ |
931 | 1 | None, |
932 | 1 | Some(vec![Some(100), None]), |
933 | 1 | Some(vec![Some(102), Some(103)]), |
934 | 1 | ]; |
935 | 1 | let list2_array = |
936 | 1 | FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone(), 2); |
937 | | |
938 | 1 | let list3 = [Some(vec![Some(1000), Some(1001)])]; |
939 | 1 | let list3_array = |
940 | 1 | FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone(), 2); |
941 | | |
942 | 1 | let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap(); |
943 | | |
944 | 1 | let expected = list1.into_iter().chain(list2).chain(list3); |
945 | 1 | let array_expected = |
946 | 1 | FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(expected, 2); |
947 | | |
948 | 1 | assert_eq!(array_result.as_ref(), &array_expected as &dyn Array); |
949 | 1 | } |
950 | | |
951 | | #[test] |
952 | 1 | fn test_concat_list_view_arrays() { |
953 | 1 | let list1 = [ |
954 | 1 | Some(vec![Some(-1), None]), |
955 | 1 | None, |
956 | 1 | Some(vec![Some(10), Some(20)]), |
957 | 1 | ]; |
958 | 1 | let mut list1_array = ListViewBuilder::new(Int64Builder::new()); |
959 | 3 | for v in list11 .iter1 () { |
960 | 3 | list1_array.append_option(v.clone()); |
961 | 3 | } |
962 | 1 | let list1_array = list1_array.finish(); |
963 | | |
964 | 1 | let list2 = [ |
965 | 1 | None, |
966 | 1 | Some(vec![Some(100), None]), |
967 | 1 | Some(vec![Some(102), Some(103)]), |
968 | 1 | ]; |
969 | 1 | let mut list2_array = ListViewBuilder::new(Int64Builder::new()); |
970 | 3 | for v in list21 .iter1 () { |
971 | 3 | list2_array.append_option(v.clone()); |
972 | 3 | } |
973 | 1 | let list2_array = list2_array.finish(); |
974 | | |
975 | 1 | let list3 = [Some(vec![Some(1000), Some(1001)])]; |
976 | 1 | let mut list3_array = ListViewBuilder::new(Int64Builder::new()); |
977 | 1 | for v in list3.iter() { |
978 | 1 | list3_array.append_option(v.clone()); |
979 | 1 | } |
980 | 1 | let list3_array = list3_array.finish(); |
981 | | |
982 | 1 | let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap(); |
983 | | |
984 | 1 | let expected: Vec<_> = list1.into_iter().chain(list2).chain(list3).collect(); |
985 | 1 | let mut array_expected = ListViewBuilder::new(Int64Builder::new()); |
986 | 7 | for v in expected1 .iter1 () { |
987 | 7 | array_expected.append_option(v.clone()); |
988 | 7 | } |
989 | 1 | let array_expected = array_expected.finish(); |
990 | | |
991 | 1 | assert_eq!(array_result.as_ref(), &array_expected as &dyn Array); |
992 | 1 | } |
993 | | |
994 | | #[test] |
995 | 1 | fn test_concat_sliced_list_view_arrays() { |
996 | 1 | let list1 = [ |
997 | 1 | Some(vec![Some(-1), None]), |
998 | 1 | None, |
999 | 1 | Some(vec![Some(10), Some(20)]), |
1000 | 1 | ]; |
1001 | 1 | let mut list1_array = ListViewBuilder::new(Int64Builder::new()); |
1002 | 3 | for v in list11 .iter1 () { |
1003 | 3 | list1_array.append_option(v.clone()); |
1004 | 3 | } |
1005 | 1 | let list1_array = list1_array.finish(); |
1006 | | |
1007 | 1 | let list2 = [ |
1008 | 1 | None, |
1009 | 1 | Some(vec![Some(100), None]), |
1010 | 1 | Some(vec![Some(102), Some(103)]), |
1011 | 1 | ]; |
1012 | 1 | let mut list2_array = ListViewBuilder::new(Int64Builder::new()); |
1013 | 3 | for v in list21 .iter1 () { |
1014 | 3 | list2_array.append_option(v.clone()); |
1015 | 3 | } |
1016 | 1 | let list2_array = list2_array.finish(); |
1017 | | |
1018 | 1 | let list3 = [Some(vec![Some(1000), Some(1001)])]; |
1019 | 1 | let mut list3_array = ListViewBuilder::new(Int64Builder::new()); |
1020 | 1 | for v in list3.iter() { |
1021 | 1 | list3_array.append_option(v.clone()); |
1022 | 1 | } |
1023 | 1 | let list3_array = list3_array.finish(); |
1024 | | |
1025 | | // Concat sliced arrays. |
1026 | | // ListView slicing will slice the offset/sizes but preserve the original values child. |
1027 | 1 | let array_result = concat(&[ |
1028 | 1 | &list1_array.slice(1, 2), |
1029 | 1 | &list2_array.slice(1, 2), |
1030 | 1 | &list3_array.slice(0, 1), |
1031 | 1 | ]) |
1032 | 1 | .unwrap(); |
1033 | | |
1034 | 1 | let expected: Vec<_> = vec![ |
1035 | 1 | None, |
1036 | 1 | Some(vec![Some(10), Some(20)]), |
1037 | 1 | Some(vec![Some(100), None]), |
1038 | 1 | Some(vec![Some(102), Some(103)]), |
1039 | 1 | Some(vec![Some(1000), Some(1001)]), |
1040 | | ]; |
1041 | 1 | let mut array_expected = ListViewBuilder::new(Int64Builder::new()); |
1042 | 5 | for v in expected1 .iter1 () { |
1043 | 5 | array_expected.append_option(v.clone()); |
1044 | 5 | } |
1045 | 1 | let array_expected = array_expected.finish(); |
1046 | | |
1047 | 1 | assert_eq!(array_result.as_ref(), &array_expected as &dyn Array); |
1048 | 1 | } |
1049 | | |
1050 | | #[test] |
1051 | 1 | fn test_concat_struct_arrays() { |
1052 | 1 | let field = Arc::new(Field::new("field", DataType::Int64, true)); |
1053 | 1 | let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
1054 | 1 | Some(-1), |
1055 | 1 | Some(-1), |
1056 | 1 | Some(2), |
1057 | 1 | None, |
1058 | 1 | None, |
1059 | 1 | ])); |
1060 | 1 | let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]); |
1061 | | |
1062 | 1 | let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
1063 | 1 | Some(101), |
1064 | 1 | Some(102), |
1065 | 1 | Some(103), |
1066 | 1 | None, |
1067 | 1 | ])); |
1068 | 1 | let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]); |
1069 | | |
1070 | 1 | let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
1071 | 1 | Some(256), |
1072 | 1 | Some(512), |
1073 | 1 | Some(1024), |
1074 | 1 | ])); |
1075 | 1 | let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]); |
1076 | | |
1077 | 1 | let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap(); |
1078 | | |
1079 | 1 | let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
1080 | 1 | Some(-1), |
1081 | 1 | Some(-1), |
1082 | 1 | Some(2), |
1083 | 1 | None, |
1084 | 1 | None, |
1085 | 1 | Some(101), |
1086 | 1 | Some(102), |
1087 | 1 | Some(103), |
1088 | 1 | None, |
1089 | 1 | Some(256), |
1090 | 1 | Some(512), |
1091 | 1 | Some(1024), |
1092 | 1 | ])) as ArrayRef; |
1093 | | |
1094 | 1 | let actual_primitive = arr |
1095 | 1 | .as_any() |
1096 | 1 | .downcast_ref::<StructArray>() |
1097 | 1 | .unwrap() |
1098 | 1 | .column(0); |
1099 | 1 | assert_eq!(actual_primitive, &expected_primitive_output); |
1100 | 1 | } |
1101 | | |
1102 | | #[test] |
1103 | 1 | fn test_concat_struct_array_slices() { |
1104 | 1 | let field = Arc::new(Field::new("field", DataType::Int64, true)); |
1105 | 1 | let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
1106 | 1 | Some(-1), |
1107 | 1 | Some(-1), |
1108 | 1 | Some(2), |
1109 | 1 | None, |
1110 | 1 | None, |
1111 | 1 | ])); |
1112 | 1 | let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]); |
1113 | | |
1114 | 1 | let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
1115 | 1 | Some(101), |
1116 | 1 | Some(102), |
1117 | 1 | Some(103), |
1118 | 1 | None, |
1119 | 1 | ])); |
1120 | 1 | let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]); |
1121 | | |
1122 | 1 | let arr = concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap(); |
1123 | | |
1124 | 1 | let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
1125 | 1 | Some(-1), |
1126 | 1 | Some(2), |
1127 | 1 | None, |
1128 | 1 | Some(102), |
1129 | 1 | Some(103), |
1130 | 1 | ])) as ArrayRef; |
1131 | | |
1132 | 1 | let actual_primitive = arr |
1133 | 1 | .as_any() |
1134 | 1 | .downcast_ref::<StructArray>() |
1135 | 1 | .unwrap() |
1136 | 1 | .column(0); |
1137 | 1 | assert_eq!(actual_primitive, &expected_primitive_output); |
1138 | 1 | } |
1139 | | |
1140 | | #[test] |
1141 | 1 | fn test_concat_struct_arrays_no_nulls() { |
1142 | 1 | let input_1a = vec![1, 2, 3]; |
1143 | 1 | let input_1b = vec!["one", "two", "three"]; |
1144 | 1 | let input_2a = vec![4, 5, 6, 7]; |
1145 | 1 | let input_2b = vec!["four", "five", "six", "seven"]; |
1146 | | |
1147 | 3 | let struct_from_primitives1 = |ints: Vec<i64>, strings: Vec<&str>| { |
1148 | 3 | StructArray::try_from(vec![ |
1149 | 3 | ("ints", Arc::new(Int64Array::from(ints)) as _), |
1150 | 3 | ("strings", Arc::new(StringArray::from(strings)) as _), |
1151 | | ]) |
1152 | 3 | }; |
1153 | | |
1154 | 1 | let expected_output = struct_from_primitives( |
1155 | 1 | [input_1a.clone(), input_2a.clone()].concat(), |
1156 | 1 | [input_1b.clone(), input_2b.clone()].concat(), |
1157 | 1 | ) |
1158 | 1 | .unwrap(); |
1159 | | |
1160 | 1 | let input_1 = struct_from_primitives(input_1a, input_1b).unwrap(); |
1161 | 1 | let input_2 = struct_from_primitives(input_2a, input_2b).unwrap(); |
1162 | | |
1163 | 1 | let arr = concat(&[&input_1, &input_2]).unwrap(); |
1164 | 1 | let struct_result = arr.as_struct(); |
1165 | | |
1166 | 1 | assert_eq!(struct_result, &expected_output); |
1167 | 1 | assert_eq!(arr.null_count(), 0); |
1168 | 1 | } |
1169 | | |
1170 | | #[test] |
1171 | 1 | fn test_concat_struct_no_fields() { |
1172 | 1 | let input_1 = StructArray::new_empty_fields(10, None); |
1173 | 1 | let input_2 = StructArray::new_empty_fields(10, None); |
1174 | 1 | let arr = concat(&[&input_1, &input_2]).unwrap(); |
1175 | | |
1176 | 1 | assert_eq!(arr.len(), 20); |
1177 | 1 | assert_eq!(arr.null_count(), 0); |
1178 | | |
1179 | 1 | let input1_valid = StructArray::new_empty_fields(10, Some(NullBuffer::new_valid(10))); |
1180 | 1 | let input2_null = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10))); |
1181 | 1 | let arr = concat(&[&input1_valid, &input2_null]).unwrap(); |
1182 | | |
1183 | 1 | assert_eq!(arr.len(), 20); |
1184 | 1 | assert_eq!(arr.null_count(), 10); |
1185 | 1 | } |
1186 | | |
1187 | | #[test] |
1188 | 1 | fn test_string_array_slices() { |
1189 | 1 | let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]); |
1190 | 1 | let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]); |
1191 | | |
1192 | 1 | let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap(); |
1193 | | |
1194 | 1 | let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]); |
1195 | | |
1196 | 1 | let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap(); |
1197 | 1 | assert_eq!(actual_output, &expected_output); |
1198 | 1 | } |
1199 | | |
1200 | | #[test] |
1201 | 1 | fn test_string_array_with_null_slices() { |
1202 | 1 | let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]); |
1203 | 1 | let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]); |
1204 | | |
1205 | 1 | let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap(); |
1206 | | |
1207 | 1 | let expected_output = |
1208 | 1 | StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]); |
1209 | | |
1210 | 1 | let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap(); |
1211 | 1 | assert_eq!(actual_output, &expected_output); |
1212 | 1 | } |
1213 | | |
1214 | 4 | fn collect_string_dictionary(array: &DictionaryArray<Int32Type>) -> Vec<Option<&str>> { |
1215 | 4 | let concrete = array.downcast_dict::<StringArray>().unwrap(); |
1216 | 4 | concrete.into_iter().collect() |
1217 | 4 | } |
1218 | | |
1219 | | #[test] |
1220 | 1 | fn test_string_dictionary_array() { |
1221 | 1 | let input_1: DictionaryArray<Int32Type> = vec!["hello", "A", "B", "hello", "hello", "C"] |
1222 | 1 | .into_iter() |
1223 | 1 | .collect(); |
1224 | 1 | let input_2: DictionaryArray<Int32Type> = vec!["hello", "E", "E", "hello", "F", "E"] |
1225 | 1 | .into_iter() |
1226 | 1 | .collect(); |
1227 | | |
1228 | 1 | let expected: Vec<_> = vec![ |
1229 | 1 | "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F", "E", |
1230 | | ] |
1231 | 1 | .into_iter() |
1232 | 1 | .map(Some) |
1233 | 1 | .collect(); |
1234 | | |
1235 | 1 | let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap(); |
1236 | 1 | let dictionary = concat.as_dictionary::<Int32Type>(); |
1237 | 1 | let actual = collect_string_dictionary(dictionary); |
1238 | 1 | assert_eq!(actual, expected); |
1239 | | |
1240 | | // Should have concatenated inputs together |
1241 | 1 | assert_eq!( |
1242 | 1 | dictionary.values().len(), |
1243 | 1 | input_1.values().len() + input_2.values().len(), |
1244 | | ) |
1245 | 1 | } |
1246 | | |
1247 | | #[test] |
1248 | 1 | fn test_string_dictionary_array_nulls() { |
1249 | 1 | let input_1: DictionaryArray<Int32Type> = vec![Some("foo"), Some("bar"), None, Some("fiz")] |
1250 | 1 | .into_iter() |
1251 | 1 | .collect(); |
1252 | 1 | let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect(); |
1253 | 1 | let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None]; |
1254 | | |
1255 | 1 | let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap(); |
1256 | 1 | let dictionary = concat.as_dictionary::<Int32Type>(); |
1257 | 1 | let actual = collect_string_dictionary(dictionary); |
1258 | 1 | assert_eq!(actual, expected); |
1259 | | |
1260 | | // Should have concatenated inputs together |
1261 | 1 | assert_eq!( |
1262 | 1 | dictionary.values().len(), |
1263 | 1 | input_1.values().len() + input_2.values().len(), |
1264 | | ) |
1265 | 1 | } |
1266 | | |
1267 | | #[test] |
1268 | 1 | fn test_string_dictionary_array_nulls_in_values() { |
1269 | 1 | let input_1_keys = Int32Array::from_iter_values([0, 2, 1, 3]); |
1270 | 1 | let input_1_values = StringArray::from(vec![Some("foo"), None, Some("bar"), Some("fiz")]); |
1271 | 1 | let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values)); |
1272 | | |
1273 | 1 | let input_2_keys = Int32Array::from_iter_values([0]); |
1274 | 1 | let input_2_values = StringArray::from(vec![None, Some("hello")]); |
1275 | 1 | let input_2 = DictionaryArray::new(input_2_keys, Arc::new(input_2_values)); |
1276 | | |
1277 | 1 | let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None]; |
1278 | | |
1279 | 1 | let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap(); |
1280 | 1 | let dictionary = concat.as_dictionary::<Int32Type>(); |
1281 | 1 | let actual = collect_string_dictionary(dictionary); |
1282 | 1 | assert_eq!(actual, expected); |
1283 | 1 | } |
1284 | | |
1285 | | #[test] |
1286 | 1 | fn test_string_dictionary_merge() { |
1287 | 1 | let mut builder = StringDictionaryBuilder::<Int32Type>::new(); |
1288 | 21 | for i20 in 0..20 { |
1289 | 20 | builder.append(i.to_string()).unwrap(); |
1290 | 20 | } |
1291 | 1 | let input_1 = builder.finish(); |
1292 | | |
1293 | 1 | let mut builder = StringDictionaryBuilder::<Int32Type>::new(); |
1294 | 31 | for i30 in 0..30 { |
1295 | 30 | builder.append(i.to_string()).unwrap(); |
1296 | 30 | } |
1297 | 1 | let input_2 = builder.finish(); |
1298 | | |
1299 | 50 | let expected1 : Vec<_>1 = (0..20)1 .chain1 (0..301 ).map1 (|x| x.to_string()).collect1 (); |
1300 | 50 | let expected1 : Vec<_>1 = expected.iter()1 .map1 (|x| Some(x.as_str())).collect1 (); |
1301 | | |
1302 | 1 | let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap(); |
1303 | 1 | let dictionary = concat.as_dictionary::<Int32Type>(); |
1304 | 1 | let actual = collect_string_dictionary(dictionary); |
1305 | 1 | assert_eq!(actual, expected); |
1306 | | |
1307 | | // Should have merged inputs together |
1308 | | // Not 30 as this is done on a best-effort basis |
1309 | 1 | let values_len = dictionary.values().len(); |
1310 | 1 | assert!((30..40).contains(&values_len), "{values_len}"0 ) |
1311 | 1 | } |
1312 | | |
1313 | | #[test] |
1314 | 1 | fn test_primitive_dictionary_merge() { |
1315 | | // Same value repeated 5 times. |
1316 | 1 | let keys = vec![1; 5]; |
1317 | 1 | let values = (10..20).collect::<Vec<_>>(); |
1318 | 1 | let dict = DictionaryArray::new( |
1319 | 1 | Int8Array::from(keys.clone()), |
1320 | 1 | Arc::new(Int32Array::from(values.clone())), |
1321 | | ); |
1322 | 1 | let other = DictionaryArray::new( |
1323 | 1 | Int8Array::from(keys.clone()), |
1324 | 1 | Arc::new(Int32Array::from(values.clone())), |
1325 | | ); |
1326 | | |
1327 | 1 | let result_same_dictionary = concat(&[&dict, &dict]).unwrap(); |
1328 | | // Verify pointer equality check succeeds, and therefore the |
1329 | | // dictionaries are not merged. A single values buffer should be reused |
1330 | | // in this case. |
1331 | 1 | assert!( |
1332 | 1 | dict.values().to_data().ptr_eq( |
1333 | 1 | &result_same_dictionary |
1334 | 1 | .as_dictionary::<Int8Type>() |
1335 | 1 | .values() |
1336 | 1 | .to_data() |
1337 | | ) |
1338 | | ); |
1339 | 1 | assert_eq!( |
1340 | 1 | result_same_dictionary |
1341 | 1 | .as_dictionary::<Int8Type>() |
1342 | 1 | .values() |
1343 | 1 | .len(), |
1344 | 1 | values.len(), |
1345 | | ); |
1346 | | |
1347 | 1 | let result_cloned_dictionary = concat(&[&dict, &other]).unwrap(); |
1348 | | // Should have only 1 underlying value since all keys reference it. |
1349 | 1 | assert_eq!( |
1350 | 1 | result_cloned_dictionary |
1351 | 1 | .as_dictionary::<Int8Type>() |
1352 | 1 | .values() |
1353 | 1 | .len(), |
1354 | | 1 |
1355 | | ); |
1356 | 1 | } |
1357 | | |
1358 | | #[test] |
1359 | 1 | fn test_concat_string_sizes() { |
1360 | 1 | let a: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect(); |
1361 | 1 | let b: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect(); |
1362 | 1 | let c = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); |
1363 | | // 150 * 3 = 450 |
1364 | | // 150 * 3 = 450 |
1365 | | // 3 * 3 = 9 |
1366 | | // ------------+ |
1367 | | // 909 |
1368 | | |
1369 | 1 | let arr = concat(&[&a, &b, &c]).unwrap(); |
1370 | 1 | assert_eq!(arr.to_data().buffers()[1].capacity(), 909); |
1371 | 1 | } |
1372 | | |
1373 | | #[test] |
1374 | 1 | fn test_dictionary_concat_reuse() { |
1375 | 1 | let array: DictionaryArray<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect(); |
1376 | 1 | let copy: DictionaryArray<Int8Type> = array.clone(); |
1377 | | |
1378 | | // dictionary is "a", "b", "c" |
1379 | 1 | assert_eq!( |
1380 | 1 | array.values(), |
1381 | 1 | &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef) |
1382 | | ); |
1383 | 1 | assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); |
1384 | | |
1385 | | // concatenate it with itself |
1386 | 1 | let combined = concat(&[© as _, &array as _]).unwrap(); |
1387 | 1 | let combined = combined.as_dictionary::<Int8Type>(); |
1388 | | |
1389 | 1 | assert_eq!( |
1390 | 1 | combined.values(), |
1391 | 1 | &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef), |
1392 | 0 | "Actual: {combined:#?}" |
1393 | | ); |
1394 | | |
1395 | 1 | assert_eq!( |
1396 | 1 | combined.keys(), |
1397 | 1 | &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2]) |
1398 | | ); |
1399 | | |
1400 | | // Should have reused the dictionary |
1401 | 1 | assert!( |
1402 | 1 | array |
1403 | 1 | .values() |
1404 | 1 | .to_data() |
1405 | 1 | .ptr_eq(&combined.values().to_data()) |
1406 | | ); |
1407 | 1 | assert!(copy.values().to_data().ptr_eq(&combined.values().to_data())); |
1408 | | |
1409 | 1 | let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect(); |
1410 | 1 | let combined = concat(&[© as _, &array as _, &new as _]).unwrap(); |
1411 | 1 | let com = combined.as_dictionary::<Int8Type>(); |
1412 | | |
1413 | | // Should not have reused the dictionary |
1414 | 1 | assert!(!array.values().to_data().ptr_eq(&com.values().to_data())); |
1415 | 1 | assert!(!copy.values().to_data().ptr_eq(&com.values().to_data())); |
1416 | 1 | assert!(!new.values().to_data().ptr_eq(&com.values().to_data())); |
1417 | 1 | } |
1418 | | |
1419 | | #[test] |
1420 | 1 | fn concat_record_batches() { |
1421 | 1 | let schema = Arc::new(Schema::new(vec![ |
1422 | 1 | Field::new("a", DataType::Int32, false), |
1423 | 1 | Field::new("b", DataType::Utf8, false), |
1424 | | ])); |
1425 | 1 | let batch1 = RecordBatch::try_new( |
1426 | 1 | schema.clone(), |
1427 | 1 | vec![ |
1428 | 1 | Arc::new(Int32Array::from(vec![1, 2])), |
1429 | 1 | Arc::new(StringArray::from(vec!["a", "b"])), |
1430 | | ], |
1431 | | ) |
1432 | 1 | .unwrap(); |
1433 | 1 | let batch2 = RecordBatch::try_new( |
1434 | 1 | schema.clone(), |
1435 | 1 | vec![ |
1436 | 1 | Arc::new(Int32Array::from(vec![3, 4])), |
1437 | 1 | Arc::new(StringArray::from(vec!["c", "d"])), |
1438 | | ], |
1439 | | ) |
1440 | 1 | .unwrap(); |
1441 | 1 | let new_batch = concat_batches(&schema, [&batch1, &batch2]).unwrap(); |
1442 | 1 | assert_eq!(new_batch.schema().as_ref(), schema.as_ref()); |
1443 | 1 | assert_eq!(2, new_batch.num_columns()); |
1444 | 1 | assert_eq!(4, new_batch.num_rows()); |
1445 | 1 | let new_batch_owned = concat_batches(&schema, &[batch1, batch2]).unwrap(); |
1446 | 1 | assert_eq!(new_batch_owned.schema().as_ref(), schema.as_ref()); |
1447 | 1 | assert_eq!(2, new_batch_owned.num_columns()); |
1448 | 1 | assert_eq!(4, new_batch_owned.num_rows()); |
1449 | 1 | } |
1450 | | |
1451 | | #[test] |
1452 | 1 | fn concat_empty_record_batch() { |
1453 | 1 | let schema = Arc::new(Schema::new(vec![ |
1454 | 1 | Field::new("a", DataType::Int32, false), |
1455 | 1 | Field::new("b", DataType::Utf8, false), |
1456 | | ])); |
1457 | 1 | let batch = concat_batches(&schema, []).unwrap(); |
1458 | 1 | assert_eq!(batch.schema().as_ref(), schema.as_ref()); |
1459 | 1 | assert_eq!(0, batch.num_rows()); |
1460 | 1 | } |
1461 | | |
1462 | | #[test] |
1463 | 1 | fn concat_record_batches_of_different_schemas_but_compatible_data() { |
1464 | 1 | let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); |
1465 | | // column names differ |
1466 | 1 | let schema2 = Arc::new(Schema::new(vec![Field::new("c", DataType::Int32, false)])); |
1467 | 1 | let batch1 = RecordBatch::try_new( |
1468 | 1 | schema1.clone(), |
1469 | 1 | vec![Arc::new(Int32Array::from(vec![1, 2]))], |
1470 | | ) |
1471 | 1 | .unwrap(); |
1472 | 1 | let batch2 = |
1473 | 1 | RecordBatch::try_new(schema2, vec![Arc::new(Int32Array::from(vec![3, 4]))]).unwrap(); |
1474 | | // concat_batches simply uses the schema provided |
1475 | 1 | let batch = concat_batches(&schema1, [&batch1, &batch2]).unwrap(); |
1476 | 1 | assert_eq!(batch.schema().as_ref(), schema1.as_ref()); |
1477 | 1 | assert_eq!(4, batch.num_rows()); |
1478 | 1 | } |
1479 | | |
1480 | | #[test] |
1481 | 1 | fn concat_record_batches_of_different_schemas_incompatible_data() { |
1482 | 1 | let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])); |
1483 | | // column names differ |
1484 | 1 | let schema2 = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)])); |
1485 | 1 | let batch1 = RecordBatch::try_new( |
1486 | 1 | schema1.clone(), |
1487 | 1 | vec![Arc::new(Int32Array::from(vec![1, 2]))], |
1488 | | ) |
1489 | 1 | .unwrap(); |
1490 | 1 | let batch2 = RecordBatch::try_new( |
1491 | 1 | schema2, |
1492 | 1 | vec![Arc::new(StringArray::from(vec!["foo", "bar"]))], |
1493 | | ) |
1494 | 1 | .unwrap(); |
1495 | | |
1496 | 1 | let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err(); |
1497 | 1 | assert_eq!( |
1498 | 1 | error.to_string(), |
1499 | | "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)." |
1500 | | ); |
1501 | 1 | } |
1502 | | |
1503 | | #[test] |
1504 | 1 | fn concat_capacity() { |
1505 | 1 | let a = Int32Array::from_iter_values(0..100); |
1506 | 1 | let b = Int32Array::from_iter_values(10..20); |
1507 | 1 | let a = concat(&[&a, &b]).unwrap(); |
1508 | 1 | let data = a.to_data(); |
1509 | 1 | assert_eq!(data.buffers()[0].len(), 440); |
1510 | 1 | assert_eq!(data.buffers()[0].capacity(), 440); |
1511 | | |
1512 | 1 | let a = concat(&[&a.slice(10, 20), &b]).unwrap(); |
1513 | 1 | let data = a.to_data(); |
1514 | 1 | assert_eq!(data.buffers()[0].len(), 120); |
1515 | 1 | assert_eq!(data.buffers()[0].capacity(), 120); |
1516 | | |
1517 | 1 | let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100)); |
1518 | 1 | let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]); |
1519 | | |
1520 | 1 | let a = concat(&[&a, &b]).unwrap(); |
1521 | 1 | let data = a.to_data(); |
1522 | | // (100 + 4 + 1) * size_of<i32>() |
1523 | 1 | assert_eq!(data.buffers()[0].len(), 420); |
1524 | 1 | assert_eq!(data.buffers()[0].capacity(), 420); |
1525 | | |
1526 | | // len("foo") * 100 + len("bingo") + len("bongo") + len("lorem") |
1527 | 1 | assert_eq!(data.buffers()[1].len(), 315); |
1528 | 1 | assert_eq!(data.buffers()[1].capacity(), 315); |
1529 | | |
1530 | 1 | let a = concat(&[&a.slice(10, 40), &b]).unwrap(); |
1531 | 1 | let data = a.to_data(); |
1532 | | // (40 + 4 + 5) * size_of<i32>() |
1533 | 1 | assert_eq!(data.buffers()[0].len(), 180); |
1534 | 1 | assert_eq!(data.buffers()[0].capacity(), 180); |
1535 | | |
1536 | | // len("foo") * 40 + len("bingo") + len("bongo") + len("lorem") |
1537 | 1 | assert_eq!(data.buffers()[1].len(), 135); |
1538 | 1 | assert_eq!(data.buffers()[1].capacity(), 135); |
1539 | | |
1540 | 1 | let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100)); |
1541 | 1 | let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10)); |
1542 | | |
1543 | 1 | let a = concat(&[&a, &b]).unwrap(); |
1544 | 1 | let data = a.to_data(); |
1545 | | // (100 + 10 + 1) * size_of<i64>() |
1546 | 1 | assert_eq!(data.buffers()[0].len(), 888); |
1547 | 1 | assert_eq!(data.buffers()[0].capacity(), 888); |
1548 | | |
1549 | | // len("foo") * 100 + len("cupcakes") * 10 |
1550 | 1 | assert_eq!(data.buffers()[1].len(), 380); |
1551 | 1 | assert_eq!(data.buffers()[1].capacity(), 380); |
1552 | | |
1553 | 1 | let a = concat(&[&a.slice(10, 40), &b]).unwrap(); |
1554 | 1 | let data = a.to_data(); |
1555 | | // (40 + 10 + 1) * size_of<i64>() |
1556 | 1 | assert_eq!(data.buffers()[0].len(), 408); |
1557 | 1 | assert_eq!(data.buffers()[0].capacity(), 408); |
1558 | | |
1559 | | // len("foo") * 40 + len("cupcakes") * 10 |
1560 | 1 | assert_eq!(data.buffers()[1].len(), 200); |
1561 | 1 | assert_eq!(data.buffers()[1].capacity(), 200); |
1562 | 1 | } |
1563 | | |
1564 | | #[test] |
1565 | 1 | fn concat_sparse_nulls() { |
1566 | 100 | let values1 = StringArray::from_iter_values1 ((0..100)1 .map1 (|x| x.to_string())); |
1567 | 1 | let keys = Int32Array::from(vec![1; 10]); |
1568 | 1 | let dict_a = DictionaryArray::new(keys, Arc::new(values)); |
1569 | 1 | let values = StringArray::new_null(0); |
1570 | 1 | let keys = Int32Array::new_null(10); |
1571 | 1 | let dict_b = DictionaryArray::new(keys, Arc::new(values)); |
1572 | 1 | let array = concat(&[&dict_a, &dict_b]).unwrap(); |
1573 | 1 | assert_eq!(array.null_count(), 10); |
1574 | 1 | assert_eq!(array.logical_null_count(), 10); |
1575 | 1 | } |
1576 | | |
1577 | | #[test] |
1578 | 1 | fn concat_dictionary_list_array_simple() { |
1579 | 1 | let scalars = [ |
1580 | 1 | create_single_row_list_of_dict(vec![Some("a")]), |
1581 | 1 | create_single_row_list_of_dict(vec![Some("a")]), |
1582 | 1 | create_single_row_list_of_dict(vec![Some("b")]), |
1583 | 1 | ]; |
1584 | | |
1585 | 3 | let arrays1 = scalars1 .iter1 ().map1 (|a| a as &dyn Array).collect1 ::<Vec<_>>(); |
1586 | 1 | let concat_res = concat(arrays.as_slice()).unwrap(); |
1587 | | |
1588 | 1 | let expected_list = create_list_of_dict(vec![ |
1589 | | // Row 1 |
1590 | 1 | Some(vec![Some("a")]), |
1591 | 1 | Some(vec![Some("a")]), |
1592 | 1 | Some(vec![Some("b")]), |
1593 | | ]); |
1594 | | |
1595 | 1 | let list = concat_res.as_list::<i32>(); |
1596 | | |
1597 | | // Assert that the list is equal to the expected list |
1598 | 3 | list1 .iter1 ().zip1 (expected_list1 .iter1 ()).for_each1 (|(a, b)| { |
1599 | 3 | assert_eq!(a, b); |
1600 | 3 | }); |
1601 | | |
1602 | 1 | assert_dictionary_has_unique_values::<_, StringArray>( |
1603 | 1 | list.values().as_dictionary::<Int32Type>(), |
1604 | | ); |
1605 | 1 | } |
1606 | | |
1607 | | #[test] |
1608 | 1 | fn concat_many_dictionary_list_arrays() { |
1609 | 1 | let number_of_unique_values = 8; |
1610 | 1 | let scalars = (0..80000) |
1611 | 80.0k | .map1 (|i| { |
1612 | 80.0k | create_single_row_list_of_dict(vec![Some( |
1613 | 80.0k | (i % number_of_unique_values).to_string(), |
1614 | 80.0k | )]) |
1615 | 80.0k | }) |
1616 | 1 | .collect::<Vec<_>>(); |
1617 | | |
1618 | 80.0k | let arrays1 = scalars.iter()1 .map1 (|a| a as &dyn Array).collect1 ::<Vec<_>>(); |
1619 | 1 | let concat_res = concat(arrays.as_slice()).unwrap(); |
1620 | | |
1621 | 1 | let expected_list = create_list_of_dict( |
1622 | 1 | (0..80000) |
1623 | 80.0k | .map1 (|i| Some(vec![Some((i % number_of_unique_values).to_string())])) |
1624 | 1 | .collect::<Vec<_>>(), |
1625 | | ); |
1626 | | |
1627 | 1 | let list = concat_res.as_list::<i32>(); |
1628 | | |
1629 | | // Assert that the list is equal to the expected list |
1630 | 80.0k | list1 .iter1 ().zip1 (expected_list1 .iter1 ()).for_each1 (|(a, b)| { |
1631 | 80.0k | assert_eq!(a, b); |
1632 | 80.0k | }); |
1633 | | |
1634 | 1 | assert_dictionary_has_unique_values::<_, StringArray>( |
1635 | 1 | list.values().as_dictionary::<Int32Type>(), |
1636 | | ); |
1637 | 1 | } |
1638 | | |
1639 | 80.0k | fn create_single_row_list_of_dict( |
1640 | 80.0k | list_items: Vec<Option<impl AsRef<str>>>, |
1641 | 80.0k | ) -> GenericListArray<i32> { |
1642 | 80.0k | let rows = list_items.into_iter().map(Some).collect(); |
1643 | | |
1644 | 80.0k | create_list_of_dict(vec![rows]) |
1645 | 80.0k | } |
1646 | | |
1647 | 80.0k | fn create_list_of_dict( |
1648 | 80.0k | rows: Vec<Option<Vec<Option<impl AsRef<str>>>>>, |
1649 | 80.0k | ) -> GenericListArray<i32> { |
1650 | 80.0k | let mut builder = |
1651 | 80.0k | GenericListBuilder::<i32, _>::new(StringDictionaryBuilder::<Int32Type>::new()); |
1652 | | |
1653 | 240k | for row160k in rows { |
1654 | 160k | builder.append_option(row); |
1655 | 160k | } |
1656 | | |
1657 | 80.0k | builder.finish() |
1658 | 80.0k | } |
1659 | | |
1660 | 2 | fn assert_dictionary_has_unique_values<'a, K, V>(array: &'a DictionaryArray<K>) |
1661 | 2 | where |
1662 | 2 | K: ArrowDictionaryKeyType, |
1663 | 2 | V: Sync + Send + 'static, |
1664 | 2 | &'a V: ArrayAccessor + IntoIterator, |
1665 | 2 | <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord, |
1666 | 2 | <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord, |
1667 | | { |
1668 | 2 | let dict = array.downcast_dict::<V>().unwrap(); |
1669 | 2 | let mut values = dict.values().into_iter().collect::<Vec<_>>(); |
1670 | | |
1671 | | // remove duplicates must be sorted first so we can compare |
1672 | 2 | values.sort(); |
1673 | | |
1674 | 2 | let mut unique_values = values.clone(); |
1675 | | |
1676 | 2 | unique_values.dedup(); |
1677 | | |
1678 | 2 | assert_eq!( |
1679 | | values, unique_values, |
1680 | 0 | "There are duplicates in the value list (the value list here is sorted which is only for the assertion)" |
1681 | | ); |
1682 | 2 | } |
1683 | | |
1684 | | // Test the simple case of concatenating two RunArrays |
1685 | | #[test] |
1686 | 1 | fn test_concat_run_array() { |
1687 | | // Create simple run arrays |
1688 | 1 | let run_ends1 = Int32Array::from(vec![2, 4]); |
1689 | 1 | let values1 = Int32Array::from(vec![10, 20]); |
1690 | 1 | let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); |
1691 | | |
1692 | 1 | let run_ends2 = Int32Array::from(vec![1, 4]); |
1693 | 1 | let values2 = Int32Array::from(vec![30, 40]); |
1694 | 1 | let array2 = RunArray::try_new(&run_ends2, &values2).unwrap(); |
1695 | | |
1696 | | // Concatenate the arrays - this should now work properly |
1697 | 1 | let result = concat(&[&array1, &array2]).unwrap(); |
1698 | 1 | let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run(); |
1699 | | |
1700 | | // Check that the result has the correct length |
1701 | 1 | assert_eq!(result_run_array.len(), 8); // 4 + 4 |
1702 | | |
1703 | | // Check the run ends |
1704 | 1 | let run_ends = result_run_array.run_ends().values(); |
1705 | 1 | assert_eq!(run_ends.len(), 4); |
1706 | 1 | assert_eq!(&[2, 4, 5, 8], run_ends); |
1707 | | |
1708 | | // Check the values |
1709 | 1 | let values = result_run_array |
1710 | 1 | .values() |
1711 | 1 | .as_any() |
1712 | 1 | .downcast_ref::<Int32Array>() |
1713 | 1 | .unwrap(); |
1714 | 1 | assert_eq!(values.len(), 4); |
1715 | 1 | assert_eq!(&[10, 20, 30, 40], values.values()); |
1716 | 1 | } |
1717 | | |
1718 | | #[test] |
1719 | 1 | fn test_concat_run_array_matching_first_last_value() { |
1720 | | // Create a run array with run ends [2, 4, 7] and values [10, 20, 30] |
1721 | 1 | let run_ends1 = Int32Array::from(vec![2, 4, 7]); |
1722 | 1 | let values1 = Int32Array::from(vec![10, 20, 30]); |
1723 | 1 | let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); |
1724 | | |
1725 | | // Create another run array with run ends [3, 5] and values [30, 40] |
1726 | 1 | let run_ends2 = Int32Array::from(vec![3, 5]); |
1727 | 1 | let values2 = Int32Array::from(vec![30, 40]); |
1728 | 1 | let array2 = RunArray::try_new(&run_ends2, &values2).unwrap(); |
1729 | | |
1730 | | // Concatenate the two arrays |
1731 | 1 | let result = concat(&[&array1, &array2]).unwrap(); |
1732 | 1 | let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run(); |
1733 | | |
1734 | | // The result should have length 12 (7 + 5) |
1735 | 1 | assert_eq!(result_run_array.len(), 12); |
1736 | | |
1737 | | // Check that the run ends are correct |
1738 | 1 | let run_ends = result_run_array.run_ends().values(); |
1739 | 1 | assert_eq!(&[2, 4, 7, 10, 12], run_ends); |
1740 | | |
1741 | | // Check that the values are correct |
1742 | 1 | assert_eq!( |
1743 | | &[10, 20, 30, 30, 40], |
1744 | 1 | result_run_array |
1745 | 1 | .values() |
1746 | 1 | .as_any() |
1747 | 1 | .downcast_ref::<Int32Array>() |
1748 | 1 | .unwrap() |
1749 | 1 | .values() |
1750 | | ); |
1751 | 1 | } |
1752 | | |
1753 | | #[test] |
1754 | 1 | fn test_concat_run_array_with_nulls() { |
1755 | | // Create values array with nulls |
1756 | 1 | let values1 = Int32Array::from(vec![Some(10), None, Some(30)]); |
1757 | 1 | let run_ends1 = Int32Array::from(vec![2, 4, 7]); |
1758 | 1 | let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); |
1759 | | |
1760 | | // Create another run array with run ends [3, 5] and values [30, null] |
1761 | 1 | let values2 = Int32Array::from(vec![Some(30), None]); |
1762 | 1 | let run_ends2 = Int32Array::from(vec![3, 5]); |
1763 | 1 | let array2 = RunArray::try_new(&run_ends2, &values2).unwrap(); |
1764 | | |
1765 | | // Concatenate the two arrays |
1766 | 1 | let result = concat(&[&array1, &array2]).unwrap(); |
1767 | 1 | let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run(); |
1768 | | |
1769 | | // The result should have length 12 (7 + 5) |
1770 | 1 | assert_eq!(result_run_array.len(), 12); |
1771 | | |
1772 | | // Get a reference to the run array itself for testing |
1773 | | |
1774 | | // Just test the length and run ends without asserting specific values |
1775 | | // This ensures the test passes while we work on full support for RunArray nulls |
1776 | 1 | assert_eq!(result_run_array.len(), 12); // 7 + 5 |
1777 | | |
1778 | | // Check that the run ends are correct |
1779 | 1 | let run_ends_values = result_run_array.run_ends().values(); |
1780 | 1 | assert_eq!(&[2, 4, 7, 10, 12], run_ends_values); |
1781 | | |
1782 | | // Check that the values are correct |
1783 | 1 | let expected = Int32Array::from(vec![Some(10), None, Some(30), Some(30), None]); |
1784 | 1 | let actual = result_run_array |
1785 | 1 | .values() |
1786 | 1 | .as_any() |
1787 | 1 | .downcast_ref::<Int32Array>() |
1788 | 1 | .unwrap(); |
1789 | 1 | assert_eq!(actual.len(), expected.len()); |
1790 | 1 | assert_eq!(actual.null_count(), expected.null_count()); |
1791 | 1 | assert_eq!(actual.values(), expected.values()); |
1792 | 1 | } |
1793 | | |
1794 | | #[test] |
1795 | 1 | fn test_concat_run_array_single() { |
1796 | | // Create a run array with run ends [2, 4] and values [10, 20] |
1797 | 1 | let run_ends1 = Int32Array::from(vec![2, 4]); |
1798 | 1 | let values1 = Int32Array::from(vec![10, 20]); |
1799 | 1 | let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); |
1800 | | |
1801 | | // Concatenate the single array |
1802 | 1 | let result = concat(&[&array1]).unwrap(); |
1803 | 1 | let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run(); |
1804 | | |
1805 | | // The result should have length 4 |
1806 | 1 | assert_eq!(result_run_array.len(), 4); |
1807 | | |
1808 | | // Check that the run ends are correct |
1809 | 1 | let run_ends = result_run_array.run_ends().values(); |
1810 | 1 | assert_eq!(&[2, 4], run_ends); |
1811 | | |
1812 | | // Check that the values are correct |
1813 | 1 | assert_eq!( |
1814 | | &[10, 20], |
1815 | 1 | result_run_array |
1816 | 1 | .values() |
1817 | 1 | .as_any() |
1818 | 1 | .downcast_ref::<Int32Array>() |
1819 | 1 | .unwrap() |
1820 | 1 | .values() |
1821 | | ); |
1822 | 1 | } |
1823 | | |
1824 | | #[test] |
1825 | 1 | fn test_concat_run_array_with_3_arrays() { |
1826 | 1 | let run_ends1 = Int32Array::from(vec![2, 4]); |
1827 | 1 | let values1 = Int32Array::from(vec![10, 20]); |
1828 | 1 | let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); |
1829 | 1 | let run_ends2 = Int32Array::from(vec![1, 4]); |
1830 | 1 | let values2 = Int32Array::from(vec![30, 40]); |
1831 | 1 | let array2 = RunArray::try_new(&run_ends2, &values2).unwrap(); |
1832 | 1 | let run_ends3 = Int32Array::from(vec![1, 4]); |
1833 | 1 | let values3 = Int32Array::from(vec![50, 60]); |
1834 | 1 | let array3 = RunArray::try_new(&run_ends3, &values3).unwrap(); |
1835 | | |
1836 | | // Concatenate the arrays |
1837 | 1 | let result = concat(&[&array1, &array2, &array3]).unwrap(); |
1838 | 1 | let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run(); |
1839 | | |
1840 | | // Check that the result has the correct length |
1841 | 1 | assert_eq!(result_run_array.len(), 12); // 4 + 4 + 4 |
1842 | | |
1843 | | // Check the run ends |
1844 | 1 | let run_ends = result_run_array.run_ends().values(); |
1845 | 1 | assert_eq!(run_ends.len(), 6); |
1846 | 1 | assert_eq!(&[2, 4, 5, 8, 9, 12], run_ends); |
1847 | | |
1848 | | // Check the values |
1849 | 1 | let values = result_run_array |
1850 | 1 | .values() |
1851 | 1 | .as_any() |
1852 | 1 | .downcast_ref::<Int32Array>() |
1853 | 1 | .unwrap(); |
1854 | 1 | assert_eq!(values.len(), 6); |
1855 | 1 | assert_eq!(&[10, 20, 30, 40, 50, 60], values.values()); |
1856 | 1 | } |
1857 | | } |