/Users/andrewlamb/Software/arrow-rs/arrow-select/src/interleave.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Interleave elements from multiple arrays |
19 | | |
20 | | use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values}; |
21 | | use arrow_array::builder::{BooleanBufferBuilder, PrimitiveBuilder}; |
22 | | use arrow_array::cast::AsArray; |
23 | | use arrow_array::types::*; |
24 | | use arrow_array::*; |
25 | | use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer}; |
26 | | use arrow_data::ByteView; |
27 | | use arrow_data::transform::MutableArrayData; |
28 | | use arrow_schema::{ArrowError, DataType, Fields}; |
29 | | use std::sync::Arc; |
30 | | |
31 | | macro_rules! primitive_helper { |
32 | | ($t:ty, $values:ident, $indices:ident, $data_type:ident) => { |
33 | | interleave_primitive::<$t>($values, $indices, $data_type) |
34 | | }; |
35 | | } |
36 | | |
37 | | macro_rules! dict_helper { |
38 | | ($t:ty, $values:expr, $indices:expr) => { |
39 | | Ok(Arc::new(interleave_dictionaries::<$t>($values, $indices)?) as _) |
40 | | }; |
41 | | } |
42 | | |
43 | | /// |
44 | | /// Takes elements by index from a list of [`Array`], creating a new [`Array`] from those values. |
45 | | /// |
46 | | /// Each element in `indices` is a pair of `usize` with the first identifying the index |
47 | | /// of the [`Array`] in `values`, and the second the index of the value within that [`Array`] |
48 | | /// |
49 | | /// ```text |
50 | | /// ┌─────────────────┐ ┌─────────┐ ┌─────────────────┐ |
51 | | /// │ A │ │ (0, 0) │ interleave( │ A │ |
52 | | /// ├─────────────────┤ ├─────────┤ [values0, values1], ├─────────────────┤ |
53 | | /// │ D │ │ (1, 0) │ indices │ B │ |
54 | | /// └─────────────────┘ ├─────────┤ ) ├─────────────────┤ |
55 | | /// values array 0 │ (1, 1) │ ─────────────────────────▶ │ C │ |
56 | | /// ├─────────┤ ├─────────────────┤ |
57 | | /// │ (0, 1) │ │ D │ |
58 | | /// └─────────┘ └─────────────────┘ |
59 | | /// ┌─────────────────┐ indices |
60 | | /// │ B │ array |
61 | | /// ├─────────────────┤ result |
62 | | /// │ C │ |
63 | | /// ├─────────────────┤ |
64 | | /// │ E │ |
65 | | /// └─────────────────┘ |
66 | | /// values array 1 |
67 | | /// ``` |
68 | | /// |
69 | | /// For selecting values by index from a single array see [`crate::take`] |
70 | 43 | pub fn interleave( |
71 | 43 | values: &[&dyn Array], |
72 | 43 | indices: &[(usize, usize)], |
73 | 43 | ) -> Result<ArrayRef, ArrowError> { |
74 | 43 | if values.is_empty() { |
75 | 0 | return Err(ArrowError::InvalidArgumentError( |
76 | 0 | "interleave requires input of at least one array".to_string(), |
77 | 0 | )); |
78 | 43 | } |
79 | 43 | let data_type = values[0].data_type(); |
80 | | |
81 | 80.0k | for array in values43 .iter43 ().skip43 (1) { |
82 | 80.0k | if array.data_type() != data_type { |
83 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
84 | 0 | "It is not possible to interleave arrays of different data types ({} and {})", |
85 | 0 | data_type, |
86 | 0 | array.data_type() |
87 | 0 | ))); |
88 | 80.0k | } |
89 | | } |
90 | | |
91 | 43 | if indices.is_empty() { |
92 | 2 | return Ok(new_empty_array(data_type)); |
93 | 41 | } |
94 | | |
95 | 0 | downcast_primitive! { |
96 | 0 | data_type => (primitive_helper, values, indices, data_type), |
97 | 17 | DataType::Utf8 => interleave_bytes::<Utf8Type>(values, indices), |
98 | 0 | DataType::LargeUtf8 => interleave_bytes::<LargeUtf8Type>(values, indices), |
99 | 0 | DataType::Binary => interleave_bytes::<BinaryType>(values, indices), |
100 | 0 | DataType::LargeBinary => interleave_bytes::<LargeBinaryType>(values, indices), |
101 | 0 | DataType::BinaryView => interleave_views::<BinaryViewType>(values, indices), |
102 | 3 | DataType::Utf8View => interleave_views::<StringViewType>(values, indices), |
103 | 4 | DataType::Dictionary(k, _) => downcast_integer!0 { |
104 | 4 | k.as_ref() => (dict_helper, values0 , indices0 ), |
105 | 0 | _ => unreachable!("illegal dictionary key type {k}") |
106 | | }, |
107 | 4 | DataType::Struct(fields) => interleave_struct(fields, values, indices), |
108 | 7 | _ => interleave_fallback(values, indices) |
109 | | } |
110 | 43 | } |
111 | | |
112 | | /// Common functionality for interleaving arrays |
113 | | /// |
114 | | /// T is the concrete Array type |
115 | | struct Interleave<'a, T> { |
116 | | /// The input arrays downcast to T |
117 | | arrays: Vec<&'a T>, |
118 | | /// The null buffer of the interleaved output |
119 | | nulls: Option<NullBuffer>, |
120 | | } |
121 | | |
122 | | impl<'a, T: Array + 'static> Interleave<'a, T> { |
123 | 30 | fn new(values: &[&'a dyn Array], indices: &'a [(usize, usize)]) -> Self { |
124 | 30 | let mut has_nulls = false; |
125 | 30 | let arrays: Vec<&T> = values |
126 | 30 | .iter() |
127 | 80.0k | .map30 (|x| { |
128 | 80.0k | has_nulls = has_nulls || x.null_count() != 080.0k ; |
129 | 80.0k | x.as_any().downcast_ref().unwrap() |
130 | 80.0k | }) |
131 | 30 | .collect(); |
132 | | |
133 | 30 | let nulls = match has_nulls { |
134 | | true => { |
135 | 35 | let nulls8 = BooleanBuffer::collect_bool8 (indices8 .len8 (), |i| { |
136 | 35 | let (a, b) = indices[i]; |
137 | 35 | arrays[a].is_valid(b) |
138 | 35 | }); |
139 | 8 | Some(nulls.into()) |
140 | | } |
141 | 22 | false => None, |
142 | | }; |
143 | | |
144 | 30 | Self { arrays, nulls } |
145 | 30 | } |
146 | | } |
147 | | |
148 | 6 | fn interleave_primitive<T: ArrowPrimitiveType>( |
149 | 6 | values: &[&dyn Array], |
150 | 6 | indices: &[(usize, usize)], |
151 | 6 | data_type: &DataType, |
152 | 6 | ) -> Result<ArrayRef, ArrowError> { |
153 | 6 | let interleaved = Interleave::<'_, PrimitiveArray<T>>::new(values, indices); |
154 | | |
155 | 6 | let values = indices |
156 | 6 | .iter() |
157 | 26 | .map6 (|(a, b)| interleaved.arrays[*a].value(*b)) |
158 | 6 | .collect::<Vec<_>>(); |
159 | | |
160 | 6 | let array = PrimitiveArray::<T>::try_new(values.into(), interleaved.nulls)?0 ; |
161 | 6 | Ok(Arc::new(array.with_data_type(data_type.clone()))) |
162 | 6 | } |
163 | | |
164 | 17 | fn interleave_bytes<T: ByteArrayType>( |
165 | 17 | values: &[&dyn Array], |
166 | 17 | indices: &[(usize, usize)], |
167 | 17 | ) -> Result<ArrayRef, ArrowError> { |
168 | 17 | let interleaved = Interleave::<'_, GenericByteArray<T>>::new(values, indices); |
169 | | |
170 | 17 | let mut capacity = 0; |
171 | 17 | let mut offsets = Vec::with_capacity(indices.len() + 1); |
172 | 17 | offsets.push(T::Offset::from_usize(0).unwrap()); |
173 | 94 | offsets17 .extend17 (indices17 .iter17 ().map17 (|(a, b)| { |
174 | 94 | let o = interleaved.arrays[*a].value_offsets(); |
175 | 94 | let element_len = o[*b + 1].as_usize() - o[*b].as_usize(); |
176 | 94 | capacity += element_len; |
177 | 94 | T::Offset::from_usize(capacity).expect("overflow") |
178 | 94 | })); |
179 | | |
180 | 17 | let mut values = Vec::with_capacity(capacity); |
181 | 111 | for (a94 , b94 ) in indices { |
182 | 94 | values.extend_from_slice(interleaved.arrays[*a].value(*b).as_ref()); |
183 | 94 | } |
184 | | |
185 | | // Safety: safe by construction |
186 | 17 | let array = unsafe { |
187 | 17 | let offsets = OffsetBuffer::new_unchecked(offsets.into()); |
188 | 17 | GenericByteArray::<T>::new_unchecked(offsets, values.into(), interleaved.nulls) |
189 | | }; |
190 | 17 | Ok(Arc::new(array)) |
191 | 17 | } |
192 | | |
193 | 4 | fn interleave_dictionaries<K: ArrowDictionaryKeyType>( |
194 | 4 | arrays: &[&dyn Array], |
195 | 4 | indices: &[(usize, usize)], |
196 | 4 | ) -> Result<ArrayRef, ArrowError> { |
197 | 8 | let dictionaries4 : Vec<_>4 = arrays4 .iter4 ().map4 (|x| x.as_dictionary::<K>()).collect4 (); |
198 | 4 | if !should_merge_dictionary_values::<K>(&dictionaries, indices.len()) { |
199 | 1 | return interleave_fallback(arrays, indices); |
200 | 3 | } |
201 | | |
202 | 3 | let masks: Vec<_> = dictionaries |
203 | 3 | .iter() |
204 | 3 | .enumerate() |
205 | 6 | .map3 (|(a_idx, dictionary)| { |
206 | 6 | let mut key_mask = BooleanBufferBuilder::new_from_buffer( |
207 | 6 | MutableBuffer::new_null(dictionary.len()), |
208 | 6 | dictionary.len(), |
209 | | ); |
210 | | |
211 | 22 | for (_, key_idx11 ) in indices6 .iter6 ().filter6 (|(a, _)| *a == a_idx) { |
212 | 11 | key_mask.set_bit(*key_idx, true); |
213 | 11 | } |
214 | 6 | key_mask.finish() |
215 | 6 | }) |
216 | 3 | .collect(); |
217 | | |
218 | 3 | let merged = merge_dictionary_values(&dictionaries, Some(&masks))?0 ; |
219 | | |
220 | | // Recompute keys |
221 | 3 | let mut keys = PrimitiveBuilder::<K>::with_capacity(indices.len()); |
222 | 14 | for (a11 , b11 ) in indices { |
223 | 11 | let old_keys: &PrimitiveArray<K> = dictionaries[*a].keys(); |
224 | 11 | match old_keys.is_valid(*b) { |
225 | | true => { |
226 | 9 | let old_key = old_keys.values()[*b]; |
227 | 9 | keys.append_value(merged.key_mappings[*a][old_key.as_usize()]) |
228 | | } |
229 | 2 | false => keys.append_null(), |
230 | | } |
231 | | } |
232 | 3 | let array = unsafe { DictionaryArray::new_unchecked(keys.finish(), merged.values) }; |
233 | 3 | Ok(Arc::new(array)) |
234 | 4 | } |
235 | | |
236 | 3 | fn interleave_views<T: ByteViewType>( |
237 | 3 | values: &[&dyn Array], |
238 | 3 | indices: &[(usize, usize)], |
239 | 3 | ) -> Result<ArrayRef, ArrowError> { |
240 | 3 | let interleaved = Interleave::<'_, GenericByteViewArray<T>>::new(values, indices); |
241 | 3 | let mut buffers = Vec::new(); |
242 | | |
243 | | // Contains the offsets of start buffer in `buffer_to_new_index` |
244 | 3 | let mut offsets = Vec::with_capacity(interleaved.arrays.len() + 1); |
245 | 3 | offsets.push(0); |
246 | 3 | let mut total_buffers = 0; |
247 | 6 | for a in interleaved.arrays3 .iter3 () { |
248 | 6 | total_buffers += a.data_buffers().len(); |
249 | 6 | offsets.push(total_buffers); |
250 | 6 | } |
251 | | |
252 | | // contains the mapping from old buffer index to new buffer index |
253 | 3 | let mut buffer_to_new_index = vec![None; total_buffers]; |
254 | | |
255 | 3 | let views: Vec<u128> = indices |
256 | 3 | .iter() |
257 | 16 | .map3 (|(array_idx, value_idx)| { |
258 | 16 | let array = interleaved.arrays[*array_idx]; |
259 | 16 | let view = array.views().get(*value_idx).unwrap(); |
260 | 16 | let view_len = *view as u32; |
261 | 16 | if view_len <= 12 { |
262 | 8 | return *view; |
263 | 8 | } |
264 | | // value is big enough to be in a variadic buffer |
265 | 8 | let view = ByteView::from(*view); |
266 | 8 | let buffer_to_new_idx = offsets[*array_idx] + view.buffer_index as usize; |
267 | 8 | let new_buffer_idx: u32 = |
268 | 8 | *buffer_to_new_index[buffer_to_new_idx].get_or_insert_with(|| {6 |
269 | 6 | buffers.push(array.data_buffers()[view.buffer_index as usize].clone()); |
270 | 6 | (buffers.len() - 1) as u32 |
271 | 6 | }); |
272 | 8 | view.with_buffer_index(new_buffer_idx).as_u128() |
273 | 16 | }) |
274 | 3 | .collect(); |
275 | | |
276 | 3 | let array = unsafe { |
277 | 3 | GenericByteViewArray::<T>::new_unchecked(views.into(), buffers, interleaved.nulls) |
278 | | }; |
279 | 3 | Ok(Arc::new(array)) |
280 | 3 | } |
281 | | |
282 | 4 | fn interleave_struct( |
283 | 4 | fields: &Fields, |
284 | 4 | values: &[&dyn Array], |
285 | 4 | indices: &[(usize, usize)], |
286 | 4 | ) -> Result<ArrayRef, ArrowError> { |
287 | 4 | let interleaved = Interleave::<'_, StructArray>::new(values, indices); |
288 | | |
289 | 4 | if fields.is_empty() { |
290 | 1 | let array = StructArray::try_new_with_length( |
291 | 1 | fields.clone(), |
292 | 1 | vec![], |
293 | 1 | interleaved.nulls, |
294 | 1 | indices.len(), |
295 | 0 | )?; |
296 | 1 | return Ok(Arc::new(array)); |
297 | 3 | } |
298 | | |
299 | 3 | let struct_fields_array: Result<Vec<_>, _> = (0..fields.len()) |
300 | 6 | .map3 (|i| { |
301 | 6 | let field_values: Vec<&dyn Array> = interleaved |
302 | 6 | .arrays |
303 | 6 | .iter() |
304 | 16 | .map6 (|x| x.column(i).as_ref()) |
305 | 6 | .collect(); |
306 | 6 | interleave(&field_values, indices) |
307 | 6 | }) |
308 | 3 | .collect(); |
309 | | |
310 | 3 | let struct_array = |
311 | 3 | StructArray::try_new(fields.clone(), struct_fields_array?0 , interleaved.nulls)?0 ; |
312 | 3 | Ok(Arc::new(struct_array)) |
313 | 4 | } |
314 | | |
315 | | /// Fallback implementation of interleave using [`MutableArrayData`] |
316 | 10 | fn interleave_fallback( |
317 | 10 | values: &[&dyn Array], |
318 | 10 | indices: &[(usize, usize)], |
319 | 10 | ) -> Result<ArrayRef, ArrowError> { |
320 | 20 | let arrays10 : Vec<_>10 = values10 .iter10 ().map10 (|x| x.to_data()).collect10 (); |
321 | 10 | let arrays: Vec<_> = arrays.iter().collect(); |
322 | 10 | let mut array_data = MutableArrayData::new(arrays, false, indices.len()); |
323 | | |
324 | 10 | let mut cur_array = indices[0].0; |
325 | 10 | let mut start_row_idx = indices[0].1; |
326 | 10 | let mut end_row_idx = start_row_idx + 1; |
327 | | |
328 | 41 | for (array, row) in indices10 .iter10 ().skip10 (1).copied10 () { |
329 | 41 | if array == cur_array && row == end_row_idx7 { |
330 | | // subsequent row in same batch |
331 | 2 | end_row_idx += 1; |
332 | 2 | continue; |
333 | 39 | } |
334 | | |
335 | | // emit current batch of rows for current buffer |
336 | 39 | array_data.extend(cur_array, start_row_idx, end_row_idx); |
337 | | |
338 | | // start new batch of rows |
339 | 39 | cur_array = array; |
340 | 39 | start_row_idx = row; |
341 | 39 | end_row_idx = start_row_idx + 1; |
342 | | } |
343 | | |
344 | | // emit final batch of rows |
345 | 10 | array_data.extend(cur_array, start_row_idx, end_row_idx); |
346 | 10 | Ok(make_array(array_data.freeze())) |
347 | 10 | } |
348 | | |
349 | | /// Interleave rows by index from multiple [`RecordBatch`] instances and return a new [`RecordBatch`]. |
350 | | /// |
351 | | /// This function will call [`interleave`] on each array of the [`RecordBatch`] instances and assemble a new [`RecordBatch`]. |
352 | | /// |
353 | | /// # Example |
354 | | /// ``` |
355 | | /// # use std::sync::Arc; |
356 | | /// # use arrow_array::{StringArray, Int32Array, RecordBatch, UInt32Array}; |
357 | | /// # use arrow_schema::{DataType, Field, Schema}; |
358 | | /// # use arrow_select::interleave::interleave_record_batch; |
359 | | /// |
360 | | /// let schema = Arc::new(Schema::new(vec![ |
361 | | /// Field::new("a", DataType::Int32, true), |
362 | | /// Field::new("b", DataType::Utf8, true), |
363 | | /// ])); |
364 | | /// |
365 | | /// let batch1 = RecordBatch::try_new( |
366 | | /// schema.clone(), |
367 | | /// vec![ |
368 | | /// Arc::new(Int32Array::from(vec![0, 1, 2])), |
369 | | /// Arc::new(StringArray::from(vec!["a", "b", "c"])), |
370 | | /// ], |
371 | | /// ).unwrap(); |
372 | | /// |
373 | | /// let batch2 = RecordBatch::try_new( |
374 | | /// schema.clone(), |
375 | | /// vec![ |
376 | | /// Arc::new(Int32Array::from(vec![3, 4, 5])), |
377 | | /// Arc::new(StringArray::from(vec!["d", "e", "f"])), |
378 | | /// ], |
379 | | /// ).unwrap(); |
380 | | /// |
381 | | /// let indices = vec![(0, 1), (1, 2), (0, 0), (1, 1)]; |
382 | | /// let interleaved = interleave_record_batch(&[&batch1, &batch2], &indices).unwrap(); |
383 | | /// |
384 | | /// let expected = RecordBatch::try_new( |
385 | | /// schema, |
386 | | /// vec![ |
387 | | /// Arc::new(Int32Array::from(vec![1, 5, 0, 4])), |
388 | | /// Arc::new(StringArray::from(vec!["b", "f", "a", "e"])), |
389 | | /// ], |
390 | | /// ).unwrap(); |
391 | | /// assert_eq!(interleaved, expected); |
392 | | /// ``` |
393 | 0 | pub fn interleave_record_batch( |
394 | 0 | record_batches: &[&RecordBatch], |
395 | 0 | indices: &[(usize, usize)], |
396 | 0 | ) -> Result<RecordBatch, ArrowError> { |
397 | 0 | let schema = record_batches[0].schema(); |
398 | 0 | let columns = (0..schema.fields().len()) |
399 | 0 | .map(|i| { |
400 | 0 | let column_values: Vec<&dyn Array> = record_batches |
401 | 0 | .iter() |
402 | 0 | .map(|batch| batch.column(i).as_ref()) |
403 | 0 | .collect(); |
404 | 0 | interleave(&column_values, indices) |
405 | 0 | }) |
406 | 0 | .collect::<Result<Vec<_>, _>>()?; |
407 | 0 | RecordBatch::try_new(schema, columns) |
408 | 0 | } |
409 | | |
410 | | #[cfg(test)] |
411 | | mod tests { |
412 | | use super::*; |
413 | | use arrow_array::Int32RunArray; |
414 | | use arrow_array::builder::{Int32Builder, ListBuilder, PrimitiveRunBuilder}; |
415 | | use arrow_schema::Field; |
416 | | |
417 | | #[test] |
418 | 1 | fn test_primitive() { |
419 | 1 | let a = Int32Array::from_iter_values([1, 2, 3, 4]); |
420 | 1 | let b = Int32Array::from_iter_values([5, 6, 7]); |
421 | 1 | let c = Int32Array::from_iter_values([8, 9, 10]); |
422 | 1 | let values = interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (2, 0), (1, 1)]).unwrap(); |
423 | 1 | let v = values.as_primitive::<Int32Type>(); |
424 | 1 | assert_eq!(v.values(), &[4, 4, 10, 8, 6]); |
425 | 1 | } |
426 | | |
427 | | #[test] |
428 | 1 | fn test_primitive_nulls() { |
429 | 1 | let a = Int32Array::from_iter_values([1, 2, 3, 4]); |
430 | 1 | let b = Int32Array::from_iter([Some(1), Some(4), None]); |
431 | 1 | let values = interleave(&[&a, &b], &[(0, 1), (1, 2), (1, 2), (0, 3), (0, 2)]).unwrap(); |
432 | 1 | let v: Vec<_> = values.as_primitive::<Int32Type>().into_iter().collect(); |
433 | 1 | assert_eq!(&v, &[Some(2), None, None, Some(4), Some(3)]) |
434 | 1 | } |
435 | | |
436 | | #[test] |
437 | 1 | fn test_primitive_empty() { |
438 | 1 | let a = Int32Array::from_iter_values([1, 2, 3, 4]); |
439 | 1 | let v = interleave(&[&a], &[]).unwrap(); |
440 | 1 | assert!(v.is_empty()); |
441 | 1 | assert_eq!(v.data_type(), &DataType::Int32); |
442 | 1 | } |
443 | | |
444 | | #[test] |
445 | 1 | fn test_strings() { |
446 | 1 | let a = StringArray::from_iter_values(["a", "b", "c"]); |
447 | 1 | let b = StringArray::from_iter_values(["hello", "world", "foo"]); |
448 | 1 | let values = interleave(&[&a, &b], &[(0, 2), (0, 2), (1, 0), (1, 1), (0, 1)]).unwrap(); |
449 | 1 | let v = values.as_string::<i32>(); |
450 | 1 | let values: Vec<_> = v.into_iter().collect(); |
451 | 1 | assert_eq!( |
452 | 1 | &values, |
453 | | &[ |
454 | | Some("c"), |
455 | | Some("c"), |
456 | | Some("hello"), |
457 | | Some("world"), |
458 | | Some("b") |
459 | | ] |
460 | | ) |
461 | 1 | } |
462 | | |
463 | | #[test] |
464 | 1 | fn test_interleave_dictionary() { |
465 | 1 | let a = DictionaryArray::<Int32Type>::from_iter(["a", "b", "c", "a", "b"]); |
466 | 1 | let b = DictionaryArray::<Int32Type>::from_iter(["a", "c", "a", "c", "a"]); |
467 | | |
468 | | // Should not recompute dictionary |
469 | 1 | let values = |
470 | 1 | interleave(&[&a, &b], &[(0, 2), (0, 2), (0, 2), (1, 0), (1, 1), (0, 1)]).unwrap(); |
471 | 1 | let v = values.as_dictionary::<Int32Type>(); |
472 | 1 | assert_eq!(v.values().len(), 5); |
473 | | |
474 | 1 | let vc = v.downcast_dict::<StringArray>().unwrap(); |
475 | 1 | let collected: Vec<_> = vc.into_iter().map(Option::unwrap).collect(); |
476 | 1 | assert_eq!(&collected, &["c", "c", "c", "a", "c", "b"]); |
477 | | |
478 | | // Should recompute dictionary |
479 | 1 | let values = interleave(&[&a, &b], &[(0, 2), (0, 2), (1, 1)]).unwrap(); |
480 | 1 | let v = values.as_dictionary::<Int32Type>(); |
481 | 1 | assert_eq!(v.values().len(), 1); |
482 | | |
483 | 1 | let vc = v.downcast_dict::<StringArray>().unwrap(); |
484 | 1 | let collected: Vec<_> = vc.into_iter().map(Option::unwrap).collect(); |
485 | 1 | assert_eq!(&collected, &["c", "c", "c"]); |
486 | 1 | } |
487 | | |
488 | | #[test] |
489 | 1 | fn test_interleave_dictionary_nulls() { |
490 | 1 | let input_1_keys = Int32Array::from_iter_values([0, 2, 1, 3]); |
491 | 1 | let input_1_values = StringArray::from(vec![Some("foo"), None, Some("bar"), Some("fiz")]); |
492 | 1 | let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values)); |
493 | 1 | let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect(); |
494 | | |
495 | 1 | let expected = vec![Some("fiz"), None, None, Some("foo")]; |
496 | | |
497 | 1 | let values = interleave( |
498 | 1 | &[&input_1 as _, &input_2 as _], |
499 | 1 | &[(0, 3), (0, 2), (1, 0), (0, 0)], |
500 | | ) |
501 | 1 | .unwrap(); |
502 | 1 | let dictionary = values.as_dictionary::<Int32Type>(); |
503 | 1 | let actual: Vec<Option<&str>> = dictionary |
504 | 1 | .downcast_dict::<StringArray>() |
505 | 1 | .unwrap() |
506 | 1 | .into_iter() |
507 | 1 | .collect(); |
508 | | |
509 | 1 | assert_eq!(actual, expected); |
510 | 1 | } |
511 | | |
512 | | #[test] |
513 | 1 | fn test_lists() { |
514 | | // [[1, 2], null, [3]] |
515 | 1 | let mut a = ListBuilder::new(Int32Builder::new()); |
516 | 1 | a.values().append_value(1); |
517 | 1 | a.values().append_value(2); |
518 | 1 | a.append(true); |
519 | 1 | a.append(false); |
520 | 1 | a.values().append_value(3); |
521 | 1 | a.append(true); |
522 | 1 | let a = a.finish(); |
523 | | |
524 | | // [[4], null, [5, 6, null]] |
525 | 1 | let mut b = ListBuilder::new(Int32Builder::new()); |
526 | 1 | b.values().append_value(4); |
527 | 1 | b.append(true); |
528 | 1 | b.append(false); |
529 | 1 | b.values().append_value(5); |
530 | 1 | b.values().append_value(6); |
531 | 1 | b.values().append_null(); |
532 | 1 | b.append(true); |
533 | 1 | let b = b.finish(); |
534 | | |
535 | 1 | let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap(); |
536 | 1 | let v = values.as_any().downcast_ref::<ListArray>().unwrap(); |
537 | | |
538 | | // [[3], null, [4], [5, 6, null], null] |
539 | 1 | let mut expected = ListBuilder::new(Int32Builder::new()); |
540 | 1 | expected.values().append_value(3); |
541 | 1 | expected.append(true); |
542 | 1 | expected.append(false); |
543 | 1 | expected.values().append_value(4); |
544 | 1 | expected.append(true); |
545 | 1 | expected.values().append_value(5); |
546 | 1 | expected.values().append_value(6); |
547 | 1 | expected.values().append_null(); |
548 | 1 | expected.append(true); |
549 | 1 | expected.append(false); |
550 | 1 | let expected = expected.finish(); |
551 | | |
552 | 1 | assert_eq!(v, &expected); |
553 | 1 | } |
554 | | |
555 | | #[test] |
556 | 1 | fn test_struct_without_nulls() { |
557 | 1 | let fields = Fields::from(vec![ |
558 | 1 | Field::new("number_col", DataType::Int32, false), |
559 | 1 | Field::new("string_col", DataType::Utf8, false), |
560 | | ]); |
561 | 1 | let a = { |
562 | 1 | let number_col = Int32Array::from_iter_values([1, 2, 3, 4]); |
563 | 1 | let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]); |
564 | | |
565 | 1 | StructArray::try_new( |
566 | 1 | fields.clone(), |
567 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
568 | 1 | None, |
569 | | ) |
570 | 1 | .unwrap() |
571 | | }; |
572 | | |
573 | 1 | let b = { |
574 | 1 | let number_col = Int32Array::from_iter_values([5, 6, 7]); |
575 | 1 | let string_col = StringArray::from_iter_values(["hello", "world", "foo"]); |
576 | | |
577 | 1 | StructArray::try_new( |
578 | 1 | fields.clone(), |
579 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
580 | 1 | None, |
581 | | ) |
582 | 1 | .unwrap() |
583 | | }; |
584 | | |
585 | 1 | let c = { |
586 | 1 | let number_col = Int32Array::from_iter_values([8, 9, 10]); |
587 | 1 | let string_col = StringArray::from_iter_values(["x", "y", "z"]); |
588 | | |
589 | 1 | StructArray::try_new( |
590 | 1 | fields.clone(), |
591 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
592 | 1 | None, |
593 | | ) |
594 | 1 | .unwrap() |
595 | | }; |
596 | | |
597 | 1 | let values = interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (2, 0), (1, 1)]).unwrap(); |
598 | 1 | let values_struct = values.as_struct(); |
599 | 1 | assert_eq!(values_struct.data_type(), &DataType::Struct(fields)); |
600 | 1 | assert_eq!(values_struct.null_count(), 0); |
601 | | |
602 | 1 | let values_number = values_struct.column(0).as_primitive::<Int32Type>(); |
603 | 1 | assert_eq!(values_number.values(), &[4, 4, 10, 8, 6]); |
604 | 1 | let values_string = values_struct.column(1).as_string::<i32>(); |
605 | 1 | let values_string: Vec<_> = values_string.into_iter().collect(); |
606 | 1 | assert_eq!( |
607 | 1 | &values_string, |
608 | | &[Some("d"), Some("d"), Some("z"), Some("x"), Some("world")] |
609 | | ); |
610 | 1 | } |
611 | | |
612 | | #[test] |
613 | 1 | fn test_struct_with_nulls_in_values() { |
614 | 1 | let fields = Fields::from(vec![ |
615 | 1 | Field::new("number_col", DataType::Int32, true), |
616 | 1 | Field::new("string_col", DataType::Utf8, true), |
617 | | ]); |
618 | 1 | let a = { |
619 | 1 | let number_col = Int32Array::from_iter_values([1, 2, 3, 4]); |
620 | 1 | let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]); |
621 | | |
622 | 1 | StructArray::try_new( |
623 | 1 | fields.clone(), |
624 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
625 | 1 | None, |
626 | | ) |
627 | 1 | .unwrap() |
628 | | }; |
629 | | |
630 | 1 | let b = { |
631 | 1 | let number_col = Int32Array::from_iter([Some(1), Some(4), None]); |
632 | 1 | let string_col = StringArray::from(vec![Some("hello"), None, Some("foo")]); |
633 | | |
634 | 1 | StructArray::try_new( |
635 | 1 | fields.clone(), |
636 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
637 | 1 | None, |
638 | | ) |
639 | 1 | .unwrap() |
640 | | }; |
641 | | |
642 | 1 | let values = interleave(&[&a, &b], &[(0, 1), (1, 2), (1, 2), (0, 3), (1, 1)]).unwrap(); |
643 | 1 | let values_struct = values.as_struct(); |
644 | 1 | assert_eq!(values_struct.data_type(), &DataType::Struct(fields)); |
645 | | |
646 | | // The struct itself has no nulls, but the values do |
647 | 1 | assert_eq!(values_struct.null_count(), 0); |
648 | | |
649 | 1 | let values_number: Vec<_> = values_struct |
650 | 1 | .column(0) |
651 | 1 | .as_primitive::<Int32Type>() |
652 | 1 | .into_iter() |
653 | 1 | .collect(); |
654 | 1 | assert_eq!(values_number, &[Some(2), None, None, Some(4), Some(4)]); |
655 | | |
656 | 1 | let values_string = values_struct.column(1).as_string::<i32>(); |
657 | 1 | let values_string: Vec<_> = values_string.into_iter().collect(); |
658 | 1 | assert_eq!( |
659 | 1 | &values_string, |
660 | | &[Some("b"), Some("foo"), Some("foo"), Some("d"), None] |
661 | | ); |
662 | 1 | } |
663 | | |
664 | | #[test] |
665 | 1 | fn test_struct_with_nulls() { |
666 | 1 | let fields = Fields::from(vec![ |
667 | 1 | Field::new("number_col", DataType::Int32, false), |
668 | 1 | Field::new("string_col", DataType::Utf8, false), |
669 | | ]); |
670 | 1 | let a = { |
671 | 1 | let number_col = Int32Array::from_iter_values([1, 2, 3, 4]); |
672 | 1 | let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]); |
673 | | |
674 | 1 | StructArray::try_new( |
675 | 1 | fields.clone(), |
676 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
677 | 1 | None, |
678 | | ) |
679 | 1 | .unwrap() |
680 | | }; |
681 | | |
682 | 1 | let b = { |
683 | 1 | let number_col = Int32Array::from_iter_values([5, 6, 7]); |
684 | 1 | let string_col = StringArray::from_iter_values(["hello", "world", "foo"]); |
685 | | |
686 | 1 | StructArray::try_new( |
687 | 1 | fields.clone(), |
688 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
689 | 1 | Some(NullBuffer::from(&[true, false, true])), |
690 | | ) |
691 | 1 | .unwrap() |
692 | | }; |
693 | | |
694 | 1 | let c = { |
695 | 1 | let number_col = Int32Array::from_iter_values([8, 9, 10]); |
696 | 1 | let string_col = StringArray::from_iter_values(["x", "y", "z"]); |
697 | | |
698 | 1 | StructArray::try_new( |
699 | 1 | fields.clone(), |
700 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
701 | 1 | None, |
702 | | ) |
703 | 1 | .unwrap() |
704 | | }; |
705 | | |
706 | 1 | let values = interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (1, 1), (2, 0)]).unwrap(); |
707 | 1 | let values_struct = values.as_struct(); |
708 | 1 | assert_eq!(values_struct.data_type(), &DataType::Struct(fields)); |
709 | | |
710 | 1 | let validity: Vec<bool> = { |
711 | 1 | let null_buffer = values_struct.nulls().expect("should_have_nulls"); |
712 | | |
713 | 1 | null_buffer.iter().collect() |
714 | | }; |
715 | 1 | assert_eq!(validity, &[true, true, true, false, true]); |
716 | 1 | let values_number = values_struct.column(0).as_primitive::<Int32Type>(); |
717 | 1 | assert_eq!(values_number.values(), &[4, 4, 10, 6, 8]); |
718 | 1 | let values_string = values_struct.column(1).as_string::<i32>(); |
719 | 1 | let values_string: Vec<_> = values_string.into_iter().collect(); |
720 | 1 | assert_eq!( |
721 | 1 | &values_string, |
722 | | &[Some("d"), Some("d"), Some("z"), Some("world"), Some("x"),] |
723 | | ); |
724 | 1 | } |
725 | | |
726 | | #[test] |
727 | 1 | fn test_struct_empty() { |
728 | 1 | let fields = Fields::from(vec![ |
729 | 1 | Field::new("number_col", DataType::Int32, false), |
730 | 1 | Field::new("string_col", DataType::Utf8, false), |
731 | | ]); |
732 | 1 | let a = { |
733 | 1 | let number_col = Int32Array::from_iter_values([1, 2, 3, 4]); |
734 | 1 | let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]); |
735 | | |
736 | 1 | StructArray::try_new( |
737 | 1 | fields.clone(), |
738 | 1 | vec![Arc::new(number_col), Arc::new(string_col)], |
739 | 1 | None, |
740 | | ) |
741 | 1 | .unwrap() |
742 | | }; |
743 | 1 | let v = interleave(&[&a], &[]).unwrap(); |
744 | 1 | assert!(v.is_empty()); |
745 | 1 | assert_eq!(v.data_type(), &DataType::Struct(fields)); |
746 | 1 | } |
747 | | |
748 | | #[test] |
749 | 1 | fn interleave_sparse_nulls() { |
750 | 100 | let values1 = StringArray::from_iter_values1 ((0..100)1 .map1 (|x| x.to_string())); |
751 | 1 | let keys = Int32Array::from_iter_values(0..10); |
752 | 1 | let dict_a = DictionaryArray::new(keys, Arc::new(values)); |
753 | 1 | let values = StringArray::new_null(0); |
754 | 1 | let keys = Int32Array::new_null(10); |
755 | 1 | let dict_b = DictionaryArray::new(keys, Arc::new(values)); |
756 | | |
757 | 1 | let indices = &[(0, 0), (0, 1), (0, 2), (1, 0)]; |
758 | 1 | let array = interleave(&[&dict_a, &dict_b], indices).unwrap(); |
759 | | |
760 | 1 | let expected = |
761 | 1 | DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"), Some("2"), None]); |
762 | 1 | assert_eq!(array.as_ref(), &expected) |
763 | 1 | } |
764 | | |
765 | | #[test] |
766 | 1 | fn test_interleave_views() { |
767 | 1 | let values = StringArray::from_iter_values([ |
768 | 1 | "hello", |
769 | 1 | "world_long_string_not_inlined", |
770 | 1 | "foo", |
771 | 1 | "bar", |
772 | 1 | "baz", |
773 | 1 | ]); |
774 | 1 | let view_a = StringViewArray::from(&values); |
775 | | |
776 | 1 | let values = StringArray::from_iter_values([ |
777 | 1 | "test", |
778 | 1 | "data", |
779 | 1 | "more_long_string_not_inlined", |
780 | 1 | "views", |
781 | 1 | "here", |
782 | 1 | ]); |
783 | 1 | let view_b = StringViewArray::from(&values); |
784 | | |
785 | 1 | let indices = &[ |
786 | 1 | (0, 2), // "foo" |
787 | 1 | (1, 0), // "test" |
788 | 1 | (0, 4), // "baz" |
789 | 1 | (1, 3), // "views" |
790 | 1 | (0, 1), // "world_long_string_not_inlined" |
791 | 1 | ]; |
792 | | |
793 | | // Test specialized implementation |
794 | 1 | let values = interleave(&[&view_a, &view_b], indices).unwrap(); |
795 | 1 | let result = values.as_string_view(); |
796 | 1 | assert_eq!(result.data_buffers().len(), 1); |
797 | | |
798 | 1 | let fallback = interleave_fallback(&[&view_a, &view_b], indices).unwrap(); |
799 | 1 | let fallback_result = fallback.as_string_view(); |
800 | | // note that fallback_result has 2 buffers, but only one long enough string to warrant a buffer |
801 | 1 | assert_eq!(fallback_result.data_buffers().len(), 2); |
802 | | |
803 | | // Convert to strings for easier assertion |
804 | 5 | let collected1 : Vec<_>1 = result1 .iter1 ().map1 (|x| x.map(|s| s.to_string())).collect1 (); |
805 | | |
806 | 1 | let fallback_collected: Vec<_> = fallback_result |
807 | 1 | .iter() |
808 | 5 | .map1 (|x| x.map(|s| s.to_string())) |
809 | 1 | .collect(); |
810 | | |
811 | 1 | assert_eq!(&collected, &fallback_collected); |
812 | | |
813 | 1 | assert_eq!( |
814 | 1 | &collected, |
815 | 1 | &[ |
816 | 1 | Some("foo".to_string()), |
817 | 1 | Some("test".to_string()), |
818 | 1 | Some("baz".to_string()), |
819 | 1 | Some("views".to_string()), |
820 | 1 | Some("world_long_string_not_inlined".to_string()), |
821 | 1 | ] |
822 | | ); |
823 | 1 | } |
824 | | |
825 | | #[test] |
826 | 1 | fn test_interleave_views_with_nulls() { |
827 | 1 | let values = StringArray::from_iter([ |
828 | 1 | Some("hello"), |
829 | 1 | None, |
830 | 1 | Some("foo_long_string_not_inlined"), |
831 | 1 | Some("bar"), |
832 | 1 | None, |
833 | 1 | ]); |
834 | 1 | let view_a = StringViewArray::from(&values); |
835 | | |
836 | 1 | let values = StringArray::from_iter([ |
837 | 1 | Some("test"), |
838 | 1 | Some("data_long_string_not_inlined"), |
839 | 1 | None, |
840 | 1 | None, |
841 | 1 | Some("here"), |
842 | 1 | ]); |
843 | 1 | let view_b = StringViewArray::from(&values); |
844 | | |
845 | 1 | let indices = &[ |
846 | 1 | (0, 1), // null |
847 | 1 | (1, 2), // null |
848 | 1 | (0, 2), // "foo_long_string_not_inlined" |
849 | 1 | (1, 3), // null |
850 | 1 | (0, 4), // null |
851 | 1 | ]; |
852 | | |
853 | | // Test specialized implementation |
854 | 1 | let values = interleave(&[&view_a, &view_b], indices).unwrap(); |
855 | 1 | let result = values.as_string_view(); |
856 | 1 | assert_eq!(result.data_buffers().len(), 1); |
857 | | |
858 | 1 | let fallback = interleave_fallback(&[&view_a, &view_b], indices).unwrap(); |
859 | 1 | let fallback_result = fallback.as_string_view(); |
860 | | |
861 | | // Convert to strings for easier assertion |
862 | 5 | let collected1 : Vec<_>1 = result1 .iter1 ().map1 (|x| x.map(|s| s1 .to_string1 ())).collect1 (); |
863 | | |
864 | 1 | let fallback_collected: Vec<_> = fallback_result |
865 | 1 | .iter() |
866 | 5 | .map1 (|x| x.map(|s| s1 .to_string1 ())) |
867 | 1 | .collect(); |
868 | | |
869 | 1 | assert_eq!(&collected, &fallback_collected); |
870 | | |
871 | 1 | assert_eq!( |
872 | 1 | &collected, |
873 | 1 | &[ |
874 | 1 | None, |
875 | 1 | None, |
876 | 1 | Some("foo_long_string_not_inlined".to_string()), |
877 | 1 | None, |
878 | 1 | None, |
879 | 1 | ] |
880 | | ); |
881 | 1 | } |
882 | | |
883 | | #[test] |
884 | 1 | fn test_interleave_views_multiple_buffers() { |
885 | 1 | let str1 = "very_long_string_from_first_buffer".as_bytes(); |
886 | 1 | let str2 = "very_long_string_from_second_buffer".as_bytes(); |
887 | 1 | let buffer1 = str1.to_vec().into(); |
888 | 1 | let buffer2 = str2.to_vec().into(); |
889 | | |
890 | 1 | let view1 = ByteView::new(str1.len() as u32, &str1[..4]) |
891 | 1 | .with_buffer_index(0) |
892 | 1 | .with_offset(0) |
893 | 1 | .as_u128(); |
894 | 1 | let view2 = ByteView::new(str2.len() as u32, &str2[..4]) |
895 | 1 | .with_buffer_index(1) |
896 | 1 | .with_offset(0) |
897 | 1 | .as_u128(); |
898 | 1 | let view_a = |
899 | 1 | StringViewArray::try_new(vec![view1, view2].into(), vec![buffer1, buffer2], None) |
900 | 1 | .unwrap(); |
901 | | |
902 | 1 | let str3 = "another_very_long_string_buffer_three".as_bytes(); |
903 | 1 | let str4 = "different_long_string_in_buffer_four".as_bytes(); |
904 | 1 | let buffer3 = str3.to_vec().into(); |
905 | 1 | let buffer4 = str4.to_vec().into(); |
906 | | |
907 | 1 | let view3 = ByteView::new(str3.len() as u32, &str3[..4]) |
908 | 1 | .with_buffer_index(0) |
909 | 1 | .with_offset(0) |
910 | 1 | .as_u128(); |
911 | 1 | let view4 = ByteView::new(str4.len() as u32, &str4[..4]) |
912 | 1 | .with_buffer_index(1) |
913 | 1 | .with_offset(0) |
914 | 1 | .as_u128(); |
915 | 1 | let view_b = |
916 | 1 | StringViewArray::try_new(vec![view3, view4].into(), vec![buffer3, buffer4], None) |
917 | 1 | .unwrap(); |
918 | | |
919 | 1 | let indices = &[ |
920 | 1 | (0, 0), // String from first buffer of array A |
921 | 1 | (1, 0), // String from first buffer of array B |
922 | 1 | (0, 1), // String from second buffer of array A |
923 | 1 | (1, 1), // String from second buffer of array B |
924 | 1 | (0, 0), // String from first buffer of array A again |
925 | 1 | (1, 1), // String from second buffer of array B again |
926 | 1 | ]; |
927 | | |
928 | | // Test interleave |
929 | 1 | let values = interleave(&[&view_a, &view_b], indices).unwrap(); |
930 | 1 | let result = values.as_string_view(); |
931 | | |
932 | 1 | assert_eq!( |
933 | 1 | result.data_buffers().len(), |
934 | | 4, |
935 | 0 | "Expected four buffers (two from each input array)" |
936 | | ); |
937 | | |
938 | 6 | let result_strings1 : Vec<_>1 = result1 .iter1 ().map1 (|x| x.map(|s| s.to_string())).collect1 (); |
939 | 1 | assert_eq!( |
940 | | result_strings, |
941 | 1 | vec![ |
942 | 1 | Some("very_long_string_from_first_buffer".to_string()), |
943 | 1 | Some("another_very_long_string_buffer_three".to_string()), |
944 | 1 | Some("very_long_string_from_second_buffer".to_string()), |
945 | 1 | Some("different_long_string_in_buffer_four".to_string()), |
946 | 1 | Some("very_long_string_from_first_buffer".to_string()), |
947 | 1 | Some("different_long_string_in_buffer_four".to_string()), |
948 | | ] |
949 | | ); |
950 | | |
951 | 1 | let views = result.views(); |
952 | 1 | let buffer_indices: Vec<_> = views |
953 | 1 | .iter() |
954 | 6 | .map1 (|raw_view| ByteView::from(*raw_view).buffer_index) |
955 | 1 | .collect(); |
956 | | |
957 | 1 | assert_eq!( |
958 | | buffer_indices, |
959 | 1 | vec![ |
960 | | 0, // First buffer from array A |
961 | | 1, // First buffer from array B |
962 | | 2, // Second buffer from array A |
963 | | 3, // Second buffer from array B |
964 | | 0, // First buffer from array A (reused) |
965 | | 3, // Second buffer from array B (reused) |
966 | | ] |
967 | | ); |
968 | 1 | } |
969 | | |
970 | | #[test] |
971 | 1 | fn test_interleave_run_end_encoded_primitive() { |
972 | 1 | let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new(); |
973 | 1 | builder.extend([1, 1, 2, 2, 2, 3].into_iter().map(Some)); |
974 | 1 | let a = builder.finish(); |
975 | | |
976 | 1 | let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new(); |
977 | 1 | builder.extend([4, 5, 5, 6, 6, 6].into_iter().map(Some)); |
978 | 1 | let b = builder.finish(); |
979 | | |
980 | 1 | let indices = &[(0, 1), (1, 0), (0, 4), (1, 2), (0, 5)]; |
981 | 1 | let result = interleave(&[&a, &b], indices).unwrap(); |
982 | | |
983 | | // The result should be a RunEndEncoded array |
984 | 1 | assert!(matches!0 (result.data_type(), DataType::RunEndEncoded(_, _))); |
985 | | |
986 | | // Cast to RunArray to access values |
987 | 1 | let result_run_array: &Int32RunArray = result.as_any().downcast_ref().unwrap(); |
988 | | |
989 | | // Verify the logical values by accessing the logical array directly |
990 | 1 | let expected = vec![1, 4, 2, 5, 3]; |
991 | 1 | let mut actual = Vec::new(); |
992 | 5 | for i in 0..result_run_array1 .len1 () { |
993 | 5 | let physical_idx = result_run_array.get_physical_index(i); |
994 | 5 | let value = result_run_array |
995 | 5 | .values() |
996 | 5 | .as_primitive::<Int32Type>() |
997 | 5 | .value(physical_idx); |
998 | 5 | actual.push(value); |
999 | 5 | } |
1000 | 1 | assert_eq!(actual, expected); |
1001 | 1 | } |
1002 | | |
1003 | | #[test] |
1004 | 1 | fn test_interleave_run_end_encoded_string() { |
1005 | 1 | let a: Int32RunArray = vec!["hello", "hello", "world", "world", "foo"] |
1006 | 1 | .into_iter() |
1007 | 1 | .collect(); |
1008 | 1 | let b: Int32RunArray = vec!["bar", "baz", "baz", "qux"].into_iter().collect(); |
1009 | | |
1010 | 1 | let indices = &[(0, 0), (1, 1), (0, 3), (1, 3), (0, 4)]; |
1011 | 1 | let result = interleave(&[&a, &b], indices).unwrap(); |
1012 | | |
1013 | | // The result should be a RunEndEncoded array |
1014 | 1 | assert!(matches!0 (result.data_type(), DataType::RunEndEncoded(_, _))); |
1015 | | |
1016 | | // Cast to RunArray to access values |
1017 | 1 | let result_run_array: &Int32RunArray = result.as_any().downcast_ref().unwrap(); |
1018 | | |
1019 | | // Verify the logical values by accessing the logical array directly |
1020 | 1 | let expected = vec!["hello", "baz", "world", "qux", "foo"]; |
1021 | 1 | let mut actual = Vec::new(); |
1022 | 5 | for i in 0..result_run_array1 .len1 () { |
1023 | 5 | let physical_idx = result_run_array.get_physical_index(i); |
1024 | 5 | let value = result_run_array |
1025 | 5 | .values() |
1026 | 5 | .as_string::<i32>() |
1027 | 5 | .value(physical_idx); |
1028 | 5 | actual.push(value); |
1029 | 5 | } |
1030 | 1 | assert_eq!(actual, expected); |
1031 | 1 | } |
1032 | | |
1033 | | #[test] |
1034 | 1 | fn test_interleave_run_end_encoded_with_nulls() { |
1035 | 1 | let a: Int32RunArray = vec![Some("a"), Some("a"), None, None, Some("b")] |
1036 | 1 | .into_iter() |
1037 | 1 | .collect(); |
1038 | 1 | let b: Int32RunArray = vec![None, Some("c"), Some("c"), Some("d")] |
1039 | 1 | .into_iter() |
1040 | 1 | .collect(); |
1041 | | |
1042 | 1 | let indices = &[(0, 1), (1, 0), (0, 2), (1, 3), (0, 4)]; |
1043 | 1 | let result = interleave(&[&a, &b], indices).unwrap(); |
1044 | | |
1045 | | // The result should be a RunEndEncoded array |
1046 | 1 | assert!(matches!0 (result.data_type(), DataType::RunEndEncoded(_, _))); |
1047 | | |
1048 | | // Cast to RunArray to access values |
1049 | 1 | let result_run_array: &Int32RunArray = result.as_any().downcast_ref().unwrap(); |
1050 | | |
1051 | | // Verify the logical values by accessing the logical array directly |
1052 | 1 | let expected = vec![Some("a"), None, None, Some("d"), Some("b")]; |
1053 | 1 | let mut actual = Vec::new(); |
1054 | 5 | for i in 0..result_run_array1 .len1 () { |
1055 | 5 | let physical_idx = result_run_array.get_physical_index(i); |
1056 | 5 | if result_run_array.values().is_null(physical_idx) { |
1057 | 2 | actual.push(None); |
1058 | 3 | } else { |
1059 | 3 | let value = result_run_array |
1060 | 3 | .values() |
1061 | 3 | .as_string::<i32>() |
1062 | 3 | .value(physical_idx); |
1063 | 3 | actual.push(Some(value)); |
1064 | 3 | } |
1065 | | } |
1066 | 1 | assert_eq!(actual, expected); |
1067 | 1 | } |
1068 | | |
1069 | | #[test] |
1070 | 1 | fn test_interleave_run_end_encoded_different_run_types() { |
1071 | 1 | let mut builder = PrimitiveRunBuilder::<Int16Type, Int32Type>::new(); |
1072 | 1 | builder.extend([1, 1, 2, 3, 3].into_iter().map(Some)); |
1073 | 1 | let a = builder.finish(); |
1074 | | |
1075 | 1 | let mut builder = PrimitiveRunBuilder::<Int16Type, Int32Type>::new(); |
1076 | 1 | builder.extend([4, 5, 5, 6].into_iter().map(Some)); |
1077 | 1 | let b = builder.finish(); |
1078 | | |
1079 | 1 | let indices = &[(0, 0), (1, 1), (0, 3), (1, 3)]; |
1080 | 1 | let result = interleave(&[&a, &b], indices).unwrap(); |
1081 | | |
1082 | | // The result should be a RunEndEncoded array |
1083 | 1 | assert!(matches!0 (result.data_type(), DataType::RunEndEncoded(_, _))); |
1084 | | |
1085 | | // Cast to RunArray to access values |
1086 | 1 | let result_run_array: &RunArray<Int16Type> = result.as_any().downcast_ref().unwrap(); |
1087 | | |
1088 | | // Verify the logical values by accessing the logical array directly |
1089 | 1 | let expected = vec![1, 5, 3, 6]; |
1090 | 1 | let mut actual = Vec::new(); |
1091 | 4 | for i in 0..result_run_array1 .len1 () { |
1092 | 4 | let physical_idx = result_run_array.get_physical_index(i); |
1093 | 4 | let value = result_run_array |
1094 | 4 | .values() |
1095 | 4 | .as_primitive::<Int32Type>() |
1096 | 4 | .value(physical_idx); |
1097 | 4 | actual.push(value); |
1098 | 4 | } |
1099 | 1 | assert_eq!(actual, expected); |
1100 | 1 | } |
1101 | | |
1102 | | #[test] |
1103 | 1 | fn test_interleave_run_end_encoded_mixed_run_lengths() { |
1104 | 1 | let mut builder = PrimitiveRunBuilder::<Int64Type, Int32Type>::new(); |
1105 | 1 | builder.extend([1, 2, 2, 2, 2, 3, 3, 4].into_iter().map(Some)); |
1106 | 1 | let a = builder.finish(); |
1107 | | |
1108 | 1 | let mut builder = PrimitiveRunBuilder::<Int64Type, Int32Type>::new(); |
1109 | 1 | builder.extend([5, 5, 5, 6, 7, 7, 8, 8].into_iter().map(Some)); |
1110 | 1 | let b = builder.finish(); |
1111 | | |
1112 | 1 | let indices = &[ |
1113 | 1 | (0, 0), // 1 |
1114 | 1 | (1, 2), // 5 |
1115 | 1 | (0, 3), // 2 |
1116 | 1 | (1, 3), // 6 |
1117 | 1 | (0, 6), // 3 |
1118 | 1 | (1, 6), // 8 |
1119 | 1 | (0, 7), // 4 |
1120 | 1 | (1, 4), // 7 |
1121 | 1 | ]; |
1122 | 1 | let result = interleave(&[&a, &b], indices).unwrap(); |
1123 | | |
1124 | | // The result should be a RunEndEncoded array |
1125 | 1 | assert!(matches!0 (result.data_type(), DataType::RunEndEncoded(_, _))); |
1126 | | |
1127 | | // Cast to RunArray to access values |
1128 | 1 | let result_run_array: &RunArray<Int64Type> = result.as_any().downcast_ref().unwrap(); |
1129 | | |
1130 | | // Verify the logical values by accessing the logical array directly |
1131 | 1 | let expected = vec![1, 5, 2, 6, 3, 8, 4, 7]; |
1132 | 1 | let mut actual = Vec::new(); |
1133 | 8 | for i in 0..result_run_array1 .len1 () { |
1134 | 8 | let physical_idx = result_run_array.get_physical_index(i); |
1135 | 8 | let value = result_run_array |
1136 | 8 | .values() |
1137 | 8 | .as_primitive::<Int32Type>() |
1138 | 8 | .value(physical_idx); |
1139 | 8 | actual.push(value); |
1140 | 8 | } |
1141 | 1 | assert_eq!(actual, expected); |
1142 | 1 | } |
1143 | | |
1144 | | #[test] |
1145 | 1 | fn test_interleave_run_end_encoded_empty_runs() { |
1146 | 1 | let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new(); |
1147 | 1 | builder.extend([1].into_iter().map(Some)); |
1148 | 1 | let a = builder.finish(); |
1149 | | |
1150 | 1 | let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new(); |
1151 | 1 | builder.extend([2, 2, 2].into_iter().map(Some)); |
1152 | 1 | let b = builder.finish(); |
1153 | | |
1154 | 1 | let indices = &[(0, 0), (1, 1), (1, 2)]; |
1155 | 1 | let result = interleave(&[&a, &b], indices).unwrap(); |
1156 | | |
1157 | | // The result should be a RunEndEncoded array |
1158 | 1 | assert!(matches!0 (result.data_type(), DataType::RunEndEncoded(_, _))); |
1159 | | |
1160 | | // Cast to RunArray to access values |
1161 | 1 | let result_run_array: &Int32RunArray = result.as_any().downcast_ref().unwrap(); |
1162 | | |
1163 | | // Verify the logical values by accessing the logical array directly |
1164 | 1 | let expected = vec![1, 2, 2]; |
1165 | 1 | let mut actual = Vec::new(); |
1166 | 3 | for i in 0..result_run_array1 .len1 () { |
1167 | 3 | let physical_idx = result_run_array.get_physical_index(i); |
1168 | 3 | let value = result_run_array |
1169 | 3 | .values() |
1170 | 3 | .as_primitive::<Int32Type>() |
1171 | 3 | .value(physical_idx); |
1172 | 3 | actual.push(value); |
1173 | 3 | } |
1174 | 1 | assert_eq!(actual, expected); |
1175 | 1 | } |
1176 | | |
1177 | | #[test] |
1178 | 1 | fn test_struct_no_fields() { |
1179 | 1 | let fields = Fields::empty(); |
1180 | 1 | let a = StructArray::try_new_with_length(fields.clone(), vec![], None, 10).unwrap(); |
1181 | 1 | let v = interleave(&[&a], &[(0, 0)]).unwrap(); |
1182 | 1 | assert_eq!(v.len(), 1); |
1183 | 1 | assert_eq!(v.data_type(), &DataType::Struct(fields)); |
1184 | 1 | } |
1185 | | } |