Skip to content

Commit fa1d079

Browse files
authored
Add interleave kernel (#1523) (#2838)
* Add interleave kernel (#1523) * RAT * Review feedback
1 parent 8adebca commit fa1d079

File tree

4 files changed

+223
-2
lines changed

4 files changed

+223
-2
lines changed
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_array::{make_array, new_empty_array, Array, ArrayRef};
19+
use arrow_data::transform::MutableArrayData;
20+
use arrow_schema::ArrowError;
21+
22+
///
23+
/// Takes elements by index from a list of [`Array`], creating a new [`Array`] from those values.
24+
///
25+
/// Each element in `indices` is a pair of `usize` with the first identifying the index
26+
/// of the [`Array`] in `values`, and the second the index of the value within that [`Array`]
27+
///
28+
/// ```text
29+
/// ┌─────────────────┐ ┌─────────┐ ┌─────────────────┐
30+
/// │ A │ │ (0, 0) │ interleave( │ A │
31+
/// ├─────────────────┤ ├─────────┤ [values0, values1], ├─────────────────┤
32+
/// │ D │ │ (1, 0) │ indices │ B │
33+
/// └─────────────────┘ ├─────────┤ ) ├─────────────────┤
34+
/// values array 0 │ (1, 1) │ ─────────────────────────▶ │ C │
35+
/// ├─────────┤ ├─────────────────┤
36+
/// │ (0, 1) │ │ D │
37+
/// └─────────┘ └─────────────────┘
38+
/// ┌─────────────────┐ indices
39+
/// │ B │ array
40+
/// ├─────────────────┤ result
41+
/// │ C │
42+
/// ├─────────────────┤
43+
/// │ E │
44+
/// └─────────────────┘
45+
/// values array 1
46+
/// ```
47+
///
48+
/// For selecting values by index from a single array see [compute::take](crate::compute::take)
49+
pub fn interleave(
50+
values: &[&dyn Array],
51+
indices: &[(usize, usize)],
52+
) -> Result<ArrayRef, ArrowError> {
53+
if values.is_empty() {
54+
return Err(ArrowError::InvalidArgumentError(
55+
"interleave requires input of at least one array".to_string(),
56+
));
57+
}
58+
let data_type = values[0].data_type();
59+
60+
for array in values.iter().skip(1) {
61+
if array.data_type() != data_type {
62+
return Err(ArrowError::InvalidArgumentError(
63+
format!("It is not possible to interleave arrays of different data types ({} and {})",
64+
data_type, array.data_type()),
65+
));
66+
}
67+
}
68+
69+
if indices.is_empty() {
70+
return Ok(new_empty_array(data_type));
71+
}
72+
73+
// TODO: Add specialized implementations (#2864)
74+
75+
interleave_fallback(values, indices)
76+
}
77+
78+
/// Fallback implementation of interleave using [`MutableArrayData`]
79+
fn interleave_fallback(
80+
values: &[&dyn Array],
81+
indices: &[(usize, usize)],
82+
) -> Result<ArrayRef, ArrowError> {
83+
let arrays: Vec<_> = values.iter().map(|x| x.data()).collect();
84+
let mut array_data = MutableArrayData::new(arrays, false, indices.len());
85+
86+
let mut cur_array = indices[0].0;
87+
let mut start_row_idx = indices[0].1;
88+
let mut end_row_idx = start_row_idx + 1;
89+
90+
for (array, row) in indices.iter().skip(1).copied() {
91+
if array == cur_array && row == end_row_idx {
92+
// subsequent row in same batch
93+
end_row_idx += 1;
94+
continue;
95+
}
96+
97+
// emit current batch of rows for current buffer
98+
array_data.extend(cur_array, start_row_idx, end_row_idx);
99+
100+
// start new batch of rows
101+
cur_array = array;
102+
start_row_idx = row;
103+
end_row_idx = start_row_idx + 1;
104+
}
105+
106+
// emit final batch of rows
107+
array_data.extend(cur_array, start_row_idx, end_row_idx);
108+
Ok(make_array(array_data.freeze()))
109+
}
110+
111+
#[cfg(test)]
112+
mod tests {
113+
use super::*;
114+
use arrow_array::builder::{Int32Builder, ListBuilder};
115+
use arrow_array::cast::{as_primitive_array, as_string_array};
116+
use arrow_array::types::Int32Type;
117+
use arrow_array::{Int32Array, ListArray, StringArray};
118+
use arrow_schema::DataType;
119+
120+
#[test]
121+
fn test_primitive() {
122+
let a = Int32Array::from_iter_values([1, 2, 3, 4]);
123+
let b = Int32Array::from_iter_values([5, 6, 7]);
124+
let c = Int32Array::from_iter_values([8, 9, 10]);
125+
let values =
126+
interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (2, 0), (1, 1)]).unwrap();
127+
let v = as_primitive_array::<Int32Type>(&values);
128+
assert_eq!(v.values(), &[4, 4, 10, 8, 6]);
129+
}
130+
131+
#[test]
132+
fn test_primitive_nulls() {
133+
let a = Int32Array::from_iter_values([1, 2, 3, 4]);
134+
let b = Int32Array::from_iter([Some(1), Some(4), None]);
135+
let values =
136+
interleave(&[&a, &b], &[(0, 1), (1, 2), (1, 2), (0, 3), (0, 2)]).unwrap();
137+
let v: Vec<_> = as_primitive_array::<Int32Type>(&values)
138+
.into_iter()
139+
.collect();
140+
assert_eq!(&v, &[Some(2), None, None, Some(4), Some(3)])
141+
}
142+
143+
#[test]
144+
fn test_primitive_empty() {
145+
let a = Int32Array::from_iter_values([1, 2, 3, 4]);
146+
let v = interleave(&[&a], &[]).unwrap();
147+
assert!(v.is_empty());
148+
assert_eq!(v.data_type(), &DataType::Int32);
149+
}
150+
151+
#[test]
152+
fn test_strings() {
153+
let a = StringArray::from_iter_values(["a", "b", "c"]);
154+
let b = StringArray::from_iter_values(["hello", "world", "foo"]);
155+
let values =
156+
interleave(&[&a, &b], &[(0, 2), (0, 2), (1, 0), (1, 1), (0, 1)]).unwrap();
157+
let v = as_string_array(&values);
158+
let values: Vec<_> = v.into_iter().collect();
159+
assert_eq!(
160+
&values,
161+
&[
162+
Some("c"),
163+
Some("c"),
164+
Some("hello"),
165+
Some("world"),
166+
Some("b")
167+
]
168+
)
169+
}
170+
171+
#[test]
172+
fn test_lists() {
173+
// [[1, 2], null, [3]]
174+
let mut a = ListBuilder::new(Int32Builder::new());
175+
a.values().append_value(1);
176+
a.values().append_value(2);
177+
a.append(true);
178+
a.append(false);
179+
a.values().append_value(3);
180+
a.append(true);
181+
let a = a.finish();
182+
183+
// [[4], null, [5, 6, null]]
184+
let mut b = ListBuilder::new(Int32Builder::new());
185+
b.values().append_value(4);
186+
b.append(true);
187+
b.append(false);
188+
b.values().append_value(5);
189+
b.values().append_value(6);
190+
b.values().append_null();
191+
b.append(true);
192+
let b = b.finish();
193+
194+
let values =
195+
interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
196+
let v = values.as_any().downcast_ref::<ListArray>().unwrap();
197+
198+
// [[3], null, [4], [5, 6, null], null]
199+
let mut expected = ListBuilder::new(Int32Builder::new());
200+
expected.values().append_value(3);
201+
expected.append(true);
202+
expected.append(false);
203+
expected.values().append_value(4);
204+
expected.append(true);
205+
expected.values().append_value(5);
206+
expected.values().append_value(6);
207+
expected.values().append_null();
208+
expected.append(true);
209+
expected.append(false);
210+
let expected = expected.finish();
211+
212+
assert_eq!(v, &expected);
213+
}
214+
}

arrow/src/compute/kernels/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub mod comparison;
2828
pub mod concat;
2929
pub mod concat_elements;
3030
pub mod filter;
31+
pub mod interleave;
3132
pub mod length;
3233
pub mod limit;
3334
pub mod partition;

arrow/src/compute/kernels/take.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,20 @@ use num::{ToPrimitive, Zero};
4646
/// ├─────────────────┤ └─────────┘ └─────────────────┘
4747
/// │ E │
4848
/// └─────────────────┘
49-
/// values array indicies array result
49+
/// values array indices array result
5050
/// ```
5151
///
52+
/// For selecting values by index from multiple arrays see [compute::interleave](crate::compute::interleave)
53+
///
5254
/// # Errors
5355
/// This function errors whenever:
5456
/// * An index cannot be casted to `usize` (typically 32 bit architectures)
5557
/// * An index is out of bounds and `options` is set to check bounds.
58+
///
5659
/// # Safety
57-
/// When `options` is not set to check bounds (default), taking indexes after `len` is undefined behavior.
60+
///
61+
/// When `options` is not set to check bounds, taking indexes after `len` will panic.
62+
///
5863
/// # Examples
5964
/// ```
6065
/// use arrow::array::{StringArray, UInt32Array};

arrow/src/compute/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub use self::kernels::cast::*;
2929
pub use self::kernels::comparison::*;
3030
pub use self::kernels::concat::*;
3131
pub use self::kernels::filter::*;
32+
pub use self::kernels::interleave::*;
3233
pub use self::kernels::limit::*;
3334
pub use self::kernels::partition::*;
3435
pub use self::kernels::regexp::*;

0 commit comments

Comments
 (0)