Skip to content

Commit 6227419

Browse files
authored
Speedup interleave_views (4-7x faster) (#7695)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. Closes ##7688 # Rationale for this change interleave_views is *really* slow - taking up ~25% of the samples in `SortPreservingMergeExec` We can make it faster. <details> ``` interleave str_view(0.0) 100 [0..100, 100..230, 450..1000] time: [369.33 ns 371.42 ns 374.48 ns] change: [−77.355% −77.199% −77.051%] (p = 0.00 < 0.05) Performance has improved. Found 4 outliers among 100 measurements (4.00%) 1 (1.00%) low mild 3 (3.00%) high severe interleave str_view(0.0) 400 [0..100, 100..230, 450..1000] time: [932.11 ns 937.68 ns 945.43 ns] change: [−84.672% −84.528% −84.382%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 3 (3.00%) high mild 4 (4.00%) high severe interleave str_view(0.0) 1024 [0..100, 100..230, 450..1000] time: [2.0938 µs 2.1058 µs 2.1235 µs] change: [−86.449% −86.310% −86.167%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 1 (1.00%) high mild 2 (2.00%) high severe interleave str_view(0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] time: [2.2045 µs 2.2098 µs 2.2170 µs] change: [−84.595% −84.493% −84.401%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 1 (1.00%) low mild 1 (1.00%) high mild 1 (1.00%) high severe ``` </details> # What changes are included in this PR? # Are there any user-facing changes?
1 parent 56ac4dc commit 6227419

File tree

2 files changed

+36
-23
lines changed

2 files changed

+36
-23
lines changed

arrow-select/src/interleave.rs

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,14 @@
1818
//! Interleave elements from multiple arrays
1919
2020
use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
21-
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
21+
use arrow_array::builder::{BooleanBufferBuilder, PrimitiveBuilder};
2222
use arrow_array::cast::AsArray;
2323
use arrow_array::types::*;
2424
use arrow_array::*;
2525
use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer};
2626
use arrow_data::transform::MutableArrayData;
2727
use arrow_data::ByteView;
2828
use arrow_schema::{ArrowError, DataType};
29-
use std::collections::HashMap;
3029
use std::sync::Arc;
3130

3231
macro_rules! primitive_helper {
@@ -238,32 +237,43 @@ fn interleave_views<T: ByteViewType>(
238237
indices: &[(usize, usize)],
239238
) -> Result<ArrayRef, ArrowError> {
240239
let interleaved = Interleave::<'_, GenericByteViewArray<T>>::new(values, indices);
241-
let mut views_builder = BufferBuilder::new(indices.len());
242240
let mut buffers = Vec::new();
243241

244-
// (input array_index, input buffer_index) -> output buffer_index
245-
let mut buffer_lookup: HashMap<(usize, u32), u32> = HashMap::new();
246-
for (array_idx, value_idx) in indices {
247-
let array = interleaved.arrays[*array_idx];
248-
let raw_view = array.views().get(*value_idx).unwrap();
249-
let view_len = *raw_view as u32;
250-
if view_len <= 12 {
251-
views_builder.append(*raw_view);
252-
continue;
253-
}
254-
// value is big enough to be in a variadic buffer
255-
let view = ByteView::from(*raw_view);
256-
let new_buffer_idx: &mut u32 = buffer_lookup
257-
.entry((*array_idx, view.buffer_index))
258-
.or_insert_with(|| {
259-
buffers.push(array.data_buffers()[view.buffer_index as usize].clone());
260-
(buffers.len() - 1) as u32
261-
});
262-
views_builder.append(view.with_buffer_index(*new_buffer_idx).into());
242+
// Contains the offsets of start buffer in `buffer_to_new_index`
243+
let mut offsets = Vec::with_capacity(interleaved.arrays.len() + 1);
244+
offsets.push(0);
245+
let mut total_buffers = 0;
246+
for a in interleaved.arrays.iter() {
247+
total_buffers += a.data_buffers().len();
248+
offsets.push(total_buffers);
263249
}
264250

251+
// contains the mapping from old buffer index to new buffer index
252+
let mut buffer_to_new_index = vec![None; total_buffers];
253+
254+
let views: Vec<u128> = indices
255+
.iter()
256+
.map(|(array_idx, value_idx)| {
257+
let array = interleaved.arrays[*array_idx];
258+
let view = array.views().get(*value_idx).unwrap();
259+
let view_len = *view as u32;
260+
if view_len <= 12 {
261+
return *view;
262+
}
263+
// value is big enough to be in a variadic buffer
264+
let view = ByteView::from(*view);
265+
let buffer_to_new_idx = offsets[*array_idx] + view.buffer_index as usize;
266+
let new_buffer_idx: u32 =
267+
*buffer_to_new_index[buffer_to_new_idx].get_or_insert_with(|| {
268+
buffers.push(array.data_buffers()[view.buffer_index as usize].clone());
269+
(buffers.len() - 1) as u32
270+
});
271+
view.with_buffer_index(new_buffer_idx).as_u128()
272+
})
273+
.collect();
274+
265275
let array = unsafe {
266-
GenericByteViewArray::<T>::new_unchecked(views_builder.into(), buffers, interleaved.nulls)
276+
GenericByteViewArray::<T>::new_unchecked(views.into(), buffers, interleaved.nulls)
267277
};
268278
Ok(Arc::new(array))
269279
}

arrow/benches/interleave_kernels.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,16 @@ fn add_benchmark(c: &mut Criterion) {
7777
let values = create_string_array_with_len::<i32>(1024, 0.0, 20);
7878
let sparse_dict = create_sparse_dict_from_values::<Int32Type>(1024, 0.0, &values, 10..20);
7979

80+
let string_view = create_string_view_array(1024, 0.0);
81+
8082
let cases: &[(&str, &dyn Array)] = &[
8183
("i32(0.0)", &i32),
8284
("i32(0.5)", &i32_opt),
8385
("str(20, 0.0)", &string),
8486
("str(20, 0.5)", &string_opt),
8587
("dict(20, 0.0)", &dict),
8688
("dict_sparse(20, 0.0)", &sparse_dict),
89+
("str_view(0.0)", &string_view),
8790
];
8891

8992
for (prefix, base) in cases {

0 commit comments

Comments
 (0)