Skip to content

Commit 4b7d771

Browse files
committed
Use more cache-friendly datastructure
1 parent 19bfcbb commit 4b7d771

File tree

1 file changed

+21
-23
lines changed

1 file changed

+21
-23
lines changed

arrow-select/src/interleave.rs

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -239,37 +239,35 @@ fn interleave_views<T: ByteViewType>(
239239
let interleaved = Interleave::<'_, GenericByteViewArray<T>>::new(values, indices);
240240
let mut buffers = Vec::new();
241241

242-
// (input array_index, input buffer_index) -> output buffer_index
243-
// A mapping from (input array_index, input buffer_index) -> output buffer_index
244-
// The outer vec corresponds to the input array index.
245-
// The inner vec corresponds to the buffer index within that input array.
246-
// The value is the index of the buffer in the output array.
247-
let mut buffer_remap: Vec<Vec<Option<u32>>> = interleaved
248-
.arrays
249-
.iter()
250-
.map(|a| vec![None; a.data_buffers().len()])
251-
.collect();
242+
// Contains the offsets of start buffer in `buffer_to_new_index`
243+
let mut offsets = Vec::with_capacity(interleaved.arrays.len() + 1);
244+
offsets.push(0);
245+
let mut total_buffers = 0;
246+
for a in interleaved.arrays.iter() {
247+
total_buffers += a.data_buffers().len();
248+
offsets.push(total_buffers);
249+
}
250+
251+
// contains the mapping from old buffer index to new buffer index
252+
let mut buffer_to_new_index = vec![None; total_buffers];
252253

253254
let views: Vec<u128> = indices
254255
.iter()
255256
.map(|(array_idx, value_idx)| {
256257
let array = interleaved.arrays[*array_idx];
257-
let raw_view = array.views().get(*value_idx).unwrap();
258-
let view_len = *raw_view as u32;
258+
let view = array.views().get(*value_idx).unwrap();
259+
let view_len = *view as u32;
259260
if view_len <= 12 {
260-
return *raw_view;
261+
return *view;
261262
}
262263
// value is big enough to be in a variadic buffer
263-
let view = ByteView::from(*raw_view);
264-
let new_buffer_idx = match &mut buffer_remap[*array_idx][view.buffer_index as usize] {
265-
Some(idx) => *idx,
266-
opt => {
267-
buffers.push(array.data_buffers()[view.buffer_index as usize].clone());
268-
let new_idx = (buffers.len() - 1) as u32;
269-
*opt = Some(new_idx);
270-
new_idx
271-
}
272-
};
264+
let view = ByteView::from(*view);
265+
let remap_idx = offsets[*array_idx] + view.buffer_index as usize;
266+
let new_buffer_idx: u32 = *buffer_to_new_index[remap_idx].get_or_insert_with(|| {
267+
buffers.push(array.data_buffers()[view.buffer_index as usize].clone());
268+
let new_idx = (buffers.len() - 1) as u32;
269+
new_idx
270+
});
273271
view.with_buffer_index(new_buffer_idx).as_u128()
274272
})
275273
.collect();

0 commit comments

Comments
 (0)