/Users/andrewlamb/Software/arrow-rs/arrow-data/src/transform/utils.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use arrow_buffer::{ArrowNativeType, MutableBuffer, bit_util}; |
19 | | use num_integer::Integer; |
20 | | use num_traits::CheckedAdd; |
21 | | |
22 | | /// extends the `buffer` to be able to hold `len` bits, setting all bits of the new size to zero. |
23 | | #[inline] |
24 | 110 | pub(super) fn resize_for_bits(buffer: &mut MutableBuffer, len: usize) { |
25 | 110 | let needed_bytes = bit_util::ceil(len, 8); |
26 | 110 | if buffer.len() < needed_bytes { |
27 | 2 | buffer.resize(needed_bytes, 0); |
28 | 108 | } |
29 | 110 | } |
30 | | |
31 | 54 | pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>( |
32 | 54 | buffer: &mut MutableBuffer, |
33 | 54 | mut last_offset: T, |
34 | 54 | offsets: &[T], |
35 | 54 | ) { |
36 | 54 | buffer.reserve(std::mem::size_of_val(offsets)); |
37 | 74 | offsets54 .windows54 (2).for_each54 (|offsets| { |
38 | | // compute the new offset |
39 | 74 | let length = offsets[1] - offsets[0]; |
40 | | // if you hit this appending to a StringArray / BinaryArray it is because you |
41 | | // are trying to add more data than can fit into that type. Try breaking your data into |
42 | | // smaller batches or using LargeStringArray / LargeBinaryArray |
43 | 74 | last_offset = last_offset.checked_add(&length).expect("offset overflow"); |
44 | 74 | buffer.push(last_offset); |
45 | 74 | }); |
46 | 54 | } |
47 | | |
48 | | #[inline] |
49 | 56 | pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(offset_buffer: &MutableBuffer) -> T { |
50 | | // JUSTIFICATION |
51 | | // Benefit |
52 | | // 20% performance improvement extend of variable sized arrays (see bench `mutable_array`) |
53 | | // Soundness |
54 | | // * offset buffer is always extended in slices of T and aligned accordingly. |
55 | | // * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid. |
56 | 56 | let (prefix, offsets, suffix) = unsafe { offset_buffer.as_slice().align_to::<T>() }; |
57 | 56 | debug_assert!(prefix.is_empty() && suffix.is_empty()); |
58 | 56 | *unsafe { offsets.get_unchecked(offsets.len() - 1) } |
59 | 56 | } |
60 | | |
61 | | #[cfg(test)] |
62 | | mod tests { |
63 | | use crate::transform::utils::extend_offsets; |
64 | | use arrow_buffer::MutableBuffer; |
65 | | |
66 | | #[test] |
67 | | #[should_panic(expected = "offset overflow")] |
68 | | fn test_overflow() { |
69 | | let mut buffer = MutableBuffer::new(10); |
70 | | extend_offsets(&mut buffer, i32::MAX - 4, &[0, 5]); |
71 | | } |
72 | | } |