/Users/andrewlamb/Software/arrow-rs/arrow-data/src/transform/utils.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use arrow_buffer::{bit_util, ArrowNativeType, MutableBuffer}; |
19 | | use num::{CheckedAdd, Integer}; |
20 | | |
21 | | /// extends the `buffer` to be able to hold `len` bits, setting all bits of the new size to zero. |
22 | | #[inline] |
23 | 47 | pub(super) fn resize_for_bits(buffer: &mut MutableBuffer, len: usize) { |
24 | 47 | let needed_bytes = bit_util::ceil(len, 8); |
25 | 47 | if buffer.len() < needed_bytes { |
26 | 0 | buffer.resize(needed_bytes, 0); |
27 | 47 | } |
28 | 47 | } |
29 | | |
30 | 21 | pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>( |
31 | 21 | buffer: &mut MutableBuffer, |
32 | 21 | mut last_offset: T, |
33 | 21 | offsets: &[T], |
34 | 21 | ) { |
35 | 21 | buffer.reserve(std::mem::size_of_val(offsets)); |
36 | 43 | offsets21 .windows21 (2).for_each21 (|offsets| { |
37 | | // compute the new offset |
38 | 43 | let length = offsets[1] - offsets[0]; |
39 | | // if you hit this appending to a StringArray / BinaryArray it is because you |
40 | | // are trying to add more data than can fit into that type. Try breaking your data into |
41 | | // smaller batches or using LargeStringArray / LargeBinaryArray |
42 | 43 | last_offset = last_offset.checked_add(&length).expect("offset overflow"); |
43 | 43 | buffer.push(last_offset); |
44 | 43 | }); |
45 | 21 | } |
46 | | |
47 | | #[inline] |
48 | 21 | pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(offset_buffer: &MutableBuffer) -> T { |
49 | | // JUSTIFICATION |
50 | | // Benefit |
51 | | // 20% performance improvement extend of variable sized arrays (see bench `mutable_array`) |
52 | | // Soundness |
53 | | // * offset buffer is always extended in slices of T and aligned accordingly. |
54 | | // * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid. |
55 | 21 | let (prefix, offsets, suffix) = offset_buffer.as_slice().align_to::<T>(); |
56 | 21 | debug_assert!(prefix.is_empty() && suffix.is_empty()); |
57 | 21 | *offsets.get_unchecked(offsets.len() - 1) |
58 | 21 | } |
59 | | |
60 | | #[cfg(test)] |
61 | | mod tests { |
62 | | use crate::transform::utils::extend_offsets; |
63 | | use arrow_buffer::MutableBuffer; |
64 | | |
65 | | #[test] |
66 | | #[should_panic(expected = "offset overflow")] |
67 | | fn test_overflow() { |
68 | | let mut buffer = MutableBuffer::new(10); |
69 | | extend_offsets(&mut buffer, i32::MAX - 4, &[0, 5]); |
70 | | } |
71 | | } |