/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/scalar.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::alloc::Deallocation; |
19 | | use crate::buffer::Buffer; |
20 | | use crate::native::ArrowNativeType; |
21 | | use crate::{BufferBuilder, MutableBuffer, OffsetBuffer}; |
22 | | use std::fmt::Formatter; |
23 | | use std::marker::PhantomData; |
24 | | use std::ops::Deref; |
25 | | |
26 | | /// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing |
27 | | /// |
28 | | /// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a `Arc<Vec<T>>`, |
29 | | /// with the following differences: |
30 | | /// |
31 | | /// - slicing and cloning is O(1). |
32 | | /// - support for external allocated memory (e.g. via FFI). |
33 | | /// |
34 | | /// See [`Buffer`] for more low-level memory management details. |
35 | | /// |
36 | | /// # Example: Convert to/from Vec (without copies) |
37 | | /// |
38 | | /// (See [`Buffer::from_vec`] and [`Buffer::into_vec`] for a lower level API) |
39 | | /// ``` |
40 | | /// # use arrow_buffer::ScalarBuffer; |
41 | | /// // Zero-copy conversion from Vec |
42 | | /// let buffer = ScalarBuffer::from(vec![1, 2, 3]); |
43 | | /// assert_eq!(&buffer, &[1, 2, 3]); |
44 | | /// // convert the buffer back to Vec without copy assuming: |
45 | | /// // 1. the inner buffer is not sliced |
46 | | /// // 2. the inner buffer uses standard allocation |
47 | | /// // 3. there are no other references to the inner buffer |
48 | | /// let vec: Vec<i32> = buffer.into(); |
49 | | /// assert_eq!(&vec, &[1, 2, 3]); |
50 | | /// ``` |
51 | | /// |
52 | | /// # Example: Zero copy slicing |
53 | | /// ``` |
54 | | /// # use arrow_buffer::ScalarBuffer; |
55 | | /// let buffer = ScalarBuffer::from(vec![1, 2, 3]); |
56 | | /// assert_eq!(&buffer, &[1, 2, 3]); |
57 | | /// // Zero-copy slicing |
58 | | /// let sliced = buffer.slice(1, 2); |
59 | | /// assert_eq!(&sliced, &[2, 3]); |
60 | | /// // Original buffer is unchanged |
61 | | /// assert_eq!(&buffer, &[1, 2, 3]); |
62 | | /// // converting the sliced buffer back to Vec incurs a copy |
63 | | /// let vec: Vec<i32> = sliced.into(); |
64 | | /// ``` |
65 | | #[derive(Clone, Default)] |
66 | | pub struct ScalarBuffer<T: ArrowNativeType> { |
67 | | /// Underlying data buffer |
68 | | buffer: Buffer, |
69 | | phantom: PhantomData<T>, |
70 | | } |
71 | | |
72 | | impl<T: ArrowNativeType> std::fmt::Debug for ScalarBuffer<T> { |
73 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
74 | 0 | f.debug_tuple("ScalarBuffer").field(&self.as_ref()).finish() |
75 | 0 | } |
76 | | } |
77 | | |
78 | | impl<T: ArrowNativeType> ScalarBuffer<T> { |
79 | | /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset` |
80 | | /// and `length` in units of `T` |
81 | | /// |
82 | | /// # Panics |
83 | | /// |
84 | | /// This method will panic if |
85 | | /// |
86 | | /// * `offset` or `len` would result in overflow |
87 | | /// * `buffer` is not aligned to a multiple of `std::mem::align_of::<T>` |
88 | | /// * `bytes` is not large enough for the requested slice |
89 | 485k | pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self { |
90 | 485k | let size = std::mem::size_of::<T>(); |
91 | 485k | let byte_offset = offset.checked_mul(size).expect("offset overflow"); |
92 | 485k | let byte_len = len.checked_mul(size).expect("length overflow"); |
93 | 485k | buffer.slice_with_length(byte_offset, byte_len).into() |
94 | 485k | } |
95 | | |
96 | | /// Unsafe function to create a new [`ScalarBuffer`] from a [`Buffer`]. |
97 | | /// Only use for testing purpose. |
98 | | /// |
99 | | /// # Safety |
100 | | /// |
101 | | /// This function is unsafe because it does not check if the `buffer` is aligned |
102 | | pub unsafe fn new_unchecked(buffer: Buffer) -> Self { |
103 | | Self { |
104 | | buffer, |
105 | | phantom: Default::default(), |
106 | | } |
107 | | } |
108 | | |
109 | | /// Free up unused memory. |
110 | 0 | pub fn shrink_to_fit(&mut self) { |
111 | 0 | self.buffer.shrink_to_fit(); |
112 | 0 | } |
113 | | |
114 | | /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset` |
115 | 160k | pub fn slice(&self, offset: usize, len: usize) -> Self { |
116 | 160k | Self::new(self.buffer.clone(), offset, len) |
117 | 160k | } |
118 | | |
119 | | /// Returns the inner [`Buffer`] |
120 | 17 | pub fn inner(&self) -> &Buffer { |
121 | 17 | &self.buffer |
122 | 17 | } |
123 | | |
124 | | /// Returns the inner [`Buffer`], consuming self |
125 | 484k | pub fn into_inner(self) -> Buffer { |
126 | 484k | self.buffer |
127 | 484k | } |
128 | | |
129 | | /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons |
130 | | /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may |
131 | | /// return false when the arrays are logically equal |
132 | | #[inline] |
133 | 2 | pub fn ptr_eq(&self, other: &Self) -> bool { |
134 | 2 | self.buffer.ptr_eq(&other.buffer) |
135 | 2 | } |
136 | | |
137 | | /// Returns the number of elements in the buffer |
138 | 488k | pub fn len(&self) -> usize { |
139 | 488k | self.buffer.len() / std::mem::size_of::<T>() |
140 | 488k | } |
141 | | |
142 | | /// Returns if the buffer is empty |
143 | 41 | pub fn is_empty(&self) -> bool { |
144 | 41 | self.len() == 0 |
145 | 41 | } |
146 | | } |
147 | | |
148 | | impl<T: ArrowNativeType> Deref for ScalarBuffer<T> { |
149 | | type Target = [T]; |
150 | | |
151 | | #[inline] |
152 | 1.78M | fn deref(&self) -> &Self::Target { |
153 | | // SAFETY: Verified alignment in From<Buffer> |
154 | | unsafe { |
155 | 1.78M | std::slice::from_raw_parts( |
156 | 1.78M | self.buffer.as_ptr() as *const T, |
157 | 1.78M | self.buffer.len() / std::mem::size_of::<T>(), |
158 | 1.78M | ) |
159 | | } |
160 | 1.78M | } |
161 | | } |
162 | | |
163 | | impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> { |
164 | | #[inline] |
165 | 80.2k | fn as_ref(&self) -> &[T] { |
166 | 80.2k | self |
167 | 80.2k | } |
168 | | } |
169 | | |
170 | | impl<T: ArrowNativeType> From<MutableBuffer> for ScalarBuffer<T> { |
171 | 2 | fn from(value: MutableBuffer) -> Self { |
172 | 2 | Buffer::from(value).into() |
173 | 2 | } |
174 | | } |
175 | | |
176 | | impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> { |
177 | 565k | fn from(buffer: Buffer) -> Self { |
178 | 565k | let align = std::mem::align_of::<T>(); |
179 | 565k | let is_aligned = buffer.as_ptr().align_offset(align) == 0; |
180 | | |
181 | 565k | match buffer.deallocation() { |
182 | 565k | Deallocation::Standard(_) => assert!( |
183 | 565k | is_aligned, |
184 | 0 | "Memory pointer is not aligned with the specified scalar type" |
185 | | ), |
186 | 0 | Deallocation::Custom(_, _) => assert!( |
187 | 0 | is_aligned, |
188 | 0 | "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned." |
189 | | ), |
190 | | } |
191 | | |
192 | 565k | Self { |
193 | 565k | buffer, |
194 | 565k | phantom: Default::default(), |
195 | 565k | } |
196 | 565k | } |
197 | | } |
198 | | |
199 | | impl<T: ArrowNativeType> From<OffsetBuffer<T>> for ScalarBuffer<T> { |
200 | | fn from(value: OffsetBuffer<T>) -> Self { |
201 | | value.into_inner() |
202 | | } |
203 | | } |
204 | | |
205 | | impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> { |
206 | 606 | fn from(value: Vec<T>) -> Self { |
207 | 606 | Self { |
208 | 606 | buffer: Buffer::from_vec(value), |
209 | 606 | phantom: Default::default(), |
210 | 606 | } |
211 | 606 | } |
212 | | } |
213 | | |
214 | | impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> { |
215 | | fn from(value: ScalarBuffer<T>) -> Self { |
216 | | value |
217 | | .buffer |
218 | | .into_vec() |
219 | | .unwrap_or_else(|buffer| buffer.typed_data::<T>().into()) |
220 | | } |
221 | | } |
222 | | |
223 | | impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> { |
224 | 31 | fn from(mut value: BufferBuilder<T>) -> Self { |
225 | 31 | let len = value.len(); |
226 | 31 | Self::new(value.finish(), 0, len) |
227 | 31 | } |
228 | | } |
229 | | |
230 | | impl<T: ArrowNativeType> FromIterator<T> for ScalarBuffer<T> { |
231 | | #[inline] |
232 | 91 | fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { |
233 | 91 | iter.into_iter().collect::<Vec<_>>().into() |
234 | 91 | } |
235 | | } |
236 | | |
237 | | impl<'a, T: ArrowNativeType> IntoIterator for &'a ScalarBuffer<T> { |
238 | | type Item = &'a T; |
239 | | type IntoIter = std::slice::Iter<'a, T>; |
240 | | |
241 | 80.1k | fn into_iter(self) -> Self::IntoIter { |
242 | 80.1k | self.as_ref().iter() |
243 | 80.1k | } |
244 | | } |
245 | | |
246 | | impl<T: ArrowNativeType, S: AsRef<[T]> + ?Sized> PartialEq<S> for ScalarBuffer<T> { |
247 | 7 | fn eq(&self, other: &S) -> bool { |
248 | 7 | self.as_ref().eq(other.as_ref()) |
249 | 7 | } |
250 | | } |
251 | | |
252 | | impl<T: ArrowNativeType, const N: usize> PartialEq<ScalarBuffer<T>> for [T; N] { |
253 | 9 | fn eq(&self, other: &ScalarBuffer<T>) -> bool { |
254 | 9 | self.as_ref().eq(other.as_ref()) |
255 | 9 | } |
256 | | } |
257 | | |
258 | | impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for [T] { |
259 | | fn eq(&self, other: &ScalarBuffer<T>) -> bool { |
260 | | self.as_ref().eq(other.as_ref()) |
261 | | } |
262 | | } |
263 | | |
264 | | impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> { |
265 | | fn eq(&self, other: &ScalarBuffer<T>) -> bool { |
266 | | self.as_slice().eq(other.as_ref()) |
267 | | } |
268 | | } |
269 | | |
270 | | /// If T implements Eq, then so does ScalarBuffer. |
271 | | impl<T: ArrowNativeType + Eq> Eq for ScalarBuffer<T> {} |
272 | | |
273 | | #[cfg(test)] |
274 | | mod tests { |
275 | | use std::{ptr::NonNull, sync::Arc}; |
276 | | |
277 | | use super::*; |
278 | | |
279 | | #[test] |
280 | | fn test_basic() { |
281 | | let expected = [0_i32, 1, 2]; |
282 | | let buffer = Buffer::from_iter(expected.iter().cloned()); |
283 | | let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3); |
284 | | assert_eq!(*typed, expected); |
285 | | |
286 | | let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2); |
287 | | assert_eq!(*typed, expected[1..]); |
288 | | |
289 | | let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0); |
290 | | assert!(typed.is_empty()); |
291 | | |
292 | | let typed = ScalarBuffer::<i32>::new(buffer, 3, 0); |
293 | | assert!(typed.is_empty()); |
294 | | } |
295 | | |
296 | | #[test] |
297 | | fn test_debug() { |
298 | | let buffer = ScalarBuffer::from(vec![1, 2, 3]); |
299 | | assert_eq!(format!("{buffer:?}"), "ScalarBuffer([1, 2, 3])"); |
300 | | } |
301 | | |
302 | | #[test] |
303 | | #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")] |
304 | | fn test_unaligned() { |
305 | | let expected = [0_i32, 1, 2]; |
306 | | let buffer = Buffer::from_iter(expected.iter().cloned()); |
307 | | let buffer = buffer.slice(1); |
308 | | ScalarBuffer::<i32>::new(buffer, 0, 2); |
309 | | } |
310 | | |
311 | | #[test] |
312 | | #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")] |
313 | | fn test_length_out_of_bounds() { |
314 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
315 | | ScalarBuffer::<i32>::new(buffer, 1, 3); |
316 | | } |
317 | | |
318 | | #[test] |
319 | | #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")] |
320 | | fn test_offset_out_of_bounds() { |
321 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
322 | | ScalarBuffer::<i32>::new(buffer, 4, 0); |
323 | | } |
324 | | |
325 | | #[test] |
326 | | #[should_panic(expected = "offset overflow")] |
327 | | fn test_length_overflow() { |
328 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
329 | | ScalarBuffer::<i32>::new(buffer, usize::MAX, 1); |
330 | | } |
331 | | |
332 | | #[test] |
333 | | #[should_panic(expected = "offset overflow")] |
334 | | fn test_start_overflow() { |
335 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
336 | | ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0); |
337 | | } |
338 | | |
339 | | #[test] |
340 | | #[should_panic(expected = "length overflow")] |
341 | | fn test_end_overflow() { |
342 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
343 | | ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1); |
344 | | } |
345 | | |
346 | | #[test] |
347 | | fn convert_from_buffer_builder() { |
348 | | let input = vec![1, 2, 3, 4]; |
349 | | let buffer_builder = BufferBuilder::from(input.clone()); |
350 | | let scalar_buffer = ScalarBuffer::from(buffer_builder); |
351 | | assert_eq!(scalar_buffer.as_ref(), input); |
352 | | } |
353 | | |
354 | | #[test] |
355 | | fn into_vec() { |
356 | | let input = vec![1u8, 2, 3, 4]; |
357 | | |
358 | | // No copy |
359 | | let input_buffer = Buffer::from_vec(input.clone()); |
360 | | let input_ptr = input_buffer.as_ptr(); |
361 | | let input_len = input_buffer.len(); |
362 | | let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len); |
363 | | let vec = Vec::from(scalar_buffer); |
364 | | assert_eq!(vec.as_slice(), input.as_slice()); |
365 | | assert_eq!(vec.as_ptr(), input_ptr); |
366 | | |
367 | | // Custom allocation - makes a copy |
368 | | let mut input_clone = input.clone(); |
369 | | let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap(); |
370 | | let dealloc = Arc::new(()); |
371 | | let buffer = |
372 | | unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) }; |
373 | | let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len()); |
374 | | let vec = Vec::from(scalar_buffer); |
375 | | assert_eq!(vec, input.as_slice()); |
376 | | assert_ne!(vec.as_ptr(), input_ptr.as_ptr()); |
377 | | |
378 | | // Offset - makes a copy |
379 | | let input_buffer = Buffer::from_vec(input.clone()); |
380 | | let input_ptr = input_buffer.as_ptr(); |
381 | | let input_len = input_buffer.len(); |
382 | | let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1); |
383 | | let vec = Vec::from(scalar_buffer); |
384 | | assert_eq!(vec.as_slice(), &input[1..]); |
385 | | assert_ne!(vec.as_ptr(), input_ptr); |
386 | | |
387 | | // Inner buffer Arc ref count != 0 - makes a copy |
388 | | let buffer = Buffer::from_slice_ref(input.as_slice()); |
389 | | let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len()); |
390 | | let vec = Vec::from(scalar_buffer); |
391 | | assert_eq!(vec, input.as_slice()); |
392 | | assert_ne!(vec.as_ptr(), input.as_ptr()); |
393 | | } |
394 | | |
395 | | #[test] |
396 | | fn scalar_buffer_impl_eq() { |
397 | | fn are_equal<T: Eq>(a: &T, b: &T) -> bool { |
398 | | a.eq(b) |
399 | | } |
400 | | |
401 | | assert!( |
402 | | are_equal( |
403 | | &ScalarBuffer::<i16>::from(vec![23]), |
404 | | &ScalarBuffer::<i16>::from(vec![23]) |
405 | | ), |
406 | | "ScalarBuffer should implement Eq if the inner type does" |
407 | | ); |
408 | | } |
409 | | } |