/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/scalar.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::alloc::Deallocation; |
19 | | use crate::buffer::Buffer; |
20 | | use crate::native::ArrowNativeType; |
21 | | use crate::{BufferBuilder, MutableBuffer, OffsetBuffer}; |
22 | | use std::fmt::Formatter; |
23 | | use std::marker::PhantomData; |
24 | | use std::ops::Deref; |
25 | | |
26 | | /// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing |
27 | | /// |
28 | | /// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a `Arc<Vec<T>>`, |
29 | | /// with the following differences: |
30 | | /// |
31 | | /// - slicing and cloning is O(1). |
32 | | /// - it supports external allocated memory |
33 | | /// |
34 | | /// ``` |
35 | | /// # use arrow_buffer::ScalarBuffer; |
36 | | /// // Zero-copy conversion from Vec |
37 | | /// let buffer = ScalarBuffer::from(vec![1, 2, 3]); |
38 | | /// assert_eq!(&buffer, &[1, 2, 3]); |
39 | | /// |
40 | | /// // Zero-copy slicing |
41 | | /// let sliced = buffer.slice(1, 2); |
42 | | /// assert_eq!(&sliced, &[2, 3]); |
43 | | /// ``` |
44 | | #[derive(Clone, Default)] |
45 | | pub struct ScalarBuffer<T: ArrowNativeType> { |
46 | | /// Underlying data buffer |
47 | | buffer: Buffer, |
48 | | phantom: PhantomData<T>, |
49 | | } |
50 | | |
51 | | impl<T: ArrowNativeType> std::fmt::Debug for ScalarBuffer<T> { |
52 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
53 | 0 | f.debug_tuple("ScalarBuffer").field(&self.as_ref()).finish() |
54 | 0 | } |
55 | | } |
56 | | |
57 | | impl<T: ArrowNativeType> ScalarBuffer<T> { |
58 | | /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset` |
59 | | /// and `length` in units of `T` |
60 | | /// |
61 | | /// # Panics |
62 | | /// |
63 | | /// This method will panic if |
64 | | /// |
65 | | /// * `offset` or `len` would result in overflow |
66 | | /// * `buffer` is not aligned to a multiple of `std::mem::align_of::<T>` |
67 | | /// * `bytes` is not large enough for the requested slice |
68 | 754 | pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self { |
69 | 754 | let size = std::mem::size_of::<T>(); |
70 | 754 | let byte_offset = offset.checked_mul(size).expect("offset overflow"); |
71 | 754 | let byte_len = len.checked_mul(size).expect("length overflow"); |
72 | 754 | buffer.slice_with_length(byte_offset, byte_len).into() |
73 | 754 | } |
74 | | |
75 | | /// Unsafe function to create a new [`ScalarBuffer`] from a [`Buffer`]. |
76 | | /// Only use for testing purpose. |
77 | | /// |
78 | | /// # Safety |
79 | | /// |
80 | | /// This function is unsafe because it does not check if the `buffer` is aligned |
81 | | pub unsafe fn new_unchecked(buffer: Buffer) -> Self { |
82 | | Self { |
83 | | buffer, |
84 | | phantom: Default::default(), |
85 | | } |
86 | | } |
87 | | |
88 | | /// Free up unused memory. |
89 | 0 | pub fn shrink_to_fit(&mut self) { |
90 | 0 | self.buffer.shrink_to_fit(); |
91 | 0 | } |
92 | | |
93 | | /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset` |
94 | 404 | pub fn slice(&self, offset: usize, len: usize) -> Self { |
95 | 404 | Self::new(self.buffer.clone(), offset, len) |
96 | 404 | } |
97 | | |
98 | | /// Returns the inner [`Buffer`] |
99 | 0 | pub fn inner(&self) -> &Buffer { |
100 | 0 | &self.buffer |
101 | 0 | } |
102 | | |
103 | | /// Returns the inner [`Buffer`], consuming self |
104 | 1.00k | pub fn into_inner(self) -> Buffer { |
105 | 1.00k | self.buffer |
106 | 1.00k | } |
107 | | |
108 | | /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons |
109 | | /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may |
110 | | /// return false when the arrays are logically equal |
111 | | #[inline] |
112 | 0 | pub fn ptr_eq(&self, other: &Self) -> bool { |
113 | 0 | self.buffer.ptr_eq(&other.buffer) |
114 | 0 | } |
115 | | |
116 | | /// Returns the number of elements in the buffer |
117 | 3.12k | pub fn len(&self) -> usize { |
118 | 3.12k | self.buffer.len() / std::mem::size_of::<T>() |
119 | 3.12k | } |
120 | | |
121 | | /// Returns if the buffer is empty |
122 | 2 | pub fn is_empty(&self) -> bool { |
123 | 2 | self.len() == 0 |
124 | 2 | } |
125 | | } |
126 | | |
127 | | impl<T: ArrowNativeType> Deref for ScalarBuffer<T> { |
128 | | type Target = [T]; |
129 | | |
130 | | #[inline] |
131 | 3.04k | fn deref(&self) -> &Self::Target { |
132 | | // SAFETY: Verified alignment in From<Buffer> |
133 | | unsafe { |
134 | 3.04k | std::slice::from_raw_parts( |
135 | 3.04k | self.buffer.as_ptr() as *const T, |
136 | 3.04k | self.buffer.len() / std::mem::size_of::<T>(), |
137 | 3.04k | ) |
138 | | } |
139 | 3.04k | } |
140 | | } |
141 | | |
142 | | impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> { |
143 | | #[inline] |
144 | 8 | fn as_ref(&self) -> &[T] { |
145 | 8 | self |
146 | 8 | } |
147 | | } |
148 | | |
149 | | impl<T: ArrowNativeType> From<MutableBuffer> for ScalarBuffer<T> { |
150 | 0 | fn from(value: MutableBuffer) -> Self { |
151 | 0 | Buffer::from(value).into() |
152 | 0 | } |
153 | | } |
154 | | |
155 | | impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> { |
156 | 824 | fn from(buffer: Buffer) -> Self { |
157 | 824 | let align = std::mem::align_of::<T>(); |
158 | 824 | let is_aligned = buffer.as_ptr().align_offset(align) == 0; |
159 | | |
160 | 824 | match buffer.deallocation() { |
161 | 824 | Deallocation::Standard(_) => assert!( |
162 | 824 | is_aligned, |
163 | 0 | "Memory pointer is not aligned with the specified scalar type" |
164 | | ), |
165 | | Deallocation::Custom(_, _) => |
166 | 0 | assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."), |
167 | | } |
168 | | |
169 | 824 | Self { |
170 | 824 | buffer, |
171 | 824 | phantom: Default::default(), |
172 | 824 | } |
173 | 824 | } |
174 | | } |
175 | | |
176 | | impl<T: ArrowNativeType> From<OffsetBuffer<T>> for ScalarBuffer<T> { |
177 | | fn from(value: OffsetBuffer<T>) -> Self { |
178 | | value.into_inner() |
179 | | } |
180 | | } |
181 | | |
182 | | impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> { |
183 | 757 | fn from(value: Vec<T>) -> Self { |
184 | 757 | Self { |
185 | 757 | buffer: Buffer::from_vec(value), |
186 | 757 | phantom: Default::default(), |
187 | 757 | } |
188 | 757 | } |
189 | | } |
190 | | |
191 | | impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> { |
192 | | fn from(value: ScalarBuffer<T>) -> Self { |
193 | | value |
194 | | .buffer |
195 | | .into_vec() |
196 | | .unwrap_or_else(|buffer| buffer.typed_data::<T>().into()) |
197 | | } |
198 | | } |
199 | | |
200 | | impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> { |
201 | 0 | fn from(mut value: BufferBuilder<T>) -> Self { |
202 | 0 | let len = value.len(); |
203 | 0 | Self::new(value.finish(), 0, len) |
204 | 0 | } |
205 | | } |
206 | | |
207 | | impl<T: ArrowNativeType> FromIterator<T> for ScalarBuffer<T> { |
208 | | #[inline] |
209 | 0 | fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { |
210 | 0 | iter.into_iter().collect::<Vec<_>>().into() |
211 | 0 | } |
212 | | } |
213 | | |
214 | | impl<'a, T: ArrowNativeType> IntoIterator for &'a ScalarBuffer<T> { |
215 | | type Item = &'a T; |
216 | | type IntoIter = std::slice::Iter<'a, T>; |
217 | | |
218 | 6 | fn into_iter(self) -> Self::IntoIter { |
219 | 6 | self.as_ref().iter() |
220 | 6 | } |
221 | | } |
222 | | |
223 | | impl<T: ArrowNativeType, S: AsRef<[T]> + ?Sized> PartialEq<S> for ScalarBuffer<T> { |
224 | 2 | fn eq(&self, other: &S) -> bool { |
225 | 2 | self.as_ref().eq(other.as_ref()) |
226 | 2 | } |
227 | | } |
228 | | |
229 | | impl<T: ArrowNativeType, const N: usize> PartialEq<ScalarBuffer<T>> for [T; N] { |
230 | | fn eq(&self, other: &ScalarBuffer<T>) -> bool { |
231 | | self.as_ref().eq(other.as_ref()) |
232 | | } |
233 | | } |
234 | | |
235 | | impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for [T] { |
236 | | fn eq(&self, other: &ScalarBuffer<T>) -> bool { |
237 | | self.as_ref().eq(other.as_ref()) |
238 | | } |
239 | | } |
240 | | |
241 | | impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> { |
242 | | fn eq(&self, other: &ScalarBuffer<T>) -> bool { |
243 | | self.as_slice().eq(other.as_ref()) |
244 | | } |
245 | | } |
246 | | |
247 | | /// If T implements Eq, then so does ScalarBuffer. |
248 | | impl<T: ArrowNativeType + Eq> Eq for ScalarBuffer<T> {} |
249 | | |
250 | | #[cfg(test)] |
251 | | mod tests { |
252 | | use std::{ptr::NonNull, sync::Arc}; |
253 | | |
254 | | use super::*; |
255 | | |
256 | | #[test] |
257 | | fn test_basic() { |
258 | | let expected = [0_i32, 1, 2]; |
259 | | let buffer = Buffer::from_iter(expected.iter().cloned()); |
260 | | let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3); |
261 | | assert_eq!(*typed, expected); |
262 | | |
263 | | let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2); |
264 | | assert_eq!(*typed, expected[1..]); |
265 | | |
266 | | let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0); |
267 | | assert!(typed.is_empty()); |
268 | | |
269 | | let typed = ScalarBuffer::<i32>::new(buffer, 3, 0); |
270 | | assert!(typed.is_empty()); |
271 | | } |
272 | | |
273 | | #[test] |
274 | | fn test_debug() { |
275 | | let buffer = ScalarBuffer::from(vec![1, 2, 3]); |
276 | | assert_eq!(format!("{buffer:?}"), "ScalarBuffer([1, 2, 3])"); |
277 | | } |
278 | | |
279 | | #[test] |
280 | | #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")] |
281 | | fn test_unaligned() { |
282 | | let expected = [0_i32, 1, 2]; |
283 | | let buffer = Buffer::from_iter(expected.iter().cloned()); |
284 | | let buffer = buffer.slice(1); |
285 | | ScalarBuffer::<i32>::new(buffer, 0, 2); |
286 | | } |
287 | | |
288 | | #[test] |
289 | | #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")] |
290 | | fn test_length_out_of_bounds() { |
291 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
292 | | ScalarBuffer::<i32>::new(buffer, 1, 3); |
293 | | } |
294 | | |
295 | | #[test] |
296 | | #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")] |
297 | | fn test_offset_out_of_bounds() { |
298 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
299 | | ScalarBuffer::<i32>::new(buffer, 4, 0); |
300 | | } |
301 | | |
302 | | #[test] |
303 | | #[should_panic(expected = "offset overflow")] |
304 | | fn test_length_overflow() { |
305 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
306 | | ScalarBuffer::<i32>::new(buffer, usize::MAX, 1); |
307 | | } |
308 | | |
309 | | #[test] |
310 | | #[should_panic(expected = "offset overflow")] |
311 | | fn test_start_overflow() { |
312 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
313 | | ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0); |
314 | | } |
315 | | |
316 | | #[test] |
317 | | #[should_panic(expected = "length overflow")] |
318 | | fn test_end_overflow() { |
319 | | let buffer = Buffer::from_iter([0_i32, 1, 2]); |
320 | | ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1); |
321 | | } |
322 | | |
323 | | #[test] |
324 | | fn convert_from_buffer_builder() { |
325 | | let input = vec![1, 2, 3, 4]; |
326 | | let buffer_builder = BufferBuilder::from(input.clone()); |
327 | | let scalar_buffer = ScalarBuffer::from(buffer_builder); |
328 | | assert_eq!(scalar_buffer.as_ref(), input); |
329 | | } |
330 | | |
331 | | #[test] |
332 | | fn into_vec() { |
333 | | let input = vec![1u8, 2, 3, 4]; |
334 | | |
335 | | // No copy |
336 | | let input_buffer = Buffer::from_vec(input.clone()); |
337 | | let input_ptr = input_buffer.as_ptr(); |
338 | | let input_len = input_buffer.len(); |
339 | | let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len); |
340 | | let vec = Vec::from(scalar_buffer); |
341 | | assert_eq!(vec.as_slice(), input.as_slice()); |
342 | | assert_eq!(vec.as_ptr(), input_ptr); |
343 | | |
344 | | // Custom allocation - makes a copy |
345 | | let mut input_clone = input.clone(); |
346 | | let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap(); |
347 | | let dealloc = Arc::new(()); |
348 | | let buffer = |
349 | | unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) }; |
350 | | let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len()); |
351 | | let vec = Vec::from(scalar_buffer); |
352 | | assert_eq!(vec, input.as_slice()); |
353 | | assert_ne!(vec.as_ptr(), input_ptr.as_ptr()); |
354 | | |
355 | | // Offset - makes a copy |
356 | | let input_buffer = Buffer::from_vec(input.clone()); |
357 | | let input_ptr = input_buffer.as_ptr(); |
358 | | let input_len = input_buffer.len(); |
359 | | let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1); |
360 | | let vec = Vec::from(scalar_buffer); |
361 | | assert_eq!(vec.as_slice(), &input[1..]); |
362 | | assert_ne!(vec.as_ptr(), input_ptr); |
363 | | |
364 | | // Inner buffer Arc ref count != 0 - makes a copy |
365 | | let buffer = Buffer::from_slice_ref(input.as_slice()); |
366 | | let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len()); |
367 | | let vec = Vec::from(scalar_buffer); |
368 | | assert_eq!(vec, input.as_slice()); |
369 | | assert_ne!(vec.as_ptr(), input.as_ptr()); |
370 | | } |
371 | | |
372 | | #[test] |
373 | | fn scalar_buffer_impl_eq() { |
374 | | fn are_equal<T: Eq>(a: &T, b: &T) -> bool { |
375 | | a.eq(b) |
376 | | } |
377 | | |
378 | | assert!( |
379 | | are_equal( |
380 | | &ScalarBuffer::<i16>::from(vec![23]), |
381 | | &ScalarBuffer::<i16>::from(vec![23]) |
382 | | ), |
383 | | "ScalarBuffer should implement Eq if the inner type does" |
384 | | ); |
385 | | } |
386 | | } |