Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/immutable.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::alloc::Layout;
19
use std::fmt::Debug;
20
use std::ptr::NonNull;
21
use std::sync::Arc;
22
23
use crate::alloc::{Allocation, Deallocation};
24
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
25
use crate::BufferBuilder;
26
use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27
28
#[cfg(feature = "pool")]
29
use crate::pool::MemoryPool;
30
31
use super::ops::bitwise_unary_op_helper;
32
use super::{MutableBuffer, ScalarBuffer};
33
34
/// A contiguous memory region that can be shared with other buffers and across
35
/// thread boundaries that stores Arrow data.
36
///
37
/// `Buffer`s can be sliced and cloned without copying the underlying data and can
38
/// be created from memory allocated by non-Rust sources such as C/C++.
39
///
40
/// # Example: Create a `Buffer` from a `Vec` (without copying)
41
/// ```
42
/// # use arrow_buffer::Buffer;
43
/// let vec: Vec<u32> = vec![1, 2, 3];
44
/// let buffer = Buffer::from(vec);
45
/// ```
46
///
47
/// # Example: Convert a `Buffer` to a `Vec` (without copying)
48
///
49
/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
50
/// no other references and the types are aligned correctly.
51
/// ```
52
/// # use arrow_buffer::Buffer;
53
/// # let vec: Vec<u32> = vec![1, 2, 3];
54
/// # let buffer = Buffer::from(vec);
55
/// // convert the buffer back into a Vec of u32
56
/// // note this will fail if the buffer is shared or not aligned correctly
57
/// let vec: Vec<u32> = buffer.into_vec().unwrap();
58
/// ```
59
///
60
/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
61
///
62
/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
63
/// regions. You can create a buffer from a `Bytes` instance using the `From`
64
/// implementation, also without copying.
65
///
66
/// ```
67
/// # use arrow_buffer::Buffer;
68
/// let bytes = bytes::Bytes::from("hello");
69
/// let buffer = Buffer::from(bytes);
70
///```
71
#[derive(Clone, Debug)]
72
pub struct Buffer {
73
    /// the internal byte buffer.
74
    data: Arc<Bytes>,
75
76
    /// Pointer into `data` valid
77
    ///
78
    /// We store a pointer instead of an offset to avoid pointer arithmetic
79
    /// which causes LLVM to fail to vectorise code correctly
80
    ptr: *const u8,
81
82
    /// Byte length of the buffer.
83
    ///
84
    /// Must be less than or equal to `data.len()`
85
    length: usize,
86
}
87
88
impl Default for Buffer {
89
    #[inline]
90
    fn default() -> Self {
91
        MutableBuffer::default().into()
92
    }
93
}
94
95
impl PartialEq for Buffer {
96
0
    fn eq(&self, other: &Self) -> bool {
97
0
        self.as_slice().eq(other.as_slice())
98
0
    }
99
}
100
101
impl Eq for Buffer {}
102
103
unsafe impl Send for Buffer where Bytes: Send {}
104
unsafe impl Sync for Buffer where Bytes: Sync {}
105
106
impl Buffer {
107
    /// Create a new Buffer from a (internal) `Bytes`
108
    ///
109
    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
110
    /// and is different than [`bytes::Bytes`].
111
    ///
112
    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
113
    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
114
0
    pub fn from_bytes(bytes: Bytes) -> Self {
115
0
        Self::from(bytes)
116
0
    }
117
118
    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
119
    ///
120
    /// self.ptr and self.data can be different after slicing or advancing the buffer.
121
0
    pub fn ptr_offset(&self) -> usize {
122
        // Safety: `ptr` is always in bounds of `data`.
123
0
        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
124
0
    }
125
126
    /// Returns the pointer to the start of the buffer without the offset.
127
0
    pub fn data_ptr(&self) -> NonNull<u8> {
128
0
        self.data.ptr()
129
0
    }
130
131
    /// Returns the number of strong references to the buffer.
132
    ///
133
    /// This method is safe but if the buffer is shared across multiple threads
134
    /// the underlying value could change between calling this method and using
135
    /// the result.
136
0
    pub fn strong_count(&self) -> usize {
137
0
        Arc::strong_count(&self.data)
138
0
    }
139
140
    /// Create a [`Buffer`] from the provided [`Vec`] without copying
141
    #[inline]
142
1.26k
    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
143
1.26k
        MutableBuffer::from(vec).into()
144
1.26k
    }
145
146
    /// Initializes a [Buffer] from a slice of items.
147
5
    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
148
5
        let slice = items.as_ref();
149
5
        let capacity = std::mem::size_of_val(slice);
150
5
        let mut buffer = MutableBuffer::with_capacity(capacity);
151
5
        buffer.extend_from_slice(slice);
152
5
        buffer.into()
153
5
    }
154
155
    /// Creates a buffer from an existing memory region.
156
    ///
157
    /// Ownership of the memory is tracked via reference counting
158
    /// and the memory will be freed using the `drop` method of
159
    /// [crate::alloc::Allocation] when the reference count reaches zero.
160
    ///
161
    /// # Arguments
162
    ///
163
    /// * `ptr` - Pointer to raw parts
164
    /// * `len` - Length of raw parts in **bytes**
165
    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
166
    ///
167
    /// # Safety
168
    ///
169
    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
170
0
    pub unsafe fn from_custom_allocation(
171
0
        ptr: NonNull<u8>,
172
0
        len: usize,
173
0
        owner: Arc<dyn Allocation>,
174
0
    ) -> Self {
175
0
        Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
176
0
    }
177
178
    /// Auxiliary method to create a new Buffer
179
0
    unsafe fn build_with_arguments(
180
0
        ptr: NonNull<u8>,
181
0
        len: usize,
182
0
        deallocation: Deallocation,
183
0
    ) -> Self {
184
0
        let bytes = Bytes::new(ptr, len, deallocation);
185
0
        let ptr = bytes.as_ptr();
186
0
        Buffer {
187
0
            ptr,
188
0
            data: Arc::new(bytes),
189
0
            length: len,
190
0
        }
191
0
    }
192
193
    /// Returns the number of bytes in the buffer
194
    #[inline]
195
7.27k
    pub fn len(&self) -> usize {
196
7.27k
        self.length
197
7.27k
    }
198
199
    /// Returns the capacity of this buffer.
200
    /// For externally owned buffers, this returns zero
201
    #[inline]
202
0
    pub fn capacity(&self) -> usize {
203
0
        self.data.capacity()
204
0
    }
205
206
    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
207
    ///
208
    /// If the buffer is shared, this is a no-op.
209
    ///
210
    /// If the memory was allocated with a custom allocator, this is a no-op.
211
    ///
212
    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
213
    ///
214
    /// The memory region will be reallocated using `std::alloc::realloc`.
215
0
    pub fn shrink_to_fit(&mut self) {
216
0
        let offset = self.ptr_offset();
217
0
        let is_empty = self.is_empty();
218
0
        let desired_capacity = if is_empty {
219
0
            0
220
        } else {
221
            // For realloc to work, we cannot free the elements before the offset
222
0
            offset + self.len()
223
        };
224
0
        if desired_capacity < self.capacity() {
225
0
            if let Some(bytes) = Arc::get_mut(&mut self.data) {
226
0
                if bytes.try_realloc(desired_capacity).is_ok() {
227
                    // Realloc complete - update our pointer into `bytes`:
228
0
                    self.ptr = if is_empty {
229
0
                        bytes.as_ptr()
230
                    } else {
231
                        // SAFETY: we kept all elements leading up to the offset
232
0
                        unsafe { bytes.as_ptr().add(offset) }
233
                    }
234
0
                } else {
235
0
                    // Failure to reallocate is fine; we just failed to free up memory.
236
0
                }
237
0
            }
238
0
        }
239
0
    }
240
241
    /// Returns true if the buffer is empty.
242
    #[inline]
243
3
    pub fn is_empty(&self) -> bool {
244
3
        self.length == 0
245
3
    }
246
247
    /// Returns the byte slice stored in this buffer
248
1.95k
    pub fn as_slice(&self) -> &[u8] {
249
1.95k
        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
250
1.95k
    }
251
252
824
    pub(crate) fn deallocation(&self) -> &Deallocation {
253
824
        self.data.deallocation()
254
824
    }
255
256
    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
257
    ///
258
    /// This function is `O(1)` and does not copy any data, allowing the
259
    /// same memory region to be shared between buffers.
260
    ///
261
    /// # Panics
262
    ///
263
    /// Panics iff `offset` is larger than `len`.
264
0
    pub fn slice(&self, offset: usize) -> Self {
265
0
        let mut s = self.clone();
266
0
        s.advance(offset);
267
0
        s
268
0
    }
269
270
    /// Increases the offset of this buffer by `offset`
271
    ///
272
    /// # Panics
273
    ///
274
    /// Panics iff `offset` is larger than `len`.
275
    #[inline]
276
0
    pub fn advance(&mut self, offset: usize) {
277
0
        assert!(
278
0
            offset <= self.length,
279
0
            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
280
            offset,
281
            self.length
282
        );
283
0
        self.length -= offset;
284
        // Safety:
285
        // This cannot overflow as
286
        // `self.offset + self.length < self.data.len()`
287
        // `offset < self.length`
288
0
        self.ptr = unsafe { self.ptr.add(offset) };
289
0
    }
290
291
    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
292
    /// with `length` bytes.
293
    ///
294
    /// This function is `O(1)` and does not copy any data, allowing the same
295
    /// memory region to be shared between buffers.
296
    ///
297
    /// # Panics
298
    /// Panics iff `(offset + length)` is larger than the existing length.
299
761
    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
300
761
        assert!(
301
761
            offset.saturating_add(length) <= self.length,
302
0
            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
303
            self.length
304
        );
305
        // Safety:
306
        // offset + length <= self.length
307
761
        let ptr = unsafe { self.ptr.add(offset) };
308
761
        Self {
309
761
            data: self.data.clone(),
310
761
            ptr,
311
761
            length,
312
761
        }
313
761
    }
314
315
    /// Returns a pointer to the start of this buffer.
316
    ///
317
    /// Note that this should be used cautiously, and the returned pointer should not be
318
    /// stored anywhere, to avoid dangling pointers.
319
    #[inline]
320
5.01k
    pub fn as_ptr(&self) -> *const u8 {
321
5.01k
        self.ptr
322
5.01k
    }
323
324
    /// View buffer as a slice of a specific type.
325
    ///
326
    /// # Panics
327
    ///
328
    /// This function panics if the underlying buffer is not aligned
329
    /// correctly for type `T`.
330
516
    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
331
        // SAFETY
332
        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
333
        // implementation outside this crate, and this method checks alignment
334
516
        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
335
516
        assert!(prefix.is_empty() && suffix.is_empty());
336
516
        offsets
337
516
    }
338
339
    /// Returns a slice of this buffer starting at a certain bit offset.
340
    /// If the offset is byte-aligned the returned buffer is a shallow clone,
341
    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
342
0
    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
343
0
        if offset % 8 == 0 {
344
0
            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
345
0
        }
346
347
0
        bitwise_unary_op_helper(self, offset, len, |a| a)
348
0
    }
349
350
    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
351
    /// in larger chunks and starting at arbitrary bit offsets.
352
    /// Note that both `offset` and `length` are measured in bits.
353
0
    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks<'_> {
354
0
        BitChunks::new(self.as_slice(), offset, len)
355
0
    }
356
357
    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
358
    /// inspected. Note that both `offset` and `length` are measured in bits.
359
292
    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
360
292
        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
361
292
    }
362
363
    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
364
    /// Returns `Err` if this is shared or its allocation is from an external source or
365
    /// it is not allocated with alignment [`ALIGNMENT`]
366
    ///
367
    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
368
0
    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
369
0
        let ptr = self.ptr;
370
0
        let length = self.length;
371
0
        Arc::try_unwrap(self.data)
372
0
            .and_then(|bytes| {
373
                // The pointer of underlying buffer should not be offset.
374
0
                assert_eq!(ptr, bytes.ptr().as_ptr());
375
0
                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
376
0
            })
377
0
            .map_err(|bytes| Buffer {
378
0
                data: bytes,
379
0
                ptr,
380
0
                length,
381
0
            })
382
0
    }
383
384
    /// Converts self into a `Vec`, if possible.
385
    ///
386
    /// This can be used to reuse / mutate the underlying data.
387
    ///
388
    /// # Errors
389
    ///
390
    /// Returns `Err(self)` if
391
    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
392
    /// 2. contains a non-zero offset
393
    /// 3. The buffer is shared
394
    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
395
        let layout = match self.data.deallocation() {
396
            Deallocation::Standard(l) => l,
397
            _ => return Err(self), // Custom allocation
398
        };
399
400
        if self.ptr != self.data.as_ptr() {
401
            return Err(self); // Data is offset
402
        }
403
404
        let v_capacity = layout.size() / std::mem::size_of::<T>();
405
        match Layout::array::<T>(v_capacity) {
406
            Ok(expected) if layout == &expected => {}
407
            _ => return Err(self), // Incorrect layout
408
        }
409
410
        let length = self.length;
411
        let ptr = self.ptr;
412
        let v_len = self.length / std::mem::size_of::<T>();
413
414
        Arc::try_unwrap(self.data)
415
            .map(|bytes| unsafe {
416
                let ptr = bytes.ptr().as_ptr() as _;
417
                std::mem::forget(bytes);
418
                // Safety
419
                // Verified that bytes layout matches that of Vec
420
                Vec::from_raw_parts(ptr, v_len, v_capacity)
421
            })
422
            .map_err(|bytes| Buffer {
423
                data: bytes,
424
                ptr,
425
                length,
426
            })
427
    }
428
429
    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
430
    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
431
    /// return false when the arrays are logically equal
432
    #[inline]
433
3
    pub fn ptr_eq(&self, other: &Self) -> bool {
434
3
        self.ptr == other.ptr && 
self.length == other.length0
435
3
    }
436
437
    /// Register this [`Buffer`] with the provided [`MemoryPool`]
438
    ///
439
    /// This claims the memory used by this buffer in the pool, allowing for
440
    /// accurate accounting of memory usage. Any prior reservation will be
441
    /// released so this works well when the buffer is being shared among
442
    /// multiple arrays.
443
    #[cfg(feature = "pool")]
444
    pub fn claim(&self, pool: &dyn MemoryPool) {
445
        self.data.claim(pool)
446
    }
447
}
448
449
/// Note that here we deliberately do not implement
450
/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
451
/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
452
/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
453
/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
454
///
455
/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
456
/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
457
impl From<&[u8]> for Buffer {
458
0
    fn from(p: &[u8]) -> Self {
459
0
        Self::from_slice_ref(p)
460
0
    }
461
}
462
463
impl<const N: usize> From<[u8; N]> for Buffer {
464
    fn from(p: [u8; N]) -> Self {
465
        Self::from_slice_ref(p)
466
    }
467
}
468
469
impl<const N: usize> From<&[u8; N]> for Buffer {
470
    fn from(p: &[u8; N]) -> Self {
471
        Self::from_slice_ref(p)
472
    }
473
}
474
475
impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
476
328
    fn from(value: Vec<T>) -> Self {
477
328
        Self::from_vec(value)
478
328
    }
479
}
480
481
impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
482
    fn from(value: ScalarBuffer<T>) -> Self {
483
        value.into_inner()
484
    }
485
}
486
487
/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
488
impl From<Bytes> for Buffer {
489
    #[inline]
490
1.68k
    fn from(bytes: Bytes) -> Self {
491
1.68k
        let length = bytes.len();
492
1.68k
        let ptr = bytes.as_ptr();
493
1.68k
        Self {
494
1.68k
            data: Arc::new(bytes),
495
1.68k
            ptr,
496
1.68k
            length,
497
1.68k
        }
498
1.68k
    }
499
}
500
501
/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
502
impl From<bytes::Bytes> for Buffer {
503
0
    fn from(bytes: bytes::Bytes) -> Self {
504
0
        let bytes: Bytes = bytes.into();
505
0
        Self::from(bytes)
506
0
    }
507
}
508
509
/// Create a `Buffer` instance by storing the boolean values into the buffer
510
impl FromIterator<bool> for Buffer {
511
4
    fn from_iter<I>(iter: I) -> Self
512
4
    where
513
4
        I: IntoIterator<Item = bool>,
514
    {
515
4
        MutableBuffer::from_iter(iter).into()
516
4
    }
517
}
518
519
impl std::ops::Deref for Buffer {
520
    type Target = [u8];
521
522
563
    fn deref(&self) -> &[u8] {
523
563
        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
524
563
    }
525
}
526
527
impl From<MutableBuffer> for Buffer {
528
    #[inline]
529
1.68k
    fn from(buffer: MutableBuffer) -> Self {
530
1.68k
        buffer.into_buffer()
531
1.68k
    }
532
}
533
534
impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
535
    fn from(mut value: BufferBuilder<T>) -> Self {
536
        value.finish()
537
    }
538
}
539
540
impl Buffer {
541
    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
542
    ///
543
    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
544
    ///
545
    /// # Example
546
    /// ```
547
    /// # use arrow_buffer::buffer::Buffer;
548
    /// let v = vec![1u32];
549
    /// let iter = v.iter().map(|x| x * 2);
550
    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
551
    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
552
    /// ```
553
    /// # Safety
554
    /// This method assumes that the iterator's size is correct and is undefined behavior
555
    /// to use it on an iterator that reports an incorrect length.
556
    // This implementation is required for two reasons:
557
    // 1. there is no trait `TrustedLen` in stable rust and therefore
558
    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
559
    // 2. `from_trusted_len_iter` is faster.
560
    #[inline]
561
    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
562
        iterator: I,
563
    ) -> Self {
564
        MutableBuffer::from_trusted_len_iter(iterator).into()
565
    }
566
567
    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
568
    /// if any of the items of the iterator is an error.
569
    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
570
    /// # Safety
571
    /// This method assumes that the iterator's size is correct and is undefined behavior
572
    /// to use it on an iterator that reports an incorrect length.
573
    #[inline]
574
    pub unsafe fn try_from_trusted_len_iter<
575
        E,
576
        T: ArrowNativeType,
577
        I: Iterator<Item = Result<T, E>>,
578
    >(
579
        iterator: I,
580
    ) -> Result<Self, E> {
581
        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
582
    }
583
}
584
585
impl<T: ArrowNativeType> FromIterator<T> for Buffer {
586
134
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
587
134
        let vec = Vec::from_iter(iter);
588
134
        Buffer::from_vec(vec)
589
134
    }
590
}
591
592
#[cfg(test)]
593
mod tests {
594
    use crate::i256;
595
    use std::panic::{RefUnwindSafe, UnwindSafe};
596
    use std::thread;
597
598
    use super::*;
599
600
    #[test]
601
    fn test_buffer_data_equality() {
602
        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
603
        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
604
        assert_eq!(buf1, buf2);
605
606
        // slice with same offset and same length should still preserve equality
607
        let buf3 = buf1.slice(2);
608
        assert_ne!(buf1, buf3);
609
        let buf4 = buf2.slice_with_length(2, 3);
610
        assert_eq!(buf3, buf4);
611
612
        // Different capacities should still preserve equality
613
        let mut buf2 = MutableBuffer::new(65);
614
        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
615
616
        let buf2 = buf2.into();
617
        assert_eq!(buf1, buf2);
618
619
        // unequal because of different elements
620
        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
621
        assert_ne!(buf1, buf2);
622
623
        // unequal because of different length
624
        let buf2 = Buffer::from(&[0, 1, 2, 3]);
625
        assert_ne!(buf1, buf2);
626
    }
627
628
    #[test]
629
    fn test_from_raw_parts() {
630
        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
631
        assert_eq!(5, buf.len());
632
        assert!(!buf.as_ptr().is_null());
633
        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
634
    }
635
636
    #[test]
637
    fn test_from_vec() {
638
        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
639
        assert_eq!(5, buf.len());
640
        assert!(!buf.as_ptr().is_null());
641
        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
642
    }
643
644
    #[test]
645
    fn test_copy() {
646
        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
647
        let buf2 = buf;
648
        assert_eq!(5, buf2.len());
649
        assert_eq!(64, buf2.capacity());
650
        assert!(!buf2.as_ptr().is_null());
651
        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
652
    }
653
654
    #[test]
655
    fn test_slice() {
656
        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
657
        let buf2 = buf.slice(2);
658
659
        assert_eq!([6, 8, 10], buf2.as_slice());
660
        assert_eq!(3, buf2.len());
661
        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
662
663
        let buf3 = buf2.slice_with_length(1, 2);
664
        assert_eq!([8, 10], buf3.as_slice());
665
        assert_eq!(2, buf3.len());
666
        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
667
668
        let buf4 = buf.slice(5);
669
        let empty_slice: [u8; 0] = [];
670
        assert_eq!(empty_slice, buf4.as_slice());
671
        assert_eq!(0, buf4.len());
672
        assert!(buf4.is_empty());
673
        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
674
    }
675
676
    #[test]
677
    fn test_shrink_to_fit() {
678
        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
679
        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
680
        assert_eq!(original.capacity(), 64);
681
682
        let slice = original.slice_with_length(2, 3);
683
        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
684
        assert_eq!(slice.as_slice(), &[2, 3, 4]);
685
        assert_eq!(slice.capacity(), 64);
686
687
        let mut shrunk = slice;
688
        shrunk.shrink_to_fit();
689
        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
690
        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
691
692
        // Test that we can handle empty slices:
693
        let empty_slice = shrunk.slice_with_length(1, 0);
694
        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
695
        assert_eq!(empty_slice.as_slice(), &[]);
696
        assert_eq!(empty_slice.capacity(), 5);
697
698
        let mut shrunk_empty = empty_slice;
699
        shrunk_empty.shrink_to_fit();
700
        assert_eq!(shrunk_empty.as_slice(), &[]);
701
        assert_eq!(shrunk_empty.capacity(), 0);
702
    }
703
704
    #[test]
705
    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
706
    fn test_slice_offset_out_of_bound() {
707
        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
708
        buf.slice(6);
709
    }
710
711
    #[test]
712
    fn test_access_concurrently() {
713
        let buffer = Buffer::from([1, 2, 3, 4, 5]);
714
        let buffer2 = buffer.clone();
715
        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
716
717
        let buffer_copy = thread::spawn(move || {
718
            // access buffer in another thread.
719
            buffer
720
        })
721
        .join();
722
723
        assert!(buffer_copy.is_ok());
724
        assert_eq!(buffer2, buffer_copy.ok().unwrap());
725
    }
726
727
    macro_rules! check_as_typed_data {
728
        ($input: expr, $native_t: ty) => {{
729
            let buffer = Buffer::from_slice_ref($input);
730
            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
731
            assert_eq!($input, slice);
732
        }};
733
    }
734
735
    #[test]
736
    #[allow(clippy::float_cmp)]
737
    fn test_as_typed_data() {
738
        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
739
        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
740
        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
741
        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
742
        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
743
        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
744
        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
745
        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
746
        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
747
        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
748
    }
749
750
    #[test]
751
    fn test_count_bits() {
752
        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
753
        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
754
        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
755
        assert_eq!(
756
            6,
757
            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
758
        );
759
        assert_eq!(
760
            16,
761
            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
762
        );
763
    }
764
765
    #[test]
766
    fn test_count_bits_slice() {
767
        assert_eq!(
768
            0,
769
            Buffer::from(&[0b11111111, 0b00000000])
770
                .slice(1)
771
                .count_set_bits_offset(0, 8)
772
        );
773
        assert_eq!(
774
            8,
775
            Buffer::from(&[0b11111111, 0b11111111])
776
                .slice_with_length(1, 1)
777
                .count_set_bits_offset(0, 8)
778
        );
779
        assert_eq!(
780
            3,
781
            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
782
                .slice(2)
783
                .count_set_bits_offset(0, 8)
784
        );
785
        assert_eq!(
786
            6,
787
            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
788
                .slice_with_length(1, 2)
789
                .count_set_bits_offset(0, 16)
790
        );
791
        assert_eq!(
792
            16,
793
            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
794
                .slice(2)
795
                .count_set_bits_offset(0, 16)
796
        );
797
    }
798
799
    #[test]
800
    fn test_count_bits_offset_slice() {
801
        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
802
        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
803
        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
804
        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
805
        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
806
        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
807
        assert_eq!(
808
            16,
809
            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
810
        );
811
        assert_eq!(
812
            10,
813
            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
814
        );
815
        assert_eq!(
816
            10,
817
            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
818
        );
819
        assert_eq!(
820
            8,
821
            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
822
        );
823
        assert_eq!(
824
            5,
825
            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
826
        );
827
        assert_eq!(
828
            0,
829
            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
830
        );
831
        assert_eq!(
832
            2,
833
            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
834
        );
835
        assert_eq!(
836
            4,
837
            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
838
        );
839
    }
840
841
    #[test]
842
    fn test_unwind_safe() {
843
        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
844
        assert_unwind_safe::<Buffer>()
845
    }
846
847
    #[test]
848
    fn test_from_foreign_vec() {
849
        let mut vector = vec![1_i32, 2, 3, 4, 5];
850
        let buffer = unsafe {
851
            Buffer::from_custom_allocation(
852
                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
853
                vector.len() * std::mem::size_of::<i32>(),
854
                Arc::new(vector),
855
            )
856
        };
857
858
        let slice = buffer.typed_data::<i32>();
859
        assert_eq!(slice, &[1, 2, 3, 4, 5]);
860
861
        let buffer = buffer.slice(std::mem::size_of::<i32>());
862
863
        let slice = buffer.typed_data::<i32>();
864
        assert_eq!(slice, &[2, 3, 4, 5]);
865
    }
866
867
    #[test]
868
    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
869
    fn slice_overflow() {
870
        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
871
        buffer.slice_with_length(2, usize::MAX);
872
    }
873
874
    #[test]
875
    fn test_vec_interop() {
876
        // Test empty vec
877
        let a: Vec<i128> = Vec::new();
878
        let b = Buffer::from_vec(a);
879
        b.into_vec::<i128>().unwrap();
880
881
        // Test vec with capacity
882
        let a: Vec<i128> = Vec::with_capacity(20);
883
        let b = Buffer::from_vec(a);
884
        let back = b.into_vec::<i128>().unwrap();
885
        assert_eq!(back.len(), 0);
886
        assert_eq!(back.capacity(), 20);
887
888
        // Test vec with values
889
        let mut a: Vec<i128> = Vec::with_capacity(3);
890
        a.extend_from_slice(&[1, 2, 3]);
891
        let b = Buffer::from_vec(a);
892
        let back = b.into_vec::<i128>().unwrap();
893
        assert_eq!(back.len(), 3);
894
        assert_eq!(back.capacity(), 3);
895
896
        // Test vec with values and spare capacity
897
        let mut a: Vec<i128> = Vec::with_capacity(20);
898
        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
899
        let b = Buffer::from_vec(a);
900
        let back = b.into_vec::<i128>().unwrap();
901
        assert_eq!(back.len(), 7);
902
        assert_eq!(back.capacity(), 20);
903
904
        // Test incorrect alignment
905
        let a: Vec<i128> = Vec::new();
906
        let b = Buffer::from_vec(a);
907
        let b = b.into_vec::<i32>().unwrap_err();
908
        b.into_vec::<i8>().unwrap_err();
909
910
        // Test convert between types with same alignment
911
        // This is an implementation quirk, but isn't harmful
912
        // as ArrowNativeType are trivially transmutable
913
        let a: Vec<i64> = vec![1, 2, 3, 4];
914
        let b = Buffer::from_vec(a);
915
        let back = b.into_vec::<u64>().unwrap();
916
        assert_eq!(back.len(), 4);
917
        assert_eq!(back.capacity(), 4);
918
919
        // i256 has the same layout as i128 so this is valid
920
        let mut b: Vec<i128> = Vec::with_capacity(4);
921
        b.extend_from_slice(&[1, 2, 3, 4]);
922
        let b = Buffer::from_vec(b);
923
        let back = b.into_vec::<i256>().unwrap();
924
        assert_eq!(back.len(), 2);
925
        assert_eq!(back.capacity(), 2);
926
927
        // Invalid layout
928
        let b: Vec<i128> = vec![1, 2, 3];
929
        let b = Buffer::from_vec(b);
930
        b.into_vec::<i256>().unwrap_err();
931
932
        // Invalid layout
933
        let mut b: Vec<i128> = Vec::with_capacity(5);
934
        b.extend_from_slice(&[1, 2, 3, 4]);
935
        let b = Buffer::from_vec(b);
936
        b.into_vec::<i256>().unwrap_err();
937
938
        // Truncates length
939
        // This is an implementation quirk, but isn't harmful
940
        let mut b: Vec<i128> = Vec::with_capacity(4);
941
        b.extend_from_slice(&[1, 2, 3]);
942
        let b = Buffer::from_vec(b);
943
        let back = b.into_vec::<i256>().unwrap();
944
        assert_eq!(back.len(), 1);
945
        assert_eq!(back.capacity(), 2);
946
947
        // Cannot use aligned allocation
948
        let b = Buffer::from(MutableBuffer::new(10));
949
        let b = b.into_vec::<u8>().unwrap_err();
950
        b.into_vec::<u64>().unwrap_err();
951
952
        // Test slicing
953
        let mut a: Vec<i128> = Vec::with_capacity(20);
954
        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
955
        let b = Buffer::from_vec(a);
956
        let slice = b.slice_with_length(0, 64);
957
958
        // Shared reference fails
959
        let slice = slice.into_vec::<i128>().unwrap_err();
960
        drop(b);
961
962
        // Succeeds as no outstanding shared reference
963
        let back = slice.into_vec::<i128>().unwrap();
964
        assert_eq!(&back, &[1, 4, 7, 8]);
965
        assert_eq!(back.capacity(), 20);
966
967
        // Slicing by non-multiple length truncates
968
        let mut a: Vec<i128> = Vec::with_capacity(8);
969
        a.extend_from_slice(&[1, 4, 7, 3]);
970
971
        let b = Buffer::from_vec(a);
972
        let slice = b.slice_with_length(0, 34);
973
        drop(b);
974
975
        let back = slice.into_vec::<i128>().unwrap();
976
        assert_eq!(&back, &[1, 4]);
977
        assert_eq!(back.capacity(), 8);
978
979
        // Offset prevents conversion
980
        let a: Vec<u32> = vec![1, 3, 4, 6];
981
        let b = Buffer::from_vec(a).slice(2);
982
        b.into_vec::<u32>().unwrap_err();
983
984
        let b = MutableBuffer::new(16).into_buffer();
985
        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
986
        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
987
        b.into_mutable().unwrap();
988
989
        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
990
        let b = b.into_mutable().unwrap();
991
        let b = Buffer::from(b);
992
        let b = b.into_vec::<u32>().unwrap();
993
        assert_eq!(b, &[1, 3, 5]);
994
    }
995
996
    #[test]
997
    #[should_panic(expected = "capacity overflow")]
998
    fn test_from_iter_overflow() {
999
        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
1000
        let _ = Buffer::from_iter(std::iter::repeat_n(0_u64, iter_len));
1001
    }
1002
1003
    #[test]
1004
    fn bit_slice_length_preserved() {
1005
        // Create a boring buffer
1006
        let buf = Buffer::from_iter(std::iter::repeat_n(true, 64));
1007
1008
        let assert_preserved = |offset: usize, len: usize| {
1009
            let new_buf = buf.bit_slice(offset, len);
1010
            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
1011
1012
            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
1013
            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
1014
            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
1015
            // want to make sure it doesn't unnecessarily create a deep copy.
1016
            if offset % 8 == 0 {
1017
                assert_eq!(new_buf.ptr_offset(), offset / 8);
1018
            } else {
1019
                assert_eq!(new_buf.ptr_offset(), 0);
1020
            }
1021
        };
1022
1023
        // go through every available value for offset
1024
        for o in 0..=64 {
1025
            // and go through every length that could accompany that offset - we can't have a
1026
            // situation where offset + len > 64, because that would go past the end of the buffer,
1027
            // so we use the map to ensure it's in range.
1028
            for l in (o..=64).map(|l| l - o) {
1029
                // and we just want to make sure every one of these keeps its offset and length
1030
                // when neeeded
1031
                assert_preserved(o, l);
1032
            }
1033
        }
1034
    }
1035
1036
    #[test]
1037
    fn test_strong_count() {
1038
        let buffer = Buffer::from_iter(std::iter::repeat_n(0_u8, 100));
1039
        assert_eq!(buffer.strong_count(), 1);
1040
1041
        let buffer2 = buffer.clone();
1042
        assert_eq!(buffer.strong_count(), 2);
1043
1044
        let buffer3 = buffer2.clone();
1045
        assert_eq!(buffer.strong_count(), 3);
1046
1047
        drop(buffer);
1048
        assert_eq!(buffer2.strong_count(), 2);
1049
        assert_eq!(buffer3.strong_count(), 2);
1050
1051
        // Strong count does not increase on move
1052
        let capture = move || {
1053
            assert_eq!(buffer3.strong_count(), 2);
1054
        };
1055
1056
        capture();
1057
        assert_eq!(buffer2.strong_count(), 2);
1058
1059
        drop(capture);
1060
        assert_eq!(buffer2.strong_count(), 1);
1061
    }
1062
}