Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/offset.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::buffer::ScalarBuffer;
19
use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder};
20
use std::ops::Deref;
21
22
/// A non-empty buffer of monotonically increasing, positive integers.
23
///
24
/// [`OffsetBuffer`] are used to represent ranges of offsets. An
25
/// `OffsetBuffer` of `N+1` items contains `N` such ranges. The start
26
/// offset for element `i` is `offsets[i]` and the end offset is
27
/// `offsets[i+1]`. Equal offsets represent an empty range.
28
///
29
/// # Example
30
///
31
/// This example shows how 5 distinct ranges, are represented using a
32
/// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the
33
/// three offsets `0, 1, 2`. The entry `(3,3)` represent no offsets
34
/// (e.g. an empty list).
35
///
36
/// ```text
37
///   ┌───────┐                ┌───┐
38
///   │ (0,3) │                │ 0 │
39
///   ├───────┤                ├───┤
40
///   │ (3,3) │                │ 3 │
41
///   ├───────┤                ├───┤
42
///   │ (3,4) │                │ 3 │
43
///   ├───────┤                ├───┤
44
///   │ (4,5) │                │ 4 │
45
///   ├───────┤                ├───┤
46
///   │ (5,7) │                │ 5 │
47
///   └───────┘                ├───┤
48
///                            │ 7 │
49
///                            └───┘
50
///
51
///                        Offsets Buffer
52
///    Logical
53
///    Offsets
54
///
55
///  (offsets[i],
56
///   offsets[i+1])
57
/// ```
58
#[derive(Debug, Clone, PartialEq, Eq)]
59
pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
60
61
impl<O: ArrowNativeType> OffsetBuffer<O> {
62
    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
63
    ///
64
    /// # Panics
65
    ///
66
    /// Panics if `buffer` is not a non-empty buffer containing
67
    /// monotonically increasing values greater than or equal to zero
68
1
    pub fn new(buffer: ScalarBuffer<O>) -> Self {
69
1
        assert!(!buffer.is_empty(), 
"offsets cannot be empty"0
);
70
1
        assert!(
71
1
            buffer[0] >= O::usize_as(0),
72
0
            "offsets must be greater than 0"
73
        );
74
1
        assert!(
75
5
            
buffer.windows(2)1
.
all1
(|w| w[0] <= w[1]),
76
0
            "offsets must be monotonically increasing"
77
        );
78
1
        Self(buffer)
79
1
    }
80
81
    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
82
    ///
83
    /// # Safety
84
    ///
85
    /// `buffer` must be a non-empty buffer containing monotonically increasing
86
    /// values greater than or equal to zero
87
362
    pub unsafe fn new_unchecked(buffer: ScalarBuffer<O>) -> Self {
88
362
        Self(buffer)
89
362
    }
90
91
    /// Create a new [`OffsetBuffer`] containing a single 0 value
92
0
    pub fn new_empty() -> Self {
93
0
        let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
94
0
        Self(buffer.into_buffer().into())
95
0
    }
96
97
    /// Create a new [`OffsetBuffer`] containing `len + 1` `0` values
98
    pub fn new_zeroed(len: usize) -> Self {
99
        let len_bytes = len
100
            .checked_add(1)
101
            .and_then(|o| o.checked_mul(std::mem::size_of::<O>()))
102
            .expect("overflow");
103
        let buffer = MutableBuffer::from_len_zeroed(len_bytes);
104
        Self(buffer.into_buffer().into())
105
    }
106
107
    /// Create a new [`OffsetBuffer`] from the iterator of slice lengths
108
    ///
109
    /// ```
110
    /// # use arrow_buffer::OffsetBuffer;
111
    /// let offsets = OffsetBuffer::<i32>::from_lengths([1, 3, 5]);
112
    /// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
113
    /// ```
114
    ///
115
    /// # Panics
116
    ///
117
    /// Panics on overflow
118
8
    pub fn from_lengths<I>(lengths: I) -> Self
119
8
    where
120
8
        I: IntoIterator<Item = usize>,
121
    {
122
8
        let iter = lengths.into_iter();
123
8
        let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
124
8
        out.push(O::usize_as(0));
125
126
8
        let mut acc = 0_usize;
127
67
        for 
length59
in iter {
128
59
            acc = acc.checked_add(length).expect("usize overflow");
129
59
            out.push(O::usize_as(acc))
130
        }
131
        // Check for overflow
132
8
        O::from_usize(acc).expect("offset overflow");
133
8
        Self(out.into())
134
8
    }
135
136
    /// Get an Iterator over the lengths of this [`OffsetBuffer`]
137
    ///
138
    /// ```
139
    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
140
    /// let offsets = OffsetBuffer::<_>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
141
    /// assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
142
    /// ```
143
    ///
144
    /// Empty [`OffsetBuffer`] will return an empty iterator
145
    /// ```
146
    /// # use arrow_buffer::OffsetBuffer;
147
    /// let offsets = OffsetBuffer::<i32>::new_empty();
148
    /// assert_eq!(offsets.lengths().count(), 0);
149
    /// ```
150
    ///
151
    /// This can be used to merge multiple [`OffsetBuffer`]s to one
152
    /// ```
153
    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
154
    ///
155
    /// let buffer1 = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
156
    /// let buffer2 = OffsetBuffer::<i32>::from_lengths([1, 3, 5, 7, 9]);
157
    ///
158
    /// let merged = OffsetBuffer::<i32>::from_lengths(
159
    ///     vec![buffer1, buffer2].iter().flat_map(|x| x.lengths())
160
    /// );
161
    ///
162
    /// assert_eq!(merged.lengths().collect::<Vec<_>>(), &[2, 6, 3, 7, 2, 1, 3, 5, 7, 9]);
163
    /// ```
164
23
    pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
165
59
        
self.0.windows(2)23
.
map23
(|x| x[1].as_usize() - x[0].as_usize())
166
23
    }
167
168
    /// Free up unused memory.
169
0
    pub fn shrink_to_fit(&mut self) {
170
0
        self.0.shrink_to_fit();
171
0
    }
172
173
    /// Returns the inner [`ScalarBuffer`]
174
0
    pub fn inner(&self) -> &ScalarBuffer<O> {
175
0
        &self.0
176
0
    }
177
178
    /// Returns the inner [`ScalarBuffer`], consuming self
179
328
    pub fn into_inner(self) -> ScalarBuffer<O> {
180
328
        self.0
181
328
    }
182
183
    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
184
110
    pub fn slice(&self, offset: usize, len: usize) -> Self {
185
110
        Self(self.0.slice(offset, len.saturating_add(1)))
186
110
    }
187
188
    /// Returns true if this [`OffsetBuffer`] is equal to `other`, using pointer comparisons
189
    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
190
    /// return false when the arrays are logically equal
191
    #[inline]
192
0
    pub fn ptr_eq(&self, other: &Self) -> bool {
193
0
        self.0.ptr_eq(&other.0)
194
0
    }
195
}
196
197
impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
198
    type Target = [T];
199
200
    #[inline]
201
2.40k
    fn deref(&self) -> &Self::Target {
202
2.40k
        &self.0
203
2.40k
    }
204
}
205
206
impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
207
    #[inline]
208
    fn as_ref(&self) -> &[T] {
209
        self
210
    }
211
}
212
213
impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
214
    fn from(value: OffsetBufferBuilder<O>) -> Self {
215
        value.finish()
216
    }
217
}
218
219
impl<O: ArrowNativeType> Default for OffsetBuffer<O> {
220
    fn default() -> Self {
221
        Self::new_empty()
222
    }
223
}
224
225
#[cfg(test)]
226
mod tests {
227
    use super::*;
228
229
    #[test]
230
    #[should_panic(expected = "offsets cannot be empty")]
231
    fn empty_offsets() {
232
        OffsetBuffer::new(Vec::<i32>::new().into());
233
    }
234
235
    #[test]
236
    #[should_panic(expected = "offsets must be greater than 0")]
237
    fn negative_offsets() {
238
        OffsetBuffer::new(vec![-1, 0, 1].into());
239
    }
240
241
    #[test]
242
    fn offsets() {
243
        OffsetBuffer::new(vec![0, 1, 2, 3].into());
244
245
        let offsets = OffsetBuffer::<i32>::new_zeroed(3);
246
        assert_eq!(offsets.as_ref(), &[0; 4]);
247
248
        let offsets = OffsetBuffer::<i32>::new_zeroed(0);
249
        assert_eq!(offsets.as_ref(), &[0; 1]);
250
    }
251
252
    #[test]
253
    #[should_panic(expected = "overflow")]
254
    fn offsets_new_zeroed_overflow() {
255
        OffsetBuffer::<i32>::new_zeroed(usize::MAX);
256
    }
257
258
    #[test]
259
    #[should_panic(expected = "offsets must be monotonically increasing")]
260
    fn non_monotonic_offsets() {
261
        OffsetBuffer::new(vec![1, 2, 0].into());
262
    }
263
264
    #[test]
265
    fn from_lengths() {
266
        let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
267
        assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
268
269
        let half_max = i32::MAX / 2;
270
        let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
271
        assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
272
    }
273
274
    #[test]
275
    #[should_panic(expected = "offset overflow")]
276
    fn from_lengths_offset_overflow() {
277
        OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
278
    }
279
280
    #[test]
281
    #[should_panic(expected = "usize overflow")]
282
    fn from_lengths_usize_overflow() {
283
        OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
284
    }
285
286
    #[test]
287
    fn get_lengths() {
288
        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
289
        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
290
    }
291
292
    #[test]
293
    fn get_lengths_should_be_with_fixed_size() {
294
        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
295
        let iter = offsets.lengths();
296
        assert_eq!(iter.size_hint(), (3, Some(3)));
297
        assert_eq!(iter.len(), 3);
298
    }
299
300
    #[test]
301
    fn get_lengths_from_empty_offset_buffer_should_be_empty_iterator() {
302
        let offsets = OffsetBuffer::<i32>::new_empty();
303
        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![]);
304
    }
305
306
    #[test]
307
    fn impl_eq() {
308
        fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
309
            a.eq(b)
310
        }
311
312
        assert!(
313
            are_equal(
314
                &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9])),
315
                &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]))
316
            ),
317
            "OffsetBuffer should implement Eq."
318
        );
319
    }
320
321
    #[test]
322
    fn impl_default() {
323
        let default = OffsetBuffer::<i32>::default();
324
        assert_eq!(default.as_ref(), &[0]);
325
    }
326
}