Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/builder/mod.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Buffer builders
19
20
mod boolean;
21
mod null;
22
mod offset;
23
24
pub use boolean::*;
25
pub use null::*;
26
pub use offset::*;
27
28
use crate::{ArrowNativeType, Buffer, MutableBuffer};
29
use std::marker::PhantomData;
30
31
/// Builder for creating a [Buffer] object.
32
///
33
/// A [Buffer] is the underlying data structure of Arrow's Arrays.
34
///
35
/// For all supported types, there are type definitions for the
36
/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37
///
38
/// # Example:
39
///
40
/// ```
41
/// # use arrow_buffer::builder::BufferBuilder;
42
///
43
/// let mut builder = BufferBuilder::<u8>::new(100);
44
/// builder.append_slice(&[42, 43, 44]);
45
/// builder.append(45);
46
/// let buffer = builder.finish();
47
///
48
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
49
/// ```
50
#[derive(Debug)]
51
pub struct BufferBuilder<T: ArrowNativeType> {
52
    buffer: MutableBuffer,
53
    len: usize,
54
    _marker: PhantomData<T>,
55
}
56
57
impl<T: ArrowNativeType> BufferBuilder<T> {
58
    /// Creates a new builder with initial capacity for _at least_ `capacity`
59
    /// elements of type `T`.
60
    ///
61
    /// The capacity can later be manually adjusted with the
62
    /// [`reserve()`](BufferBuilder::reserve) method.
63
    /// Also the
64
    /// [`append()`](BufferBuilder::append),
65
    /// [`append_slice()`](BufferBuilder::append_slice) and
66
    /// [`advance()`](BufferBuilder::advance)
67
    /// methods automatically increase the capacity if needed.
68
    ///
69
    /// # Example:
70
    ///
71
    /// ```
72
    /// # use arrow_buffer::builder::BufferBuilder;
73
    ///
74
    /// let mut builder = BufferBuilder::<u8>::new(10);
75
    ///
76
    /// assert!(builder.capacity() >= 10);
77
    /// ```
78
    #[inline]
79
0
    pub fn new(capacity: usize) -> Self {
80
0
        let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
81
82
0
        Self {
83
0
            buffer,
84
0
            len: 0,
85
0
            _marker: PhantomData,
86
0
        }
87
0
    }
88
89
    /// Creates a new builder from a [`MutableBuffer`]
90
    pub fn new_from_buffer(buffer: MutableBuffer) -> Self {
91
        let buffer_len = buffer.len();
92
        Self {
93
            buffer,
94
            len: buffer_len / std::mem::size_of::<T>(),
95
            _marker: PhantomData,
96
        }
97
    }
98
99
    /// Returns the current number of array elements in the internal buffer.
100
    ///
101
    /// # Example:
102
    ///
103
    /// ```
104
    /// # use arrow_buffer::builder::BufferBuilder;
105
    ///
106
    /// let mut builder = BufferBuilder::<u8>::new(10);
107
    /// builder.append(42);
108
    ///
109
    /// assert_eq!(builder.len(), 1);
110
    /// ```
111
0
    pub fn len(&self) -> usize {
112
0
        self.len
113
0
    }
114
115
    /// Returns whether the internal buffer is empty.
116
    ///
117
    /// # Example:
118
    ///
119
    /// ```
120
    /// # use arrow_buffer::builder::BufferBuilder;
121
    ///
122
    /// let mut builder = BufferBuilder::<u8>::new(10);
123
    /// builder.append(42);
124
    ///
125
    /// assert_eq!(builder.is_empty(), false);
126
    /// ```
127
    pub fn is_empty(&self) -> bool {
128
        self.len == 0
129
    }
130
131
    /// Returns the actual capacity (number of elements) of the internal buffer.
132
    ///
133
    /// Note: the internal capacity returned by this method might be larger than
134
    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
135
    /// functions.
136
    pub fn capacity(&self) -> usize {
137
        let byte_capacity = self.buffer.capacity();
138
        byte_capacity / std::mem::size_of::<T>()
139
    }
140
141
    /// Increases the number of elements in the internal buffer by `n`
142
    /// and resizes the buffer as needed.
143
    ///
144
    /// The values of the newly added elements are 0.
145
    /// This method is usually used when appending `NULL` values to the buffer
146
    /// as they still require physical memory space.
147
    ///
148
    /// # Example:
149
    ///
150
    /// ```
151
    /// # use arrow_buffer::builder::BufferBuilder;
152
    ///
153
    /// let mut builder = BufferBuilder::<u8>::new(10);
154
    /// builder.advance(2);
155
    ///
156
    /// assert_eq!(builder.len(), 2);
157
    /// ```
158
    #[inline]
159
    pub fn advance(&mut self, i: usize) {
160
        self.buffer.extend_zeros(i * std::mem::size_of::<T>());
161
        self.len += i;
162
    }
163
164
    /// Reserves memory for _at least_ `n` more elements of type `T`.
165
    ///
166
    /// # Example:
167
    ///
168
    /// ```
169
    /// # use arrow_buffer::builder::BufferBuilder;
170
    ///
171
    /// let mut builder = BufferBuilder::<u8>::new(10);
172
    /// builder.reserve(10);
173
    ///
174
    /// assert!(builder.capacity() >= 20);
175
    /// ```
176
    #[inline]
177
0
    pub fn reserve(&mut self, n: usize) {
178
0
        self.buffer.reserve(n * std::mem::size_of::<T>());
179
0
    }
180
181
    /// Appends a value of type `T` into the builder,
182
    /// growing the internal buffer as needed.
183
    ///
184
    /// # Example:
185
    ///
186
    /// ```
187
    /// # use arrow_buffer::builder::BufferBuilder;
188
    ///
189
    /// let mut builder = BufferBuilder::<u8>::new(10);
190
    /// builder.append(42);
191
    ///
192
    /// assert_eq!(builder.len(), 1);
193
    /// ```
194
    #[inline]
195
0
    pub fn append(&mut self, v: T) {
196
0
        self.reserve(1);
197
0
        self.buffer.push(v);
198
0
        self.len += 1;
199
0
    }
200
201
    /// Appends a value of type `T` into the builder N times,
202
    /// growing the internal buffer as needed.
203
    ///
204
    /// # Example:
205
    ///
206
    /// ```
207
    /// # use arrow_buffer::builder::BufferBuilder;
208
    ///
209
    /// let mut builder = BufferBuilder::<u8>::new(10);
210
    /// builder.append_n(10, 42);
211
    ///
212
    /// assert_eq!(builder.len(), 10);
213
    /// ```
214
    #[inline]
215
    pub fn append_n(&mut self, n: usize, v: T) {
216
        self.reserve(n);
217
        self.extend(std::iter::repeat_n(v, n))
218
    }
219
220
    /// Appends `n`, zero-initialized values
221
    ///
222
    /// # Example:
223
    ///
224
    /// ```
225
    /// # use arrow_buffer::builder::BufferBuilder;
226
    ///
227
    /// let mut builder = BufferBuilder::<u32>::new(10);
228
    /// builder.append_n_zeroed(3);
229
    ///
230
    /// assert_eq!(builder.len(), 3);
231
    /// assert_eq!(builder.as_slice(), &[0, 0, 0])
232
    #[inline]
233
0
    pub fn append_n_zeroed(&mut self, n: usize) {
234
0
        self.buffer.extend_zeros(n * std::mem::size_of::<T>());
235
0
        self.len += n;
236
0
    }
237
238
    /// Appends a slice of type `T`, growing the internal buffer as needed.
239
    ///
240
    /// # Example:
241
    ///
242
    /// ```
243
    /// # use arrow_buffer::builder::BufferBuilder;
244
    ///
245
    /// let mut builder = BufferBuilder::<u8>::new(10);
246
    /// builder.append_slice(&[42, 44, 46]);
247
    ///
248
    /// assert_eq!(builder.len(), 3);
249
    /// ```
250
    #[inline]
251
    pub fn append_slice(&mut self, slice: &[T]) {
252
        self.buffer.extend_from_slice(slice);
253
        self.len += slice.len();
254
    }
255
256
    /// View the contents of this buffer as a slice
257
    ///
258
    /// ```
259
    /// # use arrow_buffer::builder::BufferBuilder;
260
    ///
261
    /// let mut builder = BufferBuilder::<f64>::new(10);
262
    /// builder.append(1.3);
263
    /// builder.append_n(2, 2.3);
264
    ///
265
    /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
266
    /// ```
267
    #[inline]
268
0
    pub fn as_slice(&self) -> &[T] {
269
        // SAFETY
270
        //
271
        // - MutableBuffer is aligned and initialized for len elements of T
272
        // - MutableBuffer corresponds to a single allocation
273
        // - MutableBuffer does not support modification whilst active immutable borrows
274
0
        unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
275
0
    }
276
277
    /// View the contents of this buffer as a mutable slice
278
    ///
279
    /// # Example:
280
    ///
281
    /// ```
282
    /// # use arrow_buffer::builder::BufferBuilder;
283
    ///
284
    /// let mut builder = BufferBuilder::<f32>::new(10);
285
    ///
286
    /// builder.append_slice(&[1., 2., 3.4]);
287
    /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
288
    ///
289
    /// builder.as_slice_mut()[1] = 4.2;
290
    /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
291
    /// ```
292
    #[inline]
293
0
    pub fn as_slice_mut(&mut self) -> &mut [T] {
294
        // SAFETY
295
        //
296
        // - MutableBuffer is aligned and initialized for len elements of T
297
        // - MutableBuffer corresponds to a single allocation
298
        // - MutableBuffer does not support modification whilst active immutable borrows
299
0
        unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
300
0
    }
301
302
    /// Shorten this BufferBuilder to `len` items
303
    ///
304
    /// If `len` is greater than the builder's current length, this has no effect
305
    ///
306
    /// # Example:
307
    ///
308
    /// ```
309
    /// # use arrow_buffer::builder::BufferBuilder;
310
    ///
311
    /// let mut builder = BufferBuilder::<u16>::new(10);
312
    ///
313
    /// builder.append_slice(&[42, 44, 46]);
314
    /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
315
    ///
316
    /// builder.truncate(2);
317
    /// assert_eq!(builder.as_slice(), &[42, 44]);
318
    ///
319
    /// builder.append(12);
320
    /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
321
    /// ```
322
    #[inline]
323
    pub fn truncate(&mut self, len: usize) {
324
        self.buffer.truncate(len * std::mem::size_of::<T>());
325
        self.len = len;
326
    }
327
328
    /// # Safety
329
    /// This requires the iterator be a trusted length. This could instead require
330
    /// the iterator implement `TrustedLen` once that is stabilized.
331
    #[inline]
332
    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
333
        let iter = iter.into_iter();
334
        let len = iter
335
            .size_hint()
336
            .1
337
            .expect("append_trusted_len_iter expects upper bound");
338
        self.reserve(len);
339
        self.extend(iter);
340
    }
341
342
    /// Resets this builder and returns an immutable [Buffer].
343
    ///
344
    /// # Example:
345
    ///
346
    /// ```
347
    /// # use arrow_buffer::builder::BufferBuilder;
348
    ///
349
    /// let mut builder = BufferBuilder::<u8>::new(10);
350
    /// builder.append_slice(&[42, 44, 46]);
351
    ///
352
    /// let buffer = builder.finish();
353
    ///
354
    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
355
    /// ```
356
    #[inline]
357
0
    pub fn finish(&mut self) -> Buffer {
358
0
        let buf = std::mem::take(&mut self.buffer);
359
0
        self.len = 0;
360
0
        buf.into()
361
0
    }
362
}
363
364
impl<T: ArrowNativeType> Default for BufferBuilder<T> {
365
0
    fn default() -> Self {
366
0
        Self::new(0)
367
0
    }
368
}
369
370
impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
371
    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
372
        self.buffer.extend(iter.into_iter().inspect(|_| {
373
            self.len += 1;
374
        }))
375
    }
376
}
377
378
impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
379
    fn from(value: Vec<T>) -> Self {
380
        Self::new_from_buffer(MutableBuffer::from(value))
381
    }
382
}
383
384
impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
385
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
386
        let mut builder = Self::default();
387
        builder.extend(iter);
388
        builder
389
    }
390
}
391
392
#[cfg(test)]
393
mod tests {
394
    use super::*;
395
    use std::mem;
396
397
    #[test]
398
    fn default() {
399
        let builder = BufferBuilder::<u32>::default();
400
        assert!(builder.is_empty());
401
        assert!(builder.buffer.is_empty());
402
        assert_eq!(builder.buffer.capacity(), 0);
403
    }
404
405
    #[test]
406
    fn from_iter() {
407
        let input = [1u16, 2, 3, 4];
408
        let builder = input.into_iter().collect::<BufferBuilder<_>>();
409
        assert_eq!(builder.len(), 4);
410
        assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
411
    }
412
413
    #[test]
414
    fn extend() {
415
        let input = [1, 2];
416
        let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
417
        assert_eq!(builder.len(), 2);
418
        builder.extend([3, 4]);
419
        assert_eq!(builder.len(), 4);
420
    }
421
}