Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/builder/null.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
19
20
/// Builder for creating [`NullBuffer`]
21
///
22
/// # Performance
23
///
24
/// This builder only materializes the buffer when we append `false`.
25
/// If you only append `true`s to the builder, what you get will be
26
/// `None` when calling [`finish`](#method.finish).
27
///
28
/// This optimization is **very** important for the performance as it avoids
29
/// allocating memory for the null buffer when there are no nulls.
30
///
31
/// See [`Self::allocated_size`] to get the current memory allocated by the builder.
32
///
33
/// # Example
34
/// ```
35
/// # use arrow_buffer::NullBufferBuilder;
36
/// let mut builder = NullBufferBuilder::new(8);
37
/// builder.append_n_non_nulls(8);
38
/// // If no non null values are appended, the null buffer is not created
39
/// let buffer = builder.finish();
40
/// assert!(buffer.is_none());
41
/// // however, if a null value is appended, the null buffer is created
42
/// let mut builder = NullBufferBuilder::new(8);
43
/// builder.append_n_non_nulls(7);
44
/// builder.append_null();
45
/// let buffer = builder.finish().unwrap();
46
/// assert_eq!(buffer.len(), 8);
47
/// assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
48
/// ```
49
#[derive(Debug)]
50
pub struct NullBufferBuilder {
51
    /// The bitmap builder to store the null buffer:
52
    /// * `Some` if any nulls have been appended ("materialized")
53
    /// * `None` if no nulls have been appended.
54
    bitmap_builder: Option<BooleanBufferBuilder>,
55
    /// Length of the buffer before materializing.
56
    ///
57
    /// if `bitmap_buffer` buffer is `Some`, this value is not used.
58
    len: usize,
59
    /// Initial capacity of the `bitmap_builder`, when it is materialized.
60
    capacity: usize,
61
}
62
63
impl NullBufferBuilder {
64
    /// Creates a new empty builder.
65
    ///
66
    /// Note that this method does not allocate any memory, regardless of the
67
    /// `capacity` parameter. If an allocation is required, `capacity` is the
68
    /// size in bits (not bytes) that will be allocated at minimum.
69
723
    pub fn new(capacity: usize) -> Self {
70
723
        Self {
71
723
            bitmap_builder: None,
72
723
            len: 0,
73
723
            capacity,
74
723
        }
75
723
    }
76
77
    /// Creates a new builder with given length.
78
0
    pub fn new_with_len(len: usize) -> Self {
79
0
        Self {
80
0
            bitmap_builder: None,
81
0
            len,
82
0
            capacity: len,
83
0
        }
84
0
    }
85
86
    /// Creates a new builder from a `MutableBuffer`.
87
0
    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
88
0
        let capacity = buffer.len() * 8;
89
0
        assert!(len <= capacity);
90
91
0
        let bitmap_builder = Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
92
0
        Self {
93
0
            bitmap_builder,
94
0
            len,
95
0
            capacity,
96
0
        }
97
0
    }
98
99
    /// Appends `n` `true`s into the builder
100
    /// to indicate that these `n` items are not nulls.
101
    #[inline]
102
227
    pub fn append_n_non_nulls(&mut self, n: usize) {
103
227
        if let Some(
buf7
) = self.bitmap_builder.as_mut() {
104
7
            buf.append_n(n, true)
105
220
        } else {
106
220
            self.len += n;
107
220
        }
108
227
    }
109
110
    /// Appends a `true` into the builder
111
    /// to indicate that this item is not null.
112
    #[inline]
113
4.19k
    pub fn append_non_null(&mut self) {
114
4.19k
        if let Some(
buf121
) = self.bitmap_builder.as_mut() {
115
121
            buf.append(true)
116
4.07k
        } else {
117
4.07k
            self.len += 1;
118
4.07k
        }
119
4.19k
    }
120
121
    /// Appends `n` `false`s into the builder
122
    /// to indicate that these `n` items are nulls.
123
    #[inline]
124
0
    pub fn append_n_nulls(&mut self, n: usize) {
125
0
        self.materialize_if_needed();
126
0
        self.bitmap_builder.as_mut().unwrap().append_n(n, false);
127
0
    }
128
129
    /// Appends a `false` into the builder
130
    /// to indicate that this item is null.
131
    #[inline]
132
205
    pub fn append_null(&mut self) {
133
205
        self.materialize_if_needed();
134
205
        self.bitmap_builder.as_mut().unwrap().append(false);
135
205
    }
136
137
    /// Appends a boolean value into the builder.
138
    #[inline]
139
4.16k
    pub fn append(&mut self, not_null: bool) {
140
4.16k
        if not_null {
141
3.96k
            self.append_non_null()
142
        } else {
143
201
            self.append_null()
144
        }
145
4.16k
    }
146
147
    /// Gets a bit in the buffer at `index`
148
    #[inline]
149
    pub fn is_valid(&self, index: usize) -> bool {
150
        if let Some(ref buf) = self.bitmap_builder {
151
            buf.get_bit(index)
152
        } else {
153
            true
154
        }
155
    }
156
157
    /// Truncates the builder to the given length
158
    ///
159
    /// If `len` is greater than the buffer's current length, this has no effect
160
    #[inline]
161
    pub fn truncate(&mut self, len: usize) {
162
        if let Some(buf) = self.bitmap_builder.as_mut() {
163
            buf.truncate(len);
164
        } else if len <= self.len {
165
            self.len = len
166
        }
167
    }
168
169
    /// Appends a boolean slice into the builder
170
    /// to indicate the validations of these items.
171
0
    pub fn append_slice(&mut self, slice: &[bool]) {
172
0
        if slice.iter().any(|v| !v) {
173
0
            self.materialize_if_needed()
174
0
        }
175
0
        if let Some(buf) = self.bitmap_builder.as_mut() {
176
0
            buf.append_slice(slice)
177
0
        } else {
178
0
            self.len += slice.len();
179
0
        }
180
0
    }
181
182
    /// Append [`NullBuffer`] to this [`NullBufferBuilder`]
183
    ///
184
    /// This is useful when you want to concatenate two null buffers.
185
11
    pub fn append_buffer(&mut self, buffer: &NullBuffer) {
186
11
        if buffer.null_count() > 0 {
187
11
            self.materialize_if_needed();
188
11
        
}0
189
11
        if let Some(buf) = self.bitmap_builder.as_mut() {
190
11
            buf.append_buffer(buffer.inner())
191
0
        } else {
192
0
            self.len += buffer.len();
193
0
        }
194
11
    }
195
196
    /// Builds the null buffer and resets the builder.
197
    /// Returns `None` if the builder only contains `true`s.
198
967
    pub fn finish(&mut self) -> Option<NullBuffer> {
199
967
        self.len = 0;
200
967
        Some(
NullBuffer::new124
(self.bitmap_builder.take()
?843
.
finish124
()))
201
967
    }
202
203
    /// Builds the [NullBuffer] without resetting the builder.
204
0
    pub fn finish_cloned(&self) -> Option<NullBuffer> {
205
0
        let buffer = self.bitmap_builder.as_ref()?.finish_cloned();
206
0
        Some(NullBuffer::new(buffer))
207
0
    }
208
209
    /// Returns the inner bitmap builder as slice
210
0
    pub fn as_slice(&self) -> Option<&[u8]> {
211
0
        Some(self.bitmap_builder.as_ref()?.as_slice())
212
0
    }
213
214
216
    fn materialize_if_needed(&mut self) {
215
216
        if self.bitmap_builder.is_none() {
216
124
            self.materialize()
217
92
        }
218
216
    }
219
220
    #[cold]
221
124
    fn materialize(&mut self) {
222
124
        if self.bitmap_builder.is_none() {
223
124
            let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
224
124
            b.append_n(self.len, true);
225
124
            self.bitmap_builder = Some(b);
226
124
        
}0
227
124
    }
228
229
    /// Return a mutable reference to the inner bitmap slice.
230
0
    pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> {
231
0
        self.bitmap_builder.as_mut().map(|b| b.as_slice_mut())
232
0
    }
233
234
    /// Return the allocated size of this builder, in bytes, useful for memory accounting.
235
0
    pub fn allocated_size(&self) -> usize {
236
0
        self.bitmap_builder
237
0
            .as_ref()
238
0
            .map(|b| b.capacity() / 8)
239
0
            .unwrap_or(0)
240
0
    }
241
}
242
243
impl NullBufferBuilder {
244
    /// Return the number of bits in the buffer.
245
82
    pub fn len(&self) -> usize {
246
82
        self.bitmap_builder.as_ref().map_or(self.len, |b| 
b14
.
len14
())
247
82
    }
248
249
    /// Check if the builder is empty.
250
0
    pub fn is_empty(&self) -> bool {
251
0
        self.len() == 0
252
0
    }
253
}
254
255
#[cfg(test)]
256
mod tests {
257
    use super::*;
258
259
    #[test]
260
    fn test_null_buffer_builder() {
261
        let mut builder = NullBufferBuilder::new(0);
262
        builder.append_null();
263
        builder.append_non_null();
264
        builder.append_n_nulls(2);
265
        builder.append_n_non_nulls(2);
266
        assert_eq!(6, builder.len());
267
        assert_eq!(64, builder.allocated_size());
268
269
        let buf = builder.finish().unwrap();
270
        assert_eq!(&[0b110010_u8], buf.validity());
271
    }
272
273
    #[test]
274
    fn test_null_buffer_builder_all_nulls() {
275
        let mut builder = NullBufferBuilder::new(0);
276
        builder.append_null();
277
        builder.append_n_nulls(2);
278
        builder.append_slice(&[false, false, false]);
279
        assert_eq!(6, builder.len());
280
        assert_eq!(64, builder.allocated_size());
281
282
        let buf = builder.finish().unwrap();
283
        assert_eq!(&[0b0_u8], buf.validity());
284
    }
285
286
    #[test]
287
    fn test_null_buffer_builder_no_null() {
288
        let mut builder = NullBufferBuilder::new(0);
289
        builder.append_non_null();
290
        builder.append_n_non_nulls(2);
291
        builder.append_slice(&[true, true, true]);
292
        assert_eq!(6, builder.len());
293
        assert_eq!(0, builder.allocated_size());
294
295
        let buf = builder.finish();
296
        assert!(buf.is_none());
297
    }
298
299
    #[test]
300
    fn test_null_buffer_builder_reset() {
301
        let mut builder = NullBufferBuilder::new(0);
302
        builder.append_slice(&[true, false, true]);
303
        builder.finish();
304
        assert!(builder.is_empty());
305
306
        builder.append_slice(&[true, true, true]);
307
        assert!(builder.finish().is_none());
308
        assert!(builder.is_empty());
309
310
        builder.append_slice(&[true, true, false, true]);
311
312
        let buf = builder.finish().unwrap();
313
        assert_eq!(&[0b1011_u8], buf.validity());
314
    }
315
316
    #[test]
317
    fn test_null_buffer_builder_is_valid() {
318
        let mut builder = NullBufferBuilder::new(0);
319
        builder.append_n_non_nulls(6);
320
        assert!(builder.is_valid(0));
321
322
        builder.append_null();
323
        assert!(!builder.is_valid(6));
324
325
        builder.append_non_null();
326
        assert!(builder.is_valid(7));
327
    }
328
329
    #[test]
330
    fn test_null_buffer_builder_truncate() {
331
        let mut builder = NullBufferBuilder::new(10);
332
        builder.append_n_non_nulls(16);
333
        assert_eq!(builder.as_slice(), None);
334
        builder.truncate(20);
335
        assert_eq!(builder.as_slice(), None);
336
        assert_eq!(builder.len(), 16);
337
        assert_eq!(builder.allocated_size(), 0);
338
        builder.truncate(14);
339
        assert_eq!(builder.as_slice(), None);
340
        assert_eq!(builder.len(), 14);
341
        builder.append_null();
342
        builder.append_non_null();
343
        assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
344
        assert_eq!(builder.allocated_size(), 64);
345
    }
346
347
    #[test]
348
    fn test_null_buffer_builder_truncate_never_materialized() {
349
        let mut builder = NullBufferBuilder::new(0);
350
        assert_eq!(builder.len(), 0);
351
        builder.append_n_nulls(2); // doesn't materialize
352
        assert_eq!(builder.len(), 2);
353
        builder.truncate(1);
354
        assert_eq!(builder.len(), 1);
355
    }
356
357
    #[test]
358
    fn test_append_buffers() {
359
        let mut builder = NullBufferBuilder::new(0);
360
        let buffer1 = NullBuffer::from(&[true, true]);
361
        let buffer2 = NullBuffer::from(&[true, true, false]);
362
363
        builder.append_buffer(&buffer1);
364
        builder.append_buffer(&buffer2);
365
366
        assert_eq!(builder.as_slice().unwrap(), &[0b01111_u8]);
367
    }
368
369
    #[test]
370
    fn test_append_buffers_with_unaligned_length() {
371
        let mut builder = NullBufferBuilder::new(0);
372
        let buffer = NullBuffer::from(&[true, true, false, true, false]);
373
        builder.append_buffer(&buffer);
374
        assert_eq!(builder.as_slice().unwrap(), &[0b01011_u8]);
375
376
        let buffer = NullBuffer::from(&[false, false, true, true, true, false, false]);
377
        builder.append_buffer(&buffer);
378
        assert_eq!(builder.as_slice().unwrap(), &[0b10001011_u8, 0b0011_u8]);
379
    }
380
381
    #[test]
382
    fn test_append_empty_buffer() {
383
        let mut builder = NullBufferBuilder::new(0);
384
        let buffer = NullBuffer::from(&[true, true, false, true]);
385
        builder.append_buffer(&buffer);
386
        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
387
388
        let buffer = NullBuffer::from(&[]);
389
        builder.append_buffer(&buffer);
390
391
        assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
392
    }
393
394
    #[test]
395
    fn test_should_not_materialize_when_appending_all_valid_buffers() {
396
        let mut builder = NullBufferBuilder::new(0);
397
        let buffer = NullBuffer::from(&[true; 10]);
398
        builder.append_buffer(&buffer);
399
400
        let buffer = NullBuffer::from(&[true; 2]);
401
        builder.append_buffer(&buffer);
402
403
        assert_eq!(builder.finish(), None);
404
    }
405
}