Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/builder/boolean.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::{bit_mask, bit_util, BooleanBuffer, Buffer, MutableBuffer};
19
use std::ops::Range;
20
21
/// Builder for [`BooleanBuffer`]
22
///
23
/// # See Also
24
///
25
/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls
26
///
27
/// [`NullBuffer`]: crate::NullBuffer
28
#[derive(Debug)]
29
pub struct BooleanBufferBuilder {
30
    buffer: MutableBuffer,
31
    len: usize,
32
}
33
34
impl BooleanBufferBuilder {
35
    /// Creates a new `BooleanBufferBuilder` with sufficient space for
36
    /// `capacity` bits (not bytes).
37
    ///
38
    /// The capacity is rounded up to the nearest multiple of 8 for the
39
    /// allocation.
40
    #[inline]
41
207
    pub fn new(capacity: usize) -> Self {
42
207
        let byte_capacity = bit_util::ceil(capacity, 8);
43
207
        let buffer = MutableBuffer::new(byte_capacity);
44
207
        Self { buffer, len: 0 }
45
207
    }
46
47
    /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
48
0
    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
49
0
        assert!(len <= buffer.len() * 8);
50
0
        let mut s = Self {
51
0
            len: buffer.len() * 8,
52
0
            buffer,
53
0
        };
54
0
        s.truncate(len);
55
0
        s
56
0
    }
57
58
    /// Returns the length of the buffer
59
    #[inline]
60
37
    pub fn len(&self) -> usize {
61
37
        self.len
62
37
    }
63
64
    /// Sets a bit in the buffer at `index`
65
    #[inline]
66
11
    pub fn set_bit(&mut self, index: usize, v: bool) {
67
11
        if v {
68
11
            bit_util::set_bit(self.buffer.as_mut(), index);
69
11
        } else {
70
0
            bit_util::unset_bit(self.buffer.as_mut(), index);
71
0
        }
72
11
    }
73
74
    /// Gets a bit in the buffer at `index`
75
    #[inline]
76
    pub fn get_bit(&self, index: usize) -> bool {
77
        bit_util::get_bit(self.buffer.as_slice(), index)
78
    }
79
80
    /// Returns true if empty
81
    #[inline]
82
    pub fn is_empty(&self) -> bool {
83
        self.len == 0
84
    }
85
86
    /// Returns the capacity of the buffer, in bits (not bytes)
87
    ///
88
    /// Note this
89
    ///
90
    /// # Example
91
    /// ```
92
    /// # use arrow_buffer::builder::BooleanBufferBuilder;
93
    /// // empty requires 0 bytes
94
    /// let b = BooleanBufferBuilder::new(0);
95
    /// assert_eq!(0, b.capacity());
96
    /// // Creating space for 1 bit results in 64 bytes (space for 512 bits)
97
    /// // (64 is the minimum allocation size for 64 bit architectures)
98
    /// let mut b = BooleanBufferBuilder::new(1);
99
    /// assert_eq!(512, b.capacity());
100
    /// // 1000 bits requires 128 bytes (space for 1024 bits)
101
    /// b.append_n(1000, true);
102
    /// assert_eq!(1024, b.capacity());
103
    /// ```
104
    #[inline]
105
0
    pub fn capacity(&self) -> usize {
106
0
        self.buffer.capacity() * 8
107
0
    }
108
109
    /// Advances the buffer by `additional` bits
110
    #[inline]
111
760
    pub fn advance(&mut self, additional: usize) {
112
760
        let new_len = self.len + additional;
113
760
        let new_len_bytes = bit_util::ceil(new_len, 8);
114
760
        if new_len_bytes > self.buffer.len() {
115
153
            self.buffer.resize(new_len_bytes, 0);
116
607
        }
117
760
        self.len = new_len;
118
760
    }
119
120
    /// Truncates the builder to the given length
121
    ///
122
    /// If `len` is greater than the buffer's current length, this has no effect
123
    #[inline]
124
0
    pub fn truncate(&mut self, len: usize) {
125
0
        if len > self.len {
126
0
            return;
127
0
        }
128
129
0
        let new_len_bytes = bit_util::ceil(len, 8);
130
0
        self.buffer.truncate(new_len_bytes);
131
0
        self.len = len;
132
133
0
        let remainder = self.len % 8;
134
0
        if remainder != 0 {
135
0
            let mask = (1_u8 << remainder).wrapping_sub(1);
136
0
            *self.buffer.as_mut().last_mut().unwrap() &= mask;
137
0
        }
138
0
    }
139
140
    /// Reserve space to at least `additional` new bits.
141
    /// Capacity will be `>= self.len() + additional`.
142
    /// New bytes are uninitialized and reading them is undefined behavior.
143
    #[inline]
144
    pub fn reserve(&mut self, additional: usize) {
145
        let capacity = self.len + additional;
146
        if capacity > self.capacity() {
147
            // convert differential to bytes
148
            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
149
            self.buffer.reserve(additional);
150
        }
151
    }
152
153
    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
154
    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
155
    #[inline]
156
0
    pub fn resize(&mut self, len: usize) {
157
0
        match len.checked_sub(self.len) {
158
0
            Some(delta) => self.advance(delta),
159
0
            None => self.truncate(len),
160
        }
161
0
    }
162
163
    /// Appends a boolean `v` into the buffer
164
    #[inline]
165
708
    pub fn append(&mut self, v: bool) {
166
708
        self.advance(1);
167
708
        if v {
168
347
            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
169
361
        }
170
708
    }
171
172
    /// Appends n `additional` bits of value `v` into the buffer
173
    #[inline]
174
148
    pub fn append_n(&mut self, additional: usize, v: bool) {
175
148
        match v {
176
            true => {
177
148
                let new_len = self.len + additional;
178
148
                let new_len_bytes = bit_util::ceil(new_len, 8);
179
148
                let cur_remainder = self.len % 8;
180
148
                let new_remainder = new_len % 8;
181
182
148
                if cur_remainder != 0 {
183
                    // Pad last byte with 1s
184
17
                    *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
185
131
                }
186
148
                self.buffer.resize(new_len_bytes, 0xFF);
187
148
                if new_remainder != 0 {
188
                    // Clear remaining bits
189
99
                    *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
190
49
                }
191
148
                self.len = new_len;
192
            }
193
0
            false => self.advance(additional),
194
        }
195
148
    }
196
197
    /// Appends a slice of booleans into the buffer
198
    #[inline]
199
1
    pub fn append_slice(&mut self, slice: &[bool]) {
200
1
        let additional = slice.len();
201
1
        self.advance(additional);
202
203
1
        let offset = self.len() - additional;
204
5
        for (i, v) in 
slice1
.
iter1
().
enumerate1
() {
205
5
            if *v {
206
3
                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
207
2
            }
208
        }
209
1
    }
210
211
    /// Append `range` bits from `to_set`
212
    ///
213
    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
214
    ///
215
    /// # Panics
216
    ///
217
    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
218
45
    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
219
45
        let offset_write = self.len;
220
45
        let len = range.end - range.start;
221
45
        self.advance(len);
222
45
        bit_mask::set_bits(
223
45
            self.buffer.as_slice_mut(),
224
45
            to_set,
225
45
            offset_write,
226
45
            range.start,
227
45
            len,
228
        );
229
45
    }
230
231
    /// Append [`BooleanBuffer`] to this [`BooleanBufferBuilder`]
232
45
    pub fn append_buffer(&mut self, buffer: &BooleanBuffer) {
233
45
        let range = buffer.offset()..buffer.offset() + buffer.len();
234
45
        self.append_packed_range(range, buffer.values())
235
45
    }
236
237
    /// Returns the packed bits
238
0
    pub fn as_slice(&self) -> &[u8] {
239
0
        self.buffer.as_slice()
240
0
    }
241
242
    /// Returns the packed bits
243
0
    pub fn as_slice_mut(&mut self) -> &mut [u8] {
244
0
        self.buffer.as_slice_mut()
245
0
    }
246
247
    /// Creates a [`BooleanBuffer`]
248
    #[inline]
249
202
    pub fn finish(&mut self) -> BooleanBuffer {
250
202
        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
251
202
        let len = std::mem::replace(&mut self.len, 0);
252
202
        BooleanBuffer::new(buf.into(), 0, len)
253
202
    }
254
255
    /// Builds the [BooleanBuffer] without resetting the builder.
256
0
    pub fn finish_cloned(&self) -> BooleanBuffer {
257
0
        BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
258
0
    }
259
}
260
261
impl From<BooleanBufferBuilder> for Buffer {
262
    #[inline]
263
15
    fn from(builder: BooleanBufferBuilder) -> Self {
264
15
        builder.buffer.into()
265
15
    }
266
}
267
268
impl From<BooleanBufferBuilder> for BooleanBuffer {
269
    #[inline]
270
    fn from(builder: BooleanBufferBuilder) -> Self {
271
        BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
272
    }
273
}
274
275
#[cfg(test)]
276
mod tests {
277
    use super::*;
278
279
    #[test]
280
    fn test_boolean_buffer_builder_write_bytes() {
281
        let mut b = BooleanBufferBuilder::new(4);
282
        b.append(false);
283
        b.append(true);
284
        b.append(false);
285
        b.append(true);
286
        assert_eq!(4, b.len());
287
        assert_eq!(512, b.capacity());
288
        let buffer = b.finish();
289
        assert_eq!(4, buffer.len());
290
291
        // Overallocate capacity
292
        let mut b = BooleanBufferBuilder::new(8);
293
        b.append_slice(&[false, true, false, true]);
294
        assert_eq!(4, b.len());
295
        assert_eq!(512, b.capacity());
296
        let buffer = b.finish();
297
        assert_eq!(4, buffer.len());
298
    }
299
300
    #[test]
301
    fn test_boolean_buffer_builder_unset_first_bit() {
302
        let mut buffer = BooleanBufferBuilder::new(4);
303
        buffer.append(true);
304
        buffer.append(true);
305
        buffer.append(false);
306
        buffer.append(true);
307
        buffer.set_bit(0, false);
308
        assert_eq!(buffer.len(), 4);
309
        assert_eq!(buffer.finish().values(), &[0b1010_u8]);
310
    }
311
312
    #[test]
313
    fn test_boolean_buffer_builder_unset_last_bit() {
314
        let mut buffer = BooleanBufferBuilder::new(4);
315
        buffer.append(true);
316
        buffer.append(true);
317
        buffer.append(false);
318
        buffer.append(true);
319
        buffer.set_bit(3, false);
320
        assert_eq!(buffer.len(), 4);
321
        assert_eq!(buffer.finish().values(), &[0b0011_u8]);
322
    }
323
324
    #[test]
325
    fn test_boolean_buffer_builder_unset_an_inner_bit() {
326
        let mut buffer = BooleanBufferBuilder::new(5);
327
        buffer.append(true);
328
        buffer.append(true);
329
        buffer.append(false);
330
        buffer.append(true);
331
        buffer.set_bit(1, false);
332
        assert_eq!(buffer.len(), 4);
333
        assert_eq!(buffer.finish().values(), &[0b1001_u8]);
334
    }
335
336
    #[test]
337
    fn test_boolean_buffer_builder_unset_several_bits() {
338
        let mut buffer = BooleanBufferBuilder::new(5);
339
        buffer.append(true);
340
        buffer.append(true);
341
        buffer.append(true);
342
        buffer.append(false);
343
        buffer.append(true);
344
        buffer.set_bit(1, false);
345
        buffer.set_bit(2, false);
346
        assert_eq!(buffer.len(), 5);
347
        assert_eq!(buffer.finish().values(), &[0b10001_u8]);
348
    }
349
350
    #[test]
351
    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
352
        let mut buffer = BooleanBufferBuilder::new(16);
353
        buffer.append_n(10, true);
354
        buffer.set_bit(0, false);
355
        buffer.set_bit(3, false);
356
        buffer.set_bit(9, false);
357
        assert_eq!(buffer.len(), 10);
358
        assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
359
    }
360
361
    #[test]
362
    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
363
        let mut buffer = BooleanBufferBuilder::new(16);
364
        buffer.append_n(5, true);
365
        buffer.append_n(5, false);
366
        buffer.append_n(5, true);
367
        buffer.set_bit(0, false);
368
        buffer.set_bit(3, false);
369
        buffer.set_bit(9, false);
370
        buffer.set_bit(6, true);
371
        buffer.set_bit(14, true);
372
        buffer.set_bit(13, false);
373
        assert_eq!(buffer.len(), 15);
374
        assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
375
    }
376
377
    #[test]
378
    fn test_bool_buffer_builder_get_first_bit() {
379
        let mut buffer = BooleanBufferBuilder::new(16);
380
        buffer.append_n(8, true);
381
        buffer.append_n(8, false);
382
        assert!(buffer.get_bit(0));
383
    }
384
385
    #[test]
386
    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
387
        let buffer = {
388
            let mut buffer = BooleanBufferBuilder::new(16);
389
            buffer.append_n(8, true);
390
            buffer
391
        };
392
393
        assert!(buffer.get_bit(0));
394
    }
395
396
    #[test]
397
    fn test_bool_buffer_builder_get_last_bit() {
398
        let mut buffer = BooleanBufferBuilder::new(16);
399
        buffer.append_n(8, true);
400
        buffer.append_n(8, false);
401
        assert!(!buffer.get_bit(15));
402
    }
403
404
    #[test]
405
    fn test_bool_buffer_builder_get_an_inner_bit() {
406
        let mut buffer = BooleanBufferBuilder::new(16);
407
        buffer.append_n(4, false);
408
        buffer.append_n(8, true);
409
        buffer.append_n(4, false);
410
        assert!(buffer.get_bit(11));
411
    }
412
413
    #[test]
414
    fn test_bool_buffer_fuzz() {
415
        use rand::prelude::*;
416
417
        let mut buffer = BooleanBufferBuilder::new(12);
418
        let mut all_bools = vec![];
419
        let mut rng = rand::rng();
420
421
        let src_len = 32;
422
        let (src, compacted_src) = {
423
            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
424
                .take(src_len)
425
                .collect();
426
427
            let mut compacted_src = BooleanBufferBuilder::new(src_len);
428
            compacted_src.append_slice(&src);
429
            (src, compacted_src.finish())
430
        };
431
432
        for _ in 0..100 {
433
            let a = rng.next_u32() as usize % src_len;
434
            let b = rng.next_u32() as usize % src_len;
435
436
            let start = a.min(b);
437
            let end = a.max(b);
438
439
            buffer.append_packed_range(start..end, compacted_src.values());
440
            all_bools.extend_from_slice(&src[start..end]);
441
        }
442
443
        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
444
        compacted.append_slice(&all_bools);
445
446
        assert_eq!(buffer.finish(), compacted.finish())
447
    }
448
449
    #[test]
450
    fn test_boolean_array_builder_resize() {
451
        let mut builder = BooleanBufferBuilder::new(20);
452
        builder.append_n(4, true);
453
        builder.append_n(7, false);
454
        builder.append_n(2, true);
455
        builder.resize(20);
456
457
        assert_eq!(builder.len(), 20);
458
        assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]);
459
460
        builder.resize(5);
461
        assert_eq!(builder.len(), 5);
462
        assert_eq!(builder.as_slice(), &[0b00001111]);
463
464
        builder.append_n(4, true);
465
        assert_eq!(builder.len(), 9);
466
        assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]);
467
    }
468
469
    #[test]
470
    fn test_truncate() {
471
        let b = MutableBuffer::from_iter([true, true, true, true]);
472
        let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
473
        builder.advance(2);
474
        let finished = builder.finish();
475
        assert_eq!(finished.values(), &[0b00000011]);
476
477
        let mut builder = BooleanBufferBuilder::new(10);
478
        builder.append_n(5, true);
479
        builder.resize(3);
480
        builder.advance(2);
481
        let finished = builder.finish();
482
        assert_eq!(finished.values(), &[0b00000111]);
483
484
        let mut builder = BooleanBufferBuilder::new(10);
485
        builder.append_n(16, true);
486
        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
487
        builder.truncate(20);
488
        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
489
        builder.truncate(14);
490
        assert_eq!(builder.as_slice(), &[0xFF, 0b00111111]);
491
        builder.append(false);
492
        builder.append(true);
493
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111]);
494
        builder.append_packed_range(0..3, &[0xFF]);
495
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000111]);
496
        builder.truncate(17);
497
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000001]);
498
        builder.append_packed_range(0..2, &[2]);
499
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b0000101]);
500
        builder.truncate(8);
501
        assert_eq!(builder.as_slice(), &[0xFF]);
502
        builder.resize(14);
503
        assert_eq!(builder.as_slice(), &[0xFF, 0x00]);
504
        builder.truncate(0);
505
        assert_eq!(builder.as_slice(), &[]);
506
    }
507
508
    #[test]
509
    fn test_boolean_builder_increases_buffer_len() {
510
        // 00000010 01001000
511
        let buf = Buffer::from([72_u8, 2_u8]);
512
        let mut builder = BooleanBufferBuilder::new(8);
513
514
        for i in 0..16 {
515
            if i == 3 || i == 6 || i == 9 {
516
                builder.append(true);
517
            } else {
518
                builder.append(false);
519
            }
520
        }
521
        let buf2 = builder.finish();
522
523
        assert_eq!(buf.len(), buf2.inner().len());
524
        assert_eq!(buf.as_slice(), buf2.values());
525
    }
526
}