Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/builder/boolean.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::bit_util::apply_bitwise_binary_op;
19
use crate::{BooleanBuffer, Buffer, MutableBuffer, bit_util};
20
use std::ops::Range;
21
22
/// Builder for [`BooleanBuffer`]
23
///
24
/// # See Also
25
///
26
/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls
27
///
28
/// [`NullBuffer`]: crate::NullBuffer
29
#[derive(Debug)]
30
pub struct BooleanBufferBuilder {
31
    buffer: MutableBuffer,
32
    len: usize,
33
}
34
35
impl BooleanBufferBuilder {
36
    /// Creates a new `BooleanBufferBuilder` with sufficient space for
37
    /// `capacity` bits (not bytes).
38
    ///
39
    /// The capacity is rounded up to the nearest multiple of 8 for the
40
    /// allocation.
41
    #[inline]
42
83.7k
    pub fn new(capacity: usize) -> Self {
43
83.7k
        let byte_capacity = bit_util::ceil(capacity, 8);
44
83.7k
        let buffer = MutableBuffer::new(byte_capacity);
45
83.7k
        Self { buffer, len: 0 }
46
83.7k
    }
47
48
    /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
49
6
    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
50
6
        assert!(len <= buffer.len() * 8);
51
6
        let mut s = Self {
52
6
            len: buffer.len() * 8,
53
6
            buffer,
54
6
        };
55
6
        s.truncate(len);
56
6
        s
57
6
    }
58
59
    /// Returns the length of the buffer
60
    #[inline]
61
2.39k
    pub fn len(&self) -> usize {
62
2.39k
        self.len
63
2.39k
    }
64
65
    /// Sets a bit in the buffer at `index`
66
    #[inline]
67
80.1k
    pub fn set_bit(&mut self, index: usize, v: bool) {
68
80.1k
        if v {
69
80.1k
            bit_util::set_bit(self.buffer.as_mut(), index);
70
80.1k
        } else {
71
3
            bit_util::unset_bit(self.buffer.as_mut(), index);
72
3
        }
73
80.1k
    }
74
75
    /// Gets a bit in the buffer at `index`
76
    #[inline]
77
    pub fn get_bit(&self, index: usize) -> bool {
78
        bit_util::get_bit(self.buffer.as_slice(), index)
79
    }
80
81
    /// Returns true if empty
82
    #[inline]
83
    pub fn is_empty(&self) -> bool {
84
        self.len == 0
85
    }
86
87
    /// Returns the capacity of the buffer, in bits (not bytes)
88
    ///
89
    /// Note this
90
    ///
91
    /// # Example
92
    /// ```
93
    /// # use arrow_buffer::builder::BooleanBufferBuilder;
94
    /// // empty requires 0 bytes
95
    /// let b = BooleanBufferBuilder::new(0);
96
    /// assert_eq!(0, b.capacity());
97
    /// // Creating space for 1 bit results in 64 bytes (space for 512 bits)
98
    /// // (64 is the minimum allocation size for 64 bit architectures)
99
    /// let mut b = BooleanBufferBuilder::new(1);
100
    /// assert_eq!(512, b.capacity());
101
    /// // 1000 bits requires 128 bytes (space for 1024 bits)
102
    /// b.append_n(1000, true);
103
    /// assert_eq!(1024, b.capacity());
104
    /// ```
105
    #[inline]
106
0
    pub fn capacity(&self) -> usize {
107
0
        self.buffer.capacity() * 8
108
0
    }
109
110
    /// Advances the buffer by `additional` bits
111
    #[inline]
112
1.22M
    pub fn advance(&mut self, additional: usize) {
113
1.22M
        let new_len = self.len + additional;
114
1.22M
        let new_len_bytes = bit_util::ceil(new_len, 8);
115
1.22M
        if new_len_bytes > self.buffer.len() {
116
228k
            self.buffer.resize(new_len_bytes, 0);
117
992k
        }
118
1.22M
        self.len = new_len;
119
1.22M
    }
120
121
    /// Truncates the builder to the given length
122
    ///
123
    /// If `len` is greater than the buffer's current length, this has no effect
124
    #[inline]
125
6
    pub fn truncate(&mut self, len: usize) {
126
6
        if len > self.len {
127
0
            return;
128
6
        }
129
130
6
        let new_len_bytes = bit_util::ceil(len, 8);
131
6
        self.buffer.truncate(new_len_bytes);
132
6
        self.len = len;
133
134
6
        let remainder = self.len % 8;
135
6
        if remainder != 0 {
136
6
            let mask = (1_u8 << remainder).wrapping_sub(1);
137
6
            *self.buffer.as_mut().last_mut().unwrap() &= mask;
138
6
        
}0
139
6
    }
140
141
    /// Reserve space to at least `additional` new bits.
142
    /// Capacity will be `>= self.len() + additional`.
143
    /// New bytes are uninitialized and reading them is undefined behavior.
144
    #[inline]
145
    pub fn reserve(&mut self, additional: usize) {
146
        let capacity = self.len + additional;
147
        if capacity > self.capacity() {
148
            // convert differential to bytes
149
            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
150
            self.buffer.reserve(additional);
151
        }
152
    }
153
154
    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
155
    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
156
    #[inline]
157
5
    pub fn resize(&mut self, len: usize) {
158
5
        match len.checked_sub(self.len) {
159
5
            Some(delta) => self.advance(delta),
160
0
            None => self.truncate(len),
161
        }
162
5
    }
163
164
    /// Appends a boolean `v` into the buffer
165
    #[inline]
166
1.13M
    pub fn append(&mut self, v: bool) {
167
1.13M
        self.advance(1);
168
1.13M
        if v {
169
874k
            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
170
874k
        
}257k
171
1.13M
    }
172
173
    /// Appends n `additional` bits of value `v` into the buffer
174
    #[inline]
175
1.51k
    pub fn append_n(&mut self, additional: usize, v: bool) {
176
1.51k
        match v {
177
            true => {
178
1.50k
                let new_len = self.len + additional;
179
1.50k
                let new_len_bytes = bit_util::ceil(new_len, 8);
180
1.50k
                let cur_remainder = self.len % 8;
181
1.50k
                let new_remainder = new_len % 8;
182
183
1.50k
                if cur_remainder != 0 {
184
                    // Pad last byte with 1s
185
19
                    *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
186
1.48k
                }
187
1.50k
                self.buffer.resize(new_len_bytes, 0xFF);
188
1.50k
                if new_remainder != 0 {
189
                    // Clear remaining bits
190
332
                    *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
191
1.17k
                }
192
1.50k
                self.len = new_len;
193
            }
194
9
            false => self.advance(additional),
195
        }
196
1.51k
    }
197
198
    /// Appends a slice of booleans into the buffer
199
    #[inline]
200
18
    pub fn append_slice(&mut self, slice: &[bool]) {
201
18
        let additional = slice.len();
202
18
        self.advance(additional);
203
204
18
        let offset = self.len() - additional;
205
89
        for (i, v) in 
slice18
.
iter18
().
enumerate18
() {
206
89
            if *v {
207
58
                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
208
31
            }
209
        }
210
18
    }
211
212
    /// Append `range` bits from `to_set`
213
    ///
214
    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
215
    ///
216
    /// # Panics
217
    ///
218
    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
219
9.01k
    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
220
9.01k
        let offset_write = self.len;
221
9.01k
        let len = range.end - range.start;
222
        // allocate new bits as 0
223
9.01k
        self.advance(len);
224
        // copy bits from to_set into self.buffer a word at a time
225
9.01k
        apply_bitwise_binary_op(
226
9.01k
            self.buffer.as_slice_mut(),
227
9.01k
            offset_write,
228
9.01k
            to_set,
229
9.01k
            range.start,
230
9.01k
            len,
231
            |_a, b| b, // copy bits from to_set
232
        );
233
9.01k
    }
234
235
    /// Append [`BooleanBuffer`] to this [`BooleanBufferBuilder`]
236
2.38k
    pub fn append_buffer(&mut self, buffer: &BooleanBuffer) {
237
2.38k
        let range = buffer.offset()..buffer.offset() + buffer.len();
238
2.38k
        self.append_packed_range(range, buffer.values())
239
2.38k
    }
240
241
    /// Returns the packed bits
242
0
    pub fn as_slice(&self) -> &[u8] {
243
0
        self.buffer.as_slice()
244
0
    }
245
246
    /// Returns the packed bits
247
5
    pub fn as_slice_mut(&mut self) -> &mut [u8] {
248
5
        self.buffer.as_slice_mut()
249
5
    }
250
251
    /// Creates a [`BooleanBuffer`]
252
    #[inline]
253
82.4k
    pub fn finish(&mut self) -> BooleanBuffer {
254
82.4k
        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
255
82.4k
        let len = std::mem::replace(&mut self.len, 0);
256
82.4k
        BooleanBuffer::new(buf.into(), 0, len)
257
82.4k
    }
258
259
    /// Builds the [BooleanBuffer] without resetting the builder.
260
0
    pub fn finish_cloned(&self) -> BooleanBuffer {
261
0
        BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
262
0
    }
263
}
264
265
impl From<BooleanBufferBuilder> for Buffer {
266
    #[inline]
267
1.30k
    fn from(builder: BooleanBufferBuilder) -> Self {
268
1.30k
        builder.buffer.into()
269
1.30k
    }
270
}
271
272
impl From<BooleanBufferBuilder> for BooleanBuffer {
273
    #[inline]
274
    fn from(builder: BooleanBufferBuilder) -> Self {
275
        BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
276
    }
277
}
278
279
#[cfg(test)]
280
mod tests {
281
    use super::*;
282
283
    #[test]
284
    fn test_boolean_buffer_builder_write_bytes() {
285
        let mut b = BooleanBufferBuilder::new(4);
286
        b.append(false);
287
        b.append(true);
288
        b.append(false);
289
        b.append(true);
290
        assert_eq!(4, b.len());
291
        assert_eq!(512, b.capacity());
292
        let buffer = b.finish();
293
        assert_eq!(4, buffer.len());
294
295
        // Overallocate capacity
296
        let mut b = BooleanBufferBuilder::new(8);
297
        b.append_slice(&[false, true, false, true]);
298
        assert_eq!(4, b.len());
299
        assert_eq!(512, b.capacity());
300
        let buffer = b.finish();
301
        assert_eq!(4, buffer.len());
302
    }
303
304
    #[test]
305
    fn test_boolean_buffer_builder_unset_first_bit() {
306
        let mut buffer = BooleanBufferBuilder::new(4);
307
        buffer.append(true);
308
        buffer.append(true);
309
        buffer.append(false);
310
        buffer.append(true);
311
        buffer.set_bit(0, false);
312
        assert_eq!(buffer.len(), 4);
313
        assert_eq!(buffer.finish().values(), &[0b1010_u8]);
314
    }
315
316
    #[test]
317
    fn test_boolean_buffer_builder_unset_last_bit() {
318
        let mut buffer = BooleanBufferBuilder::new(4);
319
        buffer.append(true);
320
        buffer.append(true);
321
        buffer.append(false);
322
        buffer.append(true);
323
        buffer.set_bit(3, false);
324
        assert_eq!(buffer.len(), 4);
325
        assert_eq!(buffer.finish().values(), &[0b0011_u8]);
326
    }
327
328
    #[test]
329
    fn test_boolean_buffer_builder_unset_an_inner_bit() {
330
        let mut buffer = BooleanBufferBuilder::new(5);
331
        buffer.append(true);
332
        buffer.append(true);
333
        buffer.append(false);
334
        buffer.append(true);
335
        buffer.set_bit(1, false);
336
        assert_eq!(buffer.len(), 4);
337
        assert_eq!(buffer.finish().values(), &[0b1001_u8]);
338
    }
339
340
    #[test]
341
    fn test_boolean_buffer_builder_unset_several_bits() {
342
        let mut buffer = BooleanBufferBuilder::new(5);
343
        buffer.append(true);
344
        buffer.append(true);
345
        buffer.append(true);
346
        buffer.append(false);
347
        buffer.append(true);
348
        buffer.set_bit(1, false);
349
        buffer.set_bit(2, false);
350
        assert_eq!(buffer.len(), 5);
351
        assert_eq!(buffer.finish().values(), &[0b10001_u8]);
352
    }
353
354
    #[test]
355
    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
356
        let mut buffer = BooleanBufferBuilder::new(16);
357
        buffer.append_n(10, true);
358
        buffer.set_bit(0, false);
359
        buffer.set_bit(3, false);
360
        buffer.set_bit(9, false);
361
        assert_eq!(buffer.len(), 10);
362
        assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
363
    }
364
365
    #[test]
366
    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
367
        let mut buffer = BooleanBufferBuilder::new(16);
368
        buffer.append_n(5, true);
369
        buffer.append_n(5, false);
370
        buffer.append_n(5, true);
371
        buffer.set_bit(0, false);
372
        buffer.set_bit(3, false);
373
        buffer.set_bit(9, false);
374
        buffer.set_bit(6, true);
375
        buffer.set_bit(14, true);
376
        buffer.set_bit(13, false);
377
        assert_eq!(buffer.len(), 15);
378
        assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
379
    }
380
381
    #[test]
382
    fn test_bool_buffer_builder_get_first_bit() {
383
        let mut buffer = BooleanBufferBuilder::new(16);
384
        buffer.append_n(8, true);
385
        buffer.append_n(8, false);
386
        assert!(buffer.get_bit(0));
387
    }
388
389
    #[test]
390
    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
391
        let buffer = {
392
            let mut buffer = BooleanBufferBuilder::new(16);
393
            buffer.append_n(8, true);
394
            buffer
395
        };
396
397
        assert!(buffer.get_bit(0));
398
    }
399
400
    #[test]
401
    fn test_bool_buffer_builder_get_last_bit() {
402
        let mut buffer = BooleanBufferBuilder::new(16);
403
        buffer.append_n(8, true);
404
        buffer.append_n(8, false);
405
        assert!(!buffer.get_bit(15));
406
    }
407
408
    #[test]
409
    fn test_bool_buffer_builder_get_an_inner_bit() {
410
        let mut buffer = BooleanBufferBuilder::new(16);
411
        buffer.append_n(4, false);
412
        buffer.append_n(8, true);
413
        buffer.append_n(4, false);
414
        assert!(buffer.get_bit(11));
415
    }
416
417
    #[test]
418
    fn test_bool_buffer_fuzz() {
419
        use rand::prelude::*;
420
421
        let mut buffer = BooleanBufferBuilder::new(12);
422
        let mut all_bools = vec![];
423
        let mut rng = rand::rng();
424
425
        let src_len = 32;
426
        let (src, compacted_src) = {
427
            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
428
                .take(src_len)
429
                .collect();
430
431
            let mut compacted_src = BooleanBufferBuilder::new(src_len);
432
            compacted_src.append_slice(&src);
433
            (src, compacted_src.finish())
434
        };
435
436
        for _ in 0..100 {
437
            let a = rng.next_u32() as usize % src_len;
438
            let b = rng.next_u32() as usize % src_len;
439
440
            let start = a.min(b);
441
            let end = a.max(b);
442
443
            buffer.append_packed_range(start..end, compacted_src.values());
444
            all_bools.extend_from_slice(&src[start..end]);
445
        }
446
447
        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
448
        compacted.append_slice(&all_bools);
449
450
        assert_eq!(buffer.finish(), compacted.finish())
451
    }
452
453
    #[test]
454
    fn test_boolean_array_builder_resize() {
455
        let mut builder = BooleanBufferBuilder::new(20);
456
        builder.append_n(4, true);
457
        builder.append_n(7, false);
458
        builder.append_n(2, true);
459
        builder.resize(20);
460
461
        assert_eq!(builder.len(), 20);
462
        assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]);
463
464
        builder.resize(5);
465
        assert_eq!(builder.len(), 5);
466
        assert_eq!(builder.as_slice(), &[0b00001111]);
467
468
        builder.append_n(4, true);
469
        assert_eq!(builder.len(), 9);
470
        assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]);
471
    }
472
473
    #[test]
474
    fn test_truncate() {
475
        let b = MutableBuffer::from_iter([true, true, true, true]);
476
        let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
477
        builder.advance(2);
478
        let finished = builder.finish();
479
        assert_eq!(finished.values(), &[0b00000011]);
480
481
        let mut builder = BooleanBufferBuilder::new(10);
482
        builder.append_n(5, true);
483
        builder.resize(3);
484
        builder.advance(2);
485
        let finished = builder.finish();
486
        assert_eq!(finished.values(), &[0b00000111]);
487
488
        let mut builder = BooleanBufferBuilder::new(10);
489
        builder.append_n(16, true);
490
        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
491
        builder.truncate(20);
492
        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
493
        builder.truncate(14);
494
        assert_eq!(builder.as_slice(), &[0xFF, 0b00111111]);
495
        builder.append(false);
496
        builder.append(true);
497
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111]);
498
        builder.append_packed_range(0..3, &[0xFF]);
499
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000111]);
500
        builder.truncate(17);
501
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000001]);
502
        builder.append_packed_range(0..2, &[2]);
503
        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b0000101]);
504
        builder.truncate(8);
505
        assert_eq!(builder.as_slice(), &[0xFF]);
506
        builder.resize(14);
507
        assert_eq!(builder.as_slice(), &[0xFF, 0x00]);
508
        builder.truncate(0);
509
        assert_eq!(builder.as_slice(), &[]);
510
    }
511
512
    #[test]
513
    fn test_boolean_builder_increases_buffer_len() {
514
        // 00000010 01001000
515
        let buf = Buffer::from([72_u8, 2_u8]);
516
        let mut builder = BooleanBufferBuilder::new(8);
517
518
        for i in 0..16 {
519
            if i == 3 || i == 6 || i == 9 {
520
                builder.append(true);
521
            } else {
522
                builder.append(false);
523
            }
524
        }
525
        let buf2 = builder.finish();
526
527
        assert_eq!(buf.len(), buf2.inner().len());
528
        assert_eq!(buf.as_slice(), buf2.values());
529
    }
530
}