Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/boolean.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::bit_chunk_iterator::BitChunks;
19
use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
20
use crate::{
21
    bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not,
22
    BooleanBufferBuilder, Buffer, MutableBuffer,
23
};
24
25
use std::ops::{BitAnd, BitOr, BitXor, Not};
26
27
/// A slice-able [`Buffer`] containing bit-packed booleans
28
///
29
/// `BooleanBuffer`s can be creating using [`BooleanBufferBuilder`]
30
///
31
/// # See Also
32
///
33
/// * [`NullBuffer`] for representing null values in Arrow arrays
34
///
35
/// [`NullBuffer`]: crate::NullBuffer
36
#[derive(Debug, Clone, Eq)]
37
pub struct BooleanBuffer {
38
    buffer: Buffer,
39
    offset: usize,
40
    len: usize,
41
}
42
43
impl PartialEq for BooleanBuffer {
44
0
    fn eq(&self, other: &Self) -> bool {
45
0
        if self.len != other.len {
46
0
            return false;
47
0
        }
48
49
0
        let lhs = self.bit_chunks().iter_padded();
50
0
        let rhs = other.bit_chunks().iter_padded();
51
0
        lhs.zip(rhs).all(|(a, b)| a == b)
52
0
    }
53
}
54
55
impl BooleanBuffer {
56
    /// Create a new [`BooleanBuffer`] from a [`Buffer`], an `offset` and `length` in bits
57
    ///
58
    /// # Panics
59
    ///
60
    /// This method will panic if `buffer` is not large enough
61
285
    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
62
285
        let total_len = offset.saturating_add(len);
63
285
        let buffer_len = buffer.len();
64
285
        let bit_len = buffer_len.saturating_mul(8);
65
285
        assert!(
66
285
            total_len <= bit_len,
67
0
            "buffer not large enough (offset: {offset}, len: {len}, buffer_len: {buffer_len})"
68
        );
69
285
        Self {
70
285
            buffer,
71
285
            offset,
72
285
            len,
73
285
        }
74
285
    }
75
76
    /// Create a new [`BooleanBuffer`] of `length` where all values are `true`
77
0
    pub fn new_set(length: usize) -> Self {
78
0
        let mut builder = BooleanBufferBuilder::new(length);
79
0
        builder.append_n(length, true);
80
0
        builder.finish()
81
0
    }
82
83
    /// Create a new [`BooleanBuffer`] of `length` where all values are `false`
84
0
    pub fn new_unset(length: usize) -> Self {
85
0
        let buffer = MutableBuffer::new_null(length).into_buffer();
86
0
        Self {
87
0
            buffer,
88
0
            offset: 0,
89
0
            len: length,
90
0
        }
91
0
    }
92
93
    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
94
0
    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
95
0
        let buffer = MutableBuffer::collect_bool(len, f);
96
0
        Self::new(buffer.into(), 0, len)
97
0
    }
98
99
    /// Returns the number of set bits in this buffer
100
224
    pub fn count_set_bits(&self) -> usize {
101
224
        self.buffer.count_set_bits_offset(self.offset, self.len)
102
224
    }
103
104
    /// Returns a `BitChunks` instance which can be used to iterate over
105
    /// this buffer's bits in `u64` chunks
106
    #[inline]
107
4
    pub fn bit_chunks(&self) -> BitChunks<'_> {
108
4
        BitChunks::new(self.values(), self.offset, self.len)
109
4
    }
110
111
    /// Returns the offset of this [`BooleanBuffer`] in bits
112
    #[inline]
113
666
    pub fn offset(&self) -> usize {
114
666
        self.offset
115
666
    }
116
117
    /// Returns the length of this [`BooleanBuffer`] in bits
118
    #[inline]
119
698
    pub fn len(&self) -> usize {
120
698
        self.len
121
698
    }
122
123
    /// Returns true if this [`BooleanBuffer`] is empty
124
    #[inline]
125
0
    pub fn is_empty(&self) -> bool {
126
0
        self.len == 0
127
0
    }
128
129
    /// Free up unused memory.
130
0
    pub fn shrink_to_fit(&mut self) {
131
        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
132
0
        self.buffer.shrink_to_fit();
133
0
    }
134
135
    /// Returns the boolean value at index `i`.
136
    ///
137
    /// # Panics
138
    ///
139
    /// Panics if `i >= self.len()`
140
    #[inline]
141
386
    pub fn value(&self, idx: usize) -> bool {
142
386
        assert!(idx < self.len);
143
386
        unsafe { self.value_unchecked(idx) }
144
386
    }
145
146
    /// Returns the boolean value at index `i`.
147
    ///
148
    /// # Safety
149
    /// This doesn't check bounds, the caller must ensure that index < self.len()
150
    #[inline]
151
426
    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
152
426
        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
153
426
    }
154
155
    /// Returns the packed values of this [`BooleanBuffer`] not including any offset
156
    #[inline]
157
498
    pub fn values(&self) -> &[u8] {
158
498
        &self.buffer
159
498
    }
160
161
    /// Slices this [`BooleanBuffer`] by the provided `offset` and `length`
162
71
    pub fn slice(&self, offset: usize, len: usize) -> Self {
163
71
        assert!(
164
71
            offset.saturating_add(len) <= self.len,
165
0
            "the length + offset of the sliced BooleanBuffer cannot exceed the existing length"
166
        );
167
71
        Self {
168
71
            buffer: self.buffer.clone(),
169
71
            offset: self.offset + offset,
170
71
            len,
171
71
        }
172
71
    }
173
174
    /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
175
    ///
176
    /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
177
0
    pub fn sliced(&self) -> Buffer {
178
0
        self.buffer.bit_slice(self.offset, self.len)
179
0
    }
180
181
    /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons
182
    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
183
    /// return false when the arrays are logically equal
184
0
    pub fn ptr_eq(&self, other: &Self) -> bool {
185
0
        self.buffer.as_ptr() == other.buffer.as_ptr()
186
0
            && self.offset == other.offset
187
0
            && self.len == other.len
188
0
    }
189
190
    /// Returns the inner [`Buffer`]
191
    #[inline]
192
68
    pub fn inner(&self) -> &Buffer {
193
68
        &self.buffer
194
68
    }
195
196
    /// Returns the inner [`Buffer`], consuming self
197
81
    pub fn into_inner(self) -> Buffer {
198
81
        self.buffer
199
81
    }
200
201
    /// Returns an iterator over the bits in this [`BooleanBuffer`]
202
0
    pub fn iter(&self) -> BitIterator<'_> {
203
0
        self.into_iter()
204
0
    }
205
206
    /// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
207
7
    pub fn set_indices(&self) -> BitIndexIterator<'_> {
208
7
        BitIndexIterator::new(self.values(), self.offset, self.len)
209
7
    }
210
211
    /// Returns a `u32` iterator over set bit positions without any usize->u32 conversion
212
0
    pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> {
213
0
        BitIndexU32Iterator::new(self.values(), self.offset, self.len)
214
0
    }
215
216
    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
217
0
    pub fn set_slices(&self) -> BitSliceIterator<'_> {
218
0
        BitSliceIterator::new(self.values(), self.offset, self.len)
219
0
    }
220
}
221
222
impl Not for &BooleanBuffer {
223
    type Output = BooleanBuffer;
224
225
0
    fn not(self) -> Self::Output {
226
0
        BooleanBuffer {
227
0
            buffer: buffer_unary_not(&self.buffer, self.offset, self.len),
228
0
            offset: 0,
229
0
            len: self.len,
230
0
        }
231
0
    }
232
}
233
234
impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
235
    type Output = BooleanBuffer;
236
237
0
    fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
238
0
        assert_eq!(self.len, rhs.len);
239
0
        BooleanBuffer {
240
0
            buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
241
0
            offset: 0,
242
0
            len: self.len,
243
0
        }
244
0
    }
245
}
246
247
impl BitOr<&BooleanBuffer> for &BooleanBuffer {
248
    type Output = BooleanBuffer;
249
250
0
    fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
251
0
        assert_eq!(self.len, rhs.len);
252
0
        BooleanBuffer {
253
0
            buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
254
0
            offset: 0,
255
0
            len: self.len,
256
0
        }
257
0
    }
258
}
259
260
impl BitXor<&BooleanBuffer> for &BooleanBuffer {
261
    type Output = BooleanBuffer;
262
263
0
    fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
264
0
        assert_eq!(self.len, rhs.len);
265
0
        BooleanBuffer {
266
0
            buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
267
0
            offset: 0,
268
0
            len: self.len,
269
0
        }
270
0
    }
271
}
272
273
impl<'a> IntoIterator for &'a BooleanBuffer {
274
    type Item = bool;
275
    type IntoIter = BitIterator<'a>;
276
277
0
    fn into_iter(self) -> Self::IntoIter {
278
0
        BitIterator::new(self.values(), self.offset, self.len)
279
0
    }
280
}
281
282
impl From<&[bool]> for BooleanBuffer {
283
1
    fn from(value: &[bool]) -> Self {
284
1
        let mut builder = BooleanBufferBuilder::new(value.len());
285
1
        builder.append_slice(value);
286
1
        builder.finish()
287
1
    }
288
}
289
290
impl From<Vec<bool>> for BooleanBuffer {
291
1
    fn from(value: Vec<bool>) -> Self {
292
1
        value.as_slice().into()
293
1
    }
294
}
295
296
impl FromIterator<bool> for BooleanBuffer {
297
0
    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
298
0
        let iter = iter.into_iter();
299
0
        let (hint, _) = iter.size_hint();
300
0
        let mut builder = BooleanBufferBuilder::new(hint);
301
0
        iter.for_each(|b| builder.append(b));
302
0
        builder.finish()
303
0
    }
304
}
305
306
#[cfg(test)]
307
mod tests {
308
    use super::*;
309
310
    #[test]
311
    fn test_boolean_new() {
312
        let bytes = &[0, 1, 2, 3, 4];
313
        let buf = Buffer::from(bytes);
314
        let offset = 0;
315
        let len = 24;
316
317
        let boolean_buf = BooleanBuffer::new(buf.clone(), offset, len);
318
        assert_eq!(bytes, boolean_buf.values());
319
        assert_eq!(offset, boolean_buf.offset());
320
        assert_eq!(len, boolean_buf.len());
321
322
        assert_eq!(2, boolean_buf.count_set_bits());
323
        assert_eq!(&buf, boolean_buf.inner());
324
        assert_eq!(buf, boolean_buf.clone().into_inner());
325
326
        assert!(!boolean_buf.is_empty())
327
    }
328
329
    #[test]
330
    fn test_boolean_data_equality() {
331
        let boolean_buf1 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
332
        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
333
        assert_eq!(boolean_buf1, boolean_buf2);
334
335
        // slice with same offset and same length should still preserve equality
336
        let boolean_buf3 = boolean_buf1.slice(8, 16);
337
        assert_ne!(boolean_buf1, boolean_buf3);
338
        let boolean_buf4 = boolean_buf1.slice(0, 32);
339
        assert_eq!(boolean_buf1, boolean_buf4);
340
341
        // unequal because of different elements
342
        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 0, 2, 3, 4]), 0, 32);
343
        assert_ne!(boolean_buf1, boolean_buf2);
344
345
        // unequal because of different length
346
        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 24);
347
        assert_ne!(boolean_buf1, boolean_buf2);
348
349
        // ptr_eq
350
        assert!(boolean_buf1.ptr_eq(&boolean_buf1));
351
        assert!(boolean_buf2.ptr_eq(&boolean_buf2));
352
        assert!(!boolean_buf1.ptr_eq(&boolean_buf2));
353
    }
354
355
    #[test]
356
    fn test_boolean_slice() {
357
        let bytes = &[0, 3, 2, 6, 2];
358
        let boolean_buf1 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
359
        let boolean_buf2 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
360
361
        let boolean_slice1 = boolean_buf1.slice(16, 16);
362
        let boolean_slice2 = boolean_buf2.slice(0, 16);
363
        assert_eq!(boolean_slice1.values(), boolean_slice2.values());
364
365
        assert_eq!(bytes, boolean_slice1.values());
366
        assert_eq!(16, boolean_slice1.offset);
367
        assert_eq!(16, boolean_slice1.len);
368
369
        assert_eq!(bytes, boolean_slice2.values());
370
        assert_eq!(0, boolean_slice2.offset);
371
        assert_eq!(16, boolean_slice2.len);
372
    }
373
374
    #[test]
375
    fn test_boolean_bitand() {
376
        let offset = 0;
377
        let len = 40;
378
379
        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
380
        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
381
382
        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
383
        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
384
385
        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 0, 0]), offset, len);
386
        assert_eq!(boolean_buf1 & boolean_buf2, expected);
387
    }
388
389
    #[test]
390
    fn test_boolean_bitor() {
391
        let offset = 0;
392
        let len = 40;
393
394
        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
395
        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
396
397
        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
398
        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
399
400
        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 1, 0]), offset, len);
401
        assert_eq!(boolean_buf1 | boolean_buf2, expected);
402
    }
403
404
    #[test]
405
    fn test_boolean_bitxor() {
406
        let offset = 0;
407
        let len = 40;
408
409
        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
410
        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
411
412
        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
413
        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
414
415
        let expected = BooleanBuffer::new(Buffer::from(&[0, 0, 0, 1, 0]), offset, len);
416
        assert_eq!(boolean_buf1 ^ boolean_buf2, expected);
417
    }
418
419
    #[test]
420
    fn test_boolean_not() {
421
        let offset = 0;
422
        let len = 40;
423
424
        let buf = Buffer::from(&[0, 1, 1, 0, 0]);
425
        let boolean_buf = &BooleanBuffer::new(buf, offset, len);
426
427
        let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
428
        assert_eq!(!boolean_buf, expected);
429
    }
430
431
    #[test]
432
    fn test_boolean_from_slice_bool() {
433
        let v = [true, false, false];
434
        let buf = BooleanBuffer::from(&v[..]);
435
        assert_eq!(buf.offset(), 0);
436
        assert_eq!(buf.len(), 3);
437
        assert_eq!(buf.values().len(), 1);
438
        assert!(buf.value(0));
439
    }
440
}