/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/boolean.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::bit_chunk_iterator::BitChunks; |
19 | | use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator}; |
20 | | use crate::{ |
21 | | bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not, |
22 | | BooleanBufferBuilder, Buffer, MutableBuffer, |
23 | | }; |
24 | | |
25 | | use std::ops::{BitAnd, BitOr, BitXor, Not}; |
26 | | |
27 | | /// A slice-able [`Buffer`] containing bit-packed booleans |
28 | | /// |
29 | | /// `BooleanBuffer`s can be creating using [`BooleanBufferBuilder`] |
30 | | /// |
31 | | /// # See Also |
32 | | /// |
33 | | /// * [`NullBuffer`] for representing null values in Arrow arrays |
34 | | /// |
35 | | /// [`NullBuffer`]: crate::NullBuffer |
36 | | #[derive(Debug, Clone, Eq)] |
37 | | pub struct BooleanBuffer { |
38 | | buffer: Buffer, |
39 | | offset: usize, |
40 | | len: usize, |
41 | | } |
42 | | |
43 | | impl PartialEq for BooleanBuffer { |
44 | 0 | fn eq(&self, other: &Self) -> bool { |
45 | 0 | if self.len != other.len { |
46 | 0 | return false; |
47 | 0 | } |
48 | | |
49 | 0 | let lhs = self.bit_chunks().iter_padded(); |
50 | 0 | let rhs = other.bit_chunks().iter_padded(); |
51 | 0 | lhs.zip(rhs).all(|(a, b)| a == b) |
52 | 0 | } |
53 | | } |
54 | | |
55 | | impl BooleanBuffer { |
56 | | /// Create a new [`BooleanBuffer`] from a [`Buffer`], an `offset` and `length` in bits |
57 | | /// |
58 | | /// # Panics |
59 | | /// |
60 | | /// This method will panic if `buffer` is not large enough |
61 | 285 | pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self { |
62 | 285 | let total_len = offset.saturating_add(len); |
63 | 285 | let buffer_len = buffer.len(); |
64 | 285 | let bit_len = buffer_len.saturating_mul(8); |
65 | 285 | assert!( |
66 | 285 | total_len <= bit_len, |
67 | 0 | "buffer not large enough (offset: {offset}, len: {len}, buffer_len: {buffer_len})" |
68 | | ); |
69 | 285 | Self { |
70 | 285 | buffer, |
71 | 285 | offset, |
72 | 285 | len, |
73 | 285 | } |
74 | 285 | } |
75 | | |
76 | | /// Create a new [`BooleanBuffer`] of `length` where all values are `true` |
77 | 0 | pub fn new_set(length: usize) -> Self { |
78 | 0 | let mut builder = BooleanBufferBuilder::new(length); |
79 | 0 | builder.append_n(length, true); |
80 | 0 | builder.finish() |
81 | 0 | } |
82 | | |
83 | | /// Create a new [`BooleanBuffer`] of `length` where all values are `false` |
84 | 0 | pub fn new_unset(length: usize) -> Self { |
85 | 0 | let buffer = MutableBuffer::new_null(length).into_buffer(); |
86 | 0 | Self { |
87 | 0 | buffer, |
88 | 0 | offset: 0, |
89 | 0 | len: length, |
90 | 0 | } |
91 | 0 | } |
92 | | |
93 | | /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer` |
94 | 0 | pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self { |
95 | 0 | let buffer = MutableBuffer::collect_bool(len, f); |
96 | 0 | Self::new(buffer.into(), 0, len) |
97 | 0 | } |
98 | | |
99 | | /// Returns the number of set bits in this buffer |
100 | 224 | pub fn count_set_bits(&self) -> usize { |
101 | 224 | self.buffer.count_set_bits_offset(self.offset, self.len) |
102 | 224 | } |
103 | | |
104 | | /// Returns a `BitChunks` instance which can be used to iterate over |
105 | | /// this buffer's bits in `u64` chunks |
106 | | #[inline] |
107 | 4 | pub fn bit_chunks(&self) -> BitChunks<'_> { |
108 | 4 | BitChunks::new(self.values(), self.offset, self.len) |
109 | 4 | } |
110 | | |
111 | | /// Returns the offset of this [`BooleanBuffer`] in bits |
112 | | #[inline] |
113 | 666 | pub fn offset(&self) -> usize { |
114 | 666 | self.offset |
115 | 666 | } |
116 | | |
117 | | /// Returns the length of this [`BooleanBuffer`] in bits |
118 | | #[inline] |
119 | 698 | pub fn len(&self) -> usize { |
120 | 698 | self.len |
121 | 698 | } |
122 | | |
123 | | /// Returns true if this [`BooleanBuffer`] is empty |
124 | | #[inline] |
125 | 0 | pub fn is_empty(&self) -> bool { |
126 | 0 | self.len == 0 |
127 | 0 | } |
128 | | |
129 | | /// Free up unused memory. |
130 | 0 | pub fn shrink_to_fit(&mut self) { |
131 | | // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer |
132 | 0 | self.buffer.shrink_to_fit(); |
133 | 0 | } |
134 | | |
135 | | /// Returns the boolean value at index `i`. |
136 | | /// |
137 | | /// # Panics |
138 | | /// |
139 | | /// Panics if `i >= self.len()` |
140 | | #[inline] |
141 | 386 | pub fn value(&self, idx: usize) -> bool { |
142 | 386 | assert!(idx < self.len); |
143 | 386 | unsafe { self.value_unchecked(idx) } |
144 | 386 | } |
145 | | |
146 | | /// Returns the boolean value at index `i`. |
147 | | /// |
148 | | /// # Safety |
149 | | /// This doesn't check bounds, the caller must ensure that index < self.len() |
150 | | #[inline] |
151 | 426 | pub unsafe fn value_unchecked(&self, i: usize) -> bool { |
152 | 426 | unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) } |
153 | 426 | } |
154 | | |
155 | | /// Returns the packed values of this [`BooleanBuffer`] not including any offset |
156 | | #[inline] |
157 | 498 | pub fn values(&self) -> &[u8] { |
158 | 498 | &self.buffer |
159 | 498 | } |
160 | | |
161 | | /// Slices this [`BooleanBuffer`] by the provided `offset` and `length` |
162 | 71 | pub fn slice(&self, offset: usize, len: usize) -> Self { |
163 | 71 | assert!( |
164 | 71 | offset.saturating_add(len) <= self.len, |
165 | 0 | "the length + offset of the sliced BooleanBuffer cannot exceed the existing length" |
166 | | ); |
167 | 71 | Self { |
168 | 71 | buffer: self.buffer.clone(), |
169 | 71 | offset: self.offset + offset, |
170 | 71 | len, |
171 | 71 | } |
172 | 71 | } |
173 | | |
174 | | /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`] |
175 | | /// |
176 | | /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)` |
177 | 0 | pub fn sliced(&self) -> Buffer { |
178 | 0 | self.buffer.bit_slice(self.offset, self.len) |
179 | 0 | } |
180 | | |
181 | | /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons |
182 | | /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may |
183 | | /// return false when the arrays are logically equal |
184 | 0 | pub fn ptr_eq(&self, other: &Self) -> bool { |
185 | 0 | self.buffer.as_ptr() == other.buffer.as_ptr() |
186 | 0 | && self.offset == other.offset |
187 | 0 | && self.len == other.len |
188 | 0 | } |
189 | | |
190 | | /// Returns the inner [`Buffer`] |
191 | | #[inline] |
192 | 68 | pub fn inner(&self) -> &Buffer { |
193 | 68 | &self.buffer |
194 | 68 | } |
195 | | |
196 | | /// Returns the inner [`Buffer`], consuming self |
197 | 81 | pub fn into_inner(self) -> Buffer { |
198 | 81 | self.buffer |
199 | 81 | } |
200 | | |
201 | | /// Returns an iterator over the bits in this [`BooleanBuffer`] |
202 | 0 | pub fn iter(&self) -> BitIterator<'_> { |
203 | 0 | self.into_iter() |
204 | 0 | } |
205 | | |
206 | | /// Returns an iterator over the set bit positions in this [`BooleanBuffer`] |
207 | 7 | pub fn set_indices(&self) -> BitIndexIterator<'_> { |
208 | 7 | BitIndexIterator::new(self.values(), self.offset, self.len) |
209 | 7 | } |
210 | | |
211 | | /// Returns a `u32` iterator over set bit positions without any usize->u32 conversion |
212 | 0 | pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> { |
213 | 0 | BitIndexU32Iterator::new(self.values(), self.offset, self.len) |
214 | 0 | } |
215 | | |
216 | | /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits |
217 | 0 | pub fn set_slices(&self) -> BitSliceIterator<'_> { |
218 | 0 | BitSliceIterator::new(self.values(), self.offset, self.len) |
219 | 0 | } |
220 | | } |
221 | | |
222 | | impl Not for &BooleanBuffer { |
223 | | type Output = BooleanBuffer; |
224 | | |
225 | 0 | fn not(self) -> Self::Output { |
226 | 0 | BooleanBuffer { |
227 | 0 | buffer: buffer_unary_not(&self.buffer, self.offset, self.len), |
228 | 0 | offset: 0, |
229 | 0 | len: self.len, |
230 | 0 | } |
231 | 0 | } |
232 | | } |
233 | | |
234 | | impl BitAnd<&BooleanBuffer> for &BooleanBuffer { |
235 | | type Output = BooleanBuffer; |
236 | | |
237 | 0 | fn bitand(self, rhs: &BooleanBuffer) -> Self::Output { |
238 | 0 | assert_eq!(self.len, rhs.len); |
239 | 0 | BooleanBuffer { |
240 | 0 | buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len), |
241 | 0 | offset: 0, |
242 | 0 | len: self.len, |
243 | 0 | } |
244 | 0 | } |
245 | | } |
246 | | |
247 | | impl BitOr<&BooleanBuffer> for &BooleanBuffer { |
248 | | type Output = BooleanBuffer; |
249 | | |
250 | 0 | fn bitor(self, rhs: &BooleanBuffer) -> Self::Output { |
251 | 0 | assert_eq!(self.len, rhs.len); |
252 | 0 | BooleanBuffer { |
253 | 0 | buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len), |
254 | 0 | offset: 0, |
255 | 0 | len: self.len, |
256 | 0 | } |
257 | 0 | } |
258 | | } |
259 | | |
260 | | impl BitXor<&BooleanBuffer> for &BooleanBuffer { |
261 | | type Output = BooleanBuffer; |
262 | | |
263 | 0 | fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output { |
264 | 0 | assert_eq!(self.len, rhs.len); |
265 | 0 | BooleanBuffer { |
266 | 0 | buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len), |
267 | 0 | offset: 0, |
268 | 0 | len: self.len, |
269 | 0 | } |
270 | 0 | } |
271 | | } |
272 | | |
273 | | impl<'a> IntoIterator for &'a BooleanBuffer { |
274 | | type Item = bool; |
275 | | type IntoIter = BitIterator<'a>; |
276 | | |
277 | 0 | fn into_iter(self) -> Self::IntoIter { |
278 | 0 | BitIterator::new(self.values(), self.offset, self.len) |
279 | 0 | } |
280 | | } |
281 | | |
282 | | impl From<&[bool]> for BooleanBuffer { |
283 | 1 | fn from(value: &[bool]) -> Self { |
284 | 1 | let mut builder = BooleanBufferBuilder::new(value.len()); |
285 | 1 | builder.append_slice(value); |
286 | 1 | builder.finish() |
287 | 1 | } |
288 | | } |
289 | | |
290 | | impl From<Vec<bool>> for BooleanBuffer { |
291 | 1 | fn from(value: Vec<bool>) -> Self { |
292 | 1 | value.as_slice().into() |
293 | 1 | } |
294 | | } |
295 | | |
296 | | impl FromIterator<bool> for BooleanBuffer { |
297 | 0 | fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self { |
298 | 0 | let iter = iter.into_iter(); |
299 | 0 | let (hint, _) = iter.size_hint(); |
300 | 0 | let mut builder = BooleanBufferBuilder::new(hint); |
301 | 0 | iter.for_each(|b| builder.append(b)); |
302 | 0 | builder.finish() |
303 | 0 | } |
304 | | } |
305 | | |
306 | | #[cfg(test)] |
307 | | mod tests { |
308 | | use super::*; |
309 | | |
310 | | #[test] |
311 | | fn test_boolean_new() { |
312 | | let bytes = &[0, 1, 2, 3, 4]; |
313 | | let buf = Buffer::from(bytes); |
314 | | let offset = 0; |
315 | | let len = 24; |
316 | | |
317 | | let boolean_buf = BooleanBuffer::new(buf.clone(), offset, len); |
318 | | assert_eq!(bytes, boolean_buf.values()); |
319 | | assert_eq!(offset, boolean_buf.offset()); |
320 | | assert_eq!(len, boolean_buf.len()); |
321 | | |
322 | | assert_eq!(2, boolean_buf.count_set_bits()); |
323 | | assert_eq!(&buf, boolean_buf.inner()); |
324 | | assert_eq!(buf, boolean_buf.clone().into_inner()); |
325 | | |
326 | | assert!(!boolean_buf.is_empty()) |
327 | | } |
328 | | |
329 | | #[test] |
330 | | fn test_boolean_data_equality() { |
331 | | let boolean_buf1 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32); |
332 | | let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32); |
333 | | assert_eq!(boolean_buf1, boolean_buf2); |
334 | | |
335 | | // slice with same offset and same length should still preserve equality |
336 | | let boolean_buf3 = boolean_buf1.slice(8, 16); |
337 | | assert_ne!(boolean_buf1, boolean_buf3); |
338 | | let boolean_buf4 = boolean_buf1.slice(0, 32); |
339 | | assert_eq!(boolean_buf1, boolean_buf4); |
340 | | |
341 | | // unequal because of different elements |
342 | | let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 0, 2, 3, 4]), 0, 32); |
343 | | assert_ne!(boolean_buf1, boolean_buf2); |
344 | | |
345 | | // unequal because of different length |
346 | | let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 24); |
347 | | assert_ne!(boolean_buf1, boolean_buf2); |
348 | | |
349 | | // ptr_eq |
350 | | assert!(boolean_buf1.ptr_eq(&boolean_buf1)); |
351 | | assert!(boolean_buf2.ptr_eq(&boolean_buf2)); |
352 | | assert!(!boolean_buf1.ptr_eq(&boolean_buf2)); |
353 | | } |
354 | | |
355 | | #[test] |
356 | | fn test_boolean_slice() { |
357 | | let bytes = &[0, 3, 2, 6, 2]; |
358 | | let boolean_buf1 = BooleanBuffer::new(Buffer::from(bytes), 0, 32); |
359 | | let boolean_buf2 = BooleanBuffer::new(Buffer::from(bytes), 0, 32); |
360 | | |
361 | | let boolean_slice1 = boolean_buf1.slice(16, 16); |
362 | | let boolean_slice2 = boolean_buf2.slice(0, 16); |
363 | | assert_eq!(boolean_slice1.values(), boolean_slice2.values()); |
364 | | |
365 | | assert_eq!(bytes, boolean_slice1.values()); |
366 | | assert_eq!(16, boolean_slice1.offset); |
367 | | assert_eq!(16, boolean_slice1.len); |
368 | | |
369 | | assert_eq!(bytes, boolean_slice2.values()); |
370 | | assert_eq!(0, boolean_slice2.offset); |
371 | | assert_eq!(16, boolean_slice2.len); |
372 | | } |
373 | | |
374 | | #[test] |
375 | | fn test_boolean_bitand() { |
376 | | let offset = 0; |
377 | | let len = 40; |
378 | | |
379 | | let buf1 = Buffer::from(&[0, 1, 1, 0, 0]); |
380 | | let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len); |
381 | | |
382 | | let buf2 = Buffer::from(&[0, 1, 1, 1, 0]); |
383 | | let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len); |
384 | | |
385 | | let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 0, 0]), offset, len); |
386 | | assert_eq!(boolean_buf1 & boolean_buf2, expected); |
387 | | } |
388 | | |
389 | | #[test] |
390 | | fn test_boolean_bitor() { |
391 | | let offset = 0; |
392 | | let len = 40; |
393 | | |
394 | | let buf1 = Buffer::from(&[0, 1, 1, 0, 0]); |
395 | | let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len); |
396 | | |
397 | | let buf2 = Buffer::from(&[0, 1, 1, 1, 0]); |
398 | | let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len); |
399 | | |
400 | | let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 1, 0]), offset, len); |
401 | | assert_eq!(boolean_buf1 | boolean_buf2, expected); |
402 | | } |
403 | | |
404 | | #[test] |
405 | | fn test_boolean_bitxor() { |
406 | | let offset = 0; |
407 | | let len = 40; |
408 | | |
409 | | let buf1 = Buffer::from(&[0, 1, 1, 0, 0]); |
410 | | let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len); |
411 | | |
412 | | let buf2 = Buffer::from(&[0, 1, 1, 1, 0]); |
413 | | let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len); |
414 | | |
415 | | let expected = BooleanBuffer::new(Buffer::from(&[0, 0, 0, 1, 0]), offset, len); |
416 | | assert_eq!(boolean_buf1 ^ boolean_buf2, expected); |
417 | | } |
418 | | |
419 | | #[test] |
420 | | fn test_boolean_not() { |
421 | | let offset = 0; |
422 | | let len = 40; |
423 | | |
424 | | let buf = Buffer::from(&[0, 1, 1, 0, 0]); |
425 | | let boolean_buf = &BooleanBuffer::new(buf, offset, len); |
426 | | |
427 | | let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len); |
428 | | assert_eq!(!boolean_buf, expected); |
429 | | } |
430 | | |
431 | | #[test] |
432 | | fn test_boolean_from_slice_bool() { |
433 | | let v = [true, false, false]; |
434 | | let buf = BooleanBuffer::from(&v[..]); |
435 | | assert_eq!(buf.offset(), 0); |
436 | | assert_eq!(buf.len(), 3); |
437 | | assert_eq!(buf.values().len(), 1); |
438 | | assert!(buf.value(0)); |
439 | | } |
440 | | } |