/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/boolean_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::array::print_long_array; |
19 | | use crate::builder::BooleanBuilder; |
20 | | use crate::iterator::BooleanIter; |
21 | | use crate::{Array, ArrayAccessor, ArrayRef, Scalar}; |
22 | | use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer}; |
23 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
24 | | use arrow_schema::DataType; |
25 | | use std::any::Any; |
26 | | use std::sync::Arc; |
27 | | |
28 | | /// An array of [boolean values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout) |
29 | | /// |
30 | | /// # Example: From a Vec |
31 | | /// |
32 | | /// ``` |
33 | | /// # use arrow_array::{Array, BooleanArray}; |
34 | | /// let arr: BooleanArray = vec![true, true, false].into(); |
35 | | /// ``` |
36 | | /// |
37 | | /// # Example: From an optional Vec |
38 | | /// |
39 | | /// ``` |
40 | | /// # use arrow_array::{Array, BooleanArray}; |
41 | | /// let arr: BooleanArray = vec![Some(true), None, Some(false)].into(); |
42 | | /// ``` |
43 | | /// |
44 | | /// # Example: From an iterator |
45 | | /// |
46 | | /// ``` |
47 | | /// # use arrow_array::{Array, BooleanArray}; |
48 | | /// let arr: BooleanArray = (0..5).map(|x| (x % 2 == 0).then(|| x % 3 == 0)).collect(); |
49 | | /// let values: Vec<_> = arr.iter().collect(); |
50 | | /// assert_eq!(&values, &[Some(true), None, Some(false), None, Some(false)]) |
51 | | /// ``` |
52 | | /// |
53 | | /// # Example: Using Builder |
54 | | /// |
55 | | /// ``` |
56 | | /// # use arrow_array::Array; |
57 | | /// # use arrow_array::builder::BooleanBuilder; |
58 | | /// let mut builder = BooleanBuilder::new(); |
59 | | /// builder.append_value(true); |
60 | | /// builder.append_null(); |
61 | | /// builder.append_value(false); |
62 | | /// let array = builder.finish(); |
63 | | /// let values: Vec<_> = array.iter().collect(); |
64 | | /// assert_eq!(&values, &[Some(true), None, Some(false)]) |
65 | | /// ``` |
66 | | /// |
67 | | #[derive(Clone)] |
68 | | pub struct BooleanArray { |
69 | | values: BooleanBuffer, |
70 | | nulls: Option<NullBuffer>, |
71 | | } |
72 | | |
73 | | impl std::fmt::Debug for BooleanArray { |
74 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
75 | 0 | write!(f, "BooleanArray\n[\n")?; |
76 | 0 | print_long_array(self, f, |array, index, f| { |
77 | 0 | std::fmt::Debug::fmt(&array.value(index), f) |
78 | 0 | })?; |
79 | 0 | write!(f, "]") |
80 | 0 | } |
81 | | } |
82 | | |
83 | | impl BooleanArray { |
84 | | /// Create a new [`BooleanArray`] from the provided values and nulls |
85 | | /// |
86 | | /// # Panics |
87 | | /// |
88 | | /// Panics if `values.len() != nulls.len()` |
89 | 52 | pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self { |
90 | 52 | if let Some(n2 ) = nulls.as_ref() { |
91 | 2 | assert_eq!(values.len(), n.len()); |
92 | 50 | } |
93 | 52 | Self { values, nulls } |
94 | 52 | } |
95 | | |
96 | | /// Create a new [`BooleanArray`] with length `len` consisting only of nulls |
97 | 0 | pub fn new_null(len: usize) -> Self { |
98 | 0 | Self { |
99 | 0 | values: BooleanBuffer::new_unset(len), |
100 | 0 | nulls: Some(NullBuffer::new_null(len)), |
101 | 0 | } |
102 | 0 | } |
103 | | |
104 | | /// Create a new [`Scalar`] from `value` |
105 | 0 | pub fn new_scalar(value: bool) -> Scalar<Self> { |
106 | 0 | let values = match value { |
107 | 0 | true => BooleanBuffer::new_set(1), |
108 | 0 | false => BooleanBuffer::new_unset(1), |
109 | | }; |
110 | 0 | Scalar::new(Self::new(values, None)) |
111 | 0 | } |
112 | | |
113 | | /// Create a new [`BooleanArray`] from a [`Buffer`] specified by `offset` and `len`, the `offset` and `len` in bits |
114 | | /// Logically convert each bit in [`Buffer`] to boolean and use it to build [`BooleanArray`]. |
115 | | /// using this method will make the following points self-evident: |
116 | | /// * there is no `null` in the constructed [`BooleanArray`]; |
117 | | /// * without considering `buffer.into()`, this method is efficient because there is no need to perform pack and unpack operations on boolean; |
118 | | pub fn new_from_packed(buffer: impl Into<Buffer>, offset: usize, len: usize) -> Self { |
119 | | BooleanBuffer::new(buffer.into(), offset, len).into() |
120 | | } |
121 | | |
122 | | /// Create a new [`BooleanArray`] from `&[u8]` |
123 | | /// This method uses `new_from_packed` and constructs a [`Buffer`] using `value`, and offset is set to 0 and len is set to `value.len() * 8` |
124 | | /// using this method will make the following points self-evident: |
125 | | /// * there is no `null` in the constructed [`BooleanArray`]; |
126 | | /// * the length of the constructed [`BooleanArray`] is always a multiple of 8; |
127 | 0 | pub fn new_from_u8(value: &[u8]) -> Self { |
128 | 0 | BooleanBuffer::new(Buffer::from(value), 0, value.len() * 8).into() |
129 | 0 | } |
130 | | |
131 | | /// Returns the length of this array. |
132 | 55 | pub fn len(&self) -> usize { |
133 | 55 | self.values.len() |
134 | 55 | } |
135 | | |
136 | | /// Returns whether this array is empty. |
137 | 0 | pub fn is_empty(&self) -> bool { |
138 | 0 | self.values.is_empty() |
139 | 0 | } |
140 | | |
141 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
142 | 35 | pub fn slice(&self, offset: usize, length: usize) -> Self { |
143 | | Self { |
144 | 35 | values: self.values.slice(offset, length), |
145 | 35 | nulls: self.nulls.as_ref().map(|n| n2 .slice2 (offset2 , length2 )), |
146 | | } |
147 | 35 | } |
148 | | |
149 | | /// Returns a new boolean array builder |
150 | 0 | pub fn builder(capacity: usize) -> BooleanBuilder { |
151 | 0 | BooleanBuilder::with_capacity(capacity) |
152 | 0 | } |
153 | | |
154 | | /// Returns the underlying [`BooleanBuffer`] holding all the values of this array |
155 | 15 | pub fn values(&self) -> &BooleanBuffer { |
156 | 15 | &self.values |
157 | 15 | } |
158 | | |
159 | | /// Returns the number of non null, true values within this array |
160 | 0 | pub fn true_count(&self) -> usize { |
161 | 0 | match self.nulls() { |
162 | 0 | Some(nulls) => { |
163 | 0 | let null_chunks = nulls.inner().bit_chunks().iter_padded(); |
164 | 0 | let value_chunks = self.values().bit_chunks().iter_padded(); |
165 | 0 | null_chunks |
166 | 0 | .zip(value_chunks) |
167 | 0 | .map(|(a, b)| (a & b).count_ones() as usize) |
168 | 0 | .sum() |
169 | | } |
170 | 0 | None => self.values().count_set_bits(), |
171 | | } |
172 | 0 | } |
173 | | |
174 | | /// Returns the number of non null, false values within this array |
175 | 0 | pub fn false_count(&self) -> usize { |
176 | 0 | self.len() - self.null_count() - self.true_count() |
177 | 0 | } |
178 | | |
179 | | /// Returns the boolean value at index `i`. |
180 | | /// |
181 | | /// Note: This method does not check for nulls and the value is arbitrary |
182 | | /// if [`is_null`](Self::is_null) returns true for the index. |
183 | | /// |
184 | | /// # Safety |
185 | | /// This doesn't check bounds, the caller must ensure that index < self.len() |
186 | 40 | pub unsafe fn value_unchecked(&self, i: usize) -> bool { |
187 | 40 | self.values.value_unchecked(i) |
188 | 40 | } |
189 | | |
190 | | /// Returns the boolean value at index `i`. |
191 | | /// |
192 | | /// Note: This method does not check for nulls and the value is arbitrary |
193 | | /// if [`is_null`](Self::is_null) returns true for the index. |
194 | | /// |
195 | | /// # Panics |
196 | | /// Panics if index `i` is out of bounds |
197 | 40 | pub fn value(&self, i: usize) -> bool { |
198 | 40 | assert!( |
199 | 40 | i < self.len(), |
200 | 0 | "Trying to access an element at index {} from a BooleanArray of length {}", |
201 | | i, |
202 | 0 | self.len() |
203 | | ); |
204 | | // Safety: |
205 | | // `i < self.len() |
206 | 40 | unsafe { self.value_unchecked(i) } |
207 | 40 | } |
208 | | |
209 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
210 | | pub fn take_iter<'a>( |
211 | | &'a self, |
212 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
213 | | ) -> impl Iterator<Item = Option<bool>> + 'a { |
214 | | indexes.map(|opt_index| opt_index.map(|index| self.value(index))) |
215 | | } |
216 | | |
217 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
218 | | /// # Safety |
219 | | /// |
220 | | /// caller must ensure that the offsets in the iterator are less than the array len() |
221 | | pub unsafe fn take_iter_unchecked<'a>( |
222 | | &'a self, |
223 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
224 | | ) -> impl Iterator<Item = Option<bool>> + 'a { |
225 | | indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) |
226 | | } |
227 | | |
228 | | /// Create a [`BooleanArray`] by evaluating the operation for |
229 | | /// each element of the provided array |
230 | | /// |
231 | | /// ``` |
232 | | /// # use arrow_array::{BooleanArray, Int32Array}; |
233 | | /// |
234 | | /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); |
235 | | /// let r = BooleanArray::from_unary(&array, |x| x > 2); |
236 | | /// assert_eq!(&r, &BooleanArray::from(vec![false, false, true, true, true])); |
237 | | /// ``` |
238 | 0 | pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self |
239 | 0 | where |
240 | 0 | F: FnMut(T::Item) -> bool, |
241 | | { |
242 | 0 | let nulls = left.logical_nulls(); |
243 | 0 | let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe { |
244 | | // SAFETY: i in range 0..len |
245 | 0 | op(left.value_unchecked(i)) |
246 | 0 | }); |
247 | 0 | Self::new(values, nulls) |
248 | 0 | } |
249 | | |
250 | | /// Create a [`BooleanArray`] by evaluating the binary operation for |
251 | | /// each element of the provided arrays |
252 | | /// |
253 | | /// ``` |
254 | | /// # use arrow_array::{BooleanArray, Int32Array}; |
255 | | /// |
256 | | /// let a = Int32Array::from(vec![1, 2, 3, 4, 5]); |
257 | | /// let b = Int32Array::from(vec![1, 2, 0, 2, 5]); |
258 | | /// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b); |
259 | | /// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true])); |
260 | | /// ``` |
261 | | /// |
262 | | /// # Panics |
263 | | /// |
264 | | /// This function panics if left and right are not the same length |
265 | | /// |
266 | | pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self |
267 | | where |
268 | | F: FnMut(T::Item, S::Item) -> bool, |
269 | | { |
270 | | assert_eq!(left.len(), right.len()); |
271 | | |
272 | | let nulls = NullBuffer::union( |
273 | | left.logical_nulls().as_ref(), |
274 | | right.logical_nulls().as_ref(), |
275 | | ); |
276 | | let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe { |
277 | | // SAFETY: i in range 0..len |
278 | | op(left.value_unchecked(i), right.value_unchecked(i)) |
279 | | }); |
280 | | Self::new(values, nulls) |
281 | | } |
282 | | |
283 | | /// Deconstruct this array into its constituent parts |
284 | 0 | pub fn into_parts(self) -> (BooleanBuffer, Option<NullBuffer>) { |
285 | 0 | (self.values, self.nulls) |
286 | 0 | } |
287 | | } |
288 | | |
289 | | impl Array for BooleanArray { |
290 | 20 | fn as_any(&self) -> &dyn Any { |
291 | 20 | self |
292 | 20 | } |
293 | | |
294 | 75 | fn to_data(&self) -> ArrayData { |
295 | 75 | self.clone().into() |
296 | 75 | } |
297 | | |
298 | 0 | fn into_data(self) -> ArrayData { |
299 | 0 | self.into() |
300 | 0 | } |
301 | | |
302 | 151 | fn data_type(&self) -> &DataType { |
303 | 151 | &DataType::Boolean |
304 | 151 | } |
305 | | |
306 | 35 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
307 | 35 | Arc::new(self.slice(offset, length)) |
308 | 35 | } |
309 | | |
310 | 165 | fn len(&self) -> usize { |
311 | 165 | self.values.len() |
312 | 165 | } |
313 | | |
314 | 0 | fn is_empty(&self) -> bool { |
315 | 0 | self.values.is_empty() |
316 | 0 | } |
317 | | |
318 | 0 | fn shrink_to_fit(&mut self) { |
319 | 0 | self.values.shrink_to_fit(); |
320 | 0 | if let Some(nulls) = &mut self.nulls { |
321 | 0 | nulls.shrink_to_fit(); |
322 | 0 | } |
323 | 0 | } |
324 | | |
325 | 0 | fn offset(&self) -> usize { |
326 | 0 | self.values.offset() |
327 | 0 | } |
328 | | |
329 | 23 | fn nulls(&self) -> Option<&NullBuffer> { |
330 | 23 | self.nulls.as_ref() |
331 | 23 | } |
332 | | |
333 | 0 | fn logical_null_count(&self) -> usize { |
334 | 0 | self.null_count() |
335 | 0 | } |
336 | | |
337 | 0 | fn get_buffer_memory_size(&self) -> usize { |
338 | 0 | let mut sum = self.values.inner().capacity(); |
339 | 0 | if let Some(x) = &self.nulls { |
340 | 0 | sum += x.buffer().capacity() |
341 | 0 | } |
342 | 0 | sum |
343 | 0 | } |
344 | | |
345 | 0 | fn get_array_memory_size(&self) -> usize { |
346 | 0 | std::mem::size_of::<Self>() + self.get_buffer_memory_size() |
347 | 0 | } |
348 | | } |
349 | | |
350 | | impl ArrayAccessor for &BooleanArray { |
351 | | type Item = bool; |
352 | | |
353 | 0 | fn value(&self, index: usize) -> Self::Item { |
354 | 0 | BooleanArray::value(self, index) |
355 | 0 | } |
356 | | |
357 | 0 | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
358 | 0 | BooleanArray::value_unchecked(self, index) |
359 | 0 | } |
360 | | } |
361 | | |
362 | | impl From<Vec<bool>> for BooleanArray { |
363 | 0 | fn from(data: Vec<bool>) -> Self { |
364 | 0 | let mut mut_buf = MutableBuffer::new_null(data.len()); |
365 | | { |
366 | 0 | let mut_slice = mut_buf.as_slice_mut(); |
367 | 0 | for (i, b) in data.iter().enumerate() { |
368 | 0 | if *b { |
369 | 0 | bit_util::set_bit(mut_slice, i); |
370 | 0 | } |
371 | | } |
372 | | } |
373 | 0 | let array_data = ArrayData::builder(DataType::Boolean) |
374 | 0 | .len(data.len()) |
375 | 0 | .add_buffer(mut_buf.into()); |
376 | | |
377 | 0 | let array_data = unsafe { array_data.build_unchecked() }; |
378 | 0 | BooleanArray::from(array_data) |
379 | 0 | } |
380 | | } |
381 | | |
382 | | impl From<Vec<Option<bool>>> for BooleanArray { |
383 | 3 | fn from(data: Vec<Option<bool>>) -> Self { |
384 | 3 | data.iter().collect() |
385 | 3 | } |
386 | | } |
387 | | |
388 | | impl From<ArrayData> for BooleanArray { |
389 | 26 | fn from(data: ArrayData) -> Self { |
390 | 26 | assert_eq!( |
391 | 26 | data.data_type(), |
392 | | &DataType::Boolean, |
393 | 0 | "BooleanArray expected ArrayData with type {} got {}", |
394 | | DataType::Boolean, |
395 | 0 | data.data_type() |
396 | | ); |
397 | 26 | assert_eq!( |
398 | 26 | data.buffers().len(), |
399 | | 1, |
400 | 0 | "BooleanArray data should contain a single buffer only (values buffer)" |
401 | | ); |
402 | 26 | let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()); |
403 | | |
404 | 26 | Self { |
405 | 26 | values, |
406 | 26 | nulls: data.nulls().cloned(), |
407 | 26 | } |
408 | 26 | } |
409 | | } |
410 | | |
411 | | impl From<BooleanArray> for ArrayData { |
412 | 75 | fn from(array: BooleanArray) -> Self { |
413 | 75 | let builder = ArrayDataBuilder::new(DataType::Boolean) |
414 | 75 | .len(array.values.len()) |
415 | 75 | .offset(array.values.offset()) |
416 | 75 | .nulls(array.nulls) |
417 | 75 | .buffers(vec![array.values.into_inner()]); |
418 | | |
419 | 75 | unsafe { builder.build_unchecked() } |
420 | 75 | } |
421 | | } |
422 | | |
423 | | impl<'a> IntoIterator for &'a BooleanArray { |
424 | | type Item = Option<bool>; |
425 | | type IntoIter = BooleanIter<'a>; |
426 | | |
427 | 0 | fn into_iter(self) -> Self::IntoIter { |
428 | 0 | BooleanIter::<'a>::new(self) |
429 | 0 | } |
430 | | } |
431 | | |
432 | | impl<'a> BooleanArray { |
433 | | /// constructs a new iterator |
434 | 0 | pub fn iter(&'a self) -> BooleanIter<'a> { |
435 | 0 | BooleanIter::<'a>::new(self) |
436 | 0 | } |
437 | | } |
438 | | |
439 | | impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray { |
440 | 19 | fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self { |
441 | 19 | let iter = iter.into_iter(); |
442 | 19 | let (_, data_len) = iter.size_hint(); |
443 | 19 | let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. |
444 | | |
445 | 19 | let num_bytes = bit_util::ceil(data_len, 8); |
446 | 19 | let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes); |
447 | 19 | let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes); |
448 | | |
449 | 19 | let data = val_builder.as_slice_mut(); |
450 | | |
451 | 19 | let null_slice = null_builder.as_slice_mut(); |
452 | 136 | iter19 .enumerate19 ().for_each19 (|(i, item)| { |
453 | 136 | if let Some(a135 ) = item.borrow() { |
454 | 135 | bit_util::set_bit(null_slice, i); |
455 | 135 | if *a { |
456 | 69 | bit_util::set_bit(data, i); |
457 | 69 | }66 |
458 | 1 | } |
459 | 136 | }); |
460 | | |
461 | 19 | let data = unsafe { |
462 | 19 | ArrayData::new_unchecked( |
463 | 19 | DataType::Boolean, |
464 | 19 | data_len, |
465 | 19 | None, |
466 | 19 | Some(null_builder.into()), |
467 | | 0, |
468 | 19 | vec![val_builder.into()], |
469 | 19 | vec![], |
470 | | ) |
471 | | }; |
472 | 19 | BooleanArray::from(data) |
473 | 19 | } |
474 | | } |
475 | | |
476 | | impl From<BooleanBuffer> for BooleanArray { |
477 | 0 | fn from(values: BooleanBuffer) -> Self { |
478 | 0 | Self { |
479 | 0 | values, |
480 | 0 | nulls: None, |
481 | 0 | } |
482 | 0 | } |
483 | | } |
484 | | |
485 | | #[cfg(test)] |
486 | | mod tests { |
487 | | use super::*; |
488 | | use arrow_buffer::Buffer; |
489 | | use rand::{rng, Rng}; |
490 | | |
491 | | #[test] |
492 | | fn test_boolean_fmt_debug() { |
493 | | let arr = BooleanArray::from(vec![true, false, false]); |
494 | | assert_eq!( |
495 | | "BooleanArray\n[\n true,\n false,\n false,\n]", |
496 | | format!("{arr:?}") |
497 | | ); |
498 | | } |
499 | | |
500 | | #[test] |
501 | | fn test_boolean_with_null_fmt_debug() { |
502 | | let mut builder = BooleanArray::builder(3); |
503 | | builder.append_value(true); |
504 | | builder.append_null(); |
505 | | builder.append_value(false); |
506 | | let arr = builder.finish(); |
507 | | assert_eq!( |
508 | | "BooleanArray\n[\n true,\n null,\n false,\n]", |
509 | | format!("{arr:?}") |
510 | | ); |
511 | | } |
512 | | |
513 | | #[test] |
514 | | fn test_boolean_array_from_vec() { |
515 | | let buf = Buffer::from([10_u8]); |
516 | | let arr = BooleanArray::from(vec![false, true, false, true]); |
517 | | assert_eq!(&buf, arr.values().inner()); |
518 | | assert_eq!(4, arr.len()); |
519 | | assert_eq!(0, arr.offset()); |
520 | | assert_eq!(0, arr.null_count()); |
521 | | for i in 0..4 { |
522 | | assert!(!arr.is_null(i)); |
523 | | assert!(arr.is_valid(i)); |
524 | | assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}") |
525 | | } |
526 | | } |
527 | | |
528 | | #[test] |
529 | | fn test_boolean_array_from_vec_option() { |
530 | | let buf = Buffer::from([10_u8]); |
531 | | let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); |
532 | | assert_eq!(&buf, arr.values().inner()); |
533 | | assert_eq!(4, arr.len()); |
534 | | assert_eq!(0, arr.offset()); |
535 | | assert_eq!(1, arr.null_count()); |
536 | | for i in 0..4 { |
537 | | if i == 2 { |
538 | | assert!(arr.is_null(i)); |
539 | | assert!(!arr.is_valid(i)); |
540 | | } else { |
541 | | assert!(!arr.is_null(i)); |
542 | | assert!(arr.is_valid(i)); |
543 | | assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}") |
544 | | } |
545 | | } |
546 | | } |
547 | | |
548 | | #[test] |
549 | | fn test_boolean_array_from_packed() { |
550 | | let v = [1_u8, 2_u8, 3_u8]; |
551 | | let arr = BooleanArray::new_from_packed(v, 0, 24); |
552 | | assert_eq!(24, arr.len()); |
553 | | assert_eq!(0, arr.offset()); |
554 | | assert_eq!(0, arr.null_count()); |
555 | | assert!(arr.nulls.is_none()); |
556 | | for i in 0..24 { |
557 | | assert!(!arr.is_null(i)); |
558 | | assert!(arr.is_valid(i)); |
559 | | assert_eq!( |
560 | | i == 0 || i == 9 || i == 16 || i == 17, |
561 | | arr.value(i), |
562 | | "failed t {i}" |
563 | | ) |
564 | | } |
565 | | } |
566 | | |
567 | | #[test] |
568 | | fn test_boolean_array_from_slice_u8() { |
569 | | let v: Vec<u8> = vec![1, 2, 3]; |
570 | | let slice = &v[..]; |
571 | | let arr = BooleanArray::new_from_u8(slice); |
572 | | assert_eq!(24, arr.len()); |
573 | | assert_eq!(0, arr.offset()); |
574 | | assert_eq!(0, arr.null_count()); |
575 | | assert!(arr.nulls().is_none()); |
576 | | for i in 0..24 { |
577 | | assert!(!arr.is_null(i)); |
578 | | assert!(arr.is_valid(i)); |
579 | | assert_eq!( |
580 | | i == 0 || i == 9 || i == 16 || i == 17, |
581 | | arr.value(i), |
582 | | "failed t {i}" |
583 | | ) |
584 | | } |
585 | | } |
586 | | |
587 | | #[test] |
588 | | fn test_boolean_array_from_iter() { |
589 | | let v = vec![Some(false), Some(true), Some(false), Some(true)]; |
590 | | let arr = v.into_iter().collect::<BooleanArray>(); |
591 | | assert_eq!(4, arr.len()); |
592 | | assert_eq!(0, arr.offset()); |
593 | | assert_eq!(0, arr.null_count()); |
594 | | assert!(arr.nulls().is_none()); |
595 | | for i in 0..3 { |
596 | | assert!(!arr.is_null(i)); |
597 | | assert!(arr.is_valid(i)); |
598 | | assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {i}") |
599 | | } |
600 | | } |
601 | | |
602 | | #[test] |
603 | | fn test_boolean_array_from_nullable_iter() { |
604 | | let v = vec![Some(true), None, Some(false), None]; |
605 | | let arr = v.into_iter().collect::<BooleanArray>(); |
606 | | assert_eq!(4, arr.len()); |
607 | | assert_eq!(0, arr.offset()); |
608 | | assert_eq!(2, arr.null_count()); |
609 | | assert!(arr.nulls().is_some()); |
610 | | |
611 | | assert!(arr.is_valid(0)); |
612 | | assert!(arr.is_null(1)); |
613 | | assert!(arr.is_valid(2)); |
614 | | assert!(arr.is_null(3)); |
615 | | |
616 | | assert!(arr.value(0)); |
617 | | assert!(!arr.value(2)); |
618 | | } |
619 | | |
620 | | #[test] |
621 | | fn test_boolean_array_builder() { |
622 | | // Test building a boolean array with ArrayData builder and offset |
623 | | // 000011011 |
624 | | let buf = Buffer::from([27_u8]); |
625 | | let buf2 = buf.clone(); |
626 | | let data = ArrayData::builder(DataType::Boolean) |
627 | | .len(5) |
628 | | .offset(2) |
629 | | .add_buffer(buf) |
630 | | .build() |
631 | | .unwrap(); |
632 | | let arr = BooleanArray::from(data); |
633 | | assert_eq!(&buf2, arr.values().inner()); |
634 | | assert_eq!(5, arr.len()); |
635 | | assert_eq!(2, arr.offset()); |
636 | | assert_eq!(0, arr.null_count()); |
637 | | for i in 0..3 { |
638 | | assert_eq!(i != 0, arr.value(i), "failed at {i}"); |
639 | | } |
640 | | } |
641 | | |
642 | | #[test] |
643 | | #[should_panic( |
644 | | expected = "Trying to access an element at index 4 from a BooleanArray of length 3" |
645 | | )] |
646 | | fn test_fixed_size_binary_array_get_value_index_out_of_bound() { |
647 | | let v = vec![Some(true), None, Some(false)]; |
648 | | let array = v.into_iter().collect::<BooleanArray>(); |
649 | | |
650 | | array.value(4); |
651 | | } |
652 | | |
653 | | #[test] |
654 | | #[should_panic(expected = "BooleanArray data should contain a single buffer only \ |
655 | | (values buffer)")] |
656 | | // Different error messages, so skip for now |
657 | | // https://github.com/apache/arrow-rs/issues/1545 |
658 | | #[cfg(not(feature = "force_validate"))] |
659 | | fn test_boolean_array_invalid_buffer_len() { |
660 | | let data = unsafe { |
661 | | ArrayData::builder(DataType::Boolean) |
662 | | .len(5) |
663 | | .build_unchecked() |
664 | | }; |
665 | | drop(BooleanArray::from(data)); |
666 | | } |
667 | | |
668 | | #[test] |
669 | | #[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")] |
670 | | fn test_from_array_data_validation() { |
671 | | let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32)); |
672 | | } |
673 | | |
674 | | #[test] |
675 | | #[cfg_attr(miri, ignore)] // Takes too long |
676 | | fn test_true_false_count() { |
677 | | let mut rng = rng(); |
678 | | |
679 | | for _ in 0..10 { |
680 | | // No nulls |
681 | | let d: Vec<_> = (0..2000).map(|_| rng.random_bool(0.5)).collect(); |
682 | | let b = BooleanArray::from(d.clone()); |
683 | | |
684 | | let expected_true = d.iter().filter(|x| **x).count(); |
685 | | assert_eq!(b.true_count(), expected_true); |
686 | | assert_eq!(b.false_count(), d.len() - expected_true); |
687 | | |
688 | | // With nulls |
689 | | let d: Vec<_> = (0..2000) |
690 | | .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5))) |
691 | | .collect(); |
692 | | let b = BooleanArray::from(d.clone()); |
693 | | |
694 | | let expected_true = d.iter().filter(|x| matches!(x, Some(true))).count(); |
695 | | assert_eq!(b.true_count(), expected_true); |
696 | | |
697 | | let expected_false = d.iter().filter(|x| matches!(x, Some(false))).count(); |
698 | | assert_eq!(b.false_count(), expected_false); |
699 | | } |
700 | | } |
701 | | |
702 | | #[test] |
703 | | fn test_into_parts() { |
704 | | let boolean_array = [Some(true), None, Some(false)] |
705 | | .into_iter() |
706 | | .collect::<BooleanArray>(); |
707 | | let (values, nulls) = boolean_array.into_parts(); |
708 | | assert_eq!(values.values(), &[0b0000_0001]); |
709 | | assert!(nulls.is_some()); |
710 | | assert_eq!(nulls.unwrap().buffer().as_slice(), &[0b0000_0101]); |
711 | | |
712 | | let boolean_array = |
713 | | BooleanArray::from(vec![false, false, false, false, false, false, false, true]); |
714 | | let (values, nulls) = boolean_array.into_parts(); |
715 | | assert_eq!(values.values(), &[0b1000_0000]); |
716 | | assert!(nulls.is_none()); |
717 | | } |
718 | | } |