/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/boolean_builder.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::builder::{ArrayBuilder, BooleanBufferBuilder}; |
19 | | use crate::{Array, ArrayRef, BooleanArray}; |
20 | | use arrow_buffer::Buffer; |
21 | | use arrow_buffer::NullBufferBuilder; |
22 | | use arrow_data::ArrayData; |
23 | | use arrow_schema::{ArrowError, DataType}; |
24 | | use std::any::Any; |
25 | | use std::sync::Arc; |
26 | | |
27 | | /// Builder for [`BooleanArray`] |
28 | | /// |
29 | | /// # Example |
30 | | /// |
31 | | /// Create a `BooleanArray` from a `BooleanBuilder` |
32 | | /// |
33 | | /// ``` |
34 | | /// |
35 | | /// # use arrow_array::{Array, BooleanArray, builder::BooleanBuilder}; |
36 | | /// |
37 | | /// let mut b = BooleanBuilder::new(); |
38 | | /// b.append_value(true); |
39 | | /// b.append_null(); |
40 | | /// b.append_value(false); |
41 | | /// b.append_value(true); |
42 | | /// let arr = b.finish(); |
43 | | /// |
44 | | /// assert_eq!(4, arr.len()); |
45 | | /// assert_eq!(1, arr.null_count()); |
46 | | /// assert_eq!(true, arr.value(0)); |
47 | | /// assert!(arr.is_valid(0)); |
48 | | /// assert!(!arr.is_null(0)); |
49 | | /// assert!(!arr.is_valid(1)); |
50 | | /// assert!(arr.is_null(1)); |
51 | | /// assert_eq!(false, arr.value(2)); |
52 | | /// assert!(arr.is_valid(2)); |
53 | | /// assert!(!arr.is_null(2)); |
54 | | /// assert_eq!(true, arr.value(3)); |
55 | | /// assert!(arr.is_valid(3)); |
56 | | /// assert!(!arr.is_null(3)); |
57 | | /// ``` |
58 | | #[derive(Debug)] |
59 | | pub struct BooleanBuilder { |
60 | | values_builder: BooleanBufferBuilder, |
61 | | null_buffer_builder: NullBufferBuilder, |
62 | | } |
63 | | |
64 | | impl Default for BooleanBuilder { |
65 | 0 | fn default() -> Self { |
66 | 0 | Self::new() |
67 | 0 | } |
68 | | } |
69 | | |
70 | | impl BooleanBuilder { |
71 | | /// Creates a new boolean builder |
72 | 1 | pub fn new() -> Self { |
73 | 1 | Self::with_capacity(1024) |
74 | 1 | } |
75 | | |
76 | | /// Creates a new boolean builder with space for `capacity` elements without re-allocating |
77 | 6 | pub fn with_capacity(capacity: usize) -> Self { |
78 | 6 | Self { |
79 | 6 | values_builder: BooleanBufferBuilder::new(capacity), |
80 | 6 | null_buffer_builder: NullBufferBuilder::new(capacity), |
81 | 6 | } |
82 | 6 | } |
83 | | |
84 | | /// Returns the capacity of this builder measured in slots of type `T` |
85 | 0 | pub fn capacity(&self) -> usize { |
86 | 0 | self.values_builder.capacity() |
87 | 0 | } |
88 | | |
89 | | /// Appends a value of type `T` into the builder |
90 | | #[inline] |
91 | 3 | pub fn append_value(&mut self, v: bool) { |
92 | 3 | self.values_builder.append(v); |
93 | 3 | self.null_buffer_builder.append_non_null(); |
94 | 3 | } |
95 | | |
96 | | /// Appends a null slot into the builder |
97 | | #[inline] |
98 | 0 | pub fn append_null(&mut self) { |
99 | 0 | self.null_buffer_builder.append_null(); |
100 | 0 | self.values_builder.advance(1); |
101 | 0 | } |
102 | | |
103 | | /// Appends `n` `null`s into the builder. |
104 | | #[inline] |
105 | | pub fn append_nulls(&mut self, n: usize) { |
106 | | self.null_buffer_builder.append_n_nulls(n); |
107 | | self.values_builder.advance(n); |
108 | | } |
109 | | |
110 | | /// Appends an `Option<T>` into the builder |
111 | | #[inline] |
112 | | pub fn append_option(&mut self, v: Option<bool>) { |
113 | | match v { |
114 | | None => self.append_null(), |
115 | | Some(v) => self.append_value(v), |
116 | | }; |
117 | | } |
118 | | |
119 | | /// Appends a slice of type `T` into the builder |
120 | | #[inline] |
121 | | pub fn append_slice(&mut self, v: &[bool]) { |
122 | | self.values_builder.append_slice(v); |
123 | | self.null_buffer_builder.append_n_non_nulls(v.len()); |
124 | | } |
125 | | |
126 | | /// Appends n `additional` bits of value `v` into the buffer |
127 | | #[inline] |
128 | | pub fn append_n(&mut self, additional: usize, v: bool) { |
129 | | self.values_builder.append_n(additional, v); |
130 | | self.null_buffer_builder.append_n_non_nulls(additional); |
131 | | } |
132 | | |
133 | | /// Appends values from a slice of type `T` and a validity boolean slice. |
134 | | /// |
135 | | /// Returns an error if the slices are of different lengths |
136 | | #[inline] |
137 | | pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<(), ArrowError> { |
138 | | if values.len() != is_valid.len() { |
139 | | Err(ArrowError::InvalidArgumentError( |
140 | | "Value and validity lengths must be equal".to_string(), |
141 | | )) |
142 | | } else { |
143 | | self.null_buffer_builder.append_slice(is_valid); |
144 | | self.values_builder.append_slice(values); |
145 | | Ok(()) |
146 | | } |
147 | | } |
148 | | |
149 | | /// Appends array values and null to this builder as is |
150 | | /// (this means that underlying null values are copied as is). |
151 | | #[inline] |
152 | 15 | pub fn append_array(&mut self, array: &BooleanArray) { |
153 | 15 | self.values_builder.append_buffer(array.values()); |
154 | 15 | if let Some(null_buffer0 ) = array.nulls() { |
155 | 0 | self.null_buffer_builder.append_buffer(null_buffer); |
156 | 15 | } else { |
157 | 15 | self.null_buffer_builder.append_n_non_nulls(array.len()); |
158 | 15 | } |
159 | 15 | } |
160 | | |
161 | | /// Builds the [BooleanArray] and reset this builder. |
162 | 6 | pub fn finish(&mut self) -> BooleanArray { |
163 | 6 | let len = self.len(); |
164 | 6 | let null_bit_buffer = self.null_buffer_builder.finish(); |
165 | 6 | let builder = ArrayData::builder(DataType::Boolean) |
166 | 6 | .len(len) |
167 | 6 | .add_buffer(self.values_builder.finish().into_inner()) |
168 | 6 | .nulls(null_bit_buffer); |
169 | | |
170 | 6 | let array_data = unsafe { builder.build_unchecked() }; |
171 | 6 | BooleanArray::from(array_data) |
172 | 6 | } |
173 | | |
174 | | /// Builds the [BooleanArray] without resetting the builder. |
175 | 0 | pub fn finish_cloned(&self) -> BooleanArray { |
176 | 0 | let len = self.len(); |
177 | 0 | let nulls = self.null_buffer_builder.finish_cloned(); |
178 | 0 | let value_buffer = Buffer::from_slice_ref(self.values_builder.as_slice()); |
179 | 0 | let builder = ArrayData::builder(DataType::Boolean) |
180 | 0 | .len(len) |
181 | 0 | .add_buffer(value_buffer) |
182 | 0 | .nulls(nulls); |
183 | | |
184 | 0 | let array_data = unsafe { builder.build_unchecked() }; |
185 | 0 | BooleanArray::from(array_data) |
186 | 0 | } |
187 | | |
188 | | /// Returns the current values buffer as a slice |
189 | | /// |
190 | | /// Boolean values are bit-packed into bytes. To extract the i-th boolean |
191 | | /// from the bytes, you can use `arrow_buffer::bit_util::get_bit()`. |
192 | 0 | pub fn values_slice(&self) -> &[u8] { |
193 | 0 | self.values_builder.as_slice() |
194 | 0 | } |
195 | | |
196 | | /// Returns the current null buffer as a slice |
197 | 0 | pub fn validity_slice(&self) -> Option<&[u8]> { |
198 | 0 | self.null_buffer_builder.as_slice() |
199 | 0 | } |
200 | | } |
201 | | |
202 | | impl ArrayBuilder for BooleanBuilder { |
203 | | /// Returns the builder as a non-mutable `Any` reference. |
204 | 0 | fn as_any(&self) -> &dyn Any { |
205 | 0 | self |
206 | 0 | } |
207 | | |
208 | | /// Returns the builder as a mutable `Any` reference. |
209 | 3 | fn as_any_mut(&mut self) -> &mut dyn Any { |
210 | 3 | self |
211 | 3 | } |
212 | | |
213 | | /// Returns the boxed builder as a box of `Any`. |
214 | 0 | fn into_box_any(self: Box<Self>) -> Box<dyn Any> { |
215 | 0 | self |
216 | 0 | } |
217 | | |
218 | | /// Returns the number of array slots in the builder |
219 | 7 | fn len(&self) -> usize { |
220 | 7 | self.values_builder.len() |
221 | 7 | } |
222 | | |
223 | | /// Builds the array and reset this builder. |
224 | 1 | fn finish(&mut self) -> ArrayRef { |
225 | 1 | Arc::new(self.finish()) |
226 | 1 | } |
227 | | |
228 | | /// Builds the array without resetting the builder. |
229 | 0 | fn finish_cloned(&self) -> ArrayRef { |
230 | 0 | Arc::new(self.finish_cloned()) |
231 | 0 | } |
232 | | } |
233 | | |
234 | | impl Extend<Option<bool>> for BooleanBuilder { |
235 | | #[inline] |
236 | | fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) { |
237 | | for v in iter { |
238 | | self.append_option(v) |
239 | | } |
240 | | } |
241 | | } |
242 | | |
243 | | #[cfg(test)] |
244 | | mod tests { |
245 | | use super::*; |
246 | | use crate::Array; |
247 | | use arrow_buffer::{BooleanBuffer, NullBuffer}; |
248 | | |
249 | | #[test] |
250 | | fn test_boolean_array_builder() { |
251 | | // 00000010 01001000 |
252 | | let buf = Buffer::from([72_u8, 2_u8]); |
253 | | let mut builder = BooleanArray::builder(10); |
254 | | for i in 0..10 { |
255 | | if i == 3 || i == 6 || i == 9 { |
256 | | builder.append_value(true); |
257 | | } else { |
258 | | builder.append_value(false); |
259 | | } |
260 | | } |
261 | | |
262 | | let arr = builder.finish(); |
263 | | assert_eq!(&buf, arr.values().inner()); |
264 | | assert_eq!(10, arr.len()); |
265 | | assert_eq!(0, arr.offset()); |
266 | | assert_eq!(0, arr.null_count()); |
267 | | for i in 0..10 { |
268 | | assert!(!arr.is_null(i)); |
269 | | assert!(arr.is_valid(i)); |
270 | | assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}") |
271 | | } |
272 | | } |
273 | | |
274 | | #[test] |
275 | | fn test_boolean_array_builder_append_slice() { |
276 | | let arr1 = BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]); |
277 | | |
278 | | let mut builder = BooleanArray::builder(0); |
279 | | builder.append_slice(&[true, false]); |
280 | | builder.append_null(); |
281 | | builder.append_null(); |
282 | | builder.append_value(false); |
283 | | let arr2 = builder.finish(); |
284 | | |
285 | | assert_eq!(arr1, arr2); |
286 | | } |
287 | | |
288 | | #[test] |
289 | | fn test_boolean_array_builder_append_slice_large() { |
290 | | let arr1 = BooleanArray::from(vec![true; 513]); |
291 | | |
292 | | let mut builder = BooleanArray::builder(512); |
293 | | builder.append_slice(&[true; 513]); |
294 | | let arr2 = builder.finish(); |
295 | | |
296 | | assert_eq!(arr1, arr2); |
297 | | } |
298 | | |
299 | | #[test] |
300 | | fn test_boolean_array_builder_no_null() { |
301 | | let mut builder = BooleanArray::builder(0); |
302 | | builder.append_option(Some(true)); |
303 | | builder.append_value(false); |
304 | | builder.append_slice(&[true, false, true]); |
305 | | builder |
306 | | .append_values(&[false, false, true], &[true, true, true]) |
307 | | .unwrap(); |
308 | | |
309 | | let array = builder.finish(); |
310 | | assert_eq!(0, array.null_count()); |
311 | | assert!(array.nulls().is_none()); |
312 | | } |
313 | | |
314 | | #[test] |
315 | | fn test_boolean_array_builder_finish_cloned() { |
316 | | let mut builder = BooleanArray::builder(16); |
317 | | builder.append_option(Some(true)); |
318 | | builder.append_value(false); |
319 | | builder.append_slice(&[true, false, true]); |
320 | | let mut array = builder.finish_cloned(); |
321 | | assert_eq!(3, array.true_count()); |
322 | | assert_eq!(2, array.false_count()); |
323 | | |
324 | | builder |
325 | | .append_values(&[false, false, true], &[true, true, true]) |
326 | | .unwrap(); |
327 | | |
328 | | array = builder.finish(); |
329 | | assert_eq!(4, array.true_count()); |
330 | | assert_eq!(4, array.false_count()); |
331 | | |
332 | | assert_eq!(0, array.null_count()); |
333 | | assert!(array.nulls().is_none()); |
334 | | } |
335 | | |
336 | | #[test] |
337 | | fn test_extend() { |
338 | | let mut builder = BooleanBuilder::new(); |
339 | | builder.extend([false, false, true, false, false].into_iter().map(Some)); |
340 | | builder.extend([true, true, false].into_iter().map(Some)); |
341 | | let array = builder.finish(); |
342 | | let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>(); |
343 | | assert_eq!( |
344 | | &values, |
345 | | &[false, false, true, false, false, true, true, false] |
346 | | ) |
347 | | } |
348 | | |
349 | | #[test] |
350 | | fn test_boolean_array_builder_append_n() { |
351 | | let mut builder = BooleanBuilder::new(); |
352 | | builder.append_n(3, true); |
353 | | builder.append_n(2, false); |
354 | | let array = builder.finish(); |
355 | | assert_eq!(3, array.true_count()); |
356 | | assert_eq!(2, array.false_count()); |
357 | | assert_eq!(0, array.null_count()); |
358 | | |
359 | | let values = array.iter().map(|x| x.unwrap()).collect::<Vec<_>>(); |
360 | | assert_eq!(&values, &[true, true, true, false, false]) |
361 | | } |
362 | | |
363 | | #[test] |
364 | | fn test_append_array() { |
365 | | let input = vec![ |
366 | | Some(true), |
367 | | None, |
368 | | Some(true), |
369 | | None, |
370 | | Some(false), |
371 | | None, |
372 | | None, |
373 | | None, |
374 | | Some(false), |
375 | | Some(false), |
376 | | Some(false), |
377 | | Some(true), |
378 | | Some(false), |
379 | | ]; |
380 | | let arr1 = BooleanArray::from(input[..5].to_vec()); |
381 | | let arr2 = BooleanArray::from(input[5..8].to_vec()); |
382 | | let arr3 = BooleanArray::from(input[8..].to_vec()); |
383 | | |
384 | | let mut builder = BooleanBuilder::new(); |
385 | | builder.append_array(&arr1); |
386 | | builder.append_array(&arr2); |
387 | | builder.append_array(&arr3); |
388 | | let actual = builder.finish(); |
389 | | let expected = BooleanArray::from(input); |
390 | | |
391 | | assert_eq!(actual, expected); |
392 | | } |
393 | | |
394 | | #[test] |
395 | | fn test_append_array_add_underlying_null_values() { |
396 | | let array = BooleanArray::new( |
397 | | BooleanBuffer::from(vec![true, false, true, false]), |
398 | | Some(NullBuffer::from(&[true, true, false, false])), |
399 | | ); |
400 | | |
401 | | let mut builder = BooleanBuilder::new(); |
402 | | builder.append_array(&array); |
403 | | let actual = builder.finish(); |
404 | | |
405 | | assert_eq!(actual, array); |
406 | | assert_eq!(actual.values(), array.values()) |
407 | | } |
408 | | } |