/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/builder/null.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer}; |
19 | | |
20 | | /// Builder for creating [`NullBuffer`] |
21 | | /// |
22 | | /// # Performance |
23 | | /// |
24 | | /// This builder only materializes the buffer when we append `false`. |
25 | | /// If you only append `true`s to the builder, what you get will be |
26 | | /// `None` when calling [`finish`](#method.finish). |
27 | | /// |
28 | | /// This optimization is **very** important for the performance as it avoids |
29 | | /// allocating memory for the null buffer when there are no nulls. |
30 | | /// |
31 | | /// See [`Self::allocated_size`] to get the current memory allocated by the builder. |
32 | | /// |
33 | | /// # Example |
34 | | /// ``` |
35 | | /// # use arrow_buffer::NullBufferBuilder; |
36 | | /// let mut builder = NullBufferBuilder::new(8); |
37 | | /// builder.append_n_non_nulls(8); |
38 | | /// // If no non null values are appended, the null buffer is not created |
39 | | /// let buffer = builder.finish(); |
40 | | /// assert!(buffer.is_none()); |
41 | | /// // however, if a null value is appended, the null buffer is created |
42 | | /// let mut builder = NullBufferBuilder::new(8); |
43 | | /// builder.append_n_non_nulls(7); |
44 | | /// builder.append_null(); |
45 | | /// let buffer = builder.finish().unwrap(); |
46 | | /// assert_eq!(buffer.len(), 8); |
47 | | /// assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]); |
48 | | /// ``` |
49 | | #[derive(Debug)] |
50 | | pub struct NullBufferBuilder { |
51 | | /// The bitmap builder to store the null buffer: |
52 | | /// * `Some` if any nulls have been appended ("materialized") |
53 | | /// * `None` if no nulls have been appended. |
54 | | bitmap_builder: Option<BooleanBufferBuilder>, |
55 | | /// Length of the buffer before materializing. |
56 | | /// |
57 | | /// if `bitmap_buffer` buffer is `Some`, this value is not used. |
58 | | len: usize, |
59 | | /// Initial capacity of the `bitmap_builder`, when it is materialized. |
60 | | capacity: usize, |
61 | | } |
62 | | |
63 | | impl NullBufferBuilder { |
64 | | /// Creates a new empty builder. |
65 | | /// |
66 | | /// Note that this method does not allocate any memory, regardless of the |
67 | | /// `capacity` parameter. If an allocation is required, `capacity` is the |
68 | | /// size in bits (not bytes) that will be allocated at minimum. |
69 | 723 | pub fn new(capacity: usize) -> Self { |
70 | 723 | Self { |
71 | 723 | bitmap_builder: None, |
72 | 723 | len: 0, |
73 | 723 | capacity, |
74 | 723 | } |
75 | 723 | } |
76 | | |
77 | | /// Creates a new builder with given length. |
78 | 0 | pub fn new_with_len(len: usize) -> Self { |
79 | 0 | Self { |
80 | 0 | bitmap_builder: None, |
81 | 0 | len, |
82 | 0 | capacity: len, |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | | /// Creates a new builder from a `MutableBuffer`. |
87 | 0 | pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self { |
88 | 0 | let capacity = buffer.len() * 8; |
89 | 0 | assert!(len <= capacity); |
90 | | |
91 | 0 | let bitmap_builder = Some(BooleanBufferBuilder::new_from_buffer(buffer, len)); |
92 | 0 | Self { |
93 | 0 | bitmap_builder, |
94 | 0 | len, |
95 | 0 | capacity, |
96 | 0 | } |
97 | 0 | } |
98 | | |
99 | | /// Appends `n` `true`s into the builder |
100 | | /// to indicate that these `n` items are not nulls. |
101 | | #[inline] |
102 | 227 | pub fn append_n_non_nulls(&mut self, n: usize) { |
103 | 227 | if let Some(buf7 ) = self.bitmap_builder.as_mut() { |
104 | 7 | buf.append_n(n, true) |
105 | 220 | } else { |
106 | 220 | self.len += n; |
107 | 220 | } |
108 | 227 | } |
109 | | |
110 | | /// Appends a `true` into the builder |
111 | | /// to indicate that this item is not null. |
112 | | #[inline] |
113 | 4.19k | pub fn append_non_null(&mut self) { |
114 | 4.19k | if let Some(buf121 ) = self.bitmap_builder.as_mut() { |
115 | 121 | buf.append(true) |
116 | 4.07k | } else { |
117 | 4.07k | self.len += 1; |
118 | 4.07k | } |
119 | 4.19k | } |
120 | | |
121 | | /// Appends `n` `false`s into the builder |
122 | | /// to indicate that these `n` items are nulls. |
123 | | #[inline] |
124 | 0 | pub fn append_n_nulls(&mut self, n: usize) { |
125 | 0 | self.materialize_if_needed(); |
126 | 0 | self.bitmap_builder.as_mut().unwrap().append_n(n, false); |
127 | 0 | } |
128 | | |
129 | | /// Appends a `false` into the builder |
130 | | /// to indicate that this item is null. |
131 | | #[inline] |
132 | 205 | pub fn append_null(&mut self) { |
133 | 205 | self.materialize_if_needed(); |
134 | 205 | self.bitmap_builder.as_mut().unwrap().append(false); |
135 | 205 | } |
136 | | |
137 | | /// Appends a boolean value into the builder. |
138 | | #[inline] |
139 | 4.16k | pub fn append(&mut self, not_null: bool) { |
140 | 4.16k | if not_null { |
141 | 3.96k | self.append_non_null() |
142 | | } else { |
143 | 201 | self.append_null() |
144 | | } |
145 | 4.16k | } |
146 | | |
147 | | /// Gets a bit in the buffer at `index` |
148 | | #[inline] |
149 | | pub fn is_valid(&self, index: usize) -> bool { |
150 | | if let Some(ref buf) = self.bitmap_builder { |
151 | | buf.get_bit(index) |
152 | | } else { |
153 | | true |
154 | | } |
155 | | } |
156 | | |
157 | | /// Truncates the builder to the given length |
158 | | /// |
159 | | /// If `len` is greater than the buffer's current length, this has no effect |
160 | | #[inline] |
161 | | pub fn truncate(&mut self, len: usize) { |
162 | | if let Some(buf) = self.bitmap_builder.as_mut() { |
163 | | buf.truncate(len); |
164 | | } else if len <= self.len { |
165 | | self.len = len |
166 | | } |
167 | | } |
168 | | |
169 | | /// Appends a boolean slice into the builder |
170 | | /// to indicate the validations of these items. |
171 | 0 | pub fn append_slice(&mut self, slice: &[bool]) { |
172 | 0 | if slice.iter().any(|v| !v) { |
173 | 0 | self.materialize_if_needed() |
174 | 0 | } |
175 | 0 | if let Some(buf) = self.bitmap_builder.as_mut() { |
176 | 0 | buf.append_slice(slice) |
177 | 0 | } else { |
178 | 0 | self.len += slice.len(); |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | | /// Append [`NullBuffer`] to this [`NullBufferBuilder`] |
183 | | /// |
184 | | /// This is useful when you want to concatenate two null buffers. |
185 | 11 | pub fn append_buffer(&mut self, buffer: &NullBuffer) { |
186 | 11 | if buffer.null_count() > 0 { |
187 | 11 | self.materialize_if_needed(); |
188 | 11 | }0 |
189 | 11 | if let Some(buf) = self.bitmap_builder.as_mut() { |
190 | 11 | buf.append_buffer(buffer.inner()) |
191 | 0 | } else { |
192 | 0 | self.len += buffer.len(); |
193 | 0 | } |
194 | 11 | } |
195 | | |
196 | | /// Builds the null buffer and resets the builder. |
197 | | /// Returns `None` if the builder only contains `true`s. |
198 | 967 | pub fn finish(&mut self) -> Option<NullBuffer> { |
199 | 967 | self.len = 0; |
200 | 967 | Some(NullBuffer::new124 (self.bitmap_builder.take()?843 .finish124 ())) |
201 | 967 | } |
202 | | |
203 | | /// Builds the [NullBuffer] without resetting the builder. |
204 | 0 | pub fn finish_cloned(&self) -> Option<NullBuffer> { |
205 | 0 | let buffer = self.bitmap_builder.as_ref()?.finish_cloned(); |
206 | 0 | Some(NullBuffer::new(buffer)) |
207 | 0 | } |
208 | | |
209 | | /// Returns the inner bitmap builder as slice |
210 | 0 | pub fn as_slice(&self) -> Option<&[u8]> { |
211 | 0 | Some(self.bitmap_builder.as_ref()?.as_slice()) |
212 | 0 | } |
213 | | |
214 | 216 | fn materialize_if_needed(&mut self) { |
215 | 216 | if self.bitmap_builder.is_none() { |
216 | 124 | self.materialize() |
217 | 92 | } |
218 | 216 | } |
219 | | |
220 | | #[cold] |
221 | 124 | fn materialize(&mut self) { |
222 | 124 | if self.bitmap_builder.is_none() { |
223 | 124 | let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity)); |
224 | 124 | b.append_n(self.len, true); |
225 | 124 | self.bitmap_builder = Some(b); |
226 | 124 | }0 |
227 | 124 | } |
228 | | |
229 | | /// Return a mutable reference to the inner bitmap slice. |
230 | 0 | pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> { |
231 | 0 | self.bitmap_builder.as_mut().map(|b| b.as_slice_mut()) |
232 | 0 | } |
233 | | |
234 | | /// Return the allocated size of this builder, in bytes, useful for memory accounting. |
235 | 0 | pub fn allocated_size(&self) -> usize { |
236 | 0 | self.bitmap_builder |
237 | 0 | .as_ref() |
238 | 0 | .map(|b| b.capacity() / 8) |
239 | 0 | .unwrap_or(0) |
240 | 0 | } |
241 | | } |
242 | | |
243 | | impl NullBufferBuilder { |
244 | | /// Return the number of bits in the buffer. |
245 | 82 | pub fn len(&self) -> usize { |
246 | 82 | self.bitmap_builder.as_ref().map_or(self.len, |b| b14 .len14 ()) |
247 | 82 | } |
248 | | |
249 | | /// Check if the builder is empty. |
250 | 0 | pub fn is_empty(&self) -> bool { |
251 | 0 | self.len() == 0 |
252 | 0 | } |
253 | | } |
254 | | |
255 | | #[cfg(test)] |
256 | | mod tests { |
257 | | use super::*; |
258 | | |
259 | | #[test] |
260 | | fn test_null_buffer_builder() { |
261 | | let mut builder = NullBufferBuilder::new(0); |
262 | | builder.append_null(); |
263 | | builder.append_non_null(); |
264 | | builder.append_n_nulls(2); |
265 | | builder.append_n_non_nulls(2); |
266 | | assert_eq!(6, builder.len()); |
267 | | assert_eq!(64, builder.allocated_size()); |
268 | | |
269 | | let buf = builder.finish().unwrap(); |
270 | | assert_eq!(&[0b110010_u8], buf.validity()); |
271 | | } |
272 | | |
273 | | #[test] |
274 | | fn test_null_buffer_builder_all_nulls() { |
275 | | let mut builder = NullBufferBuilder::new(0); |
276 | | builder.append_null(); |
277 | | builder.append_n_nulls(2); |
278 | | builder.append_slice(&[false, false, false]); |
279 | | assert_eq!(6, builder.len()); |
280 | | assert_eq!(64, builder.allocated_size()); |
281 | | |
282 | | let buf = builder.finish().unwrap(); |
283 | | assert_eq!(&[0b0_u8], buf.validity()); |
284 | | } |
285 | | |
286 | | #[test] |
287 | | fn test_null_buffer_builder_no_null() { |
288 | | let mut builder = NullBufferBuilder::new(0); |
289 | | builder.append_non_null(); |
290 | | builder.append_n_non_nulls(2); |
291 | | builder.append_slice(&[true, true, true]); |
292 | | assert_eq!(6, builder.len()); |
293 | | assert_eq!(0, builder.allocated_size()); |
294 | | |
295 | | let buf = builder.finish(); |
296 | | assert!(buf.is_none()); |
297 | | } |
298 | | |
299 | | #[test] |
300 | | fn test_null_buffer_builder_reset() { |
301 | | let mut builder = NullBufferBuilder::new(0); |
302 | | builder.append_slice(&[true, false, true]); |
303 | | builder.finish(); |
304 | | assert!(builder.is_empty()); |
305 | | |
306 | | builder.append_slice(&[true, true, true]); |
307 | | assert!(builder.finish().is_none()); |
308 | | assert!(builder.is_empty()); |
309 | | |
310 | | builder.append_slice(&[true, true, false, true]); |
311 | | |
312 | | let buf = builder.finish().unwrap(); |
313 | | assert_eq!(&[0b1011_u8], buf.validity()); |
314 | | } |
315 | | |
316 | | #[test] |
317 | | fn test_null_buffer_builder_is_valid() { |
318 | | let mut builder = NullBufferBuilder::new(0); |
319 | | builder.append_n_non_nulls(6); |
320 | | assert!(builder.is_valid(0)); |
321 | | |
322 | | builder.append_null(); |
323 | | assert!(!builder.is_valid(6)); |
324 | | |
325 | | builder.append_non_null(); |
326 | | assert!(builder.is_valid(7)); |
327 | | } |
328 | | |
329 | | #[test] |
330 | | fn test_null_buffer_builder_truncate() { |
331 | | let mut builder = NullBufferBuilder::new(10); |
332 | | builder.append_n_non_nulls(16); |
333 | | assert_eq!(builder.as_slice(), None); |
334 | | builder.truncate(20); |
335 | | assert_eq!(builder.as_slice(), None); |
336 | | assert_eq!(builder.len(), 16); |
337 | | assert_eq!(builder.allocated_size(), 0); |
338 | | builder.truncate(14); |
339 | | assert_eq!(builder.as_slice(), None); |
340 | | assert_eq!(builder.len(), 14); |
341 | | builder.append_null(); |
342 | | builder.append_non_null(); |
343 | | assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]); |
344 | | assert_eq!(builder.allocated_size(), 64); |
345 | | } |
346 | | |
347 | | #[test] |
348 | | fn test_null_buffer_builder_truncate_never_materialized() { |
349 | | let mut builder = NullBufferBuilder::new(0); |
350 | | assert_eq!(builder.len(), 0); |
351 | | builder.append_n_nulls(2); // doesn't materialize |
352 | | assert_eq!(builder.len(), 2); |
353 | | builder.truncate(1); |
354 | | assert_eq!(builder.len(), 1); |
355 | | } |
356 | | |
357 | | #[test] |
358 | | fn test_append_buffers() { |
359 | | let mut builder = NullBufferBuilder::new(0); |
360 | | let buffer1 = NullBuffer::from(&[true, true]); |
361 | | let buffer2 = NullBuffer::from(&[true, true, false]); |
362 | | |
363 | | builder.append_buffer(&buffer1); |
364 | | builder.append_buffer(&buffer2); |
365 | | |
366 | | assert_eq!(builder.as_slice().unwrap(), &[0b01111_u8]); |
367 | | } |
368 | | |
369 | | #[test] |
370 | | fn test_append_buffers_with_unaligned_length() { |
371 | | let mut builder = NullBufferBuilder::new(0); |
372 | | let buffer = NullBuffer::from(&[true, true, false, true, false]); |
373 | | builder.append_buffer(&buffer); |
374 | | assert_eq!(builder.as_slice().unwrap(), &[0b01011_u8]); |
375 | | |
376 | | let buffer = NullBuffer::from(&[false, false, true, true, true, false, false]); |
377 | | builder.append_buffer(&buffer); |
378 | | assert_eq!(builder.as_slice().unwrap(), &[0b10001011_u8, 0b0011_u8]); |
379 | | } |
380 | | |
381 | | #[test] |
382 | | fn test_append_empty_buffer() { |
383 | | let mut builder = NullBufferBuilder::new(0); |
384 | | let buffer = NullBuffer::from(&[true, true, false, true]); |
385 | | builder.append_buffer(&buffer); |
386 | | assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]); |
387 | | |
388 | | let buffer = NullBuffer::from(&[]); |
389 | | builder.append_buffer(&buffer); |
390 | | |
391 | | assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]); |
392 | | } |
393 | | |
394 | | #[test] |
395 | | fn test_should_not_materialize_when_appending_all_valid_buffers() { |
396 | | let mut builder = NullBufferBuilder::new(0); |
397 | | let buffer = NullBuffer::from(&[true; 10]); |
398 | | builder.append_buffer(&buffer); |
399 | | |
400 | | let buffer = NullBuffer::from(&[true; 2]); |
401 | | builder.append_buffer(&buffer); |
402 | | |
403 | | assert_eq!(builder.finish(), None); |
404 | | } |
405 | | } |