/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/builder/mod.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Buffer builders |
19 | | |
20 | | mod boolean; |
21 | | mod null; |
22 | | mod offset; |
23 | | |
24 | | pub use boolean::*; |
25 | | pub use null::*; |
26 | | pub use offset::*; |
27 | | |
28 | | use crate::{ArrowNativeType, Buffer, MutableBuffer}; |
29 | | use std::marker::PhantomData; |
30 | | |
31 | | /// Builder for creating a [Buffer] object. |
32 | | /// |
33 | | /// A [Buffer] is the underlying data structure of Arrow's Arrays. |
34 | | /// |
35 | | /// For all supported types, there are type definitions for the |
36 | | /// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`. |
37 | | /// |
38 | | /// # Example: |
39 | | /// |
40 | | /// ``` |
41 | | /// # use arrow_buffer::builder::BufferBuilder; |
42 | | /// |
43 | | /// let mut builder = BufferBuilder::<u8>::new(100); |
44 | | /// builder.append_slice(&[42, 43, 44]); |
45 | | /// builder.append(45); |
46 | | /// let buffer = builder.finish(); |
47 | | /// |
48 | | /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]); |
49 | | /// ``` |
50 | | #[derive(Debug)] |
51 | | pub struct BufferBuilder<T: ArrowNativeType> { |
52 | | buffer: MutableBuffer, |
53 | | len: usize, |
54 | | _marker: PhantomData<T>, |
55 | | } |
56 | | |
57 | | impl<T: ArrowNativeType> BufferBuilder<T> { |
58 | | /// Creates a new builder with initial capacity for _at least_ `capacity` |
59 | | /// elements of type `T`. |
60 | | /// |
61 | | /// The capacity can later be manually adjusted with the |
62 | | /// [`reserve()`](BufferBuilder::reserve) method. |
63 | | /// Also the |
64 | | /// [`append()`](BufferBuilder::append), |
65 | | /// [`append_slice()`](BufferBuilder::append_slice) and |
66 | | /// [`advance()`](BufferBuilder::advance) |
67 | | /// methods automatically increase the capacity if needed. |
68 | | /// |
69 | | /// # Example: |
70 | | /// |
71 | | /// ``` |
72 | | /// # use arrow_buffer::builder::BufferBuilder; |
73 | | /// |
74 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
75 | | /// |
76 | | /// assert!(builder.capacity() >= 10); |
77 | | /// ``` |
78 | | #[inline] |
79 | 0 | pub fn new(capacity: usize) -> Self { |
80 | 0 | let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>()); |
81 | | |
82 | 0 | Self { |
83 | 0 | buffer, |
84 | 0 | len: 0, |
85 | 0 | _marker: PhantomData, |
86 | 0 | } |
87 | 0 | } |
88 | | |
89 | | /// Creates a new builder from a [`MutableBuffer`] |
90 | | pub fn new_from_buffer(buffer: MutableBuffer) -> Self { |
91 | | let buffer_len = buffer.len(); |
92 | | Self { |
93 | | buffer, |
94 | | len: buffer_len / std::mem::size_of::<T>(), |
95 | | _marker: PhantomData, |
96 | | } |
97 | | } |
98 | | |
99 | | /// Returns the current number of array elements in the internal buffer. |
100 | | /// |
101 | | /// # Example: |
102 | | /// |
103 | | /// ``` |
104 | | /// # use arrow_buffer::builder::BufferBuilder; |
105 | | /// |
106 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
107 | | /// builder.append(42); |
108 | | /// |
109 | | /// assert_eq!(builder.len(), 1); |
110 | | /// ``` |
111 | 0 | pub fn len(&self) -> usize { |
112 | 0 | self.len |
113 | 0 | } |
114 | | |
115 | | /// Returns whether the internal buffer is empty. |
116 | | /// |
117 | | /// # Example: |
118 | | /// |
119 | | /// ``` |
120 | | /// # use arrow_buffer::builder::BufferBuilder; |
121 | | /// |
122 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
123 | | /// builder.append(42); |
124 | | /// |
125 | | /// assert_eq!(builder.is_empty(), false); |
126 | | /// ``` |
127 | | pub fn is_empty(&self) -> bool { |
128 | | self.len == 0 |
129 | | } |
130 | | |
131 | | /// Returns the actual capacity (number of elements) of the internal buffer. |
132 | | /// |
133 | | /// Note: the internal capacity returned by this method might be larger than |
134 | | /// what you'd expect after setting the capacity in the `new()` or `reserve()` |
135 | | /// functions. |
136 | | pub fn capacity(&self) -> usize { |
137 | | let byte_capacity = self.buffer.capacity(); |
138 | | byte_capacity / std::mem::size_of::<T>() |
139 | | } |
140 | | |
141 | | /// Increases the number of elements in the internal buffer by `n` |
142 | | /// and resizes the buffer as needed. |
143 | | /// |
144 | | /// The values of the newly added elements are 0. |
145 | | /// This method is usually used when appending `NULL` values to the buffer |
146 | | /// as they still require physical memory space. |
147 | | /// |
148 | | /// # Example: |
149 | | /// |
150 | | /// ``` |
151 | | /// # use arrow_buffer::builder::BufferBuilder; |
152 | | /// |
153 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
154 | | /// builder.advance(2); |
155 | | /// |
156 | | /// assert_eq!(builder.len(), 2); |
157 | | /// ``` |
158 | | #[inline] |
159 | | pub fn advance(&mut self, i: usize) { |
160 | | self.buffer.extend_zeros(i * std::mem::size_of::<T>()); |
161 | | self.len += i; |
162 | | } |
163 | | |
164 | | /// Reserves memory for _at least_ `n` more elements of type `T`. |
165 | | /// |
166 | | /// # Example: |
167 | | /// |
168 | | /// ``` |
169 | | /// # use arrow_buffer::builder::BufferBuilder; |
170 | | /// |
171 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
172 | | /// builder.reserve(10); |
173 | | /// |
174 | | /// assert!(builder.capacity() >= 20); |
175 | | /// ``` |
176 | | #[inline] |
177 | 0 | pub fn reserve(&mut self, n: usize) { |
178 | 0 | self.buffer.reserve(n * std::mem::size_of::<T>()); |
179 | 0 | } |
180 | | |
181 | | /// Appends a value of type `T` into the builder, |
182 | | /// growing the internal buffer as needed. |
183 | | /// |
184 | | /// # Example: |
185 | | /// |
186 | | /// ``` |
187 | | /// # use arrow_buffer::builder::BufferBuilder; |
188 | | /// |
189 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
190 | | /// builder.append(42); |
191 | | /// |
192 | | /// assert_eq!(builder.len(), 1); |
193 | | /// ``` |
194 | | #[inline] |
195 | 0 | pub fn append(&mut self, v: T) { |
196 | 0 | self.reserve(1); |
197 | 0 | self.buffer.push(v); |
198 | 0 | self.len += 1; |
199 | 0 | } |
200 | | |
201 | | /// Appends a value of type `T` into the builder N times, |
202 | | /// growing the internal buffer as needed. |
203 | | /// |
204 | | /// # Example: |
205 | | /// |
206 | | /// ``` |
207 | | /// # use arrow_buffer::builder::BufferBuilder; |
208 | | /// |
209 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
210 | | /// builder.append_n(10, 42); |
211 | | /// |
212 | | /// assert_eq!(builder.len(), 10); |
213 | | /// ``` |
214 | | #[inline] |
215 | | pub fn append_n(&mut self, n: usize, v: T) { |
216 | | self.reserve(n); |
217 | | self.extend(std::iter::repeat_n(v, n)) |
218 | | } |
219 | | |
220 | | /// Appends `n`, zero-initialized values |
221 | | /// |
222 | | /// # Example: |
223 | | /// |
224 | | /// ``` |
225 | | /// # use arrow_buffer::builder::BufferBuilder; |
226 | | /// |
227 | | /// let mut builder = BufferBuilder::<u32>::new(10); |
228 | | /// builder.append_n_zeroed(3); |
229 | | /// |
230 | | /// assert_eq!(builder.len(), 3); |
231 | | /// assert_eq!(builder.as_slice(), &[0, 0, 0]) |
232 | | #[inline] |
233 | 0 | pub fn append_n_zeroed(&mut self, n: usize) { |
234 | 0 | self.buffer.extend_zeros(n * std::mem::size_of::<T>()); |
235 | 0 | self.len += n; |
236 | 0 | } |
237 | | |
238 | | /// Appends a slice of type `T`, growing the internal buffer as needed. |
239 | | /// |
240 | | /// # Example: |
241 | | /// |
242 | | /// ``` |
243 | | /// # use arrow_buffer::builder::BufferBuilder; |
244 | | /// |
245 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
246 | | /// builder.append_slice(&[42, 44, 46]); |
247 | | /// |
248 | | /// assert_eq!(builder.len(), 3); |
249 | | /// ``` |
250 | | #[inline] |
251 | | pub fn append_slice(&mut self, slice: &[T]) { |
252 | | self.buffer.extend_from_slice(slice); |
253 | | self.len += slice.len(); |
254 | | } |
255 | | |
256 | | /// View the contents of this buffer as a slice |
257 | | /// |
258 | | /// ``` |
259 | | /// # use arrow_buffer::builder::BufferBuilder; |
260 | | /// |
261 | | /// let mut builder = BufferBuilder::<f64>::new(10); |
262 | | /// builder.append(1.3); |
263 | | /// builder.append_n(2, 2.3); |
264 | | /// |
265 | | /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]); |
266 | | /// ``` |
267 | | #[inline] |
268 | 0 | pub fn as_slice(&self) -> &[T] { |
269 | | // SAFETY |
270 | | // |
271 | | // - MutableBuffer is aligned and initialized for len elements of T |
272 | | // - MutableBuffer corresponds to a single allocation |
273 | | // - MutableBuffer does not support modification whilst active immutable borrows |
274 | 0 | unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) } |
275 | 0 | } |
276 | | |
277 | | /// View the contents of this buffer as a mutable slice |
278 | | /// |
279 | | /// # Example: |
280 | | /// |
281 | | /// ``` |
282 | | /// # use arrow_buffer::builder::BufferBuilder; |
283 | | /// |
284 | | /// let mut builder = BufferBuilder::<f32>::new(10); |
285 | | /// |
286 | | /// builder.append_slice(&[1., 2., 3.4]); |
287 | | /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]); |
288 | | /// |
289 | | /// builder.as_slice_mut()[1] = 4.2; |
290 | | /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]); |
291 | | /// ``` |
292 | | #[inline] |
293 | 0 | pub fn as_slice_mut(&mut self) -> &mut [T] { |
294 | | // SAFETY |
295 | | // |
296 | | // - MutableBuffer is aligned and initialized for len elements of T |
297 | | // - MutableBuffer corresponds to a single allocation |
298 | | // - MutableBuffer does not support modification whilst active immutable borrows |
299 | 0 | unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) } |
300 | 0 | } |
301 | | |
302 | | /// Shorten this BufferBuilder to `len` items |
303 | | /// |
304 | | /// If `len` is greater than the builder's current length, this has no effect |
305 | | /// |
306 | | /// # Example: |
307 | | /// |
308 | | /// ``` |
309 | | /// # use arrow_buffer::builder::BufferBuilder; |
310 | | /// |
311 | | /// let mut builder = BufferBuilder::<u16>::new(10); |
312 | | /// |
313 | | /// builder.append_slice(&[42, 44, 46]); |
314 | | /// assert_eq!(builder.as_slice(), &[42, 44, 46]); |
315 | | /// |
316 | | /// builder.truncate(2); |
317 | | /// assert_eq!(builder.as_slice(), &[42, 44]); |
318 | | /// |
319 | | /// builder.append(12); |
320 | | /// assert_eq!(builder.as_slice(), &[42, 44, 12]); |
321 | | /// ``` |
322 | | #[inline] |
323 | | pub fn truncate(&mut self, len: usize) { |
324 | | self.buffer.truncate(len * std::mem::size_of::<T>()); |
325 | | self.len = len; |
326 | | } |
327 | | |
328 | | /// # Safety |
329 | | /// This requires the iterator be a trusted length. This could instead require |
330 | | /// the iterator implement `TrustedLen` once that is stabilized. |
331 | | #[inline] |
332 | | pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) { |
333 | | let iter = iter.into_iter(); |
334 | | let len = iter |
335 | | .size_hint() |
336 | | .1 |
337 | | .expect("append_trusted_len_iter expects upper bound"); |
338 | | self.reserve(len); |
339 | | self.extend(iter); |
340 | | } |
341 | | |
342 | | /// Resets this builder and returns an immutable [Buffer]. |
343 | | /// |
344 | | /// # Example: |
345 | | /// |
346 | | /// ``` |
347 | | /// # use arrow_buffer::builder::BufferBuilder; |
348 | | /// |
349 | | /// let mut builder = BufferBuilder::<u8>::new(10); |
350 | | /// builder.append_slice(&[42, 44, 46]); |
351 | | /// |
352 | | /// let buffer = builder.finish(); |
353 | | /// |
354 | | /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]); |
355 | | /// ``` |
356 | | #[inline] |
357 | 0 | pub fn finish(&mut self) -> Buffer { |
358 | 0 | let buf = std::mem::take(&mut self.buffer); |
359 | 0 | self.len = 0; |
360 | 0 | buf.into() |
361 | 0 | } |
362 | | } |
363 | | |
364 | | impl<T: ArrowNativeType> Default for BufferBuilder<T> { |
365 | 0 | fn default() -> Self { |
366 | 0 | Self::new(0) |
367 | 0 | } |
368 | | } |
369 | | |
370 | | impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> { |
371 | | fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { |
372 | | self.buffer.extend(iter.into_iter().inspect(|_| { |
373 | | self.len += 1; |
374 | | })) |
375 | | } |
376 | | } |
377 | | |
378 | | impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> { |
379 | | fn from(value: Vec<T>) -> Self { |
380 | | Self::new_from_buffer(MutableBuffer::from(value)) |
381 | | } |
382 | | } |
383 | | |
384 | | impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> { |
385 | | fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self { |
386 | | let mut builder = Self::default(); |
387 | | builder.extend(iter); |
388 | | builder |
389 | | } |
390 | | } |
391 | | |
392 | | #[cfg(test)] |
393 | | mod tests { |
394 | | use super::*; |
395 | | use std::mem; |
396 | | |
397 | | #[test] |
398 | | fn default() { |
399 | | let builder = BufferBuilder::<u32>::default(); |
400 | | assert!(builder.is_empty()); |
401 | | assert!(builder.buffer.is_empty()); |
402 | | assert_eq!(builder.buffer.capacity(), 0); |
403 | | } |
404 | | |
405 | | #[test] |
406 | | fn from_iter() { |
407 | | let input = [1u16, 2, 3, 4]; |
408 | | let builder = input.into_iter().collect::<BufferBuilder<_>>(); |
409 | | assert_eq!(builder.len(), 4); |
410 | | assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>()); |
411 | | } |
412 | | |
413 | | #[test] |
414 | | fn extend() { |
415 | | let input = [1, 2]; |
416 | | let mut builder = input.into_iter().collect::<BufferBuilder<_>>(); |
417 | | assert_eq!(builder.len(), 2); |
418 | | builder.extend([3, 4]); |
419 | | assert_eq!(builder.len(), 4); |
420 | | } |
421 | | } |