/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/fixed_size_binary_builder.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::builder::ArrayBuilder; |
19 | | use crate::{ArrayRef, FixedSizeBinaryArray}; |
20 | | use arrow_buffer::Buffer; |
21 | | use arrow_buffer::NullBufferBuilder; |
22 | | use arrow_data::ArrayData; |
23 | | use arrow_schema::{ArrowError, DataType}; |
24 | | use std::any::Any; |
25 | | use std::sync::Arc; |
26 | | |
27 | | /// Builder for [`FixedSizeBinaryArray`] |
28 | | /// ``` |
29 | | /// # use arrow_array::builder::FixedSizeBinaryBuilder; |
30 | | /// # use arrow_array::Array; |
31 | | /// # |
32 | | /// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5); |
33 | | /// // [b"hello", null, b"arrow"] |
34 | | /// builder.append_value(b"hello").unwrap(); |
35 | | /// builder.append_null(); |
36 | | /// builder.append_value(b"arrow").unwrap(); |
37 | | /// |
38 | | /// let array = builder.finish(); |
39 | | /// assert_eq!(array.value(0), b"hello"); |
40 | | /// assert!(array.is_null(1)); |
41 | | /// assert_eq!(array.value(2), b"arrow"); |
42 | | /// ``` |
43 | | #[derive(Debug)] |
44 | | pub struct FixedSizeBinaryBuilder { |
45 | | values_builder: Vec<u8>, |
46 | | null_buffer_builder: NullBufferBuilder, |
47 | | value_length: i32, |
48 | | } |
49 | | |
50 | | impl FixedSizeBinaryBuilder { |
51 | | /// Creates a new [`FixedSizeBinaryBuilder`] |
52 | 0 | pub fn new(byte_width: i32) -> Self { |
53 | 0 | Self::with_capacity(1024, byte_width) |
54 | 0 | } |
55 | | |
56 | | /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices |
57 | | /// that can be appended without reallocating |
58 | 0 | pub fn with_capacity(capacity: usize, byte_width: i32) -> Self { |
59 | 0 | assert!( |
60 | 0 | byte_width >= 0, |
61 | 0 | "value length ({byte_width}) of the array must >= 0" |
62 | | ); |
63 | 0 | Self { |
64 | 0 | values_builder: Vec::with_capacity(capacity * byte_width as usize), |
65 | 0 | null_buffer_builder: NullBufferBuilder::new(capacity), |
66 | 0 | value_length: byte_width, |
67 | 0 | } |
68 | 0 | } |
69 | | |
70 | | /// Appends a byte slice into the builder. |
71 | | /// |
72 | | /// Automatically update the null buffer to delimit the slice appended in as a |
73 | | /// distinct value element. |
74 | | #[inline] |
75 | 0 | pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> { |
76 | 0 | if self.value_length != value.as_ref().len() as i32 { |
77 | 0 | Err(ArrowError::InvalidArgumentError( |
78 | 0 | "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths" |
79 | 0 | .to_string(), |
80 | 0 | )) |
81 | | } else { |
82 | 0 | self.values_builder.extend_from_slice(value.as_ref()); |
83 | 0 | self.null_buffer_builder.append_non_null(); |
84 | 0 | Ok(()) |
85 | | } |
86 | 0 | } |
87 | | |
88 | | /// Append a null value to the array. |
89 | | #[inline] |
90 | 0 | pub fn append_null(&mut self) { |
91 | 0 | self.values_builder |
92 | 0 | .extend(std::iter::repeat_n(0u8, self.value_length as usize)); |
93 | 0 | self.null_buffer_builder.append_null(); |
94 | 0 | } |
95 | | |
96 | | /// Appends `n` `null`s into the builder. |
97 | | #[inline] |
98 | | pub fn append_nulls(&mut self, n: usize) { |
99 | | self.values_builder |
100 | | .extend(std::iter::repeat_n(0u8, self.value_length as usize * n)); |
101 | | self.null_buffer_builder.append_n_nulls(n); |
102 | | } |
103 | | |
104 | | /// Returns the current values buffer as a slice |
105 | 0 | pub fn values_slice(&self) -> &[u8] { |
106 | 0 | self.values_builder.as_slice() |
107 | 0 | } |
108 | | |
109 | | /// Builds the [`FixedSizeBinaryArray`] and reset this builder. |
110 | 0 | pub fn finish(&mut self) -> FixedSizeBinaryArray { |
111 | 0 | let array_length = self.len(); |
112 | 0 | let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length)) |
113 | 0 | .add_buffer(std::mem::take(&mut self.values_builder).into()) |
114 | 0 | .nulls(self.null_buffer_builder.finish()) |
115 | 0 | .len(array_length); |
116 | 0 | let array_data = unsafe { array_data_builder.build_unchecked() }; |
117 | 0 | FixedSizeBinaryArray::from(array_data) |
118 | 0 | } |
119 | | |
120 | | /// Builds the [`FixedSizeBinaryArray`] without resetting the builder. |
121 | 0 | pub fn finish_cloned(&self) -> FixedSizeBinaryArray { |
122 | 0 | let array_length = self.len(); |
123 | 0 | let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice()); |
124 | 0 | let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length)) |
125 | 0 | .add_buffer(values_buffer) |
126 | 0 | .nulls(self.null_buffer_builder.finish_cloned()) |
127 | 0 | .len(array_length); |
128 | 0 | let array_data = unsafe { array_data_builder.build_unchecked() }; |
129 | 0 | FixedSizeBinaryArray::from(array_data) |
130 | 0 | } |
131 | | |
132 | | /// Returns the current null buffer as a slice |
133 | 0 | pub fn validity_slice(&self) -> Option<&[u8]> { |
134 | 0 | self.null_buffer_builder.as_slice() |
135 | 0 | } |
136 | | } |
137 | | |
138 | | impl ArrayBuilder for FixedSizeBinaryBuilder { |
139 | | /// Returns the builder as a non-mutable `Any` reference. |
140 | 0 | fn as_any(&self) -> &dyn Any { |
141 | 0 | self |
142 | 0 | } |
143 | | |
144 | | /// Returns the builder as a mutable `Any` reference. |
145 | 0 | fn as_any_mut(&mut self) -> &mut dyn Any { |
146 | 0 | self |
147 | 0 | } |
148 | | |
149 | | /// Returns the boxed builder as a box of `Any`. |
150 | 0 | fn into_box_any(self: Box<Self>) -> Box<dyn Any> { |
151 | 0 | self |
152 | 0 | } |
153 | | |
154 | | /// Returns the number of array slots in the builder |
155 | 0 | fn len(&self) -> usize { |
156 | 0 | self.null_buffer_builder.len() |
157 | 0 | } |
158 | | |
159 | | /// Builds the array and reset this builder. |
160 | 0 | fn finish(&mut self) -> ArrayRef { |
161 | 0 | Arc::new(self.finish()) |
162 | 0 | } |
163 | | |
164 | | /// Builds the array without resetting the builder. |
165 | 0 | fn finish_cloned(&self) -> ArrayRef { |
166 | 0 | Arc::new(self.finish_cloned()) |
167 | 0 | } |
168 | | } |
169 | | |
170 | | #[cfg(test)] |
171 | | mod tests { |
172 | | use super::*; |
173 | | |
174 | | use crate::Array; |
175 | | |
176 | | #[test] |
177 | | fn test_fixed_size_binary_builder() { |
178 | | let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5); |
179 | | |
180 | | // [b"hello", null, "arrow", null, null, "world"] |
181 | | builder.append_value(b"hello").unwrap(); |
182 | | builder.append_null(); |
183 | | builder.append_value(b"arrow").unwrap(); |
184 | | builder.append_nulls(2); |
185 | | builder.append_value(b"world").unwrap(); |
186 | | let array: FixedSizeBinaryArray = builder.finish(); |
187 | | |
188 | | assert_eq!(&DataType::FixedSizeBinary(5), array.data_type()); |
189 | | assert_eq!(6, array.len()); |
190 | | assert_eq!(3, array.null_count()); |
191 | | assert_eq!(10, array.value_offset(2)); |
192 | | assert_eq!(15, array.value_offset(3)); |
193 | | assert_eq!(5, array.value_length()); |
194 | | assert!(array.is_null(3)); |
195 | | assert!(array.is_null(4)); |
196 | | } |
197 | | |
198 | | #[test] |
199 | | fn test_fixed_size_binary_builder_finish_cloned() { |
200 | | let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5); |
201 | | |
202 | | // [b"hello", null, "arrow"] |
203 | | builder.append_value(b"hello").unwrap(); |
204 | | builder.append_null(); |
205 | | builder.append_value(b"arrow").unwrap(); |
206 | | let mut array: FixedSizeBinaryArray = builder.finish_cloned(); |
207 | | |
208 | | assert_eq!(&DataType::FixedSizeBinary(5), array.data_type()); |
209 | | assert_eq!(3, array.len()); |
210 | | assert_eq!(1, array.null_count()); |
211 | | assert_eq!(10, array.value_offset(2)); |
212 | | assert_eq!(5, array.value_length()); |
213 | | |
214 | | // [b"finis", null, "clone"] |
215 | | builder.append_value(b"finis").unwrap(); |
216 | | builder.append_null(); |
217 | | builder.append_value(b"clone").unwrap(); |
218 | | |
219 | | array = builder.finish(); |
220 | | |
221 | | assert_eq!(&DataType::FixedSizeBinary(5), array.data_type()); |
222 | | assert_eq!(6, array.len()); |
223 | | assert_eq!(2, array.null_count()); |
224 | | assert_eq!(25, array.value_offset(5)); |
225 | | assert_eq!(5, array.value_length()); |
226 | | } |
227 | | |
228 | | #[test] |
229 | | fn test_fixed_size_binary_builder_with_zero_value_length() { |
230 | | let mut builder = FixedSizeBinaryBuilder::new(0); |
231 | | |
232 | | builder.append_value(b"").unwrap(); |
233 | | builder.append_null(); |
234 | | builder.append_value(b"").unwrap(); |
235 | | assert!(!builder.is_empty()); |
236 | | |
237 | | let array: FixedSizeBinaryArray = builder.finish(); |
238 | | assert_eq!(&DataType::FixedSizeBinary(0), array.data_type()); |
239 | | assert_eq!(3, array.len()); |
240 | | assert_eq!(1, array.null_count()); |
241 | | assert_eq!(0, array.value_offset(2)); |
242 | | assert_eq!(0, array.value_length()); |
243 | | assert_eq!(b"", array.value(0)); |
244 | | assert_eq!(b"", array.value(2)); |
245 | | } |
246 | | |
247 | | #[test] |
248 | | #[should_panic( |
249 | | expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths" |
250 | | )] |
251 | | fn test_fixed_size_binary_builder_with_inconsistent_value_length() { |
252 | | let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4); |
253 | | builder.append_value(b"hello").unwrap(); |
254 | | } |
255 | | #[test] |
256 | | fn test_fixed_size_binary_builder_empty() { |
257 | | let mut builder = FixedSizeBinaryBuilder::new(5); |
258 | | assert!(builder.is_empty()); |
259 | | |
260 | | let fixed_size_binary_array = builder.finish(); |
261 | | assert_eq!( |
262 | | &DataType::FixedSizeBinary(5), |
263 | | fixed_size_binary_array.data_type() |
264 | | ); |
265 | | assert_eq!(0, fixed_size_binary_array.len()); |
266 | | } |
267 | | |
268 | | #[test] |
269 | | #[should_panic(expected = "value length (-1) of the array must >= 0")] |
270 | | fn test_fixed_size_binary_builder_invalid_value_length() { |
271 | | let _ = FixedSizeBinaryBuilder::with_capacity(15, -1); |
272 | | } |
273 | | } |