/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/fixed_size_binary_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::array::print_long_array; |
19 | | use crate::iterator::FixedSizeBinaryIter; |
20 | | use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar}; |
21 | | use arrow_buffer::buffer::NullBuffer; |
22 | | use arrow_buffer::{bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer}; |
23 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
24 | | use arrow_schema::{ArrowError, DataType}; |
25 | | use std::any::Any; |
26 | | use std::sync::Arc; |
27 | | |
28 | | /// An array of [fixed size binary arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout) |
29 | | /// |
30 | | /// # Examples |
31 | | /// |
32 | | /// Create an array from an iterable argument of byte slices. |
33 | | /// |
34 | | /// ``` |
35 | | /// use arrow_array::{Array, FixedSizeBinaryArray}; |
36 | | /// let input_arg = vec![ vec![1, 2], vec![3, 4], vec![5, 6] ]; |
37 | | /// let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); |
38 | | /// |
39 | | /// assert_eq!(3, arr.len()); |
40 | | /// |
41 | | /// ``` |
42 | | /// Create an array from an iterable argument of sparse byte slices. |
43 | | /// Sparsity means that the input argument can contain `None` items. |
44 | | /// ``` |
45 | | /// use arrow_array::{Array, FixedSizeBinaryArray}; |
46 | | /// let input_arg = vec![ None, Some(vec![7, 8]), Some(vec![9, 10]), None, Some(vec![13, 14]) ]; |
47 | | /// let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap(); |
48 | | /// assert_eq!(5, arr.len()) |
49 | | /// |
50 | | /// ``` |
51 | | /// |
52 | | #[derive(Clone)] |
53 | | pub struct FixedSizeBinaryArray { |
54 | | data_type: DataType, // Must be DataType::FixedSizeBinary(value_length) |
55 | | value_data: Buffer, |
56 | | nulls: Option<NullBuffer>, |
57 | | len: usize, |
58 | | value_length: i32, |
59 | | } |
60 | | |
61 | | impl FixedSizeBinaryArray { |
62 | | /// Create a new [`FixedSizeBinaryArray`] with `size` element size, panicking on failure |
63 | | /// |
64 | | /// # Panics |
65 | | /// |
66 | | /// Panics if [`Self::try_new`] returns an error |
67 | 0 | pub fn new(size: i32, values: Buffer, nulls: Option<NullBuffer>) -> Self { |
68 | 0 | Self::try_new(size, values, nulls).unwrap() |
69 | 0 | } |
70 | | |
71 | | /// Create a new [`Scalar`] from `value` |
72 | | pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> { |
73 | | let v = value.as_ref(); |
74 | | Scalar::new(Self::new(v.len() as _, Buffer::from(v), None)) |
75 | | } |
76 | | |
77 | | /// Create a new [`FixedSizeBinaryArray`] from the provided parts, returning an error on failure |
78 | | /// |
79 | | /// # Errors |
80 | | /// |
81 | | /// * `size < 0` |
82 | | /// * `values.len() / size != nulls.len()` |
83 | 13 | pub fn try_new( |
84 | 13 | size: i32, |
85 | 13 | values: Buffer, |
86 | 13 | nulls: Option<NullBuffer>, |
87 | 13 | ) -> Result<Self, ArrowError> { |
88 | 13 | let data_type = DataType::FixedSizeBinary(size); |
89 | 13 | let s = size.to_usize().ok_or_else(|| {0 |
90 | 0 | ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}")) |
91 | 0 | })?; |
92 | | |
93 | 13 | let len = values.len() / s; |
94 | 13 | if let Some(n2 ) = nulls.as_ref() { |
95 | 2 | if n.len() != len { |
96 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
97 | 0 | "Incorrect length of null buffer for FixedSizeBinaryArray, expected {} got {}", |
98 | 0 | len, |
99 | 0 | n.len(), |
100 | 0 | ))); |
101 | 2 | } |
102 | 11 | } |
103 | | |
104 | 13 | Ok(Self { |
105 | 13 | data_type, |
106 | 13 | value_data: values, |
107 | 13 | value_length: size, |
108 | 13 | nulls, |
109 | 13 | len, |
110 | 13 | }) |
111 | 13 | } |
112 | | |
113 | | /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values are null |
114 | | /// |
115 | | /// # Panics |
116 | | /// |
117 | | /// Panics if |
118 | | /// |
119 | | /// * `size < 0` |
120 | | /// * `size * len` would overflow `usize` |
121 | 0 | pub fn new_null(size: i32, len: usize) -> Self { |
122 | 0 | let capacity = size.to_usize().unwrap().checked_mul(len).unwrap(); |
123 | 0 | Self { |
124 | 0 | data_type: DataType::FixedSizeBinary(size), |
125 | 0 | value_data: MutableBuffer::new(capacity).into(), |
126 | 0 | nulls: Some(NullBuffer::new_null(len)), |
127 | 0 | value_length: size, |
128 | 0 | len, |
129 | 0 | } |
130 | 0 | } |
131 | | |
132 | | /// Deconstruct this array into its constituent parts |
133 | 0 | pub fn into_parts(self) -> (i32, Buffer, Option<NullBuffer>) { |
134 | 0 | (self.value_length, self.value_data, self.nulls) |
135 | 0 | } |
136 | | |
137 | | /// Returns the element at index `i` as a byte slice. |
138 | | /// |
139 | | /// Note: This method does not check for nulls and the value is arbitrary |
140 | | /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index. |
141 | | /// |
142 | | /// # Panics |
143 | | /// Panics if index `i` is out of bounds. |
144 | 3 | pub fn value(&self, i: usize) -> &[u8] { |
145 | 3 | assert!( |
146 | 3 | i < self.len(), |
147 | 0 | "Trying to access an element at index {} from a FixedSizeBinaryArray of length {}", |
148 | | i, |
149 | 0 | self.len() |
150 | | ); |
151 | 3 | let offset = i + self.offset(); |
152 | | unsafe { |
153 | 3 | let pos = self.value_offset_at(offset); |
154 | 3 | std::slice::from_raw_parts( |
155 | 3 | self.value_data.as_ptr().offset(pos as isize), |
156 | 3 | (self.value_offset_at(offset + 1) - pos) as usize, |
157 | 3 | ) |
158 | | } |
159 | 3 | } |
160 | | |
161 | | /// Returns the element at index `i` as a byte slice. |
162 | | /// |
163 | | /// Note: This method does not check for nulls and the value is arbitrary |
164 | | /// if [`is_null`](Self::is_null) returns true for the index. |
165 | | /// |
166 | | /// # Safety |
167 | | /// |
168 | | /// Caller is responsible for ensuring that the index is within the bounds |
169 | | /// of the array |
170 | 0 | pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] { |
171 | 0 | let offset = i + self.offset(); |
172 | 0 | let pos = self.value_offset_at(offset); |
173 | 0 | std::slice::from_raw_parts( |
174 | 0 | self.value_data.as_ptr().offset(pos as isize), |
175 | 0 | (self.value_offset_at(offset + 1) - pos) as usize, |
176 | 0 | ) |
177 | 0 | } |
178 | | |
179 | | /// Returns the offset for the element at index `i`. |
180 | | /// |
181 | | /// Note this doesn't do any bound checking, for performance reason. |
182 | | #[inline] |
183 | 0 | pub fn value_offset(&self, i: usize) -> i32 { |
184 | 0 | self.value_offset_at(self.offset() + i) |
185 | 0 | } |
186 | | |
187 | | /// Returns the length for an element. |
188 | | /// |
189 | | /// All elements have the same length as the array is a fixed size. |
190 | | #[inline] |
191 | 3 | pub fn value_length(&self) -> i32 { |
192 | 3 | self.value_length |
193 | 3 | } |
194 | | |
195 | | /// Returns the values of this array. |
196 | | /// |
197 | | /// Unlike [`Self::value_data`] this returns the [`Buffer`] |
198 | | /// allowing for zero-copy cloning. |
199 | | #[inline] |
200 | 0 | pub fn values(&self) -> &Buffer { |
201 | 0 | &self.value_data |
202 | 0 | } |
203 | | |
204 | | /// Returns the raw value data. |
205 | 0 | pub fn value_data(&self) -> &[u8] { |
206 | 0 | self.value_data.as_slice() |
207 | 0 | } |
208 | | |
209 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
210 | 4 | pub fn slice(&self, offset: usize, len: usize) -> Self { |
211 | 4 | assert!( |
212 | 4 | offset.saturating_add(len) <= self.len, |
213 | 0 | "the length + offset of the sliced FixedSizeBinaryArray cannot exceed the existing length" |
214 | | ); |
215 | | |
216 | 4 | let size = self.value_length as usize; |
217 | | |
218 | | Self { |
219 | 4 | data_type: self.data_type.clone(), |
220 | 4 | nulls: self.nulls.as_ref().map(|n| n1 .slice1 (offset1 , len1 )), |
221 | 4 | value_length: self.value_length, |
222 | 4 | value_data: self.value_data.slice_with_length(offset * size, len * size), |
223 | 4 | len, |
224 | | } |
225 | 4 | } |
226 | | |
227 | | /// Create an array from an iterable argument of sparse byte slices. |
228 | | /// Sparsity means that items returned by the iterator are optional, i.e input argument can |
229 | | /// contain `None` items. |
230 | | /// |
231 | | /// # Examples |
232 | | /// |
233 | | /// ``` |
234 | | /// use arrow_array::FixedSizeBinaryArray; |
235 | | /// let input_arg = vec![ |
236 | | /// None, |
237 | | /// Some(vec![7, 8]), |
238 | | /// Some(vec![9, 10]), |
239 | | /// None, |
240 | | /// Some(vec![13, 14]), |
241 | | /// None, |
242 | | /// ]; |
243 | | /// let array = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap(); |
244 | | /// ``` |
245 | | /// |
246 | | /// # Errors |
247 | | /// |
248 | | /// Returns error if argument has length zero, or sizes of nested slices don't match. |
249 | | #[deprecated( |
250 | | since = "28.0.0", |
251 | | note = "This function will fail if the iterator produces only None values; prefer `try_from_sparse_iter_with_size`" |
252 | | )] |
253 | 0 | pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError> |
254 | 0 | where |
255 | 0 | T: Iterator<Item = Option<U>>, |
256 | 0 | U: AsRef<[u8]>, |
257 | | { |
258 | 0 | let mut len = 0; |
259 | 0 | let mut size = None; |
260 | 0 | let mut byte = 0; |
261 | | |
262 | 0 | let iter_size_hint = iter.size_hint().0; |
263 | 0 | let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8)); |
264 | 0 | let mut buffer = MutableBuffer::new(0); |
265 | | |
266 | 0 | let mut prepend = 0; |
267 | 0 | iter.try_for_each(|item| -> Result<(), ArrowError> { |
268 | | // extend null bitmask by one byte per each 8 items |
269 | 0 | if byte == 0 { |
270 | 0 | null_buf.push(0u8); |
271 | 0 | byte = 8; |
272 | 0 | } |
273 | 0 | byte -= 1; |
274 | | |
275 | 0 | if let Some(slice) = item { |
276 | 0 | let slice = slice.as_ref(); |
277 | 0 | if let Some(size) = size { |
278 | 0 | if size != slice.len() { |
279 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
280 | 0 | "Nested array size mismatch: one is {}, and the other is {}", |
281 | 0 | size, |
282 | 0 | slice.len() |
283 | 0 | ))); |
284 | 0 | } |
285 | 0 | } else { |
286 | 0 | let len = slice.len(); |
287 | 0 | size = Some(len); |
288 | 0 | // Now that we know how large each element is we can reserve |
289 | 0 | // sufficient capacity in the underlying mutable buffer for |
290 | 0 | // the data. |
291 | 0 | buffer.reserve(iter_size_hint * len); |
292 | 0 | buffer.extend_zeros(slice.len() * prepend); |
293 | 0 | } |
294 | 0 | bit_util::set_bit(null_buf.as_slice_mut(), len); |
295 | 0 | buffer.extend_from_slice(slice); |
296 | 0 | } else if let Some(size) = size { |
297 | 0 | buffer.extend_zeros(size); |
298 | 0 | } else { |
299 | 0 | prepend += 1; |
300 | 0 | } |
301 | | |
302 | 0 | len += 1; |
303 | | |
304 | 0 | Ok(()) |
305 | 0 | })?; |
306 | | |
307 | 0 | if len == 0 { |
308 | 0 | return Err(ArrowError::InvalidArgumentError( |
309 | 0 | "Input iterable argument has no data".to_owned(), |
310 | 0 | )); |
311 | 0 | } |
312 | | |
313 | 0 | let null_buf = BooleanBuffer::new(null_buf.into(), 0, len); |
314 | 0 | let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0); |
315 | | |
316 | 0 | let size = size.unwrap_or(0) as i32; |
317 | 0 | Ok(Self { |
318 | 0 | data_type: DataType::FixedSizeBinary(size), |
319 | 0 | value_data: buffer.into(), |
320 | 0 | nulls, |
321 | 0 | value_length: size, |
322 | 0 | len, |
323 | 0 | }) |
324 | 0 | } |
325 | | |
326 | | /// Create an array from an iterable argument of sparse byte slices. |
327 | | /// Sparsity means that items returned by the iterator are optional, i.e input argument can |
328 | | /// contain `None` items. In cases where the iterator returns only `None` values, this |
329 | | /// also takes a size parameter to ensure that the a valid FixedSizeBinaryArray is still |
330 | | /// created. |
331 | | /// |
332 | | /// # Examples |
333 | | /// |
334 | | /// ``` |
335 | | /// use arrow_array::FixedSizeBinaryArray; |
336 | | /// let input_arg = vec![ |
337 | | /// None, |
338 | | /// Some(vec![7, 8]), |
339 | | /// Some(vec![9, 10]), |
340 | | /// None, |
341 | | /// Some(vec![13, 14]), |
342 | | /// None, |
343 | | /// ]; |
344 | | /// let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap(); |
345 | | /// ``` |
346 | | /// |
347 | | /// # Errors |
348 | | /// |
349 | | /// Returns error if argument has length zero, or sizes of nested slices don't match. |
350 | 2 | pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) -> Result<Self, ArrowError> |
351 | 2 | where |
352 | 2 | T: Iterator<Item = Option<U>>, |
353 | 2 | U: AsRef<[u8]>, |
354 | | { |
355 | 2 | let mut len = 0; |
356 | 2 | let mut byte = 0; |
357 | | |
358 | 2 | let iter_size_hint = iter.size_hint().0; |
359 | 2 | let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8)); |
360 | 2 | let mut buffer = MutableBuffer::new(iter_size_hint * (size as usize)); |
361 | | |
362 | 6 | iter2 .try_for_each2 (|item| -> Result<(), ArrowError> { |
363 | | // extend null bitmask by one byte per each 8 items |
364 | 6 | if byte == 0 { |
365 | 2 | null_buf.push(0u8); |
366 | 2 | byte = 8; |
367 | 4 | } |
368 | 6 | byte -= 1; |
369 | | |
370 | 6 | if let Some(slice5 ) = item { |
371 | 5 | let slice = slice.as_ref(); |
372 | 5 | if size as usize != slice.len() { |
373 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
374 | 0 | "Nested array size mismatch: one is {}, and the other is {}", |
375 | 0 | size, |
376 | 0 | slice.len() |
377 | 0 | ))); |
378 | 5 | } |
379 | | |
380 | 5 | bit_util::set_bit(null_buf.as_slice_mut(), len); |
381 | 5 | buffer.extend_from_slice(slice); |
382 | 1 | } else { |
383 | 1 | buffer.extend_zeros(size as usize); |
384 | 1 | } |
385 | | |
386 | 6 | len += 1; |
387 | | |
388 | 6 | Ok(()) |
389 | 6 | })?0 ; |
390 | | |
391 | 2 | let null_buf = BooleanBuffer::new(null_buf.into(), 0, len); |
392 | 2 | let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0); |
393 | | |
394 | 2 | Ok(Self { |
395 | 2 | data_type: DataType::FixedSizeBinary(size), |
396 | 2 | value_data: buffer.into(), |
397 | 2 | nulls, |
398 | 2 | len, |
399 | 2 | value_length: size, |
400 | 2 | }) |
401 | 2 | } |
402 | | |
403 | | /// Create an array from an iterable argument of byte slices. |
404 | | /// |
405 | | /// # Examples |
406 | | /// |
407 | | /// ``` |
408 | | /// use arrow_array::FixedSizeBinaryArray; |
409 | | /// let input_arg = vec![ |
410 | | /// vec![1, 2], |
411 | | /// vec![3, 4], |
412 | | /// vec![5, 6], |
413 | | /// ]; |
414 | | /// let array = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); |
415 | | /// ``` |
416 | | /// |
417 | | /// # Errors |
418 | | /// |
419 | | /// Returns error if argument has length zero, or sizes of nested slices don't match. |
420 | 2 | pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError> |
421 | 2 | where |
422 | 2 | T: Iterator<Item = U>, |
423 | 2 | U: AsRef<[u8]>, |
424 | | { |
425 | 2 | let mut len = 0; |
426 | 2 | let mut size = None; |
427 | 2 | let iter_size_hint = iter.size_hint().0; |
428 | 2 | let mut buffer = MutableBuffer::new(0); |
429 | | |
430 | 4 | iter2 .try_for_each2 (|item| -> Result<(), ArrowError> { |
431 | 4 | let slice = item.as_ref(); |
432 | 4 | if let Some(size2 ) = size { |
433 | 2 | if size != slice.len() { |
434 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
435 | 0 | "Nested array size mismatch: one is {}, and the other is {}", |
436 | 0 | size, |
437 | 0 | slice.len() |
438 | 0 | ))); |
439 | 2 | } |
440 | 2 | } else { |
441 | 2 | let len = slice.len(); |
442 | 2 | size = Some(len); |
443 | 2 | buffer.reserve(iter_size_hint * len); |
444 | 2 | } |
445 | | |
446 | 4 | buffer.extend_from_slice(slice); |
447 | | |
448 | 4 | len += 1; |
449 | | |
450 | 4 | Ok(()) |
451 | 4 | })?0 ; |
452 | | |
453 | 2 | if len == 0 { |
454 | 0 | return Err(ArrowError::InvalidArgumentError( |
455 | 0 | "Input iterable argument has no data".to_owned(), |
456 | 0 | )); |
457 | 2 | } |
458 | | |
459 | 2 | let size = size.unwrap_or(0).try_into().unwrap(); |
460 | 2 | Ok(Self { |
461 | 2 | data_type: DataType::FixedSizeBinary(size), |
462 | 2 | value_data: buffer.into(), |
463 | 2 | nulls: None, |
464 | 2 | value_length: size, |
465 | 2 | len, |
466 | 2 | }) |
467 | 2 | } |
468 | | |
469 | | #[inline] |
470 | 6 | fn value_offset_at(&self, i: usize) -> i32 { |
471 | 6 | self.value_length * i as i32 |
472 | 6 | } |
473 | | |
474 | | /// constructs a new iterator |
475 | 0 | pub fn iter(&self) -> FixedSizeBinaryIter<'_> { |
476 | 0 | FixedSizeBinaryIter::new(self) |
477 | 0 | } |
478 | | } |
479 | | |
480 | | impl From<ArrayData> for FixedSizeBinaryArray { |
481 | 3 | fn from(data: ArrayData) -> Self { |
482 | 3 | assert_eq!( |
483 | 3 | data.buffers().len(), |
484 | | 1, |
485 | 0 | "FixedSizeBinaryArray data should contain 1 buffer only (values)" |
486 | | ); |
487 | 3 | let value_length = match data.data_type() { |
488 | 3 | DataType::FixedSizeBinary(len) => *len, |
489 | 0 | _ => panic!("Expected data type to be FixedSizeBinary"), |
490 | | }; |
491 | | |
492 | 3 | let size = value_length as usize; |
493 | 3 | let value_data = |
494 | 3 | data.buffers()[0].slice_with_length(data.offset() * size, data.len() * size); |
495 | | |
496 | 3 | Self { |
497 | 3 | data_type: data.data_type().clone(), |
498 | 3 | nulls: data.nulls().cloned(), |
499 | 3 | len: data.len(), |
500 | 3 | value_data, |
501 | 3 | value_length, |
502 | 3 | } |
503 | 3 | } |
504 | | } |
505 | | |
506 | | impl From<FixedSizeBinaryArray> for ArrayData { |
507 | 20 | fn from(array: FixedSizeBinaryArray) -> Self { |
508 | 20 | let builder = ArrayDataBuilder::new(array.data_type) |
509 | 20 | .len(array.len) |
510 | 20 | .buffers(vec![array.value_data]) |
511 | 20 | .nulls(array.nulls); |
512 | | |
513 | 20 | unsafe { builder.build_unchecked() } |
514 | 20 | } |
515 | | } |
516 | | |
517 | | /// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array |
518 | | impl From<FixedSizeListArray> for FixedSizeBinaryArray { |
519 | 0 | fn from(v: FixedSizeListArray) -> Self { |
520 | 0 | let value_len = v.value_length(); |
521 | 0 | let v = v.into_data(); |
522 | 0 | assert_eq!( |
523 | 0 | v.child_data().len(), |
524 | | 1, |
525 | 0 | "FixedSizeBinaryArray can only be created from list array of u8 values \ |
526 | 0 | (i.e. FixedSizeList<PrimitiveArray<u8>>)." |
527 | | ); |
528 | 0 | let child_data = &v.child_data()[0]; |
529 | | |
530 | 0 | assert_eq!( |
531 | 0 | child_data.child_data().len(), |
532 | | 0, |
533 | 0 | "FixedSizeBinaryArray can only be created from list array of u8 values \ |
534 | 0 | (i.e. FixedSizeList<PrimitiveArray<u8>>)." |
535 | | ); |
536 | 0 | assert_eq!( |
537 | 0 | child_data.data_type(), |
538 | | &DataType::UInt8, |
539 | 0 | "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types." |
540 | | ); |
541 | 0 | assert_eq!( |
542 | 0 | child_data.null_count(), |
543 | | 0, |
544 | 0 | "The child array cannot contain null values." |
545 | | ); |
546 | | |
547 | 0 | let builder = ArrayData::builder(DataType::FixedSizeBinary(value_len)) |
548 | 0 | .len(v.len()) |
549 | 0 | .offset(v.offset()) |
550 | 0 | .add_buffer(child_data.buffers()[0].slice(child_data.offset())) |
551 | 0 | .nulls(v.nulls().cloned()); |
552 | | |
553 | 0 | let data = unsafe { builder.build_unchecked() }; |
554 | 0 | Self::from(data) |
555 | 0 | } |
556 | | } |
557 | | |
558 | | impl From<Vec<Option<&[u8]>>> for FixedSizeBinaryArray { |
559 | 0 | fn from(v: Vec<Option<&[u8]>>) -> Self { |
560 | | #[allow(deprecated)] |
561 | 0 | Self::try_from_sparse_iter(v.into_iter()).unwrap() |
562 | 0 | } |
563 | | } |
564 | | |
565 | | impl From<Vec<&[u8]>> for FixedSizeBinaryArray { |
566 | 0 | fn from(v: Vec<&[u8]>) -> Self { |
567 | 0 | Self::try_from_iter(v.into_iter()).unwrap() |
568 | 0 | } |
569 | | } |
570 | | |
571 | | impl<const N: usize> From<Vec<&[u8; N]>> for FixedSizeBinaryArray { |
572 | | fn from(v: Vec<&[u8; N]>) -> Self { |
573 | | Self::try_from_iter(v.into_iter()).unwrap() |
574 | | } |
575 | | } |
576 | | |
577 | | impl std::fmt::Debug for FixedSizeBinaryArray { |
578 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
579 | 0 | write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?; |
580 | 0 | print_long_array(self, f, |array, index, f| { |
581 | 0 | std::fmt::Debug::fmt(&array.value(index), f) |
582 | 0 | })?; |
583 | 0 | write!(f, "]") |
584 | 0 | } |
585 | | } |
586 | | |
587 | | impl Array for FixedSizeBinaryArray { |
588 | 4 | fn as_any(&self) -> &dyn Any { |
589 | 4 | self |
590 | 4 | } |
591 | | |
592 | 20 | fn to_data(&self) -> ArrayData { |
593 | 20 | self.clone().into() |
594 | 20 | } |
595 | | |
596 | 0 | fn into_data(self) -> ArrayData { |
597 | 0 | self.into() |
598 | 0 | } |
599 | | |
600 | 26 | fn data_type(&self) -> &DataType { |
601 | 26 | &self.data_type |
602 | 26 | } |
603 | | |
604 | 4 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
605 | 4 | Arc::new(self.slice(offset, length)) |
606 | 4 | } |
607 | | |
608 | 48 | fn len(&self) -> usize { |
609 | 48 | self.len |
610 | 48 | } |
611 | | |
612 | 0 | fn is_empty(&self) -> bool { |
613 | 0 | self.len == 0 |
614 | 0 | } |
615 | | |
616 | 0 | fn shrink_to_fit(&mut self) { |
617 | 0 | self.value_data.shrink_to_fit(); |
618 | 0 | if let Some(nulls) = &mut self.nulls { |
619 | 0 | nulls.shrink_to_fit(); |
620 | 0 | } |
621 | 0 | } |
622 | | |
623 | 3 | fn offset(&self) -> usize { |
624 | 3 | 0 |
625 | 3 | } |
626 | | |
627 | 14 | fn nulls(&self) -> Option<&NullBuffer> { |
628 | 14 | self.nulls.as_ref() |
629 | 14 | } |
630 | | |
631 | 0 | fn logical_null_count(&self) -> usize { |
632 | | // More efficient that the default implementation |
633 | 0 | self.null_count() |
634 | 0 | } |
635 | | |
636 | 0 | fn get_buffer_memory_size(&self) -> usize { |
637 | 0 | let mut sum = self.value_data.capacity(); |
638 | 0 | if let Some(n) = &self.nulls { |
639 | 0 | sum += n.buffer().capacity(); |
640 | 0 | } |
641 | 0 | sum |
642 | 0 | } |
643 | | |
644 | 0 | fn get_array_memory_size(&self) -> usize { |
645 | 0 | std::mem::size_of::<Self>() + self.get_buffer_memory_size() |
646 | 0 | } |
647 | | } |
648 | | |
649 | | impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray { |
650 | | type Item = &'a [u8]; |
651 | | |
652 | 0 | fn value(&self, index: usize) -> Self::Item { |
653 | 0 | FixedSizeBinaryArray::value(self, index) |
654 | 0 | } |
655 | | |
656 | 0 | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
657 | 0 | FixedSizeBinaryArray::value_unchecked(self, index) |
658 | 0 | } |
659 | | } |
660 | | |
661 | | impl<'a> IntoIterator for &'a FixedSizeBinaryArray { |
662 | | type Item = Option<&'a [u8]>; |
663 | | type IntoIter = FixedSizeBinaryIter<'a>; |
664 | | |
665 | 0 | fn into_iter(self) -> Self::IntoIter { |
666 | 0 | FixedSizeBinaryIter::<'a>::new(self) |
667 | 0 | } |
668 | | } |
669 | | |
670 | | #[cfg(test)] |
671 | | mod tests { |
672 | | use crate::RecordBatch; |
673 | | use arrow_schema::{Field, Schema}; |
674 | | |
675 | | use super::*; |
676 | | |
677 | | #[test] |
678 | | fn test_fixed_size_binary_array() { |
679 | | let values: [u8; 15] = *b"hellotherearrow"; |
680 | | |
681 | | let array_data = ArrayData::builder(DataType::FixedSizeBinary(5)) |
682 | | .len(3) |
683 | | .add_buffer(Buffer::from(&values)) |
684 | | .build() |
685 | | .unwrap(); |
686 | | let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data); |
687 | | assert_eq!(3, fixed_size_binary_array.len()); |
688 | | assert_eq!(0, fixed_size_binary_array.null_count()); |
689 | | assert_eq!( |
690 | | [b'h', b'e', b'l', b'l', b'o'], |
691 | | fixed_size_binary_array.value(0) |
692 | | ); |
693 | | assert_eq!( |
694 | | [b't', b'h', b'e', b'r', b'e'], |
695 | | fixed_size_binary_array.value(1) |
696 | | ); |
697 | | assert_eq!( |
698 | | [b'a', b'r', b'r', b'o', b'w'], |
699 | | fixed_size_binary_array.value(2) |
700 | | ); |
701 | | assert_eq!(5, fixed_size_binary_array.value_length()); |
702 | | assert_eq!(10, fixed_size_binary_array.value_offset(2)); |
703 | | for i in 0..3 { |
704 | | assert!(fixed_size_binary_array.is_valid(i)); |
705 | | assert!(!fixed_size_binary_array.is_null(i)); |
706 | | } |
707 | | |
708 | | // Test binary array with offset |
709 | | let array_data = ArrayData::builder(DataType::FixedSizeBinary(5)) |
710 | | .len(2) |
711 | | .offset(1) |
712 | | .add_buffer(Buffer::from(&values)) |
713 | | .build() |
714 | | .unwrap(); |
715 | | let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data); |
716 | | assert_eq!( |
717 | | [b't', b'h', b'e', b'r', b'e'], |
718 | | fixed_size_binary_array.value(0) |
719 | | ); |
720 | | assert_eq!( |
721 | | [b'a', b'r', b'r', b'o', b'w'], |
722 | | fixed_size_binary_array.value(1) |
723 | | ); |
724 | | assert_eq!(2, fixed_size_binary_array.len()); |
725 | | assert_eq!(0, fixed_size_binary_array.value_offset(0)); |
726 | | assert_eq!(5, fixed_size_binary_array.value_length()); |
727 | | assert_eq!(5, fixed_size_binary_array.value_offset(1)); |
728 | | } |
729 | | |
730 | | #[test] |
731 | | fn test_fixed_size_binary_array_from_fixed_size_list_array() { |
732 | | let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; |
733 | | let values_data = ArrayData::builder(DataType::UInt8) |
734 | | .len(12) |
735 | | .offset(2) |
736 | | .add_buffer(Buffer::from_slice_ref(values)) |
737 | | .build() |
738 | | .unwrap(); |
739 | | // [null, [10, 11, 12, 13]] |
740 | | let array_data = unsafe { |
741 | | ArrayData::builder(DataType::FixedSizeList( |
742 | | Arc::new(Field::new_list_field(DataType::UInt8, false)), |
743 | | 4, |
744 | | )) |
745 | | .len(2) |
746 | | .offset(1) |
747 | | .add_child_data(values_data) |
748 | | .null_bit_buffer(Some(Buffer::from_slice_ref([0b101]))) |
749 | | .build_unchecked() |
750 | | }; |
751 | | let list_array = FixedSizeListArray::from(array_data); |
752 | | let binary_array = FixedSizeBinaryArray::from(list_array); |
753 | | |
754 | | assert_eq!(2, binary_array.len()); |
755 | | assert_eq!(1, binary_array.null_count()); |
756 | | assert!(binary_array.is_null(0)); |
757 | | assert!(binary_array.is_valid(1)); |
758 | | assert_eq!(&[10, 11, 12, 13], binary_array.value(1)); |
759 | | } |
760 | | |
761 | | #[test] |
762 | | #[should_panic( |
763 | | expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays" |
764 | | )] |
765 | | // Different error messages, so skip for now |
766 | | // https://github.com/apache/arrow-rs/issues/1545 |
767 | | #[cfg(not(feature = "force_validate"))] |
768 | | fn test_fixed_size_binary_array_from_incorrect_fixed_size_list_array() { |
769 | | let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; |
770 | | let values_data = ArrayData::builder(DataType::UInt32) |
771 | | .len(12) |
772 | | .add_buffer(Buffer::from_slice_ref(values)) |
773 | | .build() |
774 | | .unwrap(); |
775 | | |
776 | | let array_data = unsafe { |
777 | | ArrayData::builder(DataType::FixedSizeList( |
778 | | Arc::new(Field::new_list_field(DataType::Binary, false)), |
779 | | 4, |
780 | | )) |
781 | | .len(3) |
782 | | .add_child_data(values_data) |
783 | | .build_unchecked() |
784 | | }; |
785 | | let list_array = FixedSizeListArray::from(array_data); |
786 | | drop(FixedSizeBinaryArray::from(list_array)); |
787 | | } |
788 | | |
789 | | #[test] |
790 | | #[should_panic(expected = "The child array cannot contain null values.")] |
791 | | fn test_fixed_size_binary_array_from_fixed_size_list_array_with_child_nulls_failed() { |
792 | | let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; |
793 | | let values_data = ArrayData::builder(DataType::UInt8) |
794 | | .len(12) |
795 | | .add_buffer(Buffer::from_slice_ref(values)) |
796 | | .null_bit_buffer(Some(Buffer::from_slice_ref([0b101010101010]))) |
797 | | .build() |
798 | | .unwrap(); |
799 | | |
800 | | let array_data = unsafe { |
801 | | ArrayData::builder(DataType::FixedSizeList( |
802 | | Arc::new(Field::new_list_field(DataType::UInt8, false)), |
803 | | 4, |
804 | | )) |
805 | | .len(3) |
806 | | .add_child_data(values_data) |
807 | | .build_unchecked() |
808 | | }; |
809 | | let list_array = FixedSizeListArray::from(array_data); |
810 | | drop(FixedSizeBinaryArray::from(list_array)); |
811 | | } |
812 | | |
813 | | #[test] |
814 | | fn test_fixed_size_binary_array_fmt_debug() { |
815 | | let values: [u8; 15] = *b"hellotherearrow"; |
816 | | |
817 | | let array_data = ArrayData::builder(DataType::FixedSizeBinary(5)) |
818 | | .len(3) |
819 | | .add_buffer(Buffer::from(&values)) |
820 | | .build() |
821 | | .unwrap(); |
822 | | let arr = FixedSizeBinaryArray::from(array_data); |
823 | | assert_eq!( |
824 | | "FixedSizeBinaryArray<5>\n[\n [104, 101, 108, 108, 111],\n [116, 104, 101, 114, 101],\n [97, 114, 114, 111, 119],\n]", |
825 | | format!("{arr:?}") |
826 | | ); |
827 | | } |
828 | | |
829 | | #[test] |
830 | | fn test_fixed_size_binary_array_from_iter() { |
831 | | let input_arg = vec![vec![1, 2], vec![3, 4], vec![5, 6]]; |
832 | | let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); |
833 | | |
834 | | assert_eq!(2, arr.value_length()); |
835 | | assert_eq!(3, arr.len()) |
836 | | } |
837 | | |
838 | | #[test] |
839 | | fn test_all_none_fixed_size_binary_array_from_sparse_iter() { |
840 | | let none_option: Option<[u8; 32]> = None; |
841 | | let input_arg = vec![none_option, none_option, none_option]; |
842 | | #[allow(deprecated)] |
843 | | let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap(); |
844 | | assert_eq!(0, arr.value_length()); |
845 | | assert_eq!(3, arr.len()) |
846 | | } |
847 | | |
848 | | #[test] |
849 | | fn test_fixed_size_binary_array_from_sparse_iter() { |
850 | | let input_arg = vec![ |
851 | | None, |
852 | | Some(vec![7, 8]), |
853 | | Some(vec![9, 10]), |
854 | | None, |
855 | | Some(vec![13, 14]), |
856 | | ]; |
857 | | #[allow(deprecated)] |
858 | | let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap(); |
859 | | assert_eq!(2, arr.value_length()); |
860 | | assert_eq!(5, arr.len()); |
861 | | |
862 | | let arr = |
863 | | FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap(); |
864 | | assert_eq!(2, arr.value_length()); |
865 | | assert_eq!(5, arr.len()); |
866 | | } |
867 | | |
868 | | #[test] |
869 | | fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() { |
870 | | let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>; |
871 | | |
872 | | let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16) |
873 | | .unwrap(); |
874 | | assert_eq!(16, arr.value_length()); |
875 | | assert_eq!(5, arr.len()) |
876 | | } |
877 | | |
878 | | #[test] |
879 | | fn test_fixed_size_binary_array_from_vec() { |
880 | | let values = vec!["one".as_bytes(), b"two", b"six", b"ten"]; |
881 | | let array = FixedSizeBinaryArray::from(values); |
882 | | assert_eq!(array.len(), 4); |
883 | | assert_eq!(array.null_count(), 0); |
884 | | assert_eq!(array.logical_null_count(), 0); |
885 | | assert_eq!(array.value(0), b"one"); |
886 | | assert_eq!(array.value(1), b"two"); |
887 | | assert_eq!(array.value(2), b"six"); |
888 | | assert_eq!(array.value(3), b"ten"); |
889 | | assert!(!array.is_null(0)); |
890 | | assert!(!array.is_null(1)); |
891 | | assert!(!array.is_null(2)); |
892 | | assert!(!array.is_null(3)); |
893 | | } |
894 | | |
895 | | #[test] |
896 | | #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")] |
897 | | fn test_fixed_size_binary_array_from_vec_incorrect_length() { |
898 | | let values = vec!["one".as_bytes(), b"two", b"three", b"four"]; |
899 | | let _ = FixedSizeBinaryArray::from(values); |
900 | | } |
901 | | |
902 | | #[test] |
903 | | fn test_fixed_size_binary_array_from_opt_vec() { |
904 | | let values = vec![ |
905 | | Some("one".as_bytes()), |
906 | | Some(b"two"), |
907 | | None, |
908 | | Some(b"six"), |
909 | | Some(b"ten"), |
910 | | ]; |
911 | | let array = FixedSizeBinaryArray::from(values); |
912 | | assert_eq!(array.len(), 5); |
913 | | assert_eq!(array.value(0), b"one"); |
914 | | assert_eq!(array.value(1), b"two"); |
915 | | assert_eq!(array.value(3), b"six"); |
916 | | assert_eq!(array.value(4), b"ten"); |
917 | | assert!(!array.is_null(0)); |
918 | | assert!(!array.is_null(1)); |
919 | | assert!(array.is_null(2)); |
920 | | assert!(!array.is_null(3)); |
921 | | assert!(!array.is_null(4)); |
922 | | } |
923 | | |
924 | | #[test] |
925 | | #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")] |
926 | | fn test_fixed_size_binary_array_from_opt_vec_incorrect_length() { |
927 | | let values = vec![ |
928 | | Some("one".as_bytes()), |
929 | | Some(b"two"), |
930 | | None, |
931 | | Some(b"three"), |
932 | | Some(b"four"), |
933 | | ]; |
934 | | let _ = FixedSizeBinaryArray::from(values); |
935 | | } |
936 | | |
937 | | #[test] |
938 | | fn fixed_size_binary_array_all_null() { |
939 | | let data = vec![None] as Vec<Option<String>>; |
940 | | let array = |
941 | | FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap(); |
942 | | array |
943 | | .into_data() |
944 | | .validate_full() |
945 | | .expect("All null array has valid array data"); |
946 | | } |
947 | | |
948 | | #[test] |
949 | | // Test for https://github.com/apache/arrow-rs/issues/1390 |
950 | | fn fixed_size_binary_array_all_null_in_batch_with_schema() { |
951 | | let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]); |
952 | | |
953 | | let none_option: Option<[u8; 2]> = None; |
954 | | let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size( |
955 | | vec![none_option, none_option, none_option].into_iter(), |
956 | | 2, |
957 | | ) |
958 | | .unwrap(); |
959 | | |
960 | | // Should not panic |
961 | | RecordBatch::try_new(Arc::new(schema), vec![Arc::new(item)]).unwrap(); |
962 | | } |
963 | | |
964 | | #[test] |
965 | | #[should_panic( |
966 | | expected = "Trying to access an element at index 4 from a FixedSizeBinaryArray of length 3" |
967 | | )] |
968 | | fn test_fixed_size_binary_array_get_value_index_out_of_bound() { |
969 | | let values = vec![Some("one".as_bytes()), Some(b"two"), None]; |
970 | | let array = FixedSizeBinaryArray::from(values); |
971 | | |
972 | | array.value(4); |
973 | | } |
974 | | |
975 | | #[test] |
976 | | fn test_constructors() { |
977 | | let buffer = Buffer::from_vec(vec![0_u8; 10]); |
978 | | let a = FixedSizeBinaryArray::new(2, buffer.clone(), None); |
979 | | assert_eq!(a.len(), 5); |
980 | | |
981 | | let nulls = NullBuffer::new_null(5); |
982 | | FixedSizeBinaryArray::new(2, buffer.clone(), Some(nulls)); |
983 | | |
984 | | let a = FixedSizeBinaryArray::new(3, buffer.clone(), None); |
985 | | assert_eq!(a.len(), 3); |
986 | | |
987 | | let nulls = NullBuffer::new_null(3); |
988 | | FixedSizeBinaryArray::new(3, buffer.clone(), Some(nulls)); |
989 | | |
990 | | let err = FixedSizeBinaryArray::try_new(-1, buffer.clone(), None).unwrap_err(); |
991 | | |
992 | | assert_eq!( |
993 | | err.to_string(), |
994 | | "Invalid argument error: Size cannot be negative, got -1" |
995 | | ); |
996 | | |
997 | | let nulls = NullBuffer::new_null(3); |
998 | | let err = FixedSizeBinaryArray::try_new(2, buffer, Some(nulls)).unwrap_err(); |
999 | | assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3"); |
1000 | | } |
1001 | | } |