/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/fixed_size_binary_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::array::print_long_array; |
19 | | use crate::iterator::FixedSizeBinaryIter; |
20 | | use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar}; |
21 | | use arrow_buffer::buffer::NullBuffer; |
22 | | use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, bit_util}; |
23 | | use arrow_data::{ArrayData, ArrayDataBuilder}; |
24 | | use arrow_schema::{ArrowError, DataType}; |
25 | | use std::any::Any; |
26 | | use std::sync::Arc; |
27 | | |
28 | | /// An array of [fixed size binary arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout) |
29 | | /// |
30 | | /// # Examples |
31 | | /// |
32 | | /// Create an array from an iterable argument of byte slices. |
33 | | /// |
34 | | /// ``` |
35 | | /// use arrow_array::{Array, FixedSizeBinaryArray}; |
36 | | /// let input_arg = vec![ vec![1, 2], vec![3, 4], vec![5, 6] ]; |
37 | | /// let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); |
38 | | /// |
39 | | /// assert_eq!(3, arr.len()); |
40 | | /// |
41 | | /// ``` |
42 | | /// Create an array from an iterable argument of sparse byte slices. |
43 | | /// Sparsity means that the input argument can contain `None` items. |
44 | | /// ``` |
45 | | /// use arrow_array::{Array, FixedSizeBinaryArray}; |
46 | | /// let input_arg = vec![ None, Some(vec![7, 8]), Some(vec![9, 10]), None, Some(vec![13, 14]) ]; |
47 | | /// let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap(); |
48 | | /// assert_eq!(5, arr.len()) |
49 | | /// |
50 | | /// ``` |
51 | | /// |
52 | | #[derive(Clone)] |
53 | | pub struct FixedSizeBinaryArray { |
54 | | data_type: DataType, // Must be DataType::FixedSizeBinary(value_length) |
55 | | value_data: Buffer, |
56 | | nulls: Option<NullBuffer>, |
57 | | len: usize, |
58 | | value_length: i32, |
59 | | } |
60 | | |
61 | | impl FixedSizeBinaryArray { |
62 | | /// Create a new [`FixedSizeBinaryArray`] with `size` element size, panicking on failure |
63 | | /// |
64 | | /// # Panics |
65 | | /// |
66 | | /// Panics if [`Self::try_new`] returns an error |
67 | 0 | pub fn new(size: i32, values: Buffer, nulls: Option<NullBuffer>) -> Self { |
68 | 0 | Self::try_new(size, values, nulls).unwrap() |
69 | 0 | } |
70 | | |
71 | | /// Create a new [`Scalar`] from `value` |
72 | | pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> { |
73 | | let v = value.as_ref(); |
74 | | Scalar::new(Self::new(v.len() as _, Buffer::from(v), None)) |
75 | | } |
76 | | |
77 | | /// Create a new [`FixedSizeBinaryArray`] from the provided parts, returning an error on failure |
78 | | /// |
79 | | /// # Errors |
80 | | /// |
81 | | /// * `size < 0` |
82 | | /// * `values.len() / size != nulls.len()` |
83 | 0 | pub fn try_new( |
84 | 0 | size: i32, |
85 | 0 | values: Buffer, |
86 | 0 | nulls: Option<NullBuffer>, |
87 | 0 | ) -> Result<Self, ArrowError> { |
88 | 0 | let data_type = DataType::FixedSizeBinary(size); |
89 | 0 | let s = size.to_usize().ok_or_else(|| { |
90 | 0 | ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}")) |
91 | 0 | })?; |
92 | | |
93 | 0 | let len = values.len() / s; |
94 | 0 | if let Some(n) = nulls.as_ref() { |
95 | 0 | if n.len() != len { |
96 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
97 | 0 | "Incorrect length of null buffer for FixedSizeBinaryArray, expected {} got {}", |
98 | 0 | len, |
99 | 0 | n.len(), |
100 | 0 | ))); |
101 | 0 | } |
102 | 0 | } |
103 | | |
104 | 0 | Ok(Self { |
105 | 0 | data_type, |
106 | 0 | value_data: values, |
107 | 0 | value_length: size, |
108 | 0 | nulls, |
109 | 0 | len, |
110 | 0 | }) |
111 | 0 | } |
112 | | |
113 | | /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values are null |
114 | | /// |
115 | | /// # Panics |
116 | | /// |
117 | | /// Panics if |
118 | | /// |
119 | | /// * `size < 0` |
120 | | /// * `size * len` would overflow `usize` |
121 | 0 | pub fn new_null(size: i32, len: usize) -> Self { |
122 | 0 | let capacity = size.to_usize().unwrap().checked_mul(len).unwrap(); |
123 | 0 | Self { |
124 | 0 | data_type: DataType::FixedSizeBinary(size), |
125 | 0 | value_data: MutableBuffer::new(capacity).into(), |
126 | 0 | nulls: Some(NullBuffer::new_null(len)), |
127 | 0 | value_length: size, |
128 | 0 | len, |
129 | 0 | } |
130 | 0 | } |
131 | | |
132 | | /// Deconstruct this array into its constituent parts |
133 | 0 | pub fn into_parts(self) -> (i32, Buffer, Option<NullBuffer>) { |
134 | 0 | (self.value_length, self.value_data, self.nulls) |
135 | 0 | } |
136 | | |
137 | | /// Returns the element at index `i` as a byte slice. |
138 | | /// |
139 | | /// Note: This method does not check for nulls and the value is arbitrary |
140 | | /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index. |
141 | | /// |
142 | | /// # Panics |
143 | | /// Panics if index `i` is out of bounds. |
144 | 6 | pub fn value(&self, i: usize) -> &[u8] { |
145 | 6 | assert!( |
146 | 6 | i < self.len(), |
147 | 0 | "Trying to access an element at index {} from a FixedSizeBinaryArray of length {}", |
148 | | i, |
149 | 0 | self.len() |
150 | | ); |
151 | 6 | let offset = i + self.offset(); |
152 | | unsafe { |
153 | 6 | let pos = self.value_offset_at(offset); |
154 | 6 | std::slice::from_raw_parts( |
155 | 6 | self.value_data.as_ptr().offset(pos as isize), |
156 | 6 | (self.value_offset_at(offset + 1) - pos) as usize, |
157 | 6 | ) |
158 | | } |
159 | 6 | } |
160 | | |
161 | | /// Returns the element at index `i` as a byte slice. |
162 | | /// |
163 | | /// Note: This method does not check for nulls and the value is arbitrary |
164 | | /// if [`is_null`](Self::is_null) returns true for the index. |
165 | | /// |
166 | | /// # Safety |
167 | | /// |
168 | | /// Caller is responsible for ensuring that the index is within the bounds |
169 | | /// of the array |
170 | 0 | pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] { |
171 | 0 | let offset = i + self.offset(); |
172 | 0 | let pos = self.value_offset_at(offset); |
173 | | unsafe { |
174 | 0 | std::slice::from_raw_parts( |
175 | 0 | self.value_data.as_ptr().offset(pos as isize), |
176 | 0 | (self.value_offset_at(offset + 1) - pos) as usize, |
177 | 0 | ) |
178 | | } |
179 | 0 | } |
180 | | |
181 | | /// Returns the offset for the element at index `i`. |
182 | | /// |
183 | | /// Note this doesn't do any bound checking, for performance reason. |
184 | | #[inline] |
185 | | pub fn value_offset(&self, i: usize) -> i32 { |
186 | | self.value_offset_at(self.offset() + i) |
187 | | } |
188 | | |
189 | | /// Returns the length for an element. |
190 | | /// |
191 | | /// All elements have the same length as the array is a fixed size. |
192 | | #[inline] |
193 | 4 | pub fn value_length(&self) -> i32 { |
194 | 4 | self.value_length |
195 | 4 | } |
196 | | |
197 | | /// Returns the values of this array. |
198 | | /// |
199 | | /// Unlike [`Self::value_data`] this returns the [`Buffer`] |
200 | | /// allowing for zero-copy cloning. |
201 | | #[inline] |
202 | 4 | pub fn values(&self) -> &Buffer { |
203 | 4 | &self.value_data |
204 | 4 | } |
205 | | |
206 | | /// Returns the raw value data. |
207 | 0 | pub fn value_data(&self) -> &[u8] { |
208 | 0 | self.value_data.as_slice() |
209 | 0 | } |
210 | | |
211 | | /// Returns a zero-copy slice of this array with the indicated offset and length. |
212 | 1 | pub fn slice(&self, offset: usize, len: usize) -> Self { |
213 | 1 | assert!( |
214 | 1 | offset.saturating_add(len) <= self.len, |
215 | 0 | "the length + offset of the sliced FixedSizeBinaryArray cannot exceed the existing length" |
216 | | ); |
217 | | |
218 | 1 | let size = self.value_length as usize; |
219 | | |
220 | | Self { |
221 | 1 | data_type: self.data_type.clone(), |
222 | 1 | nulls: self.nulls.as_ref().map(|n| n0 .slice0 (offset0 , len0 )), |
223 | 1 | value_length: self.value_length, |
224 | 1 | value_data: self.value_data.slice_with_length(offset * size, len * size), |
225 | 1 | len, |
226 | | } |
227 | 1 | } |
228 | | |
229 | | /// Create an array from an iterable argument of sparse byte slices. |
230 | | /// Sparsity means that items returned by the iterator are optional, i.e input argument can |
231 | | /// contain `None` items. |
232 | | /// |
233 | | /// # Examples |
234 | | /// |
235 | | /// ``` |
236 | | /// use arrow_array::FixedSizeBinaryArray; |
237 | | /// let input_arg = vec![ |
238 | | /// None, |
239 | | /// Some(vec![7, 8]), |
240 | | /// Some(vec![9, 10]), |
241 | | /// None, |
242 | | /// Some(vec![13, 14]), |
243 | | /// None, |
244 | | /// ]; |
245 | | /// let array = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap(); |
246 | | /// ``` |
247 | | /// |
248 | | /// # Errors |
249 | | /// |
250 | | /// Returns error if argument has length zero, or sizes of nested slices don't match. |
251 | | #[deprecated( |
252 | | since = "28.0.0", |
253 | | note = "This function will fail if the iterator produces only None values; prefer `try_from_sparse_iter_with_size`" |
254 | | )] |
255 | 0 | pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError> |
256 | 0 | where |
257 | 0 | T: Iterator<Item = Option<U>>, |
258 | 0 | U: AsRef<[u8]>, |
259 | | { |
260 | 0 | let mut len = 0; |
261 | 0 | let mut size = None; |
262 | 0 | let mut byte = 0; |
263 | | |
264 | 0 | let iter_size_hint = iter.size_hint().0; |
265 | 0 | let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8)); |
266 | 0 | let mut buffer = MutableBuffer::new(0); |
267 | | |
268 | 0 | let mut prepend = 0; |
269 | 0 | iter.try_for_each(|item| -> Result<(), ArrowError> { |
270 | | // extend null bitmask by one byte per each 8 items |
271 | 0 | if byte == 0 { |
272 | 0 | null_buf.push(0u8); |
273 | 0 | byte = 8; |
274 | 0 | } |
275 | 0 | byte -= 1; |
276 | | |
277 | 0 | if let Some(slice) = item { |
278 | 0 | let slice = slice.as_ref(); |
279 | 0 | if let Some(size) = size { |
280 | 0 | if size != slice.len() { |
281 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
282 | 0 | "Nested array size mismatch: one is {}, and the other is {}", |
283 | 0 | size, |
284 | 0 | slice.len() |
285 | 0 | ))); |
286 | 0 | } |
287 | 0 | } else { |
288 | 0 | let len = slice.len(); |
289 | 0 | size = Some(len); |
290 | 0 | // Now that we know how large each element is we can reserve |
291 | 0 | // sufficient capacity in the underlying mutable buffer for |
292 | 0 | // the data. |
293 | 0 | buffer.reserve(iter_size_hint * len); |
294 | 0 | buffer.extend_zeros(slice.len() * prepend); |
295 | 0 | } |
296 | 0 | bit_util::set_bit(null_buf.as_slice_mut(), len); |
297 | 0 | buffer.extend_from_slice(slice); |
298 | 0 | } else if let Some(size) = size { |
299 | 0 | buffer.extend_zeros(size); |
300 | 0 | } else { |
301 | 0 | prepend += 1; |
302 | 0 | } |
303 | | |
304 | 0 | len += 1; |
305 | | |
306 | 0 | Ok(()) |
307 | 0 | })?; |
308 | | |
309 | 0 | if len == 0 { |
310 | 0 | return Err(ArrowError::InvalidArgumentError( |
311 | 0 | "Input iterable argument has no data".to_owned(), |
312 | 0 | )); |
313 | 0 | } |
314 | | |
315 | 0 | let null_buf = BooleanBuffer::new(null_buf.into(), 0, len); |
316 | 0 | let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0); |
317 | | |
318 | 0 | let size = size.unwrap_or(0) as i32; |
319 | 0 | Ok(Self { |
320 | 0 | data_type: DataType::FixedSizeBinary(size), |
321 | 0 | value_data: buffer.into(), |
322 | 0 | nulls, |
323 | 0 | value_length: size, |
324 | 0 | len, |
325 | 0 | }) |
326 | 0 | } |
327 | | |
328 | | /// Create an array from an iterable argument of sparse byte slices. |
329 | | /// Sparsity means that items returned by the iterator are optional, i.e input argument can |
330 | | /// contain `None` items. In cases where the iterator returns only `None` values, this |
331 | | /// also takes a size parameter to ensure that the a valid FixedSizeBinaryArray is still |
332 | | /// created. |
333 | | /// |
334 | | /// # Examples |
335 | | /// |
336 | | /// ``` |
337 | | /// use arrow_array::FixedSizeBinaryArray; |
338 | | /// let input_arg = vec![ |
339 | | /// None, |
340 | | /// Some(vec![7, 8]), |
341 | | /// Some(vec![9, 10]), |
342 | | /// None, |
343 | | /// Some(vec![13, 14]), |
344 | | /// None, |
345 | | /// ]; |
346 | | /// let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap(); |
347 | | /// ``` |
348 | | /// |
349 | | /// # Errors |
350 | | /// |
351 | | /// Returns error if argument has length zero, or sizes of nested slices don't match. |
352 | 0 | pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) -> Result<Self, ArrowError> |
353 | 0 | where |
354 | 0 | T: Iterator<Item = Option<U>>, |
355 | 0 | U: AsRef<[u8]>, |
356 | | { |
357 | 0 | let mut len = 0; |
358 | 0 | let mut byte = 0; |
359 | | |
360 | 0 | let iter_size_hint = iter.size_hint().0; |
361 | 0 | let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8)); |
362 | 0 | let mut buffer = MutableBuffer::new(iter_size_hint * (size as usize)); |
363 | | |
364 | 0 | iter.try_for_each(|item| -> Result<(), ArrowError> { |
365 | | // extend null bitmask by one byte per each 8 items |
366 | 0 | if byte == 0 { |
367 | 0 | null_buf.push(0u8); |
368 | 0 | byte = 8; |
369 | 0 | } |
370 | 0 | byte -= 1; |
371 | | |
372 | 0 | if let Some(slice) = item { |
373 | 0 | let slice = slice.as_ref(); |
374 | 0 | if size as usize != slice.len() { |
375 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
376 | 0 | "Nested array size mismatch: one is {}, and the other is {}", |
377 | 0 | size, |
378 | 0 | slice.len() |
379 | 0 | ))); |
380 | 0 | } |
381 | | |
382 | 0 | bit_util::set_bit(null_buf.as_slice_mut(), len); |
383 | 0 | buffer.extend_from_slice(slice); |
384 | 0 | } else { |
385 | 0 | buffer.extend_zeros(size as usize); |
386 | 0 | } |
387 | | |
388 | 0 | len += 1; |
389 | | |
390 | 0 | Ok(()) |
391 | 0 | })?; |
392 | | |
393 | 0 | let null_buf = BooleanBuffer::new(null_buf.into(), 0, len); |
394 | 0 | let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0); |
395 | | |
396 | 0 | Ok(Self { |
397 | 0 | data_type: DataType::FixedSizeBinary(size), |
398 | 0 | value_data: buffer.into(), |
399 | 0 | nulls, |
400 | 0 | len, |
401 | 0 | value_length: size, |
402 | 0 | }) |
403 | 0 | } |
404 | | |
405 | | /// Create an array from an iterable argument of byte slices. |
406 | | /// |
407 | | /// # Examples |
408 | | /// |
409 | | /// ``` |
410 | | /// use arrow_array::FixedSizeBinaryArray; |
411 | | /// let input_arg = vec![ |
412 | | /// vec![1, 2], |
413 | | /// vec![3, 4], |
414 | | /// vec![5, 6], |
415 | | /// ]; |
416 | | /// let array = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); |
417 | | /// ``` |
418 | | /// |
419 | | /// # Errors |
420 | | /// |
421 | | /// Returns error if argument has length zero, or sizes of nested slices don't match. |
422 | 1 | pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError> |
423 | 1 | where |
424 | 1 | T: Iterator<Item = U>, |
425 | 1 | U: AsRef<[u8]>, |
426 | | { |
427 | 1 | let mut len = 0; |
428 | 1 | let mut size = None; |
429 | 1 | let iter_size_hint = iter.size_hint().0; |
430 | 1 | let mut buffer = MutableBuffer::new(0); |
431 | | |
432 | 3 | iter1 .try_for_each1 (|item| -> Result<(), ArrowError> { |
433 | 3 | let slice = item.as_ref(); |
434 | 3 | if let Some(size2 ) = size { |
435 | 2 | if size != slice.len() { |
436 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
437 | 0 | "Nested array size mismatch: one is {}, and the other is {}", |
438 | 0 | size, |
439 | 0 | slice.len() |
440 | 0 | ))); |
441 | 2 | } |
442 | 1 | } else { |
443 | 1 | let len = slice.len(); |
444 | 1 | size = Some(len); |
445 | 1 | buffer.reserve(iter_size_hint * len); |
446 | 1 | } |
447 | | |
448 | 3 | buffer.extend_from_slice(slice); |
449 | | |
450 | 3 | len += 1; |
451 | | |
452 | 3 | Ok(()) |
453 | 3 | })?0 ; |
454 | | |
455 | 1 | if len == 0 { |
456 | 0 | return Err(ArrowError::InvalidArgumentError( |
457 | 0 | "Input iterable argument has no data".to_owned(), |
458 | 0 | )); |
459 | 1 | } |
460 | | |
461 | 1 | let size = size.unwrap_or(0).try_into().unwrap(); |
462 | 1 | Ok(Self { |
463 | 1 | data_type: DataType::FixedSizeBinary(size), |
464 | 1 | value_data: buffer.into(), |
465 | 1 | nulls: None, |
466 | 1 | value_length: size, |
467 | 1 | len, |
468 | 1 | }) |
469 | 1 | } |
470 | | |
471 | | #[inline] |
472 | 12 | fn value_offset_at(&self, i: usize) -> i32 { |
473 | 12 | self.value_length * i as i32 |
474 | 12 | } |
475 | | |
476 | | /// constructs a new iterator |
477 | 0 | pub fn iter(&self) -> FixedSizeBinaryIter<'_> { |
478 | 0 | FixedSizeBinaryIter::new(self) |
479 | 0 | } |
480 | | } |
481 | | |
482 | | impl From<ArrayData> for FixedSizeBinaryArray { |
483 | 5 | fn from(data: ArrayData) -> Self { |
484 | 5 | assert_eq!( |
485 | 5 | data.buffers().len(), |
486 | | 1, |
487 | 0 | "FixedSizeBinaryArray data should contain 1 buffer only (values)" |
488 | | ); |
489 | 5 | let value_length = match data.data_type() { |
490 | 5 | DataType::FixedSizeBinary(len) => *len, |
491 | 0 | _ => panic!("Expected data type to be FixedSizeBinary"), |
492 | | }; |
493 | | |
494 | 5 | let size = value_length as usize; |
495 | 5 | let value_data = |
496 | 5 | data.buffers()[0].slice_with_length(data.offset() * size, data.len() * size); |
497 | | |
498 | 5 | Self { |
499 | 5 | data_type: data.data_type().clone(), |
500 | 5 | nulls: data.nulls().cloned(), |
501 | 5 | len: data.len(), |
502 | 5 | value_data, |
503 | 5 | value_length, |
504 | 5 | } |
505 | 5 | } |
506 | | } |
507 | | |
508 | | impl From<FixedSizeBinaryArray> for ArrayData { |
509 | 4 | fn from(array: FixedSizeBinaryArray) -> Self { |
510 | 4 | let builder = ArrayDataBuilder::new(array.data_type) |
511 | 4 | .len(array.len) |
512 | 4 | .buffers(vec![array.value_data]) |
513 | 4 | .nulls(array.nulls); |
514 | | |
515 | 4 | unsafe { builder.build_unchecked() } |
516 | 4 | } |
517 | | } |
518 | | |
519 | | /// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array |
520 | | impl From<FixedSizeListArray> for FixedSizeBinaryArray { |
521 | 0 | fn from(v: FixedSizeListArray) -> Self { |
522 | 0 | let value_len = v.value_length(); |
523 | 0 | let v = v.into_data(); |
524 | 0 | assert_eq!( |
525 | 0 | v.child_data().len(), |
526 | | 1, |
527 | 0 | "FixedSizeBinaryArray can only be created from list array of u8 values \ |
528 | 0 | (i.e. FixedSizeList<PrimitiveArray<u8>>)." |
529 | | ); |
530 | 0 | let child_data = &v.child_data()[0]; |
531 | | |
532 | 0 | assert_eq!( |
533 | 0 | child_data.child_data().len(), |
534 | | 0, |
535 | 0 | "FixedSizeBinaryArray can only be created from list array of u8 values \ |
536 | 0 | (i.e. FixedSizeList<PrimitiveArray<u8>>)." |
537 | | ); |
538 | 0 | assert_eq!( |
539 | 0 | child_data.data_type(), |
540 | | &DataType::UInt8, |
541 | 0 | "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types." |
542 | | ); |
543 | 0 | assert_eq!( |
544 | 0 | child_data.null_count(), |
545 | | 0, |
546 | 0 | "The child array cannot contain null values." |
547 | | ); |
548 | | |
549 | 0 | let builder = ArrayData::builder(DataType::FixedSizeBinary(value_len)) |
550 | 0 | .len(v.len()) |
551 | 0 | .offset(v.offset()) |
552 | 0 | .add_buffer(child_data.buffers()[0].slice(child_data.offset())) |
553 | 0 | .nulls(v.nulls().cloned()); |
554 | | |
555 | 0 | let data = unsafe { builder.build_unchecked() }; |
556 | 0 | Self::from(data) |
557 | 0 | } |
558 | | } |
559 | | |
560 | | impl From<Vec<Option<&[u8]>>> for FixedSizeBinaryArray { |
561 | 0 | fn from(v: Vec<Option<&[u8]>>) -> Self { |
562 | | #[allow(deprecated)] |
563 | 0 | Self::try_from_sparse_iter(v.into_iter()).unwrap() |
564 | 0 | } |
565 | | } |
566 | | |
567 | | impl From<Vec<&[u8]>> for FixedSizeBinaryArray { |
568 | 0 | fn from(v: Vec<&[u8]>) -> Self { |
569 | 0 | Self::try_from_iter(v.into_iter()).unwrap() |
570 | 0 | } |
571 | | } |
572 | | |
573 | | impl<const N: usize> From<Vec<&[u8; N]>> for FixedSizeBinaryArray { |
574 | 1 | fn from(v: Vec<&[u8; N]>) -> Self { |
575 | 1 | Self::try_from_iter(v.into_iter()).unwrap() |
576 | 1 | } |
577 | | } |
578 | | |
579 | | impl std::fmt::Debug for FixedSizeBinaryArray { |
580 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
581 | 0 | write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?; |
582 | 0 | print_long_array(self, f, |array, index, f| { |
583 | 0 | std::fmt::Debug::fmt(&array.value(index), f) |
584 | 0 | })?; |
585 | 0 | write!(f, "]") |
586 | 0 | } |
587 | | } |
588 | | |
589 | | impl Array for FixedSizeBinaryArray { |
590 | 10 | fn as_any(&self) -> &dyn Any { |
591 | 10 | self |
592 | 10 | } |
593 | | |
594 | 4 | fn to_data(&self) -> ArrayData { |
595 | 4 | self.clone().into() |
596 | 4 | } |
597 | | |
598 | 0 | fn into_data(self) -> ArrayData { |
599 | 0 | self.into() |
600 | 0 | } |
601 | | |
602 | 17 | fn data_type(&self) -> &DataType { |
603 | 17 | &self.data_type |
604 | 17 | } |
605 | | |
606 | 1 | fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
607 | 1 | Arc::new(self.slice(offset, length)) |
608 | 1 | } |
609 | | |
610 | 16 | fn len(&self) -> usize { |
611 | 16 | self.len |
612 | 16 | } |
613 | | |
614 | 0 | fn is_empty(&self) -> bool { |
615 | 0 | self.len == 0 |
616 | 0 | } |
617 | | |
618 | 0 | fn shrink_to_fit(&mut self) { |
619 | 0 | self.value_data.shrink_to_fit(); |
620 | 0 | if let Some(nulls) = &mut self.nulls { |
621 | 0 | nulls.shrink_to_fit(); |
622 | 0 | } |
623 | 0 | } |
624 | | |
625 | 6 | fn offset(&self) -> usize { |
626 | 6 | 0 |
627 | 6 | } |
628 | | |
629 | 4 | fn nulls(&self) -> Option<&NullBuffer> { |
630 | 4 | self.nulls.as_ref() |
631 | 4 | } |
632 | | |
633 | 0 | fn logical_null_count(&self) -> usize { |
634 | | // More efficient that the default implementation |
635 | 0 | self.null_count() |
636 | 0 | } |
637 | | |
638 | 0 | fn get_buffer_memory_size(&self) -> usize { |
639 | 0 | let mut sum = self.value_data.capacity(); |
640 | 0 | if let Some(n) = &self.nulls { |
641 | 0 | sum += n.buffer().capacity(); |
642 | 0 | } |
643 | 0 | sum |
644 | 0 | } |
645 | | |
646 | 0 | fn get_array_memory_size(&self) -> usize { |
647 | 0 | std::mem::size_of::<Self>() + self.get_buffer_memory_size() |
648 | 0 | } |
649 | | } |
650 | | |
651 | | impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray { |
652 | | type Item = &'a [u8]; |
653 | | |
654 | 0 | fn value(&self, index: usize) -> Self::Item { |
655 | 0 | FixedSizeBinaryArray::value(self, index) |
656 | 0 | } |
657 | | |
658 | 0 | unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
659 | 0 | unsafe { FixedSizeBinaryArray::value_unchecked(self, index) } |
660 | 0 | } |
661 | | } |
662 | | |
663 | | impl<'a> IntoIterator for &'a FixedSizeBinaryArray { |
664 | | type Item = Option<&'a [u8]>; |
665 | | type IntoIter = FixedSizeBinaryIter<'a>; |
666 | | |
667 | 0 | fn into_iter(self) -> Self::IntoIter { |
668 | 0 | FixedSizeBinaryIter::<'a>::new(self) |
669 | 0 | } |
670 | | } |
671 | | |
672 | | #[cfg(test)] |
673 | | mod tests { |
674 | | use crate::RecordBatch; |
675 | | use arrow_schema::{Field, Schema}; |
676 | | |
677 | | use super::*; |
678 | | |
679 | | #[test] |
680 | | fn test_fixed_size_binary_array() { |
681 | | let values: [u8; 15] = *b"hellotherearrow"; |
682 | | |
683 | | let array_data = ArrayData::builder(DataType::FixedSizeBinary(5)) |
684 | | .len(3) |
685 | | .add_buffer(Buffer::from(&values)) |
686 | | .build() |
687 | | .unwrap(); |
688 | | let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data); |
689 | | assert_eq!(3, fixed_size_binary_array.len()); |
690 | | assert_eq!(0, fixed_size_binary_array.null_count()); |
691 | | assert_eq!( |
692 | | [b'h', b'e', b'l', b'l', b'o'], |
693 | | fixed_size_binary_array.value(0) |
694 | | ); |
695 | | assert_eq!( |
696 | | [b't', b'h', b'e', b'r', b'e'], |
697 | | fixed_size_binary_array.value(1) |
698 | | ); |
699 | | assert_eq!( |
700 | | [b'a', b'r', b'r', b'o', b'w'], |
701 | | fixed_size_binary_array.value(2) |
702 | | ); |
703 | | assert_eq!(5, fixed_size_binary_array.value_length()); |
704 | | assert_eq!(10, fixed_size_binary_array.value_offset(2)); |
705 | | for i in 0..3 { |
706 | | assert!(fixed_size_binary_array.is_valid(i)); |
707 | | assert!(!fixed_size_binary_array.is_null(i)); |
708 | | } |
709 | | |
710 | | // Test binary array with offset |
711 | | let array_data = ArrayData::builder(DataType::FixedSizeBinary(5)) |
712 | | .len(2) |
713 | | .offset(1) |
714 | | .add_buffer(Buffer::from(&values)) |
715 | | .build() |
716 | | .unwrap(); |
717 | | let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data); |
718 | | assert_eq!( |
719 | | [b't', b'h', b'e', b'r', b'e'], |
720 | | fixed_size_binary_array.value(0) |
721 | | ); |
722 | | assert_eq!( |
723 | | [b'a', b'r', b'r', b'o', b'w'], |
724 | | fixed_size_binary_array.value(1) |
725 | | ); |
726 | | assert_eq!(2, fixed_size_binary_array.len()); |
727 | | assert_eq!(0, fixed_size_binary_array.value_offset(0)); |
728 | | assert_eq!(5, fixed_size_binary_array.value_length()); |
729 | | assert_eq!(5, fixed_size_binary_array.value_offset(1)); |
730 | | } |
731 | | |
732 | | #[test] |
733 | | fn test_fixed_size_binary_array_from_fixed_size_list_array() { |
734 | | let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; |
735 | | let values_data = ArrayData::builder(DataType::UInt8) |
736 | | .len(12) |
737 | | .offset(2) |
738 | | .add_buffer(Buffer::from_slice_ref(values)) |
739 | | .build() |
740 | | .unwrap(); |
741 | | // [null, [10, 11, 12, 13]] |
742 | | let array_data = unsafe { |
743 | | ArrayData::builder(DataType::FixedSizeList( |
744 | | Arc::new(Field::new_list_field(DataType::UInt8, false)), |
745 | | 4, |
746 | | )) |
747 | | .len(2) |
748 | | .offset(1) |
749 | | .add_child_data(values_data) |
750 | | .null_bit_buffer(Some(Buffer::from_slice_ref([0b101]))) |
751 | | .build_unchecked() |
752 | | }; |
753 | | let list_array = FixedSizeListArray::from(array_data); |
754 | | let binary_array = FixedSizeBinaryArray::from(list_array); |
755 | | |
756 | | assert_eq!(2, binary_array.len()); |
757 | | assert_eq!(1, binary_array.null_count()); |
758 | | assert!(binary_array.is_null(0)); |
759 | | assert!(binary_array.is_valid(1)); |
760 | | assert_eq!(&[10, 11, 12, 13], binary_array.value(1)); |
761 | | } |
762 | | |
763 | | #[test] |
764 | | #[should_panic( |
765 | | expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays" |
766 | | )] |
767 | | // Different error messages, so skip for now |
768 | | // https://github.com/apache/arrow-rs/issues/1545 |
769 | | #[cfg(not(feature = "force_validate"))] |
770 | | fn test_fixed_size_binary_array_from_incorrect_fixed_size_list_array() { |
771 | | let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; |
772 | | let values_data = ArrayData::builder(DataType::UInt32) |
773 | | .len(12) |
774 | | .add_buffer(Buffer::from_slice_ref(values)) |
775 | | .build() |
776 | | .unwrap(); |
777 | | |
778 | | let array_data = unsafe { |
779 | | ArrayData::builder(DataType::FixedSizeList( |
780 | | Arc::new(Field::new_list_field(DataType::Binary, false)), |
781 | | 4, |
782 | | )) |
783 | | .len(3) |
784 | | .add_child_data(values_data) |
785 | | .build_unchecked() |
786 | | }; |
787 | | let list_array = FixedSizeListArray::from(array_data); |
788 | | drop(FixedSizeBinaryArray::from(list_array)); |
789 | | } |
790 | | |
791 | | #[test] |
792 | | #[should_panic(expected = "The child array cannot contain null values.")] |
793 | | fn test_fixed_size_binary_array_from_fixed_size_list_array_with_child_nulls_failed() { |
794 | | let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; |
795 | | let values_data = ArrayData::builder(DataType::UInt8) |
796 | | .len(12) |
797 | | .add_buffer(Buffer::from_slice_ref(values)) |
798 | | .null_bit_buffer(Some(Buffer::from_slice_ref([0b101010101010]))) |
799 | | .build() |
800 | | .unwrap(); |
801 | | |
802 | | let array_data = unsafe { |
803 | | ArrayData::builder(DataType::FixedSizeList( |
804 | | Arc::new(Field::new_list_field(DataType::UInt8, false)), |
805 | | 4, |
806 | | )) |
807 | | .len(3) |
808 | | .add_child_data(values_data) |
809 | | .build_unchecked() |
810 | | }; |
811 | | let list_array = FixedSizeListArray::from(array_data); |
812 | | drop(FixedSizeBinaryArray::from(list_array)); |
813 | | } |
814 | | |
815 | | #[test] |
816 | | fn test_fixed_size_binary_array_fmt_debug() { |
817 | | let values: [u8; 15] = *b"hellotherearrow"; |
818 | | |
819 | | let array_data = ArrayData::builder(DataType::FixedSizeBinary(5)) |
820 | | .len(3) |
821 | | .add_buffer(Buffer::from(&values)) |
822 | | .build() |
823 | | .unwrap(); |
824 | | let arr = FixedSizeBinaryArray::from(array_data); |
825 | | assert_eq!( |
826 | | "FixedSizeBinaryArray<5>\n[\n [104, 101, 108, 108, 111],\n [116, 104, 101, 114, 101],\n [97, 114, 114, 111, 119],\n]", |
827 | | format!("{arr:?}") |
828 | | ); |
829 | | } |
830 | | |
831 | | #[test] |
832 | | fn test_fixed_size_binary_array_from_iter() { |
833 | | let input_arg = vec![vec![1, 2], vec![3, 4], vec![5, 6]]; |
834 | | let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap(); |
835 | | |
836 | | assert_eq!(2, arr.value_length()); |
837 | | assert_eq!(3, arr.len()) |
838 | | } |
839 | | |
840 | | #[test] |
841 | | fn test_all_none_fixed_size_binary_array_from_sparse_iter() { |
842 | | let none_option: Option<[u8; 32]> = None; |
843 | | let input_arg = vec![none_option, none_option, none_option]; |
844 | | #[allow(deprecated)] |
845 | | let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap(); |
846 | | assert_eq!(0, arr.value_length()); |
847 | | assert_eq!(3, arr.len()) |
848 | | } |
849 | | |
850 | | #[test] |
851 | | fn test_fixed_size_binary_array_from_sparse_iter() { |
852 | | let input_arg = vec![ |
853 | | None, |
854 | | Some(vec![7, 8]), |
855 | | Some(vec![9, 10]), |
856 | | None, |
857 | | Some(vec![13, 14]), |
858 | | ]; |
859 | | #[allow(deprecated)] |
860 | | let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap(); |
861 | | assert_eq!(2, arr.value_length()); |
862 | | assert_eq!(5, arr.len()); |
863 | | |
864 | | let arr = |
865 | | FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap(); |
866 | | assert_eq!(2, arr.value_length()); |
867 | | assert_eq!(5, arr.len()); |
868 | | } |
869 | | |
870 | | #[test] |
871 | | fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() { |
872 | | let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>; |
873 | | |
874 | | let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16) |
875 | | .unwrap(); |
876 | | assert_eq!(16, arr.value_length()); |
877 | | assert_eq!(5, arr.len()) |
878 | | } |
879 | | |
880 | | #[test] |
881 | | fn test_fixed_size_binary_array_from_vec() { |
882 | | let values = vec!["one".as_bytes(), b"two", b"six", b"ten"]; |
883 | | let array = FixedSizeBinaryArray::from(values); |
884 | | assert_eq!(array.len(), 4); |
885 | | assert_eq!(array.null_count(), 0); |
886 | | assert_eq!(array.logical_null_count(), 0); |
887 | | assert_eq!(array.value(0), b"one"); |
888 | | assert_eq!(array.value(1), b"two"); |
889 | | assert_eq!(array.value(2), b"six"); |
890 | | assert_eq!(array.value(3), b"ten"); |
891 | | assert!(!array.is_null(0)); |
892 | | assert!(!array.is_null(1)); |
893 | | assert!(!array.is_null(2)); |
894 | | assert!(!array.is_null(3)); |
895 | | } |
896 | | |
897 | | #[test] |
898 | | #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")] |
899 | | fn test_fixed_size_binary_array_from_vec_incorrect_length() { |
900 | | let values = vec!["one".as_bytes(), b"two", b"three", b"four"]; |
901 | | let _ = FixedSizeBinaryArray::from(values); |
902 | | } |
903 | | |
904 | | #[test] |
905 | | fn test_fixed_size_binary_array_from_opt_vec() { |
906 | | let values = vec![ |
907 | | Some("one".as_bytes()), |
908 | | Some(b"two"), |
909 | | None, |
910 | | Some(b"six"), |
911 | | Some(b"ten"), |
912 | | ]; |
913 | | let array = FixedSizeBinaryArray::from(values); |
914 | | assert_eq!(array.len(), 5); |
915 | | assert_eq!(array.value(0), b"one"); |
916 | | assert_eq!(array.value(1), b"two"); |
917 | | assert_eq!(array.value(3), b"six"); |
918 | | assert_eq!(array.value(4), b"ten"); |
919 | | assert!(!array.is_null(0)); |
920 | | assert!(!array.is_null(1)); |
921 | | assert!(array.is_null(2)); |
922 | | assert!(!array.is_null(3)); |
923 | | assert!(!array.is_null(4)); |
924 | | } |
925 | | |
926 | | #[test] |
927 | | #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")] |
928 | | fn test_fixed_size_binary_array_from_opt_vec_incorrect_length() { |
929 | | let values = vec![ |
930 | | Some("one".as_bytes()), |
931 | | Some(b"two"), |
932 | | None, |
933 | | Some(b"three"), |
934 | | Some(b"four"), |
935 | | ]; |
936 | | let _ = FixedSizeBinaryArray::from(values); |
937 | | } |
938 | | |
939 | | #[test] |
940 | | fn fixed_size_binary_array_all_null() { |
941 | | let data = vec![None] as Vec<Option<String>>; |
942 | | let array = |
943 | | FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap(); |
944 | | array |
945 | | .into_data() |
946 | | .validate_full() |
947 | | .expect("All null array has valid array data"); |
948 | | } |
949 | | |
950 | | #[test] |
951 | | // Test for https://github.com/apache/arrow-rs/issues/1390 |
952 | | fn fixed_size_binary_array_all_null_in_batch_with_schema() { |
953 | | let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]); |
954 | | |
955 | | let none_option: Option<[u8; 2]> = None; |
956 | | let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size( |
957 | | vec![none_option, none_option, none_option].into_iter(), |
958 | | 2, |
959 | | ) |
960 | | .unwrap(); |
961 | | |
962 | | // Should not panic |
963 | | RecordBatch::try_new(Arc::new(schema), vec![Arc::new(item)]).unwrap(); |
964 | | } |
965 | | |
966 | | #[test] |
967 | | #[should_panic( |
968 | | expected = "Trying to access an element at index 4 from a FixedSizeBinaryArray of length 3" |
969 | | )] |
970 | | fn test_fixed_size_binary_array_get_value_index_out_of_bound() { |
971 | | let values = vec![Some("one".as_bytes()), Some(b"two"), None]; |
972 | | let array = FixedSizeBinaryArray::from(values); |
973 | | |
974 | | array.value(4); |
975 | | } |
976 | | |
977 | | #[test] |
978 | | fn test_constructors() { |
979 | | let buffer = Buffer::from_vec(vec![0_u8; 10]); |
980 | | let a = FixedSizeBinaryArray::new(2, buffer.clone(), None); |
981 | | assert_eq!(a.len(), 5); |
982 | | |
983 | | let nulls = NullBuffer::new_null(5); |
984 | | FixedSizeBinaryArray::new(2, buffer.clone(), Some(nulls)); |
985 | | |
986 | | let a = FixedSizeBinaryArray::new(3, buffer.clone(), None); |
987 | | assert_eq!(a.len(), 3); |
988 | | |
989 | | let nulls = NullBuffer::new_null(3); |
990 | | FixedSizeBinaryArray::new(3, buffer.clone(), Some(nulls)); |
991 | | |
992 | | let err = FixedSizeBinaryArray::try_new(-1, buffer.clone(), None).unwrap_err(); |
993 | | |
994 | | assert_eq!( |
995 | | err.to_string(), |
996 | | "Invalid argument error: Size cannot be negative, got -1" |
997 | | ); |
998 | | |
999 | | let nulls = NullBuffer::new_null(3); |
1000 | | let err = FixedSizeBinaryArray::try_new(2, buffer, Some(nulls)).unwrap_err(); |
1001 | | assert_eq!( |
1002 | | err.to_string(), |
1003 | | "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3" |
1004 | | ); |
1005 | | } |
1006 | | } |