/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/binary_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::types::{ByteArrayType, GenericBinaryType}; |
19 | | use crate::{Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait}; |
20 | | use arrow_data::ArrayData; |
21 | | use arrow_schema::DataType; |
22 | | |
23 | | /// A [`GenericByteArray`] for storing `[u8]` |
24 | | pub type GenericBinaryArray<OffsetSize> = GenericByteArray<GenericBinaryType<OffsetSize>>; |
25 | | |
26 | | impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> { |
27 | | /// Creates a [GenericBinaryArray] from a vector of byte slices |
28 | | /// |
29 | | /// See also [`Self::from_iter_values`] |
30 | 4 | pub fn from_vec(v: Vec<&[u8]>) -> Self { |
31 | 4 | Self::from_iter_values(v) |
32 | 4 | } |
33 | | |
34 | | /// Creates a [GenericBinaryArray] from a vector of Optional (null) byte slices |
35 | 1 | pub fn from_opt_vec(v: Vec<Option<&[u8]>>) -> Self { |
36 | 1 | v.into_iter().collect() |
37 | 1 | } |
38 | | |
39 | | fn from_list(v: GenericListArray<OffsetSize>) -> Self { |
40 | | let v = v.into_data(); |
41 | | assert_eq!( |
42 | | v.child_data().len(), |
43 | | 1, |
44 | | "BinaryArray can only be created from list array of u8 values \ |
45 | | (i.e. List<PrimitiveArray<u8>>)." |
46 | | ); |
47 | | let child_data = &v.child_data()[0]; |
48 | | |
49 | | assert_eq!( |
50 | | child_data.child_data().len(), |
51 | | 0, |
52 | | "BinaryArray can only be created from list array of u8 values \ |
53 | | (i.e. List<PrimitiveArray<u8>>)." |
54 | | ); |
55 | | assert_eq!( |
56 | | child_data.data_type(), |
57 | | &DataType::UInt8, |
58 | | "BinaryArray can only be created from List<u8> arrays, mismatched data types." |
59 | | ); |
60 | | assert_eq!( |
61 | | child_data.null_count(), |
62 | | 0, |
63 | | "The child array cannot contain null values." |
64 | | ); |
65 | | |
66 | | let builder = ArrayData::builder(Self::DATA_TYPE) |
67 | | .len(v.len()) |
68 | | .offset(v.offset()) |
69 | | .add_buffer(v.buffers()[0].clone()) |
70 | | .add_buffer(child_data.buffers()[0].slice(child_data.offset())) |
71 | | .nulls(v.nulls().cloned()); |
72 | | |
73 | | let data = unsafe { builder.build_unchecked() }; |
74 | | Self::from(data) |
75 | | } |
76 | | |
77 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
78 | | pub fn take_iter<'a>( |
79 | | &'a self, |
80 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
81 | | ) -> impl Iterator<Item = Option<&'a [u8]>> { |
82 | | indexes.map(|opt_index| opt_index.map(|index| self.value(index))) |
83 | | } |
84 | | |
85 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
86 | | /// # Safety |
87 | | /// |
88 | | /// caller must ensure that the indexes in the iterator are less than the `array.len()` |
89 | | pub unsafe fn take_iter_unchecked<'a>( |
90 | | &'a self, |
91 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
92 | | ) -> impl Iterator<Item = Option<&'a [u8]>> { |
93 | | indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) |
94 | | } |
95 | | } |
96 | | |
97 | | impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>> for GenericBinaryArray<OffsetSize> { |
98 | 1 | fn from(v: Vec<Option<&[u8]>>) -> Self { |
99 | 1 | Self::from_opt_vec(v) |
100 | 1 | } |
101 | | } |
102 | | |
103 | | impl<OffsetSize: OffsetSizeTrait> From<Vec<&[u8]>> for GenericBinaryArray<OffsetSize> { |
104 | | fn from(v: Vec<&[u8]>) -> Self { |
105 | | Self::from_iter_values(v) |
106 | | } |
107 | | } |
108 | | |
109 | | impl<T: OffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> { |
110 | | fn from(v: GenericListArray<T>) -> Self { |
111 | | Self::from_list(v) |
112 | | } |
113 | | } |
114 | | |
115 | | impl<OffsetSize: OffsetSizeTrait> From<GenericStringArray<OffsetSize>> |
116 | | for GenericBinaryArray<OffsetSize> |
117 | | { |
118 | 0 | fn from(value: GenericStringArray<OffsetSize>) -> Self { |
119 | 0 | let builder = value |
120 | 0 | .into_data() |
121 | 0 | .into_builder() |
122 | 0 | .data_type(GenericBinaryType::<OffsetSize>::DATA_TYPE); |
123 | | |
124 | | // Safety: |
125 | | // A StringArray is a valid BinaryArray |
126 | 0 | Self::from(unsafe { builder.build_unchecked() }) |
127 | 0 | } |
128 | | } |
129 | | |
130 | | /// A [`GenericBinaryArray`] of `[u8]` using `i32` offsets |
131 | | /// |
132 | | /// The byte length of each element is represented by an i32. |
133 | | /// |
134 | | /// # Examples |
135 | | /// |
136 | | /// Create a BinaryArray from a vector of byte slices. |
137 | | /// |
138 | | /// ``` |
139 | | /// use arrow_array::{Array, BinaryArray}; |
140 | | /// let values: Vec<&[u8]> = |
141 | | /// vec![b"one", b"two", b"", b"three"]; |
142 | | /// let array = BinaryArray::from_vec(values); |
143 | | /// assert_eq!(4, array.len()); |
144 | | /// assert_eq!(b"one", array.value(0)); |
145 | | /// assert_eq!(b"two", array.value(1)); |
146 | | /// assert_eq!(b"", array.value(2)); |
147 | | /// assert_eq!(b"three", array.value(3)); |
148 | | /// ``` |
149 | | /// |
150 | | /// Create a BinaryArray from a vector of Optional (null) byte slices. |
151 | | /// |
152 | | /// ``` |
153 | | /// use arrow_array::{Array, BinaryArray}; |
154 | | /// let values: Vec<Option<&[u8]>> = |
155 | | /// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; |
156 | | /// let array = BinaryArray::from_opt_vec(values); |
157 | | /// assert_eq!(5, array.len()); |
158 | | /// assert_eq!(b"one", array.value(0)); |
159 | | /// assert_eq!(b"two", array.value(1)); |
160 | | /// assert_eq!(b"", array.value(3)); |
161 | | /// assert_eq!(b"three", array.value(4)); |
162 | | /// assert!(!array.is_null(0)); |
163 | | /// assert!(!array.is_null(1)); |
164 | | /// assert!(array.is_null(2)); |
165 | | /// assert!(!array.is_null(3)); |
166 | | /// assert!(!array.is_null(4)); |
167 | | /// ``` |
168 | | /// |
169 | | /// See [`GenericByteArray`] for more information and examples |
170 | | pub type BinaryArray = GenericBinaryArray<i32>; |
171 | | |
172 | | /// A [`GenericBinaryArray`] of `[u8]` using `i64` offsets |
173 | | /// |
174 | | /// # Examples |
175 | | /// |
176 | | /// Create a LargeBinaryArray from a vector of byte slices. |
177 | | /// |
178 | | /// ``` |
179 | | /// use arrow_array::{Array, LargeBinaryArray}; |
180 | | /// let values: Vec<&[u8]> = |
181 | | /// vec![b"one", b"two", b"", b"three"]; |
182 | | /// let array = LargeBinaryArray::from_vec(values); |
183 | | /// assert_eq!(4, array.len()); |
184 | | /// assert_eq!(b"one", array.value(0)); |
185 | | /// assert_eq!(b"two", array.value(1)); |
186 | | /// assert_eq!(b"", array.value(2)); |
187 | | /// assert_eq!(b"three", array.value(3)); |
188 | | /// ``` |
189 | | /// |
190 | | /// Create a LargeBinaryArray from a vector of Optional (null) byte slices. |
191 | | /// |
192 | | /// ``` |
193 | | /// use arrow_array::{Array, LargeBinaryArray}; |
194 | | /// let values: Vec<Option<&[u8]>> = |
195 | | /// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; |
196 | | /// let array = LargeBinaryArray::from_opt_vec(values); |
197 | | /// assert_eq!(5, array.len()); |
198 | | /// assert_eq!(b"one", array.value(0)); |
199 | | /// assert_eq!(b"two", array.value(1)); |
200 | | /// assert_eq!(b"", array.value(3)); |
201 | | /// assert_eq!(b"three", array.value(4)); |
202 | | /// assert!(!array.is_null(0)); |
203 | | /// assert!(!array.is_null(1)); |
204 | | /// assert!(array.is_null(2)); |
205 | | /// assert!(!array.is_null(3)); |
206 | | /// assert!(!array.is_null(4)); |
207 | | /// ``` |
208 | | /// |
209 | | /// See [`GenericByteArray`] for more information and examples |
210 | | pub type LargeBinaryArray = GenericBinaryArray<i64>; |
211 | | |
212 | | #[cfg(test)] |
213 | | mod tests { |
214 | | use super::*; |
215 | | use crate::{ListArray, StringArray}; |
216 | | use arrow_buffer::Buffer; |
217 | | use arrow_schema::Field; |
218 | | use std::sync::Arc; |
219 | | |
220 | | #[test] |
221 | | fn test_binary_array() { |
222 | | let values: [u8; 12] = [ |
223 | | b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
224 | | ]; |
225 | | let offsets: [i32; 4] = [0, 5, 5, 12]; |
226 | | |
227 | | // Array data: ["hello", "", "parquet"] |
228 | | let array_data = ArrayData::builder(DataType::Binary) |
229 | | .len(3) |
230 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
231 | | .add_buffer(Buffer::from_slice_ref(values)) |
232 | | .build() |
233 | | .unwrap(); |
234 | | let binary_array = BinaryArray::from(array_data); |
235 | | assert_eq!(3, binary_array.len()); |
236 | | assert_eq!(0, binary_array.null_count()); |
237 | | assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); |
238 | | assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe { |
239 | | binary_array.value_unchecked(0) |
240 | | }); |
241 | | assert_eq!([] as [u8; 0], binary_array.value(1)); |
242 | | assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) }); |
243 | | assert_eq!( |
244 | | [b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
245 | | binary_array.value(2) |
246 | | ); |
247 | | assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe { |
248 | | binary_array.value_unchecked(2) |
249 | | }); |
250 | | assert_eq!(5, binary_array.value_offsets()[2]); |
251 | | assert_eq!(7, binary_array.value_length(2)); |
252 | | for i in 0..3 { |
253 | | assert!(binary_array.is_valid(i)); |
254 | | assert!(!binary_array.is_null(i)); |
255 | | } |
256 | | } |
257 | | |
258 | | #[test] |
259 | | fn test_binary_array_with_offsets() { |
260 | | let values: [u8; 12] = [ |
261 | | b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
262 | | ]; |
263 | | let offsets: [i32; 4] = [0, 5, 5, 12]; |
264 | | |
265 | | // Test binary array with offset |
266 | | let array_data = ArrayData::builder(DataType::Binary) |
267 | | .len(2) |
268 | | .offset(1) |
269 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
270 | | .add_buffer(Buffer::from_slice_ref(values)) |
271 | | .build() |
272 | | .unwrap(); |
273 | | let binary_array = BinaryArray::from(array_data); |
274 | | assert_eq!( |
275 | | [b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
276 | | binary_array.value(1) |
277 | | ); |
278 | | assert_eq!(5, binary_array.value_offsets()[0]); |
279 | | assert_eq!(0, binary_array.value_length(0)); |
280 | | assert_eq!(5, binary_array.value_offsets()[1]); |
281 | | assert_eq!(7, binary_array.value_length(1)); |
282 | | } |
283 | | |
284 | | #[test] |
285 | | fn test_large_binary_array() { |
286 | | let values: [u8; 12] = [ |
287 | | b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
288 | | ]; |
289 | | let offsets: [i64; 4] = [0, 5, 5, 12]; |
290 | | |
291 | | // Array data: ["hello", "", "parquet"] |
292 | | let array_data = ArrayData::builder(DataType::LargeBinary) |
293 | | .len(3) |
294 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
295 | | .add_buffer(Buffer::from_slice_ref(values)) |
296 | | .build() |
297 | | .unwrap(); |
298 | | let binary_array = LargeBinaryArray::from(array_data); |
299 | | assert_eq!(3, binary_array.len()); |
300 | | assert_eq!(0, binary_array.null_count()); |
301 | | assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); |
302 | | assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe { |
303 | | binary_array.value_unchecked(0) |
304 | | }); |
305 | | assert_eq!([] as [u8; 0], binary_array.value(1)); |
306 | | assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) }); |
307 | | assert_eq!( |
308 | | [b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
309 | | binary_array.value(2) |
310 | | ); |
311 | | assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe { |
312 | | binary_array.value_unchecked(2) |
313 | | }); |
314 | | assert_eq!(5, binary_array.value_offsets()[2]); |
315 | | assert_eq!(7, binary_array.value_length(2)); |
316 | | for i in 0..3 { |
317 | | assert!(binary_array.is_valid(i)); |
318 | | assert!(!binary_array.is_null(i)); |
319 | | } |
320 | | } |
321 | | |
322 | | #[test] |
323 | | fn test_large_binary_array_with_offsets() { |
324 | | let values: [u8; 12] = [ |
325 | | b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
326 | | ]; |
327 | | let offsets: [i64; 4] = [0, 5, 5, 12]; |
328 | | |
329 | | // Test binary array with offset |
330 | | let array_data = ArrayData::builder(DataType::LargeBinary) |
331 | | .len(2) |
332 | | .offset(1) |
333 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
334 | | .add_buffer(Buffer::from_slice_ref(values)) |
335 | | .build() |
336 | | .unwrap(); |
337 | | let binary_array = LargeBinaryArray::from(array_data); |
338 | | assert_eq!( |
339 | | [b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
340 | | binary_array.value(1) |
341 | | ); |
342 | | assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe { |
343 | | binary_array.value_unchecked(1) |
344 | | }); |
345 | | assert_eq!(5, binary_array.value_offsets()[0]); |
346 | | assert_eq!(0, binary_array.value_length(0)); |
347 | | assert_eq!(5, binary_array.value_offsets()[1]); |
348 | | assert_eq!(7, binary_array.value_length(1)); |
349 | | } |
350 | | |
351 | | fn _test_generic_binary_array_from_list_array<O: OffsetSizeTrait>() { |
352 | | let values = b"helloparquet"; |
353 | | let child_data = ArrayData::builder(DataType::UInt8) |
354 | | .len(12) |
355 | | .add_buffer(Buffer::from(values)) |
356 | | .build() |
357 | | .unwrap(); |
358 | | let offsets = [0, 5, 5, 12].map(|n| O::from_usize(n).unwrap()); |
359 | | |
360 | | // Array data: ["hello", "", "parquet"] |
361 | | let array_data1 = ArrayData::builder(GenericBinaryArray::<O>::DATA_TYPE) |
362 | | .len(3) |
363 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
364 | | .add_buffer(Buffer::from_slice_ref(values)) |
365 | | .build() |
366 | | .unwrap(); |
367 | | let binary_array1 = GenericBinaryArray::<O>::from(array_data1); |
368 | | |
369 | | let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new( |
370 | | Field::new_list_field(DataType::UInt8, false), |
371 | | )); |
372 | | |
373 | | let array_data2 = ArrayData::builder(data_type) |
374 | | .len(3) |
375 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
376 | | .add_child_data(child_data) |
377 | | .build() |
378 | | .unwrap(); |
379 | | let list_array = GenericListArray::<O>::from(array_data2); |
380 | | let binary_array2 = GenericBinaryArray::<O>::from(list_array); |
381 | | |
382 | | assert_eq!(binary_array1.len(), binary_array2.len()); |
383 | | assert_eq!(binary_array1.null_count(), binary_array2.null_count()); |
384 | | assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets()); |
385 | | for i in 0..binary_array1.len() { |
386 | | assert_eq!(binary_array1.value(i), binary_array2.value(i)); |
387 | | assert_eq!(binary_array1.value(i), unsafe { |
388 | | binary_array2.value_unchecked(i) |
389 | | }); |
390 | | assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i)); |
391 | | } |
392 | | } |
393 | | |
394 | | #[test] |
395 | | fn test_binary_array_from_list_array() { |
396 | | _test_generic_binary_array_from_list_array::<i32>(); |
397 | | } |
398 | | |
399 | | #[test] |
400 | | fn test_large_binary_array_from_list_array() { |
401 | | _test_generic_binary_array_from_list_array::<i64>(); |
402 | | } |
403 | | |
404 | | fn _test_generic_binary_array_from_list_array_with_offset<O: OffsetSizeTrait>() { |
405 | | let values = b"HelloArrowAndParquet"; |
406 | | // b"ArrowAndParquet" |
407 | | let child_data = ArrayData::builder(DataType::UInt8) |
408 | | .len(15) |
409 | | .offset(5) |
410 | | .add_buffer(Buffer::from(values)) |
411 | | .build() |
412 | | .unwrap(); |
413 | | |
414 | | let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap()); |
415 | | let null_buffer = Buffer::from_slice_ref([0b101]); |
416 | | let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new( |
417 | | Field::new_list_field(DataType::UInt8, false), |
418 | | )); |
419 | | |
420 | | // [None, Some(b"Parquet")] |
421 | | let array_data = ArrayData::builder(data_type) |
422 | | .len(2) |
423 | | .offset(1) |
424 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
425 | | .null_bit_buffer(Some(null_buffer)) |
426 | | .add_child_data(child_data) |
427 | | .build() |
428 | | .unwrap(); |
429 | | let list_array = GenericListArray::<O>::from(array_data); |
430 | | let binary_array = GenericBinaryArray::<O>::from(list_array); |
431 | | |
432 | | assert_eq!(2, binary_array.len()); |
433 | | assert_eq!(1, binary_array.null_count()); |
434 | | assert!(binary_array.is_null(0)); |
435 | | assert!(binary_array.is_valid(1)); |
436 | | assert_eq!(b"Parquet", binary_array.value(1)); |
437 | | } |
438 | | |
439 | | #[test] |
440 | | fn test_binary_array_from_list_array_with_offset() { |
441 | | _test_generic_binary_array_from_list_array_with_offset::<i32>(); |
442 | | } |
443 | | |
444 | | #[test] |
445 | | fn test_large_binary_array_from_list_array_with_offset() { |
446 | | _test_generic_binary_array_from_list_array_with_offset::<i64>(); |
447 | | } |
448 | | |
449 | | fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() { |
450 | | let values = b"HelloArrow"; |
451 | | let child_data = ArrayData::builder(DataType::UInt8) |
452 | | .len(10) |
453 | | .add_buffer(Buffer::from(values)) |
454 | | .null_bit_buffer(Some(Buffer::from_slice_ref([0b1010101010]))) |
455 | | .build() |
456 | | .unwrap(); |
457 | | |
458 | | let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap()); |
459 | | let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new( |
460 | | Field::new_list_field(DataType::UInt8, true), |
461 | | )); |
462 | | |
463 | | // [None, Some(b"Parquet")] |
464 | | let array_data = ArrayData::builder(data_type) |
465 | | .len(2) |
466 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
467 | | .add_child_data(child_data) |
468 | | .build() |
469 | | .unwrap(); |
470 | | let list_array = GenericListArray::<O>::from(array_data); |
471 | | drop(GenericBinaryArray::<O>::from(list_array)); |
472 | | } |
473 | | |
474 | | #[test] |
475 | | #[should_panic(expected = "The child array cannot contain null values.")] |
476 | | fn test_binary_array_from_list_array_with_child_nulls_failed() { |
477 | | _test_generic_binary_array_from_list_array_with_child_nulls_failed::<i32>(); |
478 | | } |
479 | | |
480 | | #[test] |
481 | | #[should_panic(expected = "The child array cannot contain null values.")] |
482 | | fn test_large_binary_array_from_list_array_with_child_nulls_failed() { |
483 | | _test_generic_binary_array_from_list_array_with_child_nulls_failed::<i64>(); |
484 | | } |
485 | | |
486 | | fn test_generic_binary_array_from_opt_vec<T: OffsetSizeTrait>() { |
487 | | let values: Vec<Option<&[u8]>> = |
488 | | vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")]; |
489 | | let array = GenericBinaryArray::<T>::from_opt_vec(values); |
490 | | assert_eq!(array.len(), 5); |
491 | | assert_eq!(array.value(0), b"one"); |
492 | | assert_eq!(array.value(1), b"two"); |
493 | | assert_eq!(array.value(3), b""); |
494 | | assert_eq!(array.value(4), b"three"); |
495 | | assert!(!array.is_null(0)); |
496 | | assert!(!array.is_null(1)); |
497 | | assert!(array.is_null(2)); |
498 | | assert!(!array.is_null(3)); |
499 | | assert!(!array.is_null(4)); |
500 | | } |
501 | | |
502 | | #[test] |
503 | | fn test_large_binary_array_from_opt_vec() { |
504 | | test_generic_binary_array_from_opt_vec::<i64>() |
505 | | } |
506 | | |
507 | | #[test] |
508 | | fn test_binary_array_from_opt_vec() { |
509 | | test_generic_binary_array_from_opt_vec::<i32>() |
510 | | } |
511 | | |
512 | | #[test] |
513 | | fn test_binary_array_from_unbound_iter() { |
514 | | // iterator that doesn't declare (upper) size bound |
515 | | let value_iter = (0..) |
516 | | .scan(0usize, |pos, i| { |
517 | | if *pos < 10 { |
518 | | *pos += 1; |
519 | | Some(Some(format!("value {i}"))) |
520 | | } else { |
521 | | // actually returns up to 10 values |
522 | | None |
523 | | } |
524 | | }) |
525 | | // limited using take() |
526 | | .take(100); |
527 | | |
528 | | let (_, upper_size_bound) = value_iter.size_hint(); |
529 | | // the upper bound, defined by take above, is 100 |
530 | | assert_eq!(upper_size_bound, Some(100)); |
531 | | let binary_array: BinaryArray = value_iter.collect(); |
532 | | // but the actual number of items in the array should be 10 |
533 | | assert_eq!(binary_array.len(), 10); |
534 | | } |
535 | | |
536 | | #[test] |
537 | | #[should_panic( |
538 | | expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types." |
539 | | )] |
540 | | fn test_binary_array_from_incorrect_list_array() { |
541 | | let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; |
542 | | let values_data = ArrayData::builder(DataType::UInt32) |
543 | | .len(12) |
544 | | .add_buffer(Buffer::from_slice_ref(values)) |
545 | | .build() |
546 | | .unwrap(); |
547 | | let offsets: [i32; 4] = [0, 5, 5, 12]; |
548 | | |
549 | | let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, false))); |
550 | | let array_data = ArrayData::builder(data_type) |
551 | | .len(3) |
552 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
553 | | .add_child_data(values_data) |
554 | | .build() |
555 | | .unwrap(); |
556 | | let list_array = ListArray::from(array_data); |
557 | | drop(BinaryArray::from(list_array)); |
558 | | } |
559 | | |
560 | | #[test] |
561 | | #[should_panic( |
562 | | expected = "Trying to access an element at index 4 from a BinaryArray of length 3" |
563 | | )] |
564 | | fn test_binary_array_get_value_index_out_of_bound() { |
565 | | let values: [u8; 12] = [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116]; |
566 | | let offsets: [i32; 4] = [0, 5, 5, 12]; |
567 | | let array_data = ArrayData::builder(DataType::Binary) |
568 | | .len(3) |
569 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
570 | | .add_buffer(Buffer::from_slice_ref(values)) |
571 | | .build() |
572 | | .unwrap(); |
573 | | let binary_array = BinaryArray::from(array_data); |
574 | | binary_array.value(4); |
575 | | } |
576 | | |
577 | | #[test] |
578 | | #[should_panic(expected = "LargeBinaryArray expects DataType::LargeBinary")] |
579 | | fn test_binary_array_validation() { |
580 | | let array = BinaryArray::from_iter_values([&[1, 2]]); |
581 | | let _ = LargeBinaryArray::from(array.into_data()); |
582 | | } |
583 | | |
584 | | #[test] |
585 | | fn test_binary_array_all_null() { |
586 | | let data = vec![None]; |
587 | | let array = BinaryArray::from(data); |
588 | | array |
589 | | .into_data() |
590 | | .validate_full() |
591 | | .expect("All null array has valid array data"); |
592 | | } |
593 | | |
594 | | #[test] |
595 | | fn test_large_binary_array_all_null() { |
596 | | let data = vec![None]; |
597 | | let array = LargeBinaryArray::from(data); |
598 | | array |
599 | | .into_data() |
600 | | .validate_full() |
601 | | .expect("All null array has valid array data"); |
602 | | } |
603 | | |
604 | | #[test] |
605 | | fn test_empty_offsets() { |
606 | | let string = BinaryArray::from( |
607 | | ArrayData::builder(DataType::Binary) |
608 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
609 | | .build() |
610 | | .unwrap(), |
611 | | ); |
612 | | assert_eq!(string.value_offsets(), &[0]); |
613 | | let string = LargeBinaryArray::from( |
614 | | ArrayData::builder(DataType::LargeBinary) |
615 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
616 | | .build() |
617 | | .unwrap(), |
618 | | ); |
619 | | assert_eq!(string.len(), 0); |
620 | | assert_eq!(string.value_offsets(), &[0]); |
621 | | } |
622 | | |
623 | | #[test] |
624 | | fn test_to_from_string() { |
625 | | let s = StringArray::from_iter_values(["a", "b", "c", "d"]); |
626 | | let b = BinaryArray::from(s.clone()); |
627 | | let sa = StringArray::from(b); // Performs UTF-8 validation again |
628 | | |
629 | | assert_eq!(s, sa); |
630 | | } |
631 | | } |