Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/byte_array.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::array::{get_offsets, print_long_array};
19
use crate::builder::GenericByteBuilder;
20
use crate::iterator::ArrayIter;
21
use crate::types::ByteArrayType;
22
use crate::types::bytes::ByteArrayNativeType;
23
use crate::{Array, ArrayAccessor, ArrayRef, OffsetSizeTrait, Scalar};
24
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
25
use arrow_buffer::{NullBuffer, OffsetBuffer};
26
use arrow_data::{ArrayData, ArrayDataBuilder};
27
use arrow_schema::{ArrowError, DataType};
28
use std::any::Any;
29
use std::sync::Arc;
30
31
/// An array of [variable length byte arrays](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout)
32
///
33
/// See [`StringArray`] and [`LargeStringArray`] for storing utf8 encoded string data
34
///
35
/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing arbitrary bytes
36
///
37
/// # Example: From a Vec
38
///
39
/// ```
40
/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type};
41
/// let arr: GenericByteArray<Utf8Type> = vec!["hello", "world", ""].into();
42
/// assert_eq!(arr.value_data(), b"helloworld");
43
/// assert_eq!(arr.value_offsets(), &[0, 5, 10, 10]);
44
/// let values: Vec<_> = arr.iter().collect();
45
/// assert_eq!(values, &[Some("hello"), Some("world"), Some("")]);
46
/// ```
47
///
48
/// # Example: From an optional Vec
49
///
50
/// ```
51
/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type};
52
/// let arr: GenericByteArray<Utf8Type> = vec![Some("hello"), Some("world"), Some(""), None].into();
53
/// assert_eq!(arr.value_data(), b"helloworld");
54
/// assert_eq!(arr.value_offsets(), &[0, 5, 10, 10, 10]);
55
/// let values: Vec<_> = arr.iter().collect();
56
/// assert_eq!(values, &[Some("hello"), Some("world"), Some(""), None]);
57
/// ```
58
///
59
/// # Example: From an iterator of option
60
///
61
/// ```
62
/// # use arrow_array::{Array, GenericByteArray, types::Utf8Type};
63
/// let arr: GenericByteArray<Utf8Type> = (0..5).map(|x| (x % 2 == 0).then(|| x.to_string())).collect();
64
/// let values: Vec<_> = arr.iter().collect();
65
/// assert_eq!(values, &[Some("0"), None, Some("2"), None, Some("4")]);
66
/// ```
67
///
68
/// # Example: Using Builder
69
///
70
/// ```
71
/// # use arrow_array::Array;
72
/// # use arrow_array::builder::GenericByteBuilder;
73
/// # use arrow_array::types::Utf8Type;
74
/// let mut builder = GenericByteBuilder::<Utf8Type>::new();
75
/// builder.append_value("hello");
76
/// builder.append_null();
77
/// builder.append_value("world");
78
/// let array = builder.finish();
79
/// let values: Vec<_> = array.iter().collect();
80
/// assert_eq!(values, &[Some("hello"), None, Some("world")]);
81
/// ```
82
///
83
/// [`StringArray`]: crate::StringArray
84
/// [`LargeStringArray`]: crate::LargeStringArray
85
/// [`BinaryArray`]: crate::BinaryArray
86
/// [`LargeBinaryArray`]: crate::LargeBinaryArray
87
pub struct GenericByteArray<T: ByteArrayType> {
88
    data_type: DataType,
89
    value_offsets: OffsetBuffer<T::Offset>,
90
    value_data: Buffer,
91
    nulls: Option<NullBuffer>,
92
}
93
94
impl<T: ByteArrayType> Clone for GenericByteArray<T> {
95
160k
    fn clone(&self) -> Self {
96
160k
        Self {
97
160k
            data_type: T::DATA_TYPE,
98
160k
            value_offsets: self.value_offsets.clone(),
99
160k
            value_data: self.value_data.clone(),
100
160k
            nulls: self.nulls.clone(),
101
160k
        }
102
160k
    }
103
}
104
105
impl<T: ByteArrayType> GenericByteArray<T> {
106
    /// Data type of the array.
107
    pub const DATA_TYPE: DataType = T::DATA_TYPE;
108
109
    /// Create a new [`GenericByteArray`] from the provided parts, panicking on failure
110
    ///
111
    /// # Panics
112
    ///
113
    /// Panics if [`GenericByteArray::try_new`] returns an error
114
1
    pub fn new(
115
1
        offsets: OffsetBuffer<T::Offset>,
116
1
        values: Buffer,
117
1
        nulls: Option<NullBuffer>,
118
1
    ) -> Self {
119
1
        Self::try_new(offsets, values, nulls).unwrap()
120
1
    }
121
122
    /// Create a new [`GenericByteArray`] from the provided parts, returning an error on failure
123
    ///
124
    /// # Errors
125
    ///
126
    /// * `offsets.len() - 1 != nulls.len()`
127
    /// * Any consecutive pair of `offsets` does not denote a valid slice of `values`
128
1
    pub fn try_new(
129
1
        offsets: OffsetBuffer<T::Offset>,
130
1
        values: Buffer,
131
1
        nulls: Option<NullBuffer>,
132
1
    ) -> Result<Self, ArrowError> {
133
1
        let len = offsets.len() - 1;
134
135
        // Verify that each pair of offsets is a valid slices of values
136
1
        T::validate(&offsets, &values)
?0
;
137
138
1
        if let Some(n) = nulls.as_ref() {
139
1
            if n.len() != len {
140
0
                return Err(ArrowError::InvalidArgumentError(format!(
141
0
                    "Incorrect length of null buffer for {}{}Array, expected {len} got {}",
142
0
                    T::Offset::PREFIX,
143
0
                    T::PREFIX,
144
0
                    n.len(),
145
0
                )));
146
1
            }
147
0
        }
148
149
1
        Ok(Self {
150
1
            data_type: T::DATA_TYPE,
151
1
            value_offsets: offsets,
152
1
            value_data: values,
153
1
            nulls,
154
1
        })
155
1
    }
156
157
    /// Create a new [`GenericByteArray`] from the provided parts, without validation
158
    ///
159
    /// # Safety
160
    ///
161
    /// Safe if [`Self::try_new`] would not error
162
41
    pub unsafe fn new_unchecked(
163
41
        offsets: OffsetBuffer<T::Offset>,
164
41
        values: Buffer,
165
41
        nulls: Option<NullBuffer>,
166
41
    ) -> Self {
167
41
        if cfg!(feature = "force_validate") {
168
0
            return Self::new(offsets, values, nulls);
169
41
        }
170
41
        Self {
171
41
            data_type: T::DATA_TYPE,
172
41
            value_offsets: offsets,
173
41
            value_data: values,
174
41
            nulls,
175
41
        }
176
41
    }
177
178
    /// Create a new [`GenericByteArray`] of length `len` where all values are null
179
32
    pub fn new_null(len: usize) -> Self {
180
32
        Self {
181
32
            data_type: T::DATA_TYPE,
182
32
            value_offsets: OffsetBuffer::new_zeroed(len),
183
32
            value_data: MutableBuffer::new(0).into(),
184
32
            nulls: Some(NullBuffer::new_null(len)),
185
32
        }
186
32
    }
187
188
    /// Create a new [`Scalar`] from `v`
189
    pub fn new_scalar(value: impl AsRef<T::Native>) -> Scalar<Self> {
190
        Scalar::new(Self::from_iter_values(std::iter::once(value)))
191
    }
192
193
    /// Create a new [`GenericByteArray`] where `value` is repeated `repeat_count` times.
194
    ///
195
    /// # Panics
196
    /// This will panic if value's length multiplied by `repeat_count` overflows usize.
197
    ///
198
    pub fn new_repeated(value: impl AsRef<T::Native>, repeat_count: usize) -> Self {
199
        let s: &[u8] = value.as_ref().as_ref();
200
        let value_offsets = OffsetBuffer::from_repeated_length(s.len(), repeat_count);
201
        let bytes: Buffer = {
202
            let mut mutable_buffer = MutableBuffer::with_capacity(0);
203
            mutable_buffer.repeat_slice_n_times(s, repeat_count);
204
205
            mutable_buffer.into()
206
        };
207
208
        Self {
209
            data_type: T::DATA_TYPE,
210
            value_data: bytes,
211
            value_offsets,
212
            nulls: None,
213
        }
214
    }
215
216
    /// Creates a [`GenericByteArray`] based on an iterator of values without nulls
217
93
    pub fn from_iter_values<Ptr, I>(iter: I) -> Self
218
93
    where
219
93
        Ptr: AsRef<T::Native>,
220
93
        I: IntoIterator<Item = Ptr>,
221
    {
222
93
        let iter = iter.into_iter();
223
93
        let (_, data_len) = iter.size_hint();
224
93
        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
225
226
93
        let mut offsets = MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
227
93
        offsets.push(T::Offset::usize_as(0));
228
229
93
        let mut values = MutableBuffer::new(0);
230
743
        for 
s650
in iter {
231
650
            let s: &[u8] = s.as_ref().as_ref();
232
650
            values.extend_from_slice(s);
233
650
            offsets.push(T::Offset::usize_as(values.len()));
234
650
        }
235
236
93
        T::Offset::from_usize(values.len()).expect("offset overflow");
237
93
        let offsets = Buffer::from(offsets);
238
239
        // Safety: valid by construction
240
93
        let value_offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
241
242
93
        Self {
243
93
            data_type: T::DATA_TYPE,
244
93
            value_data: values.into(),
245
93
            value_offsets,
246
93
            nulls: None,
247
93
        }
248
93
    }
249
250
    /// Deconstruct this array into its constituent parts
251
    pub fn into_parts(self) -> (OffsetBuffer<T::Offset>, Buffer, Option<NullBuffer>) {
252
        (self.value_offsets, self.value_data, self.nulls)
253
    }
254
255
    /// Returns the length for value at index `i`.
256
    /// # Panics
257
    /// Panics if index `i` is out of bounds.
258
    #[inline]
259
    pub fn value_length(&self, i: usize) -> T::Offset {
260
        let offsets = self.value_offsets();
261
        offsets[i + 1] - offsets[i]
262
    }
263
264
    /// Returns a reference to the offsets of this array
265
    ///
266
    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
267
    /// allowing for zero-copy cloning
268
    #[inline]
269
137
    pub fn offsets(&self) -> &OffsetBuffer<T::Offset> {
270
137
        &self.value_offsets
271
137
    }
272
273
    /// Returns the values of this array
274
    ///
275
    /// Unlike [`Self::value_data`] this returns the [`Buffer`]
276
    /// allowing for zero-copy cloning
277
    #[inline]
278
165
    pub fn values(&self) -> &Buffer {
279
165
        &self.value_data
280
165
    }
281
282
    /// Returns the raw value data
283
176
    pub fn value_data(&self) -> &[u8] {
284
176
        self.value_data.as_slice()
285
176
    }
286
287
    /// Returns true if all data within this array is ASCII
288
    pub fn is_ascii(&self) -> bool {
289
        let offsets = self.value_offsets();
290
        let start = offsets.first().unwrap();
291
        let end = offsets.last().unwrap();
292
        self.value_data()[start.as_usize()..end.as_usize()].is_ascii()
293
    }
294
295
    /// Returns the offset values in the offsets buffer
296
    #[inline]
297
174k
    pub fn value_offsets(&self) -> &[T::Offset] {
298
174k
        &self.value_offsets
299
174k
    }
300
301
    /// Returns the element at index `i`
302
    ///
303
    /// Note: This method does not check for nulls and the value is arbitrary
304
    /// if [`is_null`](Self::is_null) returns true for the index.
305
    ///
306
    /// # Safety
307
    /// Caller is responsible for ensuring that the index is within the bounds of the array
308
87.2k
    pub unsafe fn value_unchecked(&self, i: usize) -> &T::Native {
309
87.2k
        let end = *unsafe { self.value_offsets().get_unchecked(i + 1) };
310
87.2k
        let start = *unsafe { self.value_offsets().get_unchecked(i) };
311
312
        // Soundness
313
        // pointer alignment & location is ensured by RawPtrBox
314
        // buffer bounds/offset is ensured by the value_offset invariants
315
316
        // Safety of `to_isize().unwrap()`
317
        // `start` and `end` are &OffsetSize, which is a generic type that implements the
318
        // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
319
        // both of which should cleanly cast to isize on an architecture that supports
320
        // 32/64-bit offsets
321
87.2k
        let b = unsafe {
322
87.2k
            std::slice::from_raw_parts(
323
87.2k
                self.value_data
324
87.2k
                    .as_ptr()
325
87.2k
                    .offset(start.to_isize().unwrap_unchecked()),
326
87.2k
                (end - start).to_usize().unwrap_unchecked(),
327
            )
328
        };
329
330
        // SAFETY:
331
        // ArrayData is valid
332
87.2k
        unsafe { T::Native::from_bytes_unchecked(b) }
333
87.2k
    }
334
335
    /// Returns the element at index `i`
336
    ///
337
    /// Note: This method does not check for nulls and the value is arbitrary
338
    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
339
    ///
340
    /// # Panics
341
    /// Panics if index `i` is out of bounds.
342
83.6k
    pub fn value(&self, i: usize) -> &T::Native {
343
83.6k
        assert!(
344
83.6k
            i < self.len(),
345
0
            "Trying to access an element at index {} from a {}{}Array of length {}",
346
            i,
347
            T::Offset::PREFIX,
348
            T::PREFIX,
349
0
            self.len()
350
        );
351
        // SAFETY:
352
        // Verified length above
353
83.6k
        unsafe { self.value_unchecked(i) }
354
83.6k
    }
355
356
    /// constructs a new iterator
357
107
    pub fn iter(&self) -> ArrayIter<&Self> {
358
107
        ArrayIter::new(self)
359
107
    }
360
361
    /// Returns a zero-copy slice of this array with the indicated offset and length.
362
220
    pub fn slice(&self, offset: usize, length: usize) -> Self {
363
        Self {
364
220
            data_type: T::DATA_TYPE,
365
220
            value_offsets: self.value_offsets.slice(offset, length),
366
220
            value_data: self.value_data.clone(),
367
220
            nulls: self.nulls.as_ref().map(|n| 
n204
.
slice204
(
offset204
,
length204
)),
368
        }
369
220
    }
370
371
    /// Returns `GenericByteBuilder` of this byte array for mutating its values if the underlying
372
    /// offset and data buffers are not shared by others.
373
    pub fn into_builder(self) -> Result<GenericByteBuilder<T>, Self> {
374
        let len = self.len();
375
        let value_len = T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
376
377
        let data = self.into_data();
378
        let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
379
380
        let element_len = std::mem::size_of::<T::Offset>();
381
        let offset_buffer = data.buffers()[0]
382
            .slice_with_length(data.offset() * element_len, (len + 1) * element_len);
383
384
        let element_len = std::mem::size_of::<u8>();
385
        let value_buffer = data.buffers()[1]
386
            .slice_with_length(data.offset() * element_len, value_len * element_len);
387
388
        drop(data);
389
390
        let try_mutable_null_buffer = match null_bit_buffer {
391
            None => Ok(None),
392
            Some(null_buffer) => {
393
                // Null buffer exists, tries to make it mutable
394
                null_buffer.into_mutable().map(Some)
395
            }
396
        };
397
398
        let try_mutable_buffers = match try_mutable_null_buffer {
399
            Ok(mutable_null_buffer) => {
400
                // Got mutable null buffer, tries to get mutable value buffer
401
                let try_mutable_offset_buffer = offset_buffer.into_mutable();
402
                let try_mutable_value_buffer = value_buffer.into_mutable();
403
404
                // try_mutable_offset_buffer.map(...).map_err(...) doesn't work as the compiler complains
405
                // mutable_null_buffer is moved into map closure.
406
                match (try_mutable_offset_buffer, try_mutable_value_buffer) {
407
                    (Ok(mutable_offset_buffer), Ok(mutable_value_buffer)) => unsafe {
408
                        Ok(GenericByteBuilder::<T>::new_from_buffer(
409
                            mutable_offset_buffer,
410
                            mutable_value_buffer,
411
                            mutable_null_buffer,
412
                        ))
413
                    },
414
                    (Ok(mutable_offset_buffer), Err(value_buffer)) => Err((
415
                        mutable_offset_buffer.into(),
416
                        value_buffer,
417
                        mutable_null_buffer.map(|b| b.into()),
418
                    )),
419
                    (Err(offset_buffer), Ok(mutable_value_buffer)) => Err((
420
                        offset_buffer,
421
                        mutable_value_buffer.into(),
422
                        mutable_null_buffer.map(|b| b.into()),
423
                    )),
424
                    (Err(offset_buffer), Err(value_buffer)) => Err((
425
                        offset_buffer,
426
                        value_buffer,
427
                        mutable_null_buffer.map(|b| b.into()),
428
                    )),
429
                }
430
            }
431
            Err(mutable_null_buffer) => {
432
                // Unable to get mutable null buffer
433
                Err((offset_buffer, value_buffer, Some(mutable_null_buffer)))
434
            }
435
        };
436
437
        match try_mutable_buffers {
438
            Ok(builder) => Ok(builder),
439
            Err((offset_buffer, value_buffer, null_bit_buffer)) => {
440
                let builder = ArrayData::builder(T::DATA_TYPE)
441
                    .len(len)
442
                    .add_buffer(offset_buffer)
443
                    .add_buffer(value_buffer)
444
                    .null_bit_buffer(null_bit_buffer);
445
446
                let array_data = unsafe { builder.build_unchecked() };
447
                let array = GenericByteArray::<T>::from(array_data);
448
449
                Err(array)
450
            }
451
        }
452
    }
453
}
454
455
impl<T: ByteArrayType> std::fmt::Debug for GenericByteArray<T> {
456
0
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
457
0
        write!(f, "{}{}Array\n[\n", T::Offset::PREFIX, T::PREFIX)?;
458
0
        print_long_array(self, f, |array, index, f| {
459
0
            std::fmt::Debug::fmt(&array.value(index), f)
460
0
        })?;
461
0
        write!(f, "]")
462
0
    }
463
}
464
465
impl<T: ByteArrayType> Array for GenericByteArray<T> {
466
160k
    fn as_any(&self) -> &dyn Any {
467
160k
        self
468
160k
    }
469
470
160k
    fn to_data(&self) -> ArrayData {
471
160k
        self.clone().into()
472
160k
    }
473
474
80.1k
    fn into_data(self) -> ArrayData {
475
80.1k
        self.into()
476
80.1k
    }
477
478
241k
    fn data_type(&self) -> &DataType {
479
241k
        &self.data_type
480
241k
    }
481
482
115
    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
483
115
        Arc::new(self.slice(offset, length))
484
115
    }
485
486
565k
    fn len(&self) -> usize {
487
565k
        self.value_offsets.len() - 1
488
565k
    }
489
490
20
    fn is_empty(&self) -> bool {
491
20
        self.value_offsets.len() <= 1
492
20
    }
493
494
0
    fn shrink_to_fit(&mut self) {
495
0
        self.value_offsets.shrink_to_fit();
496
0
        self.value_data.shrink_to_fit();
497
0
        if let Some(nulls) = &mut self.nulls {
498
0
            nulls.shrink_to_fit();
499
0
        }
500
0
    }
501
502
0
    fn offset(&self) -> usize {
503
0
        0
504
0
    }
505
506
160k
    fn nulls(&self) -> Option<&NullBuffer> {
507
160k
        self.nulls.as_ref()
508
160k
    }
509
510
3
    fn logical_null_count(&self) -> usize {
511
        // More efficient that the default implementation
512
3
        self.null_count()
513
3
    }
514
515
0
    fn get_buffer_memory_size(&self) -> usize {
516
0
        let mut sum = self.value_offsets.inner().inner().capacity();
517
0
        sum += self.value_data.capacity();
518
0
        if let Some(x) = &self.nulls {
519
0
            sum += x.buffer().capacity()
520
0
        }
521
0
        sum
522
0
    }
523
524
0
    fn get_array_memory_size(&self) -> usize {
525
0
        std::mem::size_of::<Self>() + self.get_buffer_memory_size()
526
0
    }
527
}
528
529
impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
530
    type Item = &'a T::Native;
531
532
    fn value(&self, index: usize) -> Self::Item {
533
        GenericByteArray::value(self, index)
534
    }
535
536
3.57k
    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
537
3.57k
        unsafe { GenericByteArray::value_unchecked(self, index) }
538
3.57k
    }
539
}
540
541
impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
542
160k
    fn from(data: ArrayData) -> Self {
543
160k
        assert_eq!(
544
160k
            data.data_type(),
545
160k
            &Self::DATA_TYPE,
546
0
            "{}{}Array expects DataType::{}",
547
            T::Offset::PREFIX,
548
            T::PREFIX,
549
0
            Self::DATA_TYPE
550
        );
551
160k
        assert_eq!(
552
160k
            data.buffers().len(),
553
            2,
554
0
            "{}{}Array data should contain 2 buffers only (offsets and values)",
555
            T::Offset::PREFIX,
556
            T::PREFIX,
557
        );
558
        // SAFETY:
559
        // ArrayData is valid, and verified type above
560
160k
        let value_offsets = unsafe { get_offsets(&data) };
561
160k
        let value_data = data.buffers()[1].clone();
562
160k
        Self {
563
160k
            value_offsets,
564
160k
            value_data,
565
160k
            data_type: T::DATA_TYPE,
566
160k
            nulls: data.nulls().cloned(),
567
160k
        }
568
160k
    }
569
}
570
571
impl<T: ByteArrayType> From<GenericByteArray<T>> for ArrayData {
572
240k
    fn from(array: GenericByteArray<T>) -> Self {
573
240k
        let len = array.len();
574
575
240k
        let offsets = array.value_offsets.into_inner().into_inner();
576
240k
        let builder = ArrayDataBuilder::new(array.data_type)
577
240k
            .len(len)
578
240k
            .buffers(vec![offsets, array.value_data])
579
240k
            .nulls(array.nulls);
580
581
240k
        unsafe { builder.build_unchecked() }
582
240k
    }
583
}
584
585
impl<'a, T: ByteArrayType> IntoIterator for &'a GenericByteArray<T> {
586
    type Item = Option<&'a T::Native>;
587
    type IntoIter = ArrayIter<Self>;
588
589
6
    fn into_iter(self) -> Self::IntoIter {
590
6
        ArrayIter::new(self)
591
6
    }
592
}
593
594
impl<'a, Ptr, T: ByteArrayType> FromIterator<&'a Option<Ptr>> for GenericByteArray<T>
595
where
596
    Ptr: AsRef<T::Native> + 'a,
597
{
598
    fn from_iter<I: IntoIterator<Item = &'a Option<Ptr>>>(iter: I) -> Self {
599
        iter.into_iter()
600
            .map(|o| o.as_ref().map(|p| p.as_ref()))
601
            .collect()
602
    }
603
}
604
605
impl<Ptr, T: ByteArrayType> FromIterator<Option<Ptr>> for GenericByteArray<T>
606
where
607
    Ptr: AsRef<T::Native>,
608
{
609
204
    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
610
204
        let iter = iter.into_iter();
611
204
        let mut builder = GenericByteBuilder::with_capacity(iter.size_hint().0, 1024);
612
204
        builder.extend(iter);
613
204
        builder.finish()
614
204
    }
615
}
616
617
#[cfg(test)]
618
mod tests {
619
    use crate::{Array, BinaryArray, StringArray};
620
    use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer};
621
622
    #[test]
623
    fn try_new() {
624
        let data = Buffer::from_slice_ref("helloworld");
625
        let offsets = OffsetBuffer::new(vec![0, 5, 10].into());
626
        StringArray::new(offsets.clone(), data.clone(), None);
627
628
        let nulls = NullBuffer::new_null(3);
629
        let err =
630
            StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err();
631
        assert_eq!(
632
            err.to_string(),
633
            "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3"
634
        );
635
636
        let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
637
        assert_eq!(
638
            err.to_string(),
639
            "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3"
640
        );
641
642
        let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld");
643
        let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err();
644
        assert_eq!(
645
            err.to_string(),
646
            "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2"
647
        );
648
649
        BinaryArray::new(offsets, non_utf8_data, None);
650
651
        let offsets = OffsetBuffer::new(vec![0, 5, 11].into());
652
        let err = StringArray::try_new(offsets.clone(), data.clone(), None).unwrap_err();
653
        assert_eq!(
654
            err.to_string(),
655
            "Invalid argument error: Offset of 11 exceeds length of values 10"
656
        );
657
658
        let err = BinaryArray::try_new(offsets.clone(), data, None).unwrap_err();
659
        assert_eq!(
660
            err.to_string(),
661
            "Invalid argument error: Maximum offset of 11 is larger than values of length 10"
662
        );
663
664
        let non_ascii_data = Buffer::from_slice_ref("heìloworld");
665
        StringArray::new(offsets.clone(), non_ascii_data.clone(), None);
666
        BinaryArray::new(offsets, non_ascii_data.clone(), None);
667
668
        let offsets = OffsetBuffer::new(vec![0, 3, 10].into());
669
        let err = StringArray::try_new(offsets.clone(), non_ascii_data.clone(), None).unwrap_err();
670
        assert_eq!(
671
            err.to_string(),
672
            "Invalid argument error: Split UTF-8 codepoint at offset 3"
673
        );
674
675
        BinaryArray::new(offsets, non_ascii_data, None);
676
    }
677
678
    #[test]
679
    fn create_repeated() {
680
        let arr = BinaryArray::new_repeated(b"hello", 3);
681
        assert_eq!(arr.len(), 3);
682
        assert_eq!(arr.value(0), b"hello");
683
        assert_eq!(arr.value(1), b"hello");
684
        assert_eq!(arr.value(2), b"hello");
685
686
        let arr = StringArray::new_repeated("world", 2);
687
        assert_eq!(arr.len(), 2);
688
        assert_eq!(arr.value(0), "world");
689
        assert_eq!(arr.value(1), "world");
690
    }
691
692
    #[test]
693
    #[should_panic(expected = "usize overflow")]
694
    fn create_repeated_usize_overflow_1() {
695
        let _arr = BinaryArray::new_repeated(b"hello", (usize::MAX / "hello".len()) + 1);
696
    }
697
698
    #[test]
699
    #[should_panic(expected = "usize overflow")]
700
    fn create_repeated_usize_overflow_2() {
701
        let _arr = BinaryArray::new_repeated(b"hello", usize::MAX);
702
    }
703
704
    #[test]
705
    #[should_panic(expected = "offset overflow")]
706
    fn create_repeated_i32_offset_overflow_1() {
707
        let _arr = BinaryArray::new_repeated(b"hello", usize::MAX / "hello".len());
708
    }
709
710
    #[test]
711
    #[should_panic(expected = "offset overflow")]
712
    fn create_repeated_i32_offset_overflow_2() {
713
        let _arr = BinaryArray::new_repeated(b"hello", ((i32::MAX as usize) / "hello".len()) + 1);
714
    }
715
}