Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/generic_bytes_builder.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::builder::ArrayBuilder;
19
use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20
use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
21
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
22
use arrow_data::ArrayDataBuilder;
23
use arrow_schema::ArrowError;
24
use std::any::Any;
25
use std::sync::Arc;
26
27
/// Builder for [`GenericByteArray`]
28
///
29
/// For building strings, see docs on [`GenericStringBuilder`].
30
/// For building binary, see docs on [`GenericBinaryBuilder`].
31
pub struct GenericByteBuilder<T: ByteArrayType> {
32
    value_builder: Vec<u8>,
33
    offsets_builder: Vec<T::Offset>,
34
    null_buffer_builder: NullBufferBuilder,
35
}
36
37
impl<T: ByteArrayType> GenericByteBuilder<T> {
38
    /// Creates a new [`GenericByteBuilder`].
39
80.0k
    pub fn new() -> Self {
40
80.0k
        Self::with_capacity(1024, 1024)
41
80.0k
    }
42
43
    /// Creates a new [`GenericByteBuilder`].
44
    ///
45
    /// - `item_capacity` is the number of items to pre-allocate.
46
    ///   The size of the preallocated buffer of offsets is the number of items plus one.
47
    /// - `data_capacity` is the total number of bytes of data to pre-allocate
48
    ///   (for all items, not per item).
49
80.3k
    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
50
80.3k
        let mut offsets_builder = Vec::with_capacity(item_capacity + 1);
51
80.3k
        offsets_builder.push(T::Offset::from_usize(0).unwrap());
52
80.3k
        Self {
53
80.3k
            value_builder: Vec::with_capacity(data_capacity),
54
80.3k
            offsets_builder,
55
80.3k
            null_buffer_builder: NullBufferBuilder::new(item_capacity),
56
80.3k
        }
57
80.3k
    }
58
59
    /// Creates a new  [`GenericByteBuilder`] from buffers.
60
    ///
61
    /// # Safety
62
    ///
63
    /// This doesn't verify buffer contents as it assumes the buffers are from
64
    /// existing and valid [`GenericByteArray`].
65
    pub unsafe fn new_from_buffer(
66
        offsets_buffer: MutableBuffer,
67
        value_buffer: MutableBuffer,
68
        null_buffer: Option<MutableBuffer>,
69
    ) -> Self {
70
        let offsets_builder: Vec<T::Offset> =
71
            ScalarBuffer::<T::Offset>::from(offsets_buffer).into();
72
        let value_builder: Vec<u8> = ScalarBuffer::<u8>::from(value_buffer).into();
73
74
        let null_buffer_builder = null_buffer
75
            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
76
            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
77
78
        Self {
79
            offsets_builder,
80
            value_builder,
81
            null_buffer_builder,
82
        }
83
    }
84
85
    #[inline]
86
436k
    fn next_offset(&self) -> T::Offset {
87
436k
        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
88
436k
    }
89
90
    /// Appends a value into the builder.
91
    ///
92
    /// See the [GenericStringBuilder] documentation for examples of
93
    /// incrementally building string values with multiple `write!` calls.
94
    ///
95
    /// # Panics
96
    ///
97
    /// Panics if the resulting length of [`Self::values_slice`] would exceed
98
    /// `T::Offset::MAX` bytes.
99
    ///
100
    /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
101
    /// where the total length of all values exceeds 2GB
102
    ///
103
    /// [`StringArray`]: crate::StringArray
104
    /// [`BinaryArray`]: crate::BinaryArray
105
    #[inline]
106
324k
    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
107
324k
        self.value_builder
108
324k
            .extend_from_slice(value.as_ref().as_ref());
109
324k
        self.null_buffer_builder.append(true);
110
324k
        self.offsets_builder.push(self.next_offset());
111
324k
    }
112
113
    /// Append an `Option` value into the builder.
114
    ///
115
    /// - A `None` value will append a null value.
116
    /// - A `Some` value will append the value.
117
    ///
118
    /// See [`Self::append_value`] for more panic information.
119
    #[inline]
120
267k
    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
121
267k
        match value {
122
30.9k
            None => self.append_null(),
123
236k
            Some(v) => self.append_value(v),
124
        };
125
267k
    }
126
127
    /// Append a null value into the builder.
128
    #[inline]
129
30.9k
    pub fn append_null(&mut self) {
130
30.9k
        self.null_buffer_builder.append(false);
131
30.9k
        self.offsets_builder.push(self.next_offset());
132
30.9k
    }
133
134
    /// Appends `n` `null`s into the builder.
135
    #[inline]
136
    pub fn append_nulls(&mut self, n: usize) {
137
        self.null_buffer_builder.append_n_nulls(n);
138
        let next_offset = self.next_offset();
139
        self.offsets_builder
140
            .extend(std::iter::repeat_n(next_offset, n));
141
    }
142
143
    /// Appends array values and null to this builder as is
144
    /// (this means that underlying null values are copied as is).
145
    #[inline]
146
129
    pub fn append_array(&mut self, array: &GenericByteArray<T>) -> Result<(), ArrowError> {
147
        use num_traits::CheckedAdd;
148
129
        if array.len() == 0 {
149
0
            return Ok(());
150
129
        }
151
152
129
        let offsets = array.offsets();
153
154
        // If the offsets are contiguous, we can append them directly avoiding the need to align
155
        // for example, when the first appended array is not sliced (starts at offset 0)
156
129
        if self.next_offset() == offsets[0] {
157
16
            self.offsets_builder.extend_from_slice(&offsets[1..]);
158
16
        } else {
159
            // Shifting all the offsets
160
113
            let shift: T::Offset = self.next_offset() - offsets[0];
161
162
113
            if shift.checked_add(&offsets[offsets.len() - 1]).is_none() {
163
0
                return Err(ArrowError::OffsetOverflowError(
164
0
                    shift.as_usize() + offsets[offsets.len() - 1].as_usize(),
165
0
                ));
166
113
            }
167
168
113
            self.offsets_builder
169
31.2k
                .
extend113
(
offsets[1..]113
.
iter113
().
map113
(|&offset| offset + shift));
170
        }
171
172
        // Append underlying values, starting from the first offset and ending at the last offset
173
129
        self.value_builder.extend_from_slice(
174
129
            &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
175
        );
176
177
129
        if let Some(
null_buffer93
) = array.nulls() {
178
93
            self.null_buffer_builder.append_buffer(null_buffer);
179
93
        } else {
180
36
            self.null_buffer_builder.append_n_non_nulls(array.len());
181
36
        }
182
129
        Ok(())
183
129
    }
184
185
    /// Builds the [`GenericByteArray`] and reset this builder.
186
80.3k
    pub fn finish(&mut self) -> GenericByteArray<T> {
187
80.3k
        let array_type = T::DATA_TYPE;
188
80.3k
        let array_builder = ArrayDataBuilder::new(array_type)
189
80.3k
            .len(self.len())
190
80.3k
            .add_buffer(std::mem::take(&mut self.offsets_builder).into())
191
80.3k
            .add_buffer(std::mem::take(&mut self.value_builder).into())
192
80.3k
            .nulls(self.null_buffer_builder.finish());
193
194
80.3k
        self.offsets_builder.push(self.next_offset());
195
80.3k
        let array_data = unsafe { array_builder.build_unchecked() };
196
80.3k
        GenericByteArray::from(array_data)
197
80.3k
    }
198
199
    /// Builds the [`GenericByteArray`] without resetting the builder.
200
0
    pub fn finish_cloned(&self) -> GenericByteArray<T> {
201
0
        let array_type = T::DATA_TYPE;
202
0
        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
203
0
        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
204
0
        let array_builder = ArrayDataBuilder::new(array_type)
205
0
            .len(self.len())
206
0
            .add_buffer(offset_buffer)
207
0
            .add_buffer(value_buffer)
208
0
            .nulls(self.null_buffer_builder.finish_cloned());
209
210
0
        let array_data = unsafe { array_builder.build_unchecked() };
211
0
        GenericByteArray::from(array_data)
212
0
    }
213
214
    /// Returns the current values buffer as a slice
215
91.7k
    pub fn values_slice(&self) -> &[u8] {
216
91.7k
        self.value_builder.as_slice()
217
91.7k
    }
218
219
    /// Returns the current offsets buffer as a slice
220
91.7k
    pub fn offsets_slice(&self) -> &[T::Offset] {
221
91.7k
        self.offsets_builder.as_slice()
222
91.7k
    }
223
224
    /// Returns the current null buffer as a slice
225
    pub fn validity_slice(&self) -> Option<&[u8]> {
226
        self.null_buffer_builder.as_slice()
227
    }
228
229
    /// Returns the current null buffer as a mutable slice
230
    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
231
        self.null_buffer_builder.as_slice_mut()
232
    }
233
}
234
235
impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
236
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
237
        write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
238
        f.debug_struct("")
239
            .field("value_builder", &self.value_builder)
240
            .field("offsets_builder", &self.offsets_builder)
241
            .field("null_buffer_builder", &self.null_buffer_builder)
242
            .finish()
243
    }
244
}
245
246
impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
247
    fn default() -> Self {
248
        Self::new()
249
    }
250
}
251
252
impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
253
    /// Returns the number of binary slots in the builder
254
168k
    fn len(&self) -> usize {
255
168k
        self.null_buffer_builder.len()
256
168k
    }
257
258
    /// Builds the array and reset this builder.
259
2
    fn finish(&mut self) -> ArrayRef {
260
2
        Arc::new(self.finish())
261
2
    }
262
263
    /// Builds the array without resetting the builder.
264
0
    fn finish_cloned(&self) -> ArrayRef {
265
0
        Arc::new(self.finish_cloned())
266
0
    }
267
268
    /// Returns the builder as a non-mutable `Any` reference.
269
0
    fn as_any(&self) -> &dyn Any {
270
0
        self
271
0
    }
272
273
    /// Returns the builder as a mutable `Any` reference.
274
0
    fn as_any_mut(&mut self) -> &mut dyn Any {
275
0
        self
276
0
    }
277
278
    /// Returns the boxed builder as a box of `Any`.
279
0
    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
280
0
        self
281
0
    }
282
}
283
284
impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
285
    #[inline]
286
204
    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
287
267k
        for 
v267k
in iter {
288
267k
            self.append_option(v)
289
        }
290
204
    }
291
}
292
293
/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
294
///
295
/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
296
/// [`GenericByteBuilder::append_null`].
297
///
298
/// This builder also implements [`std::fmt::Write`] with any written data
299
/// included in the next appended value. This allows using [`std::fmt::Display`]
300
/// with standard Rust idioms like `write!` and `writeln!` to write data
301
/// directly to the builder without intermediate allocations.
302
///
303
/// # Example writing strings with `append_value`
304
/// ```
305
/// # use arrow_array::builder::GenericStringBuilder;
306
/// let mut builder = GenericStringBuilder::<i32>::new();
307
///
308
/// // Write one string value
309
/// builder.append_value("foobarbaz");
310
///
311
/// // Write a second string
312
/// builder.append_value("v2");
313
///
314
/// let array = builder.finish();
315
/// assert_eq!(array.value(0), "foobarbaz");
316
/// assert_eq!(array.value(1), "v2");
317
/// ```
318
///
319
/// # Example incrementally writing strings with `std::fmt::Write`
320
///
321
/// ```
322
/// # use std::fmt::Write;
323
/// # use arrow_array::builder::GenericStringBuilder;
324
/// let mut builder = GenericStringBuilder::<i32>::new();
325
///
326
/// // Write data in multiple `write!` calls
327
/// write!(builder, "foo").unwrap();
328
/// write!(builder, "bar").unwrap();
329
/// // The next call to append_value finishes the current string
330
/// // including all previously written strings.
331
/// builder.append_value("baz");
332
///
333
/// // Write second value with a single write call
334
/// write!(builder, "v2").unwrap();
335
/// // finish the value by calling append_value with an empty string
336
/// builder.append_value("");
337
///
338
/// let array = builder.finish();
339
/// assert_eq!(array.value(0), "foobarbaz");
340
/// assert_eq!(array.value(1), "v2");
341
/// ```
342
pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
343
344
impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
345
    fn write_str(&mut self, s: &str) -> std::fmt::Result {
346
        self.value_builder.extend_from_slice(s.as_bytes());
347
        Ok(())
348
    }
349
}
350
351
/// A byte size value representing the number of bytes to allocate per string in [`GenericStringBuilder`]
352
///
353
/// To create a [`GenericStringBuilder`] using `.with_capacity` we are required to provide: \
354
/// - `item_capacity` - the row count \
355
/// - `data_capacity` - total string byte count \
356
///
357
/// We will use the `AVERAGE_STRING_LENGTH` * row_count for `data_capacity`. \
358
///
359
/// These capacities are preallocation hints used to improve performance,
360
/// but consequences of passing a hint too large or too small should be negligible.
361
const AVERAGE_STRING_LENGTH: usize = 16;
362
/// Trait for string-like array builders
363
///
364
/// This trait provides unified interface for builders that append string-like data
365
/// such as [`GenericStringBuilder<O>`] and [`crate::builder::StringViewBuilder`]
366
pub trait StringLikeArrayBuilder: ArrayBuilder {
367
    /// Returns a human-readable type name for the builder.
368
    fn type_name() -> &'static str;
369
370
    /// Creates a new builder with the given row capacity.
371
    fn with_capacity(capacity: usize) -> Self;
372
373
    /// Appends a non-null string value to the builder.
374
    fn append_value(&mut self, value: &str);
375
376
    /// Appends a null value to the builder.
377
    fn append_null(&mut self);
378
}
379
380
impl<O: OffsetSizeTrait> StringLikeArrayBuilder for GenericStringBuilder<O> {
381
    fn type_name() -> &'static str {
382
        std::any::type_name::<Self>()
383
    }
384
    fn with_capacity(capacity: usize) -> Self {
385
        Self::with_capacity(capacity, capacity * AVERAGE_STRING_LENGTH)
386
    }
387
    fn append_value(&mut self, value: &str) {
388
        Self::append_value(self, value);
389
    }
390
    fn append_null(&mut self) {
391
        Self::append_null(self);
392
    }
393
}
394
395
/// A byte size value representing the number of bytes to allocate per binary in [`GenericBinaryBuilder`]
396
///
397
/// To create a [`GenericBinaryBuilder`] using `.with_capacity` we are required to provide: \
398
/// - `item_capacity` - the row count \
399
/// - `data_capacity` - total binary byte count \
400
///
401
/// We will use the `AVERAGE_BINARY_LENGTH` * row_count for `data_capacity`. \
402
///
403
/// These capacities are preallocation hints used to improve performance,
404
/// but consequences of passing a hint too large or too small should be negligible.
405
const AVERAGE_BINARY_LENGTH: usize = 128;
406
/// Trait for binary-like array builders
407
///
408
/// This trait provides unified interface for builders that append binary-like data
409
/// such as [`GenericBinaryBuilder<O>`] and [`crate::builder::BinaryViewBuilder`]
410
pub trait BinaryLikeArrayBuilder: ArrayBuilder {
411
    /// Returns a human-readable type name for the builder.
412
    fn type_name() -> &'static str;
413
414
    /// Creates a new builder with the given row capacity.
415
    fn with_capacity(capacity: usize) -> Self;
416
417
    /// Appends a non-null string value to the builder.
418
    fn append_value(&mut self, value: &[u8]);
419
420
    /// Appends a null value to the builder.
421
    fn append_null(&mut self);
422
}
423
424
impl<O: OffsetSizeTrait> BinaryLikeArrayBuilder for GenericBinaryBuilder<O> {
425
    fn type_name() -> &'static str {
426
        std::any::type_name::<Self>()
427
    }
428
    fn with_capacity(capacity: usize) -> Self {
429
        Self::with_capacity(capacity, capacity * AVERAGE_BINARY_LENGTH)
430
    }
431
    fn append_value(&mut self, value: &[u8]) {
432
        Self::append_value(self, value);
433
    }
434
    fn append_null(&mut self) {
435
        Self::append_null(self);
436
    }
437
}
438
439
///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
440
///
441
/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
442
/// [`GenericByteBuilder::append_null`].
443
///
444
/// # Example
445
/// ```
446
/// # use arrow_array::builder::GenericBinaryBuilder;
447
/// let mut builder = GenericBinaryBuilder::<i32>::new();
448
///
449
/// // Write data
450
/// builder.append_value("foo");
451
///
452
/// // Write second value
453
/// builder.append_value(&[0,1,2]);
454
///
455
/// let array = builder.finish();
456
/// // binary values
457
/// assert_eq!(array.value(0), b"foo");
458
/// assert_eq!(array.value(1), b"\x00\x01\x02");
459
/// ```
460
///
461
/// # Example incrementally writing bytes with `write_bytes`
462
///
463
/// ```
464
/// # use std::io::Write;
465
/// # use arrow_array::builder::GenericBinaryBuilder;
466
/// let mut builder = GenericBinaryBuilder::<i32>::new();
467
///
468
/// // Write data in multiple `write_bytes` calls
469
/// write!(builder, "foo").unwrap();
470
/// write!(builder, "bar").unwrap();
471
/// // The next call to append_value finishes the current string
472
/// // including all previously written strings.
473
/// builder.append_value("baz");
474
///
475
/// // Write second value with a single write call
476
/// write!(builder, "v2").unwrap();
477
/// // finish the value by calling append_value with an empty string
478
/// builder.append_value("");
479
///
480
/// let array = builder.finish();
481
/// assert_eq!(array.value(0), "foobarbaz".as_bytes());
482
/// assert_eq!(array.value(1), "v2".as_bytes());
483
/// ```
484
pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
485
486
impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
487
    fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
488
        self.value_builder.extend_from_slice(bs);
489
        Ok(bs.len())
490
    }
491
492
    fn flush(&mut self) -> std::io::Result<()> {
493
        Ok(())
494
    }
495
}
496
497
#[cfg(test)]
498
mod tests {
499
    use super::*;
500
    use crate::GenericStringArray;
501
    use crate::array::Array;
502
    use arrow_buffer::NullBuffer;
503
    use std::fmt::Write as _;
504
    use std::io::Write as _;
505
506
    fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
507
        let mut builder = GenericBinaryBuilder::<O>::new();
508
509
        builder.append_value(b"hello");
510
        builder.append_value(b"");
511
        builder.append_null();
512
        builder.append_value(b"rust");
513
514
        let array = builder.finish();
515
516
        assert_eq!(4, array.len());
517
        assert_eq!(1, array.null_count());
518
        assert_eq!(b"hello", array.value(0));
519
        assert_eq!([] as [u8; 0], array.value(1));
520
        assert!(array.is_null(2));
521
        assert_eq!(b"rust", array.value(3));
522
        assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
523
        assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
524
    }
525
526
    #[test]
527
    fn test_binary_builder() {
528
        _test_generic_binary_builder::<i32>()
529
    }
530
531
    #[test]
532
    fn test_large_binary_builder() {
533
        _test_generic_binary_builder::<i64>()
534
    }
535
536
    fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
537
        let mut builder = GenericBinaryBuilder::<O>::new();
538
        builder.append_null();
539
        builder.append_null();
540
        builder.append_null();
541
        builder.append_nulls(2);
542
        assert_eq!(5, builder.len());
543
        assert!(!builder.is_empty());
544
545
        let array = builder.finish();
546
        assert_eq!(5, array.null_count());
547
        assert_eq!(5, array.len());
548
        assert!(array.is_null(0));
549
        assert!(array.is_null(1));
550
        assert!(array.is_null(2));
551
        assert!(array.is_null(3));
552
        assert!(array.is_null(4));
553
    }
554
555
    #[test]
556
    fn test_binary_builder_all_nulls() {
557
        _test_generic_binary_builder_all_nulls::<i32>()
558
    }
559
560
    #[test]
561
    fn test_large_binary_builder_all_nulls() {
562
        _test_generic_binary_builder_all_nulls::<i64>()
563
    }
564
565
    fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
566
        let mut builder = GenericBinaryBuilder::<O>::new();
567
568
        builder.append_value(b"hello");
569
        builder.append_value(b"");
570
        builder.append_null();
571
        builder.append_value(b"rust");
572
        builder.finish();
573
574
        assert!(builder.is_empty());
575
576
        builder.append_value(b"parquet");
577
        builder.append_null();
578
        builder.append_value(b"arrow");
579
        builder.append_value(b"");
580
        builder.append_nulls(2);
581
        builder.append_value(b"hi");
582
        let array = builder.finish();
583
584
        assert_eq!(7, array.len());
585
        assert_eq!(3, array.null_count());
586
        assert_eq!(b"parquet", array.value(0));
587
        assert!(array.is_null(1));
588
        assert!(array.is_null(4));
589
        assert!(array.is_null(5));
590
        assert_eq!(b"arrow", array.value(2));
591
        assert_eq!(b"", array.value(1));
592
        assert_eq!(b"hi", array.value(6));
593
594
        assert_eq!(O::zero(), array.value_offsets()[0]);
595
        assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
596
        assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
597
        assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
598
    }
599
600
    #[test]
601
    fn test_binary_builder_reset() {
602
        _test_generic_binary_builder_reset::<i32>()
603
    }
604
605
    #[test]
606
    fn test_large_binary_builder_reset() {
607
        _test_generic_binary_builder_reset::<i64>()
608
    }
609
610
    fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
611
        let mut builder = GenericStringBuilder::<O>::new();
612
        let owned = "arrow".to_owned();
613
614
        builder.append_value("hello");
615
        builder.append_value("");
616
        builder.append_value(&owned);
617
        builder.append_null();
618
        builder.append_option(Some("rust"));
619
        builder.append_option(None::<&str>);
620
        builder.append_option(None::<String>);
621
        builder.append_nulls(2);
622
        builder.append_value("parquet");
623
        assert_eq!(10, builder.len());
624
625
        assert_eq!(
626
            GenericStringArray::<O>::from(vec![
627
                Some("hello"),
628
                Some(""),
629
                Some("arrow"),
630
                None,
631
                Some("rust"),
632
                None,
633
                None,
634
                None,
635
                None,
636
                Some("parquet")
637
            ]),
638
            builder.finish()
639
        );
640
    }
641
642
    #[test]
643
    fn test_string_array_builder() {
644
        _test_generic_string_array_builder::<i32>()
645
    }
646
647
    #[test]
648
    fn test_large_string_array_builder() {
649
        _test_generic_string_array_builder::<i64>()
650
    }
651
652
    fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
653
        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
654
655
        builder.append_value("hello");
656
        builder.append_value("rust");
657
        builder.append_null();
658
659
        builder.finish();
660
        assert!(builder.is_empty());
661
        assert_eq!(&[O::zero()], builder.offsets_slice());
662
663
        builder.append_value("arrow");
664
        builder.append_value("parquet");
665
        let arr = builder.finish();
666
        // array should not have null buffer because there is not `null` value.
667
        assert!(arr.nulls().is_none());
668
        assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
669
    }
670
671
    #[test]
672
    fn test_string_array_builder_finish() {
673
        _test_generic_string_array_builder_finish::<i32>()
674
    }
675
676
    #[test]
677
    fn test_large_string_array_builder_finish() {
678
        _test_generic_string_array_builder_finish::<i64>()
679
    }
680
681
    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
682
        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
683
684
        builder.append_value("hello");
685
        builder.append_value("rust");
686
        builder.append_null();
687
688
        let mut arr = builder.finish_cloned();
689
        assert!(!builder.is_empty());
690
        assert_eq!(3, arr.len());
691
692
        builder.append_value("arrow");
693
        builder.append_value("parquet");
694
        arr = builder.finish();
695
696
        assert!(arr.nulls().is_some());
697
        assert_eq!(&[O::zero()], builder.offsets_slice());
698
        assert_eq!(5, arr.len());
699
    }
700
701
    #[test]
702
    fn test_string_array_builder_finish_cloned() {
703
        _test_generic_string_array_builder_finish_cloned::<i32>()
704
    }
705
706
    #[test]
707
    fn test_large_string_array_builder_finish_cloned() {
708
        _test_generic_string_array_builder_finish_cloned::<i64>()
709
    }
710
711
    #[test]
712
    fn test_extend() {
713
        let mut builder = GenericStringBuilder::<i32>::new();
714
        builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
715
        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
716
        let array = builder.finish();
717
        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
718
        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
719
    }
720
721
    #[test]
722
    fn test_write_str() {
723
        let mut builder = GenericStringBuilder::<i32>::new();
724
        write!(builder, "foo").unwrap();
725
        builder.append_value("");
726
        writeln!(builder, "bar").unwrap();
727
        builder.append_value("");
728
        write!(builder, "fiz").unwrap();
729
        write!(builder, "buz").unwrap();
730
        builder.append_value("");
731
        let a = builder.finish();
732
        let r: Vec<_> = a.iter().flatten().collect();
733
        assert_eq!(r, &["foo", "bar\n", "fizbuz"])
734
    }
735
736
    #[test]
737
    fn test_write_bytes() {
738
        let mut builder = GenericBinaryBuilder::<i32>::new();
739
        write!(builder, "foo").unwrap();
740
        builder.append_value("");
741
        writeln!(builder, "bar").unwrap();
742
        builder.append_value("");
743
        write!(builder, "fiz").unwrap();
744
        write!(builder, "buz").unwrap();
745
        builder.append_value("");
746
        let a = builder.finish();
747
        let r: Vec<_> = a.iter().flatten().collect();
748
        assert_eq!(
749
            r,
750
            &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
751
        )
752
    }
753
754
    #[test]
755
    fn test_append_array_without_nulls() {
756
        let input = vec![
757
            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
758
            "thank", "you", "for", "asking",
759
        ];
760
        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
761
        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
762
        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
763
764
        let mut builder = GenericStringBuilder::<i32>::new();
765
        builder.append_array(&arr1).unwrap();
766
        builder.append_array(&arr2).unwrap();
767
        builder.append_array(&arr3).unwrap();
768
769
        let actual = builder.finish();
770
        let expected = GenericStringArray::<i32>::from(input);
771
772
        assert_eq!(actual, expected);
773
    }
774
775
    #[test]
776
    fn test_append_array_with_nulls() {
777
        let input = vec![
778
            Some("hello"),
779
            None,
780
            Some("how"),
781
            None,
782
            None,
783
            None,
784
            None,
785
            Some("I"),
786
            Some("am"),
787
            Some("doing"),
788
            Some("well"),
789
        ];
790
        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
791
        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
792
        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
793
794
        let mut builder = GenericStringBuilder::<i32>::new();
795
        builder.append_array(&arr1).unwrap();
796
        builder.append_array(&arr2).unwrap();
797
        builder.append_array(&arr3).unwrap();
798
799
        let actual = builder.finish();
800
        let expected = GenericStringArray::<i32>::from(input);
801
802
        assert_eq!(actual, expected);
803
    }
804
805
    #[test]
806
    fn test_append_empty_array() {
807
        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
808
        let mut builder = GenericStringBuilder::<i32>::new();
809
        builder.append_array(&arr).unwrap();
810
        let result = builder.finish();
811
        assert_eq!(result.len(), 0);
812
    }
813
814
    #[test]
815
    fn test_append_array_with_offset_not_starting_at_0() {
816
        let input = vec![
817
            Some("hello"),
818
            None,
819
            Some("how"),
820
            None,
821
            None,
822
            None,
823
            None,
824
            Some("I"),
825
            Some("am"),
826
            Some("doing"),
827
            Some("well"),
828
        ];
829
        let full_array = GenericStringArray::<i32>::from(input);
830
        let sliced = full_array.slice(1, 4);
831
832
        assert_ne!(sliced.offsets()[0].as_usize(), 0);
833
        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
834
835
        let mut builder = GenericStringBuilder::<i32>::new();
836
        builder.append_array(&sliced).unwrap();
837
        let actual = builder.finish();
838
839
        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
840
841
        assert_eq!(actual, expected);
842
    }
843
844
    #[test]
845
    fn test_append_underlying_null_values_added_as_is() {
846
        let input_1_array_with_nulls = {
847
            let input = vec![
848
                "hello", "world", "how", "are", "you", "doing", "today", "I", "am",
849
            ];
850
            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
851
852
            GenericStringArray::<i32>::new(
853
                offsets,
854
                buffer,
855
                Some(NullBuffer::from(&[
856
                    true, false, true, false, false, true, true, true, false,
857
                ])),
858
            )
859
        };
860
        let input_2_array_with_nulls = {
861
            let input = vec!["doing", "well", "thank", "you", "for", "asking"];
862
            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
863
864
            GenericStringArray::<i32>::new(
865
                offsets,
866
                buffer,
867
                Some(NullBuffer::from(&[false, false, true, false, true, true])),
868
            )
869
        };
870
871
        let mut builder = GenericStringBuilder::<i32>::new();
872
        builder.append_array(&input_1_array_with_nulls).unwrap();
873
        builder.append_array(&input_2_array_with_nulls).unwrap();
874
875
        let actual = builder.finish();
876
        let expected = GenericStringArray::<i32>::from(vec![
877
            Some("hello"),
878
            None, // world
879
            Some("how"),
880
            None, // are
881
            None, // you
882
            Some("doing"),
883
            Some("today"),
884
            Some("I"),
885
            None, // am
886
            None, // doing
887
            None, // well
888
            Some("thank"),
889
            None, // "you",
890
            Some("for"),
891
            Some("asking"),
892
        ]);
893
894
        assert_eq!(actual, expected);
895
896
        let expected_underlying_buffer = Buffer::from(
897
            [
898
                "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing",
899
                "well", "thank", "you", "for", "asking",
900
            ]
901
            .join("")
902
            .as_bytes(),
903
        );
904
        assert_eq!(actual.values(), &expected_underlying_buffer);
905
    }
906
907
    #[test]
908
    fn append_array_with_continues_indices() {
909
        let input = vec![
910
            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
911
            "thank", "you", "for", "asking",
912
        ];
913
        let full_array = GenericStringArray::<i32>::from(input);
914
        let slice1 = full_array.slice(0, 3);
915
        let slice2 = full_array.slice(3, 4);
916
        let slice3 = full_array.slice(7, full_array.len() - 7);
917
918
        let mut builder = GenericStringBuilder::<i32>::new();
919
        builder.append_array(&slice1).unwrap();
920
        builder.append_array(&slice2).unwrap();
921
        builder.append_array(&slice3).unwrap();
922
923
        let actual = builder.finish();
924
925
        assert_eq!(actual, full_array);
926
    }
927
928
    #[test]
929
    fn test_append_array_offset_overflow_precise() {
930
        let mut builder = GenericStringBuilder::<i32>::new();
931
932
        let initial_string = "x".repeat(i32::MAX as usize - 100);
933
        builder.append_value(&initial_string);
934
935
        let overflow_string = "y".repeat(200);
936
        let overflow_array = GenericStringArray::<i32>::from(vec![overflow_string.as_str()]);
937
938
        let result = builder.append_array(&overflow_array);
939
940
        assert!(matches!(result, Err(ArrowError::OffsetOverflowError(_))));
941
    }
942
}