Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/generic_bytes_builder.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::builder::ArrayBuilder;
19
use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20
use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
21
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
22
use arrow_data::ArrayDataBuilder;
23
use std::any::Any;
24
use std::sync::Arc;
25
26
/// Builder for [`GenericByteArray`]
27
///
28
/// For building strings, see docs on [`GenericStringBuilder`].
29
/// For building binary, see docs on [`GenericBinaryBuilder`].
30
pub struct GenericByteBuilder<T: ByteArrayType> {
31
    value_builder: Vec<u8>,
32
    offsets_builder: Vec<T::Offset>,
33
    null_buffer_builder: NullBufferBuilder,
34
}
35
36
impl<T: ByteArrayType> GenericByteBuilder<T> {
37
    /// Creates a new [`GenericByteBuilder`].
38
6
    pub fn new() -> Self {
39
6
        Self::with_capacity(1024, 1024)
40
6
    }
41
42
    /// Creates a new [`GenericByteBuilder`].
43
    ///
44
    /// - `item_capacity` is the number of items to pre-allocate.
45
    ///   The size of the preallocated buffer of offsets is the number of items plus one.
46
    /// - `data_capacity` is the total number of bytes of data to pre-allocate
47
    ///   (for all items, not per item).
48
24
    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
49
24
        let mut offsets_builder = Vec::with_capacity(item_capacity + 1);
50
24
        offsets_builder.push(T::Offset::from_usize(0).unwrap());
51
24
        Self {
52
24
            value_builder: Vec::with_capacity(data_capacity),
53
24
            offsets_builder,
54
24
            null_buffer_builder: NullBufferBuilder::new(item_capacity),
55
24
        }
56
24
    }
57
58
    /// Creates a new  [`GenericByteBuilder`] from buffers.
59
    ///
60
    /// # Safety
61
    ///
62
    /// This doesn't verify buffer contents as it assumes the buffers are from
63
    /// existing and valid [`GenericByteArray`].
64
    pub unsafe fn new_from_buffer(
65
        offsets_buffer: MutableBuffer,
66
        value_buffer: MutableBuffer,
67
        null_buffer: Option<MutableBuffer>,
68
    ) -> Self {
69
        let offsets_builder: Vec<T::Offset> =
70
            ScalarBuffer::<T::Offset>::from(offsets_buffer).into();
71
        let value_builder: Vec<u8> = ScalarBuffer::<u8>::from(value_buffer).into();
72
73
        let null_buffer_builder = null_buffer
74
            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
75
            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
76
77
        Self {
78
            offsets_builder,
79
            value_builder,
80
            null_buffer_builder,
81
        }
82
    }
83
84
    #[inline]
85
120
    fn next_offset(&self) -> T::Offset {
86
120
        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
87
120
    }
88
89
    /// Appends a value into the builder.
90
    ///
91
    /// See the [GenericStringBuilder] documentation for examples of
92
    /// incrementally building string values with multiple `write!` calls.
93
    ///
94
    /// # Panics
95
    ///
96
    /// Panics if the resulting length of [`Self::values_slice`] would exceed
97
    /// `T::Offset::MAX` bytes.
98
    ///
99
    /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
100
    /// where the total length of all values exceeds 2GB
101
    ///
102
    /// [`StringArray`]: crate::StringArray
103
    /// [`BinaryArray`]: crate::BinaryArray
104
    #[inline]
105
32
    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
106
32
        self.value_builder
107
32
            .extend_from_slice(value.as_ref().as_ref());
108
32
        self.null_buffer_builder.append(true);
109
32
        self.offsets_builder.push(self.next_offset());
110
32
    }
111
112
    /// Append an `Option` value into the builder.
113
    ///
114
    /// - A `None` value will append a null value.
115
    /// - A `Some` value will append the value.
116
    ///
117
    /// See [`Self::append_value`] for more panic information.
118
    #[inline]
119
27
    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
120
27
        match value {
121
5
            None => self.append_null(),
122
22
            Some(v) => self.append_value(v),
123
        };
124
27
    }
125
126
    /// Append a null value into the builder.
127
    #[inline]
128
7
    pub fn append_null(&mut self) {
129
7
        self.null_buffer_builder.append(false);
130
7
        self.offsets_builder.push(self.next_offset());
131
7
    }
132
133
    /// Appends `n` `null`s into the builder.
134
    #[inline]
135
    pub fn append_nulls(&mut self, n: usize) {
136
        self.null_buffer_builder.append_n_nulls(n);
137
        let next_offset = self.next_offset();
138
        self.offsets_builder
139
            .extend(std::iter::repeat_n(next_offset, n));
140
    }
141
142
    /// Appends array values and null to this builder as is
143
    /// (this means that underlying null values are copied as is).
144
    #[inline]
145
37
    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
146
37
        if array.len() == 0 {
147
2
            return;
148
35
        }
149
150
35
        let offsets = array.offsets();
151
152
        // If the offsets are contiguous, we can append them directly avoiding the need to align
153
        // for example, when the first appended array is not sliced (starts at offset 0)
154
35
        if self.next_offset() == offsets[0] {
155
13
            self.offsets_builder.extend_from_slice(&offsets[1..]);
156
13
        } else {
157
            // Shifting all the offsets
158
22
            let shift: T::Offset = self.next_offset() - offsets[0];
159
160
            // Creating intermediate offsets instead of pushing each offset is faster
161
            // (even if we make MutableBuffer to avoid updating length on each push
162
            //  and reserve the necessary capacity, it's still slower)
163
22
            let mut intermediate = Vec::with_capacity(offsets.len() - 1);
164
165
55
            for &offset in &
offsets[1..]22
{
166
55
                intermediate.push(offset + shift)
167
            }
168
169
22
            self.offsets_builder.extend_from_slice(&intermediate);
170
        }
171
172
        // Append underlying values, starting from the first offset and ending at the last offset
173
35
        self.value_builder.extend_from_slice(
174
35
            &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
175
        );
176
177
35
        if let Some(
null_buffer3
) = array.nulls() {
178
3
            self.null_buffer_builder.append_buffer(null_buffer);
179
32
        } else {
180
32
            self.null_buffer_builder.append_n_non_nulls(array.len());
181
32
        }
182
37
    }
183
184
    /// Builds the [`GenericByteArray`] and reset this builder.
185
24
    pub fn finish(&mut self) -> GenericByteArray<T> {
186
24
        let array_type = T::DATA_TYPE;
187
24
        let array_builder = ArrayDataBuilder::new(array_type)
188
24
            .len(self.len())
189
24
            .add_buffer(std::mem::take(&mut self.offsets_builder).into())
190
24
            .add_buffer(std::mem::take(&mut self.value_builder).into())
191
24
            .nulls(self.null_buffer_builder.finish());
192
193
24
        self.offsets_builder.push(self.next_offset());
194
24
        let array_data = unsafe { array_builder.build_unchecked() };
195
24
        GenericByteArray::from(array_data)
196
24
    }
197
198
    /// Builds the [`GenericByteArray`] without resetting the builder.
199
0
    pub fn finish_cloned(&self) -> GenericByteArray<T> {
200
0
        let array_type = T::DATA_TYPE;
201
0
        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
202
0
        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
203
0
        let array_builder = ArrayDataBuilder::new(array_type)
204
0
            .len(self.len())
205
0
            .add_buffer(offset_buffer)
206
0
            .add_buffer(value_buffer)
207
0
            .nulls(self.null_buffer_builder.finish_cloned());
208
209
0
        let array_data = unsafe { array_builder.build_unchecked() };
210
0
        GenericByteArray::from(array_data)
211
0
    }
212
213
    /// Returns the current values buffer as a slice
214
0
    pub fn values_slice(&self) -> &[u8] {
215
0
        self.value_builder.as_slice()
216
0
    }
217
218
    /// Returns the current offsets buffer as a slice
219
0
    pub fn offsets_slice(&self) -> &[T::Offset] {
220
0
        self.offsets_builder.as_slice()
221
0
    }
222
223
    /// Returns the current null buffer as a slice
224
    pub fn validity_slice(&self) -> Option<&[u8]> {
225
        self.null_buffer_builder.as_slice()
226
    }
227
228
    /// Returns the current null buffer as a mutable slice
229
    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
230
        self.null_buffer_builder.as_slice_mut()
231
    }
232
}
233
234
impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
235
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236
        write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
237
        f.debug_struct("")
238
            .field("value_builder", &self.value_builder)
239
            .field("offsets_builder", &self.offsets_builder)
240
            .field("null_buffer_builder", &self.null_buffer_builder)
241
            .finish()
242
    }
243
}
244
245
impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
246
    fn default() -> Self {
247
        Self::new()
248
    }
249
}
250
251
impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
252
    /// Returns the number of binary slots in the builder
253
45
    fn len(&self) -> usize {
254
45
        self.null_buffer_builder.len()
255
45
    }
256
257
    /// Builds the array and reset this builder.
258
6
    fn finish(&mut self) -> ArrayRef {
259
6
        Arc::new(self.finish())
260
6
    }
261
262
    /// Builds the array without resetting the builder.
263
0
    fn finish_cloned(&self) -> ArrayRef {
264
0
        Arc::new(self.finish_cloned())
265
0
    }
266
267
    /// Returns the builder as a non-mutable `Any` reference.
268
0
    fn as_any(&self) -> &dyn Any {
269
0
        self
270
0
    }
271
272
    /// Returns the builder as a mutable `Any` reference.
273
3
    fn as_any_mut(&mut self) -> &mut dyn Any {
274
3
        self
275
3
    }
276
277
    /// Returns the boxed builder as a box of `Any`.
278
0
    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
279
0
        self
280
0
    }
281
}
282
283
impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
284
    #[inline]
285
5
    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
286
32
        for 
v27
in iter {
287
27
            self.append_option(v)
288
        }
289
5
    }
290
}
291
292
/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
293
///
294
/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
295
/// [`GenericByteBuilder::append_null`].
296
///
297
/// This builder also implements [`std::fmt::Write`] with any written data
298
/// included in the next appended value. This allows using [`std::fmt::Display`]
299
/// with standard Rust idioms like `write!` and `writeln!` to write data
300
/// directly to the builder without intermediate allocations.
301
///
302
/// # Example writing strings with `append_value`
303
/// ```
304
/// # use arrow_array::builder::GenericStringBuilder;
305
/// let mut builder = GenericStringBuilder::<i32>::new();
306
///
307
/// // Write one string value
308
/// builder.append_value("foobarbaz");
309
///
310
/// // Write a second string
311
/// builder.append_value("v2");
312
///
313
/// let array = builder.finish();
314
/// assert_eq!(array.value(0), "foobarbaz");
315
/// assert_eq!(array.value(1), "v2");
316
/// ```
317
///
318
/// # Example incrementally writing strings with `std::fmt::Write`
319
///
320
/// ```
321
/// # use std::fmt::Write;
322
/// # use arrow_array::builder::GenericStringBuilder;
323
/// let mut builder = GenericStringBuilder::<i32>::new();
324
///
325
/// // Write data in multiple `write!` calls
326
/// write!(builder, "foo").unwrap();
327
/// write!(builder, "bar").unwrap();
328
/// // The next call to append_value finishes the current string
329
/// // including all previously written strings.
330
/// builder.append_value("baz");
331
///
332
/// // Write second value with a single write call
333
/// write!(builder, "v2").unwrap();
334
/// // finish the value by calling append_value with an empty string
335
/// builder.append_value("");
336
///
337
/// let array = builder.finish();
338
/// assert_eq!(array.value(0), "foobarbaz");
339
/// assert_eq!(array.value(1), "v2");
340
/// ```
341
pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
342
343
impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
344
0
    fn write_str(&mut self, s: &str) -> std::fmt::Result {
345
0
        self.value_builder.extend_from_slice(s.as_bytes());
346
0
        Ok(())
347
0
    }
348
}
349
350
///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
351
///
352
/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
353
/// [`GenericByteBuilder::append_null`].
354
///
355
/// # Example
356
/// ```
357
/// # use arrow_array::builder::GenericBinaryBuilder;
358
/// let mut builder = GenericBinaryBuilder::<i32>::new();
359
///
360
/// // Write data
361
/// builder.append_value("foo");
362
///
363
/// // Write second value
364
/// builder.append_value(&[0,1,2]);
365
///
366
/// let array = builder.finish();
367
/// // binary values
368
/// assert_eq!(array.value(0), b"foo");
369
/// assert_eq!(array.value(1), b"\x00\x01\x02");
370
/// ```
371
///
372
/// # Example incrementally writing bytes with `write_bytes`
373
///
374
/// ```
375
/// # use std::io::Write;
376
/// # use arrow_array::builder::GenericBinaryBuilder;
377
/// let mut builder = GenericBinaryBuilder::<i32>::new();
378
///
379
/// // Write data in multiple `write_bytes` calls
380
/// write!(builder, "foo").unwrap();
381
/// write!(builder, "bar").unwrap();
382
/// // The next call to append_value finishes the current string
383
/// // including all previously written strings.
384
/// builder.append_value("baz");
385
///
386
/// // Write second value with a single write call
387
/// write!(builder, "v2").unwrap();
388
/// // finish the value by calling append_value with an empty string
389
/// builder.append_value("");
390
///
391
/// let array = builder.finish();
392
/// assert_eq!(array.value(0), "foobarbaz".as_bytes());
393
/// assert_eq!(array.value(1), "v2".as_bytes());
394
/// ```
395
pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
396
397
impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
398
    fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
399
        self.value_builder.extend_from_slice(bs);
400
        Ok(bs.len())
401
    }
402
403
    fn flush(&mut self) -> std::io::Result<()> {
404
        Ok(())
405
    }
406
}
407
408
#[cfg(test)]
409
mod tests {
410
    use super::*;
411
    use crate::array::Array;
412
    use crate::GenericStringArray;
413
    use arrow_buffer::NullBuffer;
414
    use std::fmt::Write as _;
415
    use std::io::Write as _;
416
417
    fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
418
        let mut builder = GenericBinaryBuilder::<O>::new();
419
420
        builder.append_value(b"hello");
421
        builder.append_value(b"");
422
        builder.append_null();
423
        builder.append_value(b"rust");
424
425
        let array = builder.finish();
426
427
        assert_eq!(4, array.len());
428
        assert_eq!(1, array.null_count());
429
        assert_eq!(b"hello", array.value(0));
430
        assert_eq!([] as [u8; 0], array.value(1));
431
        assert!(array.is_null(2));
432
        assert_eq!(b"rust", array.value(3));
433
        assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
434
        assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
435
    }
436
437
    #[test]
438
    fn test_binary_builder() {
439
        _test_generic_binary_builder::<i32>()
440
    }
441
442
    #[test]
443
    fn test_large_binary_builder() {
444
        _test_generic_binary_builder::<i64>()
445
    }
446
447
    fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
448
        let mut builder = GenericBinaryBuilder::<O>::new();
449
        builder.append_null();
450
        builder.append_null();
451
        builder.append_null();
452
        builder.append_nulls(2);
453
        assert_eq!(5, builder.len());
454
        assert!(!builder.is_empty());
455
456
        let array = builder.finish();
457
        assert_eq!(5, array.null_count());
458
        assert_eq!(5, array.len());
459
        assert!(array.is_null(0));
460
        assert!(array.is_null(1));
461
        assert!(array.is_null(2));
462
        assert!(array.is_null(3));
463
        assert!(array.is_null(4));
464
    }
465
466
    #[test]
467
    fn test_binary_builder_all_nulls() {
468
        _test_generic_binary_builder_all_nulls::<i32>()
469
    }
470
471
    #[test]
472
    fn test_large_binary_builder_all_nulls() {
473
        _test_generic_binary_builder_all_nulls::<i64>()
474
    }
475
476
    fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
477
        let mut builder = GenericBinaryBuilder::<O>::new();
478
479
        builder.append_value(b"hello");
480
        builder.append_value(b"");
481
        builder.append_null();
482
        builder.append_value(b"rust");
483
        builder.finish();
484
485
        assert!(builder.is_empty());
486
487
        builder.append_value(b"parquet");
488
        builder.append_null();
489
        builder.append_value(b"arrow");
490
        builder.append_value(b"");
491
        builder.append_nulls(2);
492
        builder.append_value(b"hi");
493
        let array = builder.finish();
494
495
        assert_eq!(7, array.len());
496
        assert_eq!(3, array.null_count());
497
        assert_eq!(b"parquet", array.value(0));
498
        assert!(array.is_null(1));
499
        assert!(array.is_null(4));
500
        assert!(array.is_null(5));
501
        assert_eq!(b"arrow", array.value(2));
502
        assert_eq!(b"", array.value(1));
503
        assert_eq!(b"hi", array.value(6));
504
505
        assert_eq!(O::zero(), array.value_offsets()[0]);
506
        assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
507
        assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
508
        assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
509
    }
510
511
    #[test]
512
    fn test_binary_builder_reset() {
513
        _test_generic_binary_builder_reset::<i32>()
514
    }
515
516
    #[test]
517
    fn test_large_binary_builder_reset() {
518
        _test_generic_binary_builder_reset::<i64>()
519
    }
520
521
    fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
522
        let mut builder = GenericStringBuilder::<O>::new();
523
        let owned = "arrow".to_owned();
524
525
        builder.append_value("hello");
526
        builder.append_value("");
527
        builder.append_value(&owned);
528
        builder.append_null();
529
        builder.append_option(Some("rust"));
530
        builder.append_option(None::<&str>);
531
        builder.append_option(None::<String>);
532
        builder.append_nulls(2);
533
        builder.append_value("parquet");
534
        assert_eq!(10, builder.len());
535
536
        assert_eq!(
537
            GenericStringArray::<O>::from(vec![
538
                Some("hello"),
539
                Some(""),
540
                Some("arrow"),
541
                None,
542
                Some("rust"),
543
                None,
544
                None,
545
                None,
546
                None,
547
                Some("parquet")
548
            ]),
549
            builder.finish()
550
        );
551
    }
552
553
    #[test]
554
    fn test_string_array_builder() {
555
        _test_generic_string_array_builder::<i32>()
556
    }
557
558
    #[test]
559
    fn test_large_string_array_builder() {
560
        _test_generic_string_array_builder::<i64>()
561
    }
562
563
    fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
564
        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
565
566
        builder.append_value("hello");
567
        builder.append_value("rust");
568
        builder.append_null();
569
570
        builder.finish();
571
        assert!(builder.is_empty());
572
        assert_eq!(&[O::zero()], builder.offsets_slice());
573
574
        builder.append_value("arrow");
575
        builder.append_value("parquet");
576
        let arr = builder.finish();
577
        // array should not have null buffer because there is not `null` value.
578
        assert!(arr.nulls().is_none());
579
        assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
580
    }
581
582
    #[test]
583
    fn test_string_array_builder_finish() {
584
        _test_generic_string_array_builder_finish::<i32>()
585
    }
586
587
    #[test]
588
    fn test_large_string_array_builder_finish() {
589
        _test_generic_string_array_builder_finish::<i64>()
590
    }
591
592
    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
593
        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
594
595
        builder.append_value("hello");
596
        builder.append_value("rust");
597
        builder.append_null();
598
599
        let mut arr = builder.finish_cloned();
600
        assert!(!builder.is_empty());
601
        assert_eq!(3, arr.len());
602
603
        builder.append_value("arrow");
604
        builder.append_value("parquet");
605
        arr = builder.finish();
606
607
        assert!(arr.nulls().is_some());
608
        assert_eq!(&[O::zero()], builder.offsets_slice());
609
        assert_eq!(5, arr.len());
610
    }
611
612
    #[test]
613
    fn test_string_array_builder_finish_cloned() {
614
        _test_generic_string_array_builder_finish_cloned::<i32>()
615
    }
616
617
    #[test]
618
    fn test_large_string_array_builder_finish_cloned() {
619
        _test_generic_string_array_builder_finish_cloned::<i64>()
620
    }
621
622
    #[test]
623
    fn test_extend() {
624
        let mut builder = GenericStringBuilder::<i32>::new();
625
        builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
626
        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
627
        let array = builder.finish();
628
        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
629
        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
630
    }
631
632
    #[test]
633
    fn test_write_str() {
634
        let mut builder = GenericStringBuilder::<i32>::new();
635
        write!(builder, "foo").unwrap();
636
        builder.append_value("");
637
        writeln!(builder, "bar").unwrap();
638
        builder.append_value("");
639
        write!(builder, "fiz").unwrap();
640
        write!(builder, "buz").unwrap();
641
        builder.append_value("");
642
        let a = builder.finish();
643
        let r: Vec<_> = a.iter().flatten().collect();
644
        assert_eq!(r, &["foo", "bar\n", "fizbuz"])
645
    }
646
647
    #[test]
648
    fn test_write_bytes() {
649
        let mut builder = GenericBinaryBuilder::<i32>::new();
650
        write!(builder, "foo").unwrap();
651
        builder.append_value("");
652
        writeln!(builder, "bar").unwrap();
653
        builder.append_value("");
654
        write!(builder, "fiz").unwrap();
655
        write!(builder, "buz").unwrap();
656
        builder.append_value("");
657
        let a = builder.finish();
658
        let r: Vec<_> = a.iter().flatten().collect();
659
        assert_eq!(
660
            r,
661
            &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
662
        )
663
    }
664
665
    #[test]
666
    fn test_append_array_without_nulls() {
667
        let input = vec![
668
            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
669
            "thank", "you", "for", "asking",
670
        ];
671
        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
672
        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
673
        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
674
675
        let mut builder = GenericStringBuilder::<i32>::new();
676
        builder.append_array(&arr1);
677
        builder.append_array(&arr2);
678
        builder.append_array(&arr3);
679
680
        let actual = builder.finish();
681
        let expected = GenericStringArray::<i32>::from(input);
682
683
        assert_eq!(actual, expected);
684
    }
685
686
    #[test]
687
    fn test_append_array_with_nulls() {
688
        let input = vec![
689
            Some("hello"),
690
            None,
691
            Some("how"),
692
            None,
693
            None,
694
            None,
695
            None,
696
            Some("I"),
697
            Some("am"),
698
            Some("doing"),
699
            Some("well"),
700
        ];
701
        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
702
        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
703
        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
704
705
        let mut builder = GenericStringBuilder::<i32>::new();
706
        builder.append_array(&arr1);
707
        builder.append_array(&arr2);
708
        builder.append_array(&arr3);
709
710
        let actual = builder.finish();
711
        let expected = GenericStringArray::<i32>::from(input);
712
713
        assert_eq!(actual, expected);
714
    }
715
716
    #[test]
717
    fn test_append_empty_array() {
718
        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
719
        let mut builder = GenericStringBuilder::<i32>::new();
720
        builder.append_array(&arr);
721
        let result = builder.finish();
722
        assert_eq!(result.len(), 0);
723
    }
724
725
    #[test]
726
    fn test_append_array_with_offset_not_starting_at_0() {
727
        let input = vec![
728
            Some("hello"),
729
            None,
730
            Some("how"),
731
            None,
732
            None,
733
            None,
734
            None,
735
            Some("I"),
736
            Some("am"),
737
            Some("doing"),
738
            Some("well"),
739
        ];
740
        let full_array = GenericStringArray::<i32>::from(input);
741
        let sliced = full_array.slice(1, 4);
742
743
        assert_ne!(sliced.offsets()[0].as_usize(), 0);
744
        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
745
746
        let mut builder = GenericStringBuilder::<i32>::new();
747
        builder.append_array(&sliced);
748
        let actual = builder.finish();
749
750
        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
751
752
        assert_eq!(actual, expected);
753
    }
754
755
    #[test]
756
    fn test_append_underlying_null_values_added_as_is() {
757
        let input_1_array_with_nulls = {
758
            let input = vec![
759
                "hello", "world", "how", "are", "you", "doing", "today", "I", "am",
760
            ];
761
            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
762
763
            GenericStringArray::<i32>::new(
764
                offsets,
765
                buffer,
766
                Some(NullBuffer::from(&[
767
                    true, false, true, false, false, true, true, true, false,
768
                ])),
769
            )
770
        };
771
        let input_2_array_with_nulls = {
772
            let input = vec!["doing", "well", "thank", "you", "for", "asking"];
773
            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
774
775
            GenericStringArray::<i32>::new(
776
                offsets,
777
                buffer,
778
                Some(NullBuffer::from(&[false, false, true, false, true, true])),
779
            )
780
        };
781
782
        let mut builder = GenericStringBuilder::<i32>::new();
783
        builder.append_array(&input_1_array_with_nulls);
784
        builder.append_array(&input_2_array_with_nulls);
785
786
        let actual = builder.finish();
787
        let expected = GenericStringArray::<i32>::from(vec![
788
            Some("hello"),
789
            None, // world
790
            Some("how"),
791
            None, // are
792
            None, // you
793
            Some("doing"),
794
            Some("today"),
795
            Some("I"),
796
            None, // am
797
            None, // doing
798
            None, // well
799
            Some("thank"),
800
            None, // "you",
801
            Some("for"),
802
            Some("asking"),
803
        ]);
804
805
        assert_eq!(actual, expected);
806
807
        let expected_underlying_buffer = Buffer::from(
808
            [
809
                "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing",
810
                "well", "thank", "you", "for", "asking",
811
            ]
812
            .join("")
813
            .as_bytes(),
814
        );
815
        assert_eq!(actual.values(), &expected_underlying_buffer);
816
    }
817
818
    #[test]
819
    fn append_array_with_continues_indices() {
820
        let input = vec![
821
            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
822
            "thank", "you", "for", "asking",
823
        ];
824
        let full_array = GenericStringArray::<i32>::from(input);
825
        let slice1 = full_array.slice(0, 3);
826
        let slice2 = full_array.slice(3, 4);
827
        let slice3 = full_array.slice(7, full_array.len() - 7);
828
829
        let mut builder = GenericStringBuilder::<i32>::new();
830
        builder.append_array(&slice1);
831
        builder.append_array(&slice2);
832
        builder.append_array(&slice3);
833
834
        let actual = builder.finish();
835
836
        assert_eq!(actual, full_array);
837
    }
838
}