Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-string/src/length.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines kernel for length of string arrays and binary arrays
19
20
use arrow_array::*;
21
use arrow_array::{cast::AsArray, types::*};
22
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23
use arrow_schema::{ArrowError, DataType};
24
use std::sync::Arc;
25
26
0
fn length_impl<P: ArrowPrimitiveType>(
27
0
    offsets: &OffsetBuffer<P::Native>,
28
0
    nulls: Option<&NullBuffer>,
29
0
) -> ArrayRef {
30
0
    let v: Vec<_> = offsets
31
0
        .windows(2)
32
0
        .map(|w| w[1].sub_wrapping(w[0]))
33
0
        .collect();
34
0
    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
35
0
}
36
37
0
fn bit_length_impl<P: ArrowPrimitiveType>(
38
0
    offsets: &OffsetBuffer<P::Native>,
39
0
    nulls: Option<&NullBuffer>,
40
0
) -> ArrayRef {
41
0
    let bits = P::Native::usize_as(8);
42
0
    let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
43
0
    let v: Vec<_> = offsets.windows(2).map(c).collect();
44
0
    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
45
0
}
46
47
/// Returns an array of Int32/Int64 denoting the length of each value in the array.
48
///
49
/// For list array, length is the number of elements in each list.
50
/// For string array and binary array, length is the number of bytes of each value.
51
///
52
/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, and FixedSizeListArray,
53
///   or DictionaryArray with above Arrays as values
54
/// * length of null is null.
55
pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
56
    if let Some(d) = array.as_any_dictionary_opt() {
57
        let lengths = length(d.values().as_ref())?;
58
        return Ok(d.with_values(lengths));
59
    }
60
61
    match array.data_type() {
62
        DataType::List(_) => {
63
            let list = array.as_list::<i32>();
64
            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
65
        }
66
        DataType::LargeList(_) => {
67
            let list = array.as_list::<i64>();
68
            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
69
        }
70
        DataType::Utf8 => {
71
            let list = array.as_string::<i32>();
72
            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
73
        }
74
        DataType::LargeUtf8 => {
75
            let list = array.as_string::<i64>();
76
            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
77
        }
78
        DataType::Utf8View => {
79
            let list = array.as_string_view();
80
0
            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
81
            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
82
                v.into(),
83
                list.nulls().cloned(),
84
            )))
85
        }
86
        DataType::Binary => {
87
            let list = array.as_binary::<i32>();
88
            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
89
        }
90
        DataType::LargeBinary => {
91
            let list = array.as_binary::<i64>();
92
            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
93
        }
94
        DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
95
            Int32Array::new(vec![*len; array.len()].into(), array.nulls().cloned()),
96
        )),
97
        DataType::BinaryView => {
98
            let list = array.as_binary_view();
99
0
            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
100
            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
101
                v.into(),
102
                list.nulls().cloned(),
103
            )))
104
        }
105
        other => Err(ArrowError::ComputeError(format!(
106
            "length not supported for {other:?}"
107
        ))),
108
    }
109
}
110
111
/// Returns an array of Int32/Int64 denoting the number of bits in each value in the array.
112
///
113
/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, BinaryArray and LargeBinaryArray,
114
///   or DictionaryArray with above Arrays as values
115
/// * bit_length of null is null.
116
/// * bit_length is in number of bits
117
pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
118
    if let Some(d) = array.as_any_dictionary_opt() {
119
        let lengths = bit_length(d.values().as_ref())?;
120
        return Ok(d.with_values(lengths));
121
    }
122
123
    match array.data_type() {
124
        DataType::List(_) => {
125
            let list = array.as_list::<i32>();
126
            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
127
        }
128
        DataType::LargeList(_) => {
129
            let list = array.as_list::<i64>();
130
            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
131
        }
132
        DataType::Utf8 => {
133
            let list = array.as_string::<i32>();
134
            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
135
        }
136
        DataType::LargeUtf8 => {
137
            let list = array.as_string::<i64>();
138
            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
139
        }
140
        DataType::Utf8View => {
141
            let list = array.as_string_view();
142
            let values = list
143
                .views()
144
                .iter()
145
0
                .map(|view| (*view as i32).wrapping_mul(8))
146
                .collect();
147
            Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
148
        }
149
        DataType::Binary => {
150
            let list = array.as_binary::<i32>();
151
            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
152
        }
153
        DataType::LargeBinary => {
154
            let list = array.as_binary::<i64>();
155
            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
156
        }
157
        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::new(
158
            vec![*len * 8; array.len()].into(),
159
            array.nulls().cloned(),
160
        ))),
161
        other => Err(ArrowError::ComputeError(format!(
162
            "bit_length not supported for {other:?}"
163
        ))),
164
    }
165
}
166
167
#[cfg(test)]
168
mod tests {
169
    use super::*;
170
    use arrow_buffer::Buffer;
171
    use arrow_data::ArrayData;
172
    use arrow_schema::Field;
173
174
    fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
175
        // a large array
176
        let values = [
177
            "one",
178
            "on",
179
            "o",
180
            "",
181
            "this is a longer string to test string array with",
182
        ];
183
        let values = values.into_iter().cycle().take(4096).collect();
184
        let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
185
186
        vec![
187
            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
188
            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
189
            (vec!["💖"], 1, vec![4]),
190
            (values, 4096, expected),
191
        ]
192
    }
193
194
    macro_rules! length_binary_helper {
195
        ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
196
            let array = GenericBinaryArray::<$offset_ty>::from($value);
197
            let result = $kernel(&array).unwrap();
198
            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
199
            let expected: $result_ty = $expected.into();
200
            assert_eq!(&expected, result);
201
        }};
202
    }
203
204
    macro_rules! length_list_helper {
205
        ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
206
            let array =
207
                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
208
            let result = length(&array).unwrap();
209
            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
210
            let expected: $result_ty = $expected.into();
211
            assert_eq!(&expected, result);
212
        }};
213
    }
214
215
    #[test]
216
    fn length_test_string() {
217
        length_cases_string()
218
            .into_iter()
219
            .for_each(|(input, len, expected)| {
220
                let array = StringArray::from(input);
221
                let result = length(&array).unwrap();
222
                assert_eq!(len, result.len());
223
                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
224
                expected.iter().enumerate().for_each(|(i, value)| {
225
                    assert_eq!(*value, result.value(i));
226
                });
227
            })
228
    }
229
230
    #[test]
231
    fn length_test_large_string() {
232
        length_cases_string()
233
            .into_iter()
234
            .for_each(|(input, len, expected)| {
235
                let array = LargeStringArray::from(input);
236
                let result = length(&array).unwrap();
237
                assert_eq!(len, result.len());
238
                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
239
                expected.iter().enumerate().for_each(|(i, value)| {
240
                    assert_eq!(*value as i64, result.value(i));
241
                });
242
            })
243
    }
244
245
    #[test]
246
    fn length_test_string_view() {
247
        length_cases_string()
248
            .into_iter()
249
            .for_each(|(input, len, expected)| {
250
                let array = StringViewArray::from(input);
251
                let result = length(&array).unwrap();
252
                assert_eq!(len, result.len());
253
                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
254
                expected.iter().enumerate().for_each(|(i, value)| {
255
                    assert_eq!(*value, result.value(i));
256
                });
257
            })
258
    }
259
260
    #[test]
261
    fn length_test_binary() {
262
        let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
263
        let result: Vec<i32> = vec![4, 3, 2];
264
        length_binary_helper!(i32, Int32Array, length, value, result)
265
    }
266
267
    #[test]
268
    fn length_test_large_binary() {
269
        let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
270
        let result: Vec<i64> = vec![4, 2, 3];
271
        length_binary_helper!(i64, Int64Array, length, value, result)
272
    }
273
274
    #[test]
275
    fn length_test_binary_view() {
276
        let value: Vec<&[u8]> = vec![
277
            b"zero",
278
            &[0xff, 0xf8],
279
            b"two",
280
            b"this is a longer string to test binary array with",
281
        ];
282
        let expected: Vec<i32> = vec![4, 2, 3, 49];
283
284
        let array = BinaryViewArray::from(value);
285
        let result = length(&array).unwrap();
286
        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
287
        let expected: Int32Array = expected.into();
288
        assert_eq!(&expected, result);
289
    }
290
291
    #[test]
292
    fn length_test_list() {
293
        let value = vec![
294
            Some(vec![]),
295
            Some(vec![Some(1), Some(2), Some(4)]),
296
            Some(vec![Some(0)]),
297
        ];
298
        let result: Vec<i32> = vec![0, 3, 1];
299
        length_list_helper!(i32, Int32Array, Int32Type, value, result)
300
    }
301
302
    #[test]
303
    fn length_test_large_list() {
304
        let value = vec![
305
            Some(vec![]),
306
            Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
307
            Some(vec![None]),
308
        ];
309
        let result: Vec<i64> = vec![0, 3, 1];
310
        length_list_helper!(i64, Int64Array, Float32Type, value, result)
311
    }
312
313
    type OptionStr = Option<&'static str>;
314
315
    fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
316
        vec![(
317
            vec![Some("one"), None, Some("three"), Some("four")],
318
            4,
319
            vec![Some(3), None, Some(5), Some(4)],
320
        )]
321
    }
322
323
    #[test]
324
    fn length_null_string() {
325
        length_null_cases_string()
326
            .into_iter()
327
            .for_each(|(input, len, expected)| {
328
                let array = StringArray::from(input);
329
                let result = length(&array).unwrap();
330
                assert_eq!(len, result.len());
331
                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
332
333
                let expected: Int32Array = expected.into();
334
                assert_eq!(&expected, result);
335
            })
336
    }
337
338
    #[test]
339
    fn length_null_large_string() {
340
        length_null_cases_string()
341
            .into_iter()
342
            .for_each(|(input, len, expected)| {
343
                let array = LargeStringArray::from(input);
344
                let result = length(&array).unwrap();
345
                assert_eq!(len, result.len());
346
                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
347
348
                // convert to i64
349
                let expected: Int64Array = expected
350
                    .iter()
351
                    .map(|e| e.map(|e| e as i64))
352
                    .collect::<Vec<_>>()
353
                    .into();
354
                assert_eq!(&expected, result);
355
            })
356
    }
357
358
    #[test]
359
    fn length_null_binary() {
360
        let value: Vec<Option<&[u8]>> =
361
            vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
362
        let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
363
        length_binary_helper!(i32, Int32Array, length, value, result)
364
    }
365
366
    #[test]
367
    fn length_null_large_binary() {
368
        let value: Vec<Option<&[u8]>> =
369
            vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
370
        let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
371
        length_binary_helper!(i64, Int64Array, length, value, result)
372
    }
373
374
    #[test]
375
    fn length_null_list() {
376
        let value = vec![
377
            Some(vec![]),
378
            None,
379
            Some(vec![Some(1), None, Some(2), Some(4)]),
380
            Some(vec![Some(0)]),
381
        ];
382
        let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
383
        length_list_helper!(i32, Int32Array, Int8Type, value, result)
384
    }
385
386
    #[test]
387
    fn length_null_large_list() {
388
        let value = vec![
389
            Some(vec![]),
390
            None,
391
            Some(vec![Some(1.1), None, Some(4.0)]),
392
            Some(vec![Some(0.1)]),
393
        ];
394
        let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
395
        length_list_helper!(i64, Int64Array, Float32Type, value, result)
396
    }
397
398
    /// Tests that length is not valid for u64.
399
    #[test]
400
    fn length_wrong_type() {
401
        let array: UInt64Array = vec![1u64].into();
402
403
        assert!(length(&array).is_err());
404
    }
405
406
    /// Tests with an offset
407
    #[test]
408
    fn length_offsets_string() {
409
        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
410
        let b = a.slice(1, 3);
411
        let result = length(&b).unwrap();
412
        let result: &Int32Array = result.as_primitive();
413
414
        let expected = Int32Array::from(vec![Some(1), Some(5), None]);
415
        assert_eq!(&expected, result);
416
    }
417
418
    #[test]
419
    fn length_offsets_binary() {
420
        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
421
        let a = BinaryArray::from(value);
422
        let b = a.slice(1, 3);
423
        let result = length(&b).unwrap();
424
        let result: &Int32Array = result.as_primitive();
425
426
        let expected = Int32Array::from(vec![Some(1), Some(2), None]);
427
        assert_eq!(&expected, result);
428
    }
429
430
    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
431
        // a large array
432
        let values = ["one", "on", "o", ""];
433
        let values = values.into_iter().cycle().take(4096).collect();
434
        let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
435
436
        vec![
437
            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
438
            (vec!["💖"], 1, vec![32]),
439
            (vec!["josé"], 1, vec![40]),
440
            (values, 4096, expected),
441
        ]
442
    }
443
444
    #[test]
445
    fn bit_length_test_string() {
446
        bit_length_cases()
447
            .into_iter()
448
            .for_each(|(input, len, expected)| {
449
                let array = StringArray::from(input);
450
                let result = bit_length(&array).unwrap();
451
                assert_eq!(len, result.len());
452
                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
453
                expected.iter().enumerate().for_each(|(i, value)| {
454
                    assert_eq!(*value, result.value(i));
455
                });
456
            })
457
    }
458
459
    #[test]
460
    fn bit_length_test_large_string() {
461
        bit_length_cases()
462
            .into_iter()
463
            .for_each(|(input, len, expected)| {
464
                let array = LargeStringArray::from(input);
465
                let result = bit_length(&array).unwrap();
466
                assert_eq!(len, result.len());
467
                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
468
                expected.iter().enumerate().for_each(|(i, value)| {
469
                    assert_eq!(*value as i64, result.value(i));
470
                });
471
            })
472
    }
473
474
    #[test]
475
    fn bit_length_test_utf8view() {
476
        bit_length_cases()
477
            .into_iter()
478
            .for_each(|(input, len, expected)| {
479
                let string_array = StringViewArray::from(input);
480
                let result = bit_length(&string_array).unwrap();
481
                assert_eq!(len, result.len());
482
                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
483
                expected.iter().enumerate().for_each(|(i, value)| {
484
                    assert_eq!(*value, result.value(i));
485
                });
486
            })
487
    }
488
489
    #[test]
490
    fn bit_length_null_utf8view() {
491
        bit_length_null_cases()
492
            .into_iter()
493
            .for_each(|(input, len, expected)| {
494
                let array = StringArray::from(input);
495
                let result = bit_length(&array).unwrap();
496
                assert_eq!(len, result.len());
497
                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
498
499
                let expected: Int32Array = expected.into();
500
                assert_eq!(&expected, result);
501
            })
502
    }
503
    #[test]
504
    fn bit_length_binary() {
505
        let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
506
        let expected: Vec<i32> = vec![24, 16, 40];
507
        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
508
    }
509
510
    #[test]
511
    fn bit_length_large_binary() {
512
        let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
513
        let expected: Vec<i64> = vec![32, 8, 16];
514
        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
515
    }
516
517
    fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
518
        vec![(
519
            vec![Some("one"), None, Some("three"), Some("four")],
520
            4,
521
            vec![Some(24), None, Some(40), Some(32)],
522
        )]
523
    }
524
525
    #[test]
526
    fn bit_length_null_string() {
527
        bit_length_null_cases()
528
            .into_iter()
529
            .for_each(|(input, len, expected)| {
530
                let array = StringArray::from(input);
531
                let result = bit_length(&array).unwrap();
532
                assert_eq!(len, result.len());
533
                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
534
535
                let expected: Int32Array = expected.into();
536
                assert_eq!(&expected, result);
537
            })
538
    }
539
540
    #[test]
541
    fn bit_length_null_large_string() {
542
        bit_length_null_cases()
543
            .into_iter()
544
            .for_each(|(input, len, expected)| {
545
                let array = LargeStringArray::from(input);
546
                let result = bit_length(&array).unwrap();
547
                assert_eq!(len, result.len());
548
                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
549
550
                // convert to i64
551
                let expected: Int64Array = expected
552
                    .iter()
553
                    .map(|e| e.map(|e| e as i64))
554
                    .collect::<Vec<_>>()
555
                    .into();
556
                assert_eq!(&expected, result);
557
            })
558
    }
559
560
    #[test]
561
    fn bit_length_null_binary() {
562
        let value: Vec<Option<&[u8]>> =
563
            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
564
        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
565
        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
566
    }
567
568
    #[test]
569
    fn bit_length_null_large_binary() {
570
        let value: Vec<Option<&[u8]>> =
571
            vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
572
        let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
573
        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
574
    }
575
576
    /// Tests that bit_length is not valid for u64.
577
    #[test]
578
    fn bit_length_wrong_type() {
579
        let array: UInt64Array = vec![1u64].into();
580
581
        assert!(bit_length(&array).is_err());
582
    }
583
584
    /// Tests with an offset
585
    #[test]
586
    fn bit_length_offsets_string() {
587
        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
588
        let b = a.slice(1, 3);
589
        let result = bit_length(&b).unwrap();
590
        let result: &Int32Array = result.as_primitive();
591
592
        let expected = Int32Array::from(vec![Some(8), Some(40), None]);
593
        assert_eq!(&expected, result);
594
    }
595
596
    #[test]
597
    fn bit_length_offsets_binary() {
598
        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
599
        let a = BinaryArray::from(value);
600
        let b = a.slice(1, 3);
601
        let result = bit_length(&b).unwrap();
602
        let result: &Int32Array = result.as_primitive();
603
604
        let expected = Int32Array::from(vec![Some(0), Some(40), None]);
605
        assert_eq!(&expected, result);
606
    }
607
608
    #[test]
609
    fn length_dictionary() {
610
        _length_dictionary::<Int8Type>();
611
        _length_dictionary::<Int16Type>();
612
        _length_dictionary::<Int32Type>();
613
        _length_dictionary::<Int64Type>();
614
        _length_dictionary::<UInt8Type>();
615
        _length_dictionary::<UInt16Type>();
616
        _length_dictionary::<UInt32Type>();
617
        _length_dictionary::<UInt64Type>();
618
    }
619
620
    fn _length_dictionary<K: ArrowDictionaryKeyType>() {
621
        const TOTAL: i32 = 100;
622
623
        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
624
        let data: Vec<Option<&str>> = (0..TOTAL)
625
            .map(|n| {
626
                let i = n % 5;
627
                if i == 3 {
628
                    None
629
                } else {
630
                    Some(v[i as usize])
631
                }
632
            })
633
            .collect();
634
635
        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
636
637
        let expected: Vec<Option<i32>> =
638
            data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
639
640
        let res = length(&dict_array).unwrap();
641
        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
642
        let actual: Vec<Option<i32>> = actual
643
            .values()
644
            .as_any()
645
            .downcast_ref::<Int32Array>()
646
            .unwrap()
647
            .take_iter(dict_array.keys_iter())
648
            .collect();
649
650
        for i in 0..TOTAL as usize {
651
            assert_eq!(expected[i], actual[i],);
652
        }
653
    }
654
655
    #[test]
656
    fn bit_length_dictionary() {
657
        _bit_length_dictionary::<Int8Type>();
658
        _bit_length_dictionary::<Int16Type>();
659
        _bit_length_dictionary::<Int32Type>();
660
        _bit_length_dictionary::<Int64Type>();
661
        _bit_length_dictionary::<UInt8Type>();
662
        _bit_length_dictionary::<UInt16Type>();
663
        _bit_length_dictionary::<UInt32Type>();
664
        _bit_length_dictionary::<UInt64Type>();
665
    }
666
667
    fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
668
        const TOTAL: i32 = 100;
669
670
        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
671
        let data: Vec<Option<&str>> = (0..TOTAL)
672
            .map(|n| {
673
                let i = n % 5;
674
                if i == 3 {
675
                    None
676
                } else {
677
                    Some(v[i as usize])
678
                }
679
            })
680
            .collect();
681
682
        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
683
684
        let expected: Vec<Option<i32>> = data
685
            .iter()
686
            .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
687
            .collect();
688
689
        let res = bit_length(&dict_array).unwrap();
690
        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
691
        let actual: Vec<Option<i32>> = actual
692
            .values()
693
            .as_any()
694
            .downcast_ref::<Int32Array>()
695
            .unwrap()
696
            .take_iter(dict_array.keys_iter())
697
            .collect();
698
699
        for i in 0..TOTAL as usize {
700
            assert_eq!(expected[i], actual[i],);
701
        }
702
    }
703
704
    #[test]
705
    fn test_fixed_size_list_length() {
706
        // Construct a value array
707
        let value_data = ArrayData::builder(DataType::Int32)
708
            .len(9)
709
            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
710
            .build()
711
            .unwrap();
712
        let list_data_type =
713
            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
714
        let nulls = NullBuffer::from(vec![true, false, true]);
715
        let list_data = ArrayData::builder(list_data_type)
716
            .len(3)
717
            .add_child_data(value_data)
718
            .nulls(Some(nulls))
719
            .build()
720
            .unwrap();
721
        let list_array = FixedSizeListArray::from(list_data);
722
723
        let lengths = length(&list_array).unwrap();
724
        let lengths = lengths.as_primitive::<Int32Type>();
725
726
        assert_eq!(lengths.len(), 3);
727
        assert_eq!(lengths.value(0), 3);
728
        assert!(lengths.is_null(1));
729
        assert_eq!(lengths.value(2), 3);
730
    }
731
732
    #[test]
733
    fn test_fixed_size_binary() {
734
        let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
735
        let result = length(&array).unwrap();
736
        assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
737
738
        let result = bit_length(&array).unwrap();
739
        assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
740
    }
741
}