Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-select/src/dictionary.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Dictionary utilities for Arrow arrays
19
20
use std::sync::Arc;
21
22
use crate::filter::filter;
23
use crate::interleave::interleave;
24
use ahash::RandomState;
25
use arrow_array::builder::BooleanBufferBuilder;
26
use arrow_array::types::{
27
    ArrowDictionaryKeyType, ArrowPrimitiveType, BinaryType, ByteArrayType, LargeBinaryType,
28
    LargeUtf8Type, Utf8Type,
29
};
30
use arrow_array::{
31
    AnyDictionaryArray, Array, ArrayRef, ArrowNativeTypeOp, BooleanArray, DictionaryArray,
32
    GenericByteArray, PrimitiveArray, downcast_dictionary_array,
33
};
34
use arrow_array::{cast::AsArray, downcast_primitive};
35
use arrow_buffer::{ArrowNativeType, BooleanBuffer, ScalarBuffer, ToByteSlice};
36
use arrow_schema::{ArrowError, DataType};
37
38
/// Garbage collects a [DictionaryArray] by removing unreferenced values.
39
///
40
/// Returns a new [DictionaryArray] such that there are no values
41
/// that are not referenced by at least one key. There may still be duplicate
42
/// values.
43
///
44
/// See also [`garbage_collect_any_dictionary`] if you need to handle multiple dictionary types
45
5
pub fn garbage_collect_dictionary<K: ArrowDictionaryKeyType>(
46
5
    dictionary: &DictionaryArray<K>,
47
5
) -> Result<DictionaryArray<K>, ArrowError> {
48
5
    let keys = dictionary.keys();
49
5
    let values = dictionary.values();
50
51
5
    let mask = dictionary.occupancy();
52
53
    // If no work to do, return the original dictionary
54
5
    if mask.count_set_bits() == values.len() {
55
1
        return Ok(dictionary.clone());
56
4
    }
57
58
    // Create a mapping from the old keys to the new keys, use a Vec for easy indexing
59
4
    let mut key_remap = vec![K::Native::ZERO; values.len()];
60
8
    for (new_idx, old_idx) in 
mask4
.
set_indices4
().
enumerate4
() {
61
8
        key_remap[old_idx] = K::Native::from_usize(new_idx)
62
8
            .expect("new index should fit in K::Native, as old index was in range");
63
8
    }
64
65
    // ... and then build the new keys array
66
20
    let 
new_keys4
=
keys4
.
unary4
(|key| {
67
20
        key_remap
68
20
            .get(key.as_usize())
69
20
            .copied()
70
            // nulls may be present in the keys, and they will have arbitrary value; we don't care
71
            // and can safely return zero
72
20
            .unwrap_or(K::Native::ZERO)
73
20
    });
74
75
    // Create a new values array by filtering using the mask
76
4
    let values = filter(dictionary.values(), &BooleanArray::new(mask, None))
?0
;
77
78
4
    DictionaryArray::try_new(new_keys, values)
79
5
}
80
81
/// Equivalent to [`garbage_collect_dictionary`] but without requiring casting to a specific key type.
82
1
pub fn garbage_collect_any_dictionary(
83
1
    dictionary: &dyn AnyDictionaryArray,
84
1
) -> Result<ArrayRef, ArrowError> {
85
    // FIXME: this is a workaround for MSRV Rust versions below 1.86 where trait upcasting is not stable.
86
    // From 1.86 onward, `&dyn AnyDictionaryArray` can be directly passed to `downcast_dictionary_array!`.
87
1
    let dictionary = &*dictionary.slice(0, dictionary.len());
88
1
    downcast_dictionary_array!(
89
1
        dictionary => 
garbage_collect_dictionary0
(
dictionary0
).
map0
(|dict| Arc::new(dict) as ArrayRef),
90
0
        _ => unreachable!("have a dictionary array")
91
    )
92
1
}
93
94
/// A best effort interner that maintains a fixed number of buckets
95
/// and interns keys based on their hash value
96
///
97
/// Hash collisions will result in replacement
98
struct Interner<'a, V> {
99
    state: RandomState,
100
    buckets: Vec<Option<InternerBucket<'a, V>>>,
101
    shift: u32,
102
}
103
104
/// A single bucket in [`Interner`].
105
type InternerBucket<'a, V> = (Option<&'a [u8]>, V);
106
107
impl<'a, V> Interner<'a, V> {
108
    /// Capacity controls the number of unique buckets allocated within the Interner
109
    ///
110
    /// A larger capacity reduces the probability of hash collisions, and should be set
111
    /// based on an approximation of the upper bound of unique values
112
14
    fn new(capacity: usize) -> Self {
113
        // Add additional buckets to help reduce collisions
114
14
        let shift = (capacity as u64 + 128).leading_zeros();
115
14
        let num_buckets = (u64::MAX >> shift) as usize;
116
14
        let buckets = (0..num_buckets.saturating_add(1)).map(|_| None).collect();
117
14
        Self {
118
14
            // A fixed seed to ensure deterministic behaviour
119
14
            state: RandomState::with_seeds(0, 0, 0, 0),
120
14
            buckets,
121
14
            shift,
122
14
        }
123
14
    }
124
125
80.0k
    fn intern<F: FnOnce() -> Result<V, E>, E>(
126
80.0k
        &mut self,
127
80.0k
        new: Option<&'a [u8]>,
128
80.0k
        f: F,
129
80.0k
    ) -> Result<&V, E> {
130
80.0k
        let hash = self.state.hash_one(new);
131
80.0k
        let bucket_idx = hash >> self.shift;
132
80.0k
        Ok(match &mut self.buckets[bucket_idx as usize] {
133
80.0k
            Some((current, v)) => {
134
80.0k
                if *current != new {
135
3
                    *v = f()
?0
;
136
3
                    *current = new;
137
80.0k
                }
138
80.0k
                v
139
            }
140
72
            slot => &slot.insert((new, f()
?0
)).1,
141
        })
142
80.0k
    }
143
}
144
145
pub(crate) struct MergedDictionaries<K: ArrowDictionaryKeyType> {
146
    /// Provides `key_mappings[`array_idx`][`old_key`] -> new_key`
147
    pub key_mappings: Vec<Vec<K::Native>>,
148
    /// The new values
149
    pub values: ArrayRef,
150
}
151
152
/// Performs a cheap, pointer-based comparison of two byte array
153
///
154
/// See [`ScalarBuffer::ptr_eq`]
155
16
fn bytes_ptr_eq<T: ByteArrayType>(a: &dyn Array, b: &dyn Array) -> bool {
156
16
    match (a.as_bytes_opt::<T>(), b.as_bytes_opt::<T>()) {
157
16
        (Some(a), Some(b)) => {
158
16
            let values_eq = a.values().ptr_eq(b.values()) && 
a.offsets()2
.
ptr_eq2
(
b.offsets()2
);
159
16
            match (a.nulls(), b.nulls()) {
160
1
                (Some(a), Some(b)) => values_eq && 
a.inner()0
.
ptr_eq0
(
b.inner()0
),
161
12
                (None, None) => values_eq,
162
3
                _ => false,
163
            }
164
        }
165
0
        _ => false,
166
    }
167
16
}
168
169
/// A type-erased function that compares two array for pointer equality
170
type PtrEq = fn(&dyn Array, &dyn Array) -> bool;
171
172
/// A weak heuristic of whether to merge dictionary values that aims to only
173
/// perform the expensive merge computation when it is likely to yield at least
174
/// some return over the naive approach used by MutableArrayData
175
///
176
/// `len` is the total length of the merged output
177
17
pub(crate) fn should_merge_dictionary_values<K: ArrowDictionaryKeyType>(
178
17
    dictionaries: &[&DictionaryArray<K>],
179
17
    len: usize,
180
17
) -> bool {
181
    use DataType::*;
182
17
    let first_values = dictionaries[0].values().as_ref();
183
17
    let ptr_eq: PtrEq = match first_values.data_type() {
184
15
        Utf8 => bytes_ptr_eq::<Utf8Type>,
185
0
        LargeUtf8 => bytes_ptr_eq::<LargeUtf8Type>,
186
0
        Binary => bytes_ptr_eq::<BinaryType>,
187
0
        LargeBinary => bytes_ptr_eq::<LargeBinaryType>,
188
2
        dt => {
189
2
            if !dt.is_primitive() {
190
0
                return false;
191
2
            }
192
2
            |a, b| a.to_data().ptr_eq(&b.to_data())
193
        }
194
    };
195
196
17
    let mut single_dictionary = true;
197
17
    let mut total_values = first_values.len();
198
80.0k
    for dict in 
dictionaries17
.
iter17
().
skip17
(1) {
199
80.0k
        let values = dict.values().as_ref();
200
80.0k
        total_values += values.len();
201
80.0k
        if single_dictionary {
202
18
            single_dictionary = ptr_eq(first_values, values)
203
79.9k
        }
204
    }
205
206
17
    let overflow = K::Native::from_usize(total_values).is_none();
207
17
    let values_exceed_length = total_values >= len;
208
209
17
    !single_dictionary && (
overflow15
||
values_exceed_length15
)
210
17
}
211
212
/// Given an array of dictionaries and an optional key mask compute a values array
213
/// containing referenced values, along with mappings from the [`DictionaryArray`]
214
/// keys to the new keys within this values array. Best-effort will be made to ensure
215
/// that the dictionary values are unique
216
///
217
/// This method is meant to be very fast and the output dictionary values
218
/// may not be unique, unlike `GenericByteDictionaryBuilder` which is slower
219
/// but produces unique values
220
14
pub(crate) fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
221
14
    dictionaries: &[&DictionaryArray<K>],
222
14
    masks: Option<&[BooleanBuffer]>,
223
14
) -> Result<MergedDictionaries<K>, ArrowError> {
224
14
    let mut num_values = 0;
225
226
14
    let mut values_arrays = Vec::with_capacity(dictionaries.len());
227
14
    let mut value_slices = Vec::with_capacity(dictionaries.len());
228
229
80.0k
    for (idx, dictionary) in 
dictionaries14
.
iter14
().
enumerate14
() {
230
80.0k
        let mask = masks.and_then(|m| 
m8
.
get8
(
idx8
));
231
        let key_mask_owned;
232
80.0k
        let key_mask = match (dictionary.nulls(), mask) {
233
3
            (Some(n), None) => Some(n.inner()),
234
6
            (None, Some(n)) => Some(n),
235
2
            (Some(n), Some(m)) => {
236
2
                key_mask_owned = n.inner() & m;
237
2
                Some(&key_mask_owned)
238
            }
239
80.0k
            (None, None) => None,
240
        };
241
80.0k
        let keys = dictionary.keys().values();
242
80.0k
        let values = dictionary.values().as_ref();
243
80.0k
        let values_mask = compute_values_mask(keys, key_mask, values.len());
244
245
80.0k
        let masked_values = get_masked_values(values, &values_mask);
246
80.0k
        num_values += masked_values.len();
247
80.0k
        value_slices.push(masked_values);
248
80.0k
        values_arrays.push(values)
249
    }
250
251
    // Map from value to new index
252
14
    let mut interner = Interner::new(num_values);
253
    // Interleave indices for new values array
254
14
    let mut indices = Vec::with_capacity(num_values);
255
256
    // Compute the mapping for each dictionary
257
14
    let key_mappings = dictionaries
258
14
        .iter()
259
14
        .enumerate()
260
14
        .zip(value_slices)
261
80.0k
        .
map14
(|((dictionary_idx, dictionary), values)| {
262
80.0k
            let zero = K::Native::from_usize(0).unwrap();
263
80.0k
            let mut mapping = vec![zero; dictionary.values().len()];
264
265
160k
            for (
value_idx80.0k
,
value80.0k
) in values {
266
80.0k
                mapping[value_idx] =
267
80.0k
                    *interner.intern(value, || match 
K::Native::from_usize75
(indices.len()) {
268
75
                        Some(idx) => {
269
75
                            indices.push((dictionary_idx, value_idx));
270
75
                            Ok(idx)
271
                        }
272
0
                        None => Err(ArrowError::DictionaryKeyOverflowError),
273
75
                    })
?0
;
274
            }
275
80.0k
            Ok(mapping)
276
80.0k
        })
277
14
        .collect::<Result<Vec<_>, ArrowError>>()
?0
;
278
279
    Ok(MergedDictionaries {
280
14
        key_mappings,
281
14
        values: interleave(&values_arrays, &indices)
?0
,
282
    })
283
14
}
284
285
/// Return a mask identifying the values that are referenced by keys in `dictionary`
286
/// at the positions indicated by `selection`
287
80.0k
fn compute_values_mask<K: ArrowNativeType>(
288
80.0k
    keys: &ScalarBuffer<K>,
289
80.0k
    mask: Option<&BooleanBuffer>,
290
80.0k
    max_key: usize,
291
80.0k
) -> BooleanBuffer {
292
80.0k
    let mut builder = BooleanBufferBuilder::new(max_key);
293
80.0k
    builder.advance(max_key);
294
295
80.0k
    match mask {
296
11
        Some(n) => n
297
11
            .set_indices()
298
22
            .
for_each11
(|idx| builder.set_bit(keys[idx].as_usize(), true)),
299
80.0k
        None => keys
300
80.0k
            .iter()
301
80.1k
            .
for_each80.0k
(|k| builder.set_bit(k.as_usize(), true)),
302
    }
303
80.0k
    builder.finish()
304
80.0k
}
305
306
/// Process primitive array values to bytes
307
2
fn masked_primitives_to_bytes<'a, T: ArrowPrimitiveType>(
308
2
    array: &'a PrimitiveArray<T>,
309
2
    mask: &BooleanBuffer,
310
2
) -> Vec<(usize, Option<&'a [u8]>)>
311
2
where
312
2
    T::Native: ToByteSlice,
313
{
314
2
    let mut out = Vec::with_capacity(mask.count_set_bits());
315
2
    let values = array.values();
316
2
    for idx in mask.set_indices() {
317
2
        out.push((
318
2
            idx,
319
2
            array.is_valid(idx).then_some(values[idx].to_byte_slice()),
320
2
        ))
321
    }
322
2
    out
323
2
}
324
325
macro_rules! masked_primitive_to_bytes_helper {
326
    ($t:ty, $array:expr, $mask:expr) => {
327
        masked_primitives_to_bytes::<$t>($array.as_primitive(), $mask)
328
    };
329
}
330
331
/// Return a Vec containing for each set index in `mask`, the index and byte value of that index
332
80.0k
fn get_masked_values<'a>(
333
80.0k
    array: &'a dyn Array,
334
80.0k
    mask: &BooleanBuffer,
335
80.0k
) -> Vec<(usize, Option<&'a [u8]>)> {
336
0
    downcast_primitive! {
337
80.0k
        array.data_type() => (masked_primitive_to_bytes_helper, 
array0
,
mask0
),
338
80.0k
        DataType::Utf8 => masked_bytes(array.as_string::<i32>(), mask),
339
0
        DataType::LargeUtf8 => masked_bytes(array.as_string::<i64>(), mask),
340
0
        DataType::Binary => masked_bytes(array.as_binary::<i32>(), mask),
341
0
        DataType::LargeBinary => masked_bytes(array.as_binary::<i64>(), mask),
342
0
        _ => unimplemented!("Dictionary merging for type {} is not implemented", array.data_type()),
343
    }
344
80.0k
}
345
346
/// Compute [`get_masked_values`] for a [`GenericByteArray`]
347
///
348
/// Note: this does not check the null mask and will return values contained in null slots
349
80.0k
fn masked_bytes<'a, T: ByteArrayType>(
350
80.0k
    array: &'a GenericByteArray<T>,
351
80.0k
    mask: &BooleanBuffer,
352
80.0k
) -> Vec<(usize, Option<&'a [u8]>)> {
353
80.0k
    let mut out = Vec::with_capacity(mask.count_set_bits());
354
80.0k
    for idx in 
mask80.0k
.
set_indices80.0k
() {
355
80.0k
        out.push((
356
80.0k
            idx,
357
80.0k
            array.is_valid(idx).then_some(array.value(idx).as_ref()),
358
80.0k
        ))
359
    }
360
80.0k
    out
361
80.0k
}
362
363
#[cfg(test)]
364
mod tests {
365
    use super::*;
366
367
    use arrow_array::cast::as_string_array;
368
    use arrow_array::types::Int8Type;
369
    use arrow_array::types::Int32Type;
370
    use arrow_array::{DictionaryArray, Int8Array, Int32Array, StringArray};
371
    use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, OffsetBuffer};
372
    use std::sync::Arc;
373
374
    #[test]
375
1
    fn test_garbage_collect_i32_dictionary() {
376
1
        let values = StringArray::from_iter_values(["a", "b", "c", "d"]);
377
1
        let keys = Int32Array::from_iter_values([0, 1, 1, 3, 0, 0, 1]);
378
1
        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
379
380
        // Only "a", "b", "d" are referenced, "c" is not
381
1
        let gc = garbage_collect_dictionary(&dict).unwrap();
382
383
1
        let expected_values = StringArray::from_iter_values(["a", "b", "d"]);
384
1
        let expected_keys = Int32Array::from_iter_values([0, 1, 1, 2, 0, 0, 1]);
385
1
        let expected = DictionaryArray::<Int32Type>::new(expected_keys, Arc::new(expected_values));
386
387
1
        assert_eq!(gc, expected);
388
1
    }
389
390
    #[test]
391
1
    fn test_garbage_collect_any_dictionary() {
392
1
        let values = StringArray::from_iter_values(["a", "b", "c", "d"]);
393
1
        let keys = Int32Array::from_iter_values([0, 1, 1, 3, 0, 0, 1]);
394
1
        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
395
396
1
        let gc = garbage_collect_any_dictionary(&dict).unwrap();
397
398
1
        let expected_values = StringArray::from_iter_values(["a", "b", "d"]);
399
1
        let expected_keys = Int32Array::from_iter_values([0, 1, 1, 2, 0, 0, 1]);
400
1
        let expected = DictionaryArray::<Int32Type>::new(expected_keys, Arc::new(expected_values));
401
402
1
        assert_eq!(gc.as_ref(), &expected);
403
1
    }
404
405
    #[test]
406
1
    fn test_garbage_collect_with_nulls() {
407
1
        let values = StringArray::from_iter_values(["a", "b", "c"]);
408
1
        let keys = Int8Array::from(vec![Some(2), None, Some(0)]);
409
1
        let dict = DictionaryArray::<Int8Type>::new(keys, Arc::new(values));
410
411
1
        let gc = garbage_collect_dictionary(&dict).unwrap();
412
413
1
        let expected_values = StringArray::from_iter_values(["a", "c"]);
414
1
        let expected_keys = Int8Array::from(vec![Some(1), None, Some(0)]);
415
1
        let expected = DictionaryArray::<Int8Type>::new(expected_keys, Arc::new(expected_values));
416
417
1
        assert_eq!(gc, expected);
418
1
    }
419
420
    #[test]
421
1
    fn test_garbage_collect_empty_dictionary() {
422
1
        let values = StringArray::from_iter_values::<&str, _>([]);
423
1
        let keys = Int32Array::from_iter_values([]);
424
1
        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
425
426
1
        let gc = garbage_collect_dictionary(&dict).unwrap();
427
428
1
        assert_eq!(gc, dict);
429
1
    }
430
431
    #[test]
432
1
    fn test_garbage_collect_dictionary_all_unreferenced() {
433
1
        let values = StringArray::from_iter_values(["a", "b", "c"]);
434
1
        let keys = Int32Array::from(vec![None, None, None]);
435
1
        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
436
437
1
        let gc = garbage_collect_dictionary(&dict).unwrap();
438
439
        // All keys are null, so dictionary values can be empty
440
1
        let expected_values = StringArray::from_iter_values::<&str, _>([]);
441
1
        let expected_keys = Int32Array::from(vec![None, None, None]);
442
1
        let expected = DictionaryArray::<Int32Type>::new(expected_keys, Arc::new(expected_values));
443
444
1
        assert_eq!(gc, expected);
445
1
    }
446
447
    #[test]
448
1
    fn test_merge_strings() {
449
1
        let a = DictionaryArray::<Int32Type>::from_iter(["a", "b", "a", "b", "d", "c", "e"]);
450
1
        let b = DictionaryArray::<Int32Type>::from_iter(["c", "f", "c", "d", "a", "d"]);
451
1
        let merged = merge_dictionary_values(&[&a, &b], None).unwrap();
452
453
1
        let values = as_string_array(merged.values.as_ref());
454
1
        let actual: Vec<_> = values.iter().map(Option::unwrap).collect();
455
1
        assert_eq!(&actual, &["a", "b", "d", "c", "e", "f"]);
456
457
1
        assert_eq!(merged.key_mappings.len(), 2);
458
1
        assert_eq!(&merged.key_mappings[0], &[0, 1, 2, 3, 4]);
459
1
        assert_eq!(&merged.key_mappings[1], &[3, 5, 2, 0]);
460
461
1
        let a_slice = a.slice(1, 4);
462
1
        let merged = merge_dictionary_values(&[&a_slice, &b], None).unwrap();
463
464
1
        let values = as_string_array(merged.values.as_ref());
465
1
        let actual: Vec<_> = values.iter().map(Option::unwrap).collect();
466
1
        assert_eq!(&actual, &["a", "b", "d", "c", "f"]);
467
468
1
        assert_eq!(merged.key_mappings.len(), 2);
469
1
        assert_eq!(&merged.key_mappings[0], &[0, 1, 2, 0, 0]);
470
1
        assert_eq!(&merged.key_mappings[1], &[3, 4, 2, 0]);
471
472
        // Mask out only ["b", "b", "d"] from a
473
1
        let a_mask = BooleanBuffer::from_iter([false, true, false, true, true, false, false]);
474
1
        let b_mask = BooleanBuffer::new_set(b.len());
475
1
        let merged = merge_dictionary_values(&[&a, &b], Some(&[a_mask, b_mask])).unwrap();
476
477
1
        let values = as_string_array(merged.values.as_ref());
478
1
        let actual: Vec<_> = values.iter().map(Option::unwrap).collect();
479
1
        assert_eq!(&actual, &["b", "d", "c", "f", "a"]);
480
481
1
        assert_eq!(merged.key_mappings.len(), 2);
482
1
        assert_eq!(&merged.key_mappings[0], &[0, 0, 1, 0, 0]);
483
1
        assert_eq!(&merged.key_mappings[1], &[2, 3, 1, 4]);
484
1
    }
485
486
    #[test]
487
1
    fn test_merge_nulls() {
488
1
        let buffer = Buffer::from(b"helloworldbingohelloworld");
489
1
        let offsets = OffsetBuffer::from_lengths([5, 5, 5, 5, 5]);
490
1
        let nulls = NullBuffer::from(vec![true, false, true, true, true]);
491
1
        let values = StringArray::new(offsets, buffer, Some(nulls));
492
493
1
        let key_values = vec![1, 2, 3, 1, 8, 2, 3];
494
1
        let key_nulls = NullBuffer::from(vec![true, true, false, true, false, true, true]);
495
1
        let keys = Int32Array::new(key_values.into(), Some(key_nulls));
496
1
        let a = DictionaryArray::new(keys, Arc::new(values));
497
        // [NULL, "bingo", NULL, NULL, NULL, "bingo", "hello"]
498
499
1
        let b = DictionaryArray::new(Int32Array::new_null(10), Arc::new(StringArray::new_null(0)));
500
501
1
        let merged = merge_dictionary_values(&[&a, &b], None).unwrap();
502
1
        let expected = StringArray::from(vec![None, Some("bingo"), Some("hello")]);
503
1
        assert_eq!(merged.values.as_ref(), &expected);
504
1
        assert_eq!(merged.key_mappings.len(), 2);
505
1
        assert_eq!(&merged.key_mappings[0], &[0, 0, 1, 2, 0]);
506
1
        assert_eq!(&merged.key_mappings[1], &[] as &[i32; 0]);
507
1
    }
508
509
    #[test]
510
1
    fn test_merge_keys_smaller() {
511
1
        let values = StringArray::from_iter_values(["a", "b"]);
512
1
        let keys = Int32Array::from_iter_values([1]);
513
1
        let a = DictionaryArray::new(keys, Arc::new(values));
514
515
1
        let merged = merge_dictionary_values(&[&a], None).unwrap();
516
1
        let expected = StringArray::from(vec!["b"]);
517
1
        assert_eq!(merged.values.as_ref(), &expected);
518
1
    }
519
}