Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-ord/src/sort.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Defines sort kernel for `ArrayRef`
19
20
use crate::ord::{make_comparator, DynComparator};
21
use arrow_array::builder::BufferBuilder;
22
use arrow_array::cast::*;
23
use arrow_array::types::*;
24
use arrow_array::*;
25
use arrow_buffer::ArrowNativeType;
26
use arrow_buffer::BooleanBufferBuilder;
27
use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28
use arrow_schema::{ArrowError, DataType};
29
use arrow_select::take::take;
30
use std::cmp::Ordering;
31
use std::sync::Arc;
32
33
use crate::rank::{can_rank, rank};
34
pub use arrow_schema::SortOptions;
35
36
/// Sort the `ArrayRef` using `SortOptions`.
37
///
38
/// Performs a sort on values and indices. Nulls are ordered according
39
/// to the `nulls_first` flag in `options`.  Floats are sorted using
40
/// IEEE 754 totalOrder
41
///
42
/// Returns an `ArrowError::ComputeError(String)` if the array type is
43
/// either unsupported by `sort_to_indices` or `take`.
44
///
45
/// Note: this is an unstable_sort, meaning it may not preserve the
46
/// order of equal elements.
47
///
48
/// # Example
49
/// ```rust
50
/// # use std::sync::Arc;
51
/// # use arrow_array::Int32Array;
52
/// # use arrow_ord::sort::sort;
53
/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54
/// let sorted_array = sort(&array, None).unwrap();
55
/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56
/// ```
57
0
pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58
0
    downcast_primitive_array!(
59
0
        values => sort_native_type(values, options),
60
0
        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61
        _ => {
62
0
            let indices = sort_to_indices(values, options, None)?;
63
0
            take(values, &indices, None)
64
        }
65
    )
66
0
}
67
68
0
fn sort_native_type<T>(
69
0
    primitive_values: &PrimitiveArray<T>,
70
0
    options: Option<SortOptions>,
71
0
) -> Result<ArrayRef, ArrowError>
72
0
where
73
0
    T: ArrowPrimitiveType,
74
{
75
0
    let sort_options = options.unwrap_or_default();
76
77
0
    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78
0
    let mutable_slice = &mut mutable_buffer;
79
80
0
    let input_values = primitive_values.values().as_ref();
81
82
0
    let nulls_count = primitive_values.null_count();
83
0
    let valid_count = primitive_values.len() - nulls_count;
84
85
0
    let null_bit_buffer = match nulls_count > 0 {
86
        true => {
87
0
            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88
0
            if sort_options.nulls_first {
89
0
                validity_buffer.append_n(nulls_count, false);
90
0
                validity_buffer.append_n(valid_count, true);
91
0
            } else {
92
0
                validity_buffer.append_n(valid_count, true);
93
0
                validity_buffer.append_n(nulls_count, false);
94
0
            }
95
0
            Some(validity_buffer.finish().into())
96
        }
97
0
        false => None,
98
    };
99
100
0
    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101
0
        let values_slice = match sort_options.nulls_first {
102
0
            true => &mut mutable_slice[nulls_count..],
103
0
            false => &mut mutable_slice[..valid_count],
104
        };
105
106
0
        for (write_index, index) in nulls.valid_indices().enumerate() {
107
0
            values_slice[write_index] = primitive_values.value(index);
108
0
        }
109
110
0
        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111
0
        if sort_options.descending {
112
0
            values_slice.reverse();
113
0
        }
114
    } else {
115
0
        mutable_slice.copy_from_slice(input_values);
116
0
        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117
0
        if sort_options.descending {
118
0
            mutable_slice.reverse();
119
0
        }
120
    }
121
122
0
    Ok(Arc::new(
123
0
        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124
0
            .with_data_type(primitive_values.data_type().clone()),
125
0
    ))
126
0
}
127
128
/// Sort the `ArrayRef` partially.
129
///
130
/// If `limit` is specified, the resulting array will contain only
131
/// first `limit` in the sort order. Any data data after the limit
132
/// will be discarded.
133
///
134
/// Note: this is an unstable_sort, meaning it may not preserve the
135
/// order of equal elements.
136
///
137
/// # Example
138
/// ```rust
139
/// # use std::sync::Arc;
140
/// # use arrow_array::Int32Array;
141
/// # use arrow_ord::sort::{sort_limit, SortOptions};
142
/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143
///
144
/// // Find the the top 2 items
145
/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146
/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147
///
148
/// // Find the bottom top 2 items
149
/// let options = Some(SortOptions {
150
///                  descending: true,
151
///                  ..Default::default()
152
///               });
153
/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154
/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155
/// ```
156
0
pub fn sort_limit(
157
0
    values: &dyn Array,
158
0
    options: Option<SortOptions>,
159
0
    limit: Option<usize>,
160
0
) -> Result<ArrayRef, ArrowError> {
161
0
    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162
0
        return sort_run(values, options, limit);
163
0
    }
164
0
    let indices = sort_to_indices(values, options, limit)?;
165
0
    take(values, &indices, None)
166
0
}
167
168
/// we can only do this if the T is primitive
169
#[inline]
170
0
fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171
0
where
172
0
    F: FnMut(&T, &T) -> Ordering,
173
{
174
0
    if array.len() == limit {
175
0
        array.sort_unstable_by(cmp);
176
0
    } else {
177
0
        partial_sort(array, limit, cmp);
178
0
    }
179
0
}
180
181
/// Partition indices of an Arrow array into two categories:
182
/// - `valid`: indices of non-null elements
183
/// - `nulls`: indices of null elements
184
///
185
/// Optimized for performance with fast-path for all-valid arrays
186
/// and bit-parallel scan for null-containing arrays.
187
#[inline(always)]
188
0
pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
189
0
    let len = array.len();
190
0
    let null_count = array.null_count();
191
192
    // Fast path: if there are no nulls, all elements are valid
193
0
    if null_count == 0 {
194
        // Simply return a range of indices [0, len)
195
0
        let valid = (0..len as u32).collect();
196
0
        return (valid, Vec::new());
197
0
    }
198
199
    // null bitmap exists and some values are null
200
0
    partition_validity_scan(array, len, null_count)
201
0
}
202
203
/// Scans the null bitmap and partitions valid/null indices efficiently.
204
/// Uses bit-level operations to extract bit positions.
205
/// This function is only called when nulls exist.
206
#[inline(always)]
207
0
fn partition_validity_scan(
208
0
    array: &dyn Array,
209
0
    len: usize,
210
0
    null_count: usize,
211
0
) -> (Vec<u32>, Vec<u32>) {
212
    // SAFETY: Guaranteed by caller that null_count > 0, so bitmap must exist
213
0
    let bitmap = array.nulls().unwrap();
214
215
    // Preallocate result vectors with exact capacities (avoids reallocations)
216
0
    let mut valid = Vec::with_capacity(len - null_count);
217
0
    let mut nulls = Vec::with_capacity(null_count);
218
219
    unsafe {
220
        // 1) Write valid indices (bits == 1)
221
0
        let valid_slice = valid.spare_capacity_mut();
222
0
        for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223
0
            valid_slice[i].write(idx);
224
0
        }
225
226
        // 2) Write null indices by inverting
227
0
        let inv_buf = !bitmap.inner();
228
0
        let null_slice = nulls.spare_capacity_mut();
229
0
        for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230
0
            null_slice[i].write(idx);
231
0
        }
232
233
        // Finalize lengths
234
0
        valid.set_len(len - null_count);
235
0
        nulls.set_len(null_count);
236
    }
237
238
0
    assert_eq!(valid.len(), len - null_count);
239
0
    assert_eq!(nulls.len(), null_count);
240
0
    (valid, nulls)
241
0
}
242
243
/// Whether `sort_to_indices` can sort an array of given data type.
244
0
fn can_sort_to_indices(data_type: &DataType) -> bool {
245
0
    data_type.is_primitive()
246
0
        || matches!(
247
0
            data_type,
248
            DataType::Boolean
249
                | DataType::Utf8
250
                | DataType::LargeUtf8
251
                | DataType::Utf8View
252
                | DataType::Binary
253
                | DataType::LargeBinary
254
                | DataType::BinaryView
255
                | DataType::FixedSizeBinary(_)
256
        )
257
0
        || match data_type {
258
0
            DataType::List(f) if can_rank(f.data_type()) => true,
259
0
            DataType::LargeList(f) if can_rank(f.data_type()) => true,
260
0
            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
261
0
            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
262
0
            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
263
0
            _ => false,
264
        }
265
0
}
266
267
/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
268
/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
269
0
pub fn sort_to_indices(
270
0
    array: &dyn Array,
271
0
    options: Option<SortOptions>,
272
0
    limit: Option<usize>,
273
0
) -> Result<UInt32Array, ArrowError> {
274
0
    let options = options.unwrap_or_default();
275
276
0
    let (v, n) = partition_validity(array);
277
278
0
    Ok(downcast_primitive_array! {
279
0
        array => sort_primitive(array, v, n, options, limit),
280
0
        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
281
0
        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
282
0
        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
283
0
        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
284
0
        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
285
0
        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
286
0
        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
287
0
        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
288
0
        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
289
0
        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
290
0
        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
291
0
        DataType::Dictionary(_, _) => downcast_dictionary_array!{
292
0
            array => sort_dictionary(array, v, n, options, limit)?,
293
0
            _ => unreachable!()
294
        }
295
0
        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
296
0
            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
297
0
            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
298
0
            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
299
0
            dt => {
300
0
                return Err(ArrowError::ComputeError(format!(
301
0
                    "Invalid run end data type: {dt}"
302
0
                )))
303
            }
304
        },
305
0
        t => {
306
0
            return Err(ArrowError::ComputeError(format!(
307
0
                "Sort not supported for data type {t:?}"
308
0
            )));
309
        }
310
    })
311
0
}
312
313
0
fn sort_boolean(
314
0
    values: &BooleanArray,
315
0
    value_indices: Vec<u32>,
316
0
    null_indices: Vec<u32>,
317
0
    options: SortOptions,
318
0
    limit: Option<usize>,
319
0
) -> UInt32Array {
320
0
    let mut valids = value_indices
321
0
        .into_iter()
322
0
        .map(|index| (index, values.value(index as usize)))
323
0
        .collect::<Vec<(u32, bool)>>();
324
0
    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
325
0
}
326
327
0
fn sort_primitive<T: ArrowPrimitiveType>(
328
0
    values: &PrimitiveArray<T>,
329
0
    value_indices: Vec<u32>,
330
0
    nulls: Vec<u32>,
331
0
    options: SortOptions,
332
0
    limit: Option<usize>,
333
0
) -> UInt32Array {
334
0
    let mut valids = value_indices
335
0
        .into_iter()
336
0
        .map(|index| (index, values.value(index as usize)))
337
0
        .collect::<Vec<(u32, T::Native)>>();
338
0
    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
339
0
}
340
341
0
fn sort_bytes<T: ByteArrayType>(
342
0
    values: &GenericByteArray<T>,
343
0
    value_indices: Vec<u32>,
344
0
    nulls: Vec<u32>,
345
0
    options: SortOptions,
346
0
    limit: Option<usize>,
347
0
) -> UInt32Array {
348
    // Note: Why do we use 4‑byte prefix?
349
    // Compute the 4‑byte prefix in BE order, or left‑pad if shorter.
350
    // Most byte‐sequences differ in their first few bytes, so by
351
    // comparing up to 4 bytes as a single u32 we avoid the overhead
352
    // of a full lexicographical compare for the vast majority of cases.
353
354
    // 1. Build a vector of (index, prefix, length) tuples
355
0
    let mut valids: Vec<(u32, u32, u64)> = value_indices
356
0
        .into_iter()
357
0
        .map(|idx| unsafe {
358
0
            let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
359
0
            let len = slice.len() as u64;
360
            // Compute the 4‑byte prefix in BE order, or left‑pad if shorter
361
0
            let prefix = if slice.len() >= 4 {
362
0
                let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
363
0
                u32::from_be(raw)
364
0
            } else if slice.is_empty() {
365
                // Handle empty slice case to avoid shift overflow
366
0
                0u32
367
            } else {
368
0
                let mut v = 0u32;
369
0
                for &b in slice {
370
0
                    v = (v << 8) | (b as u32);
371
0
                }
372
                // Safe shift: slice.len() is in range [1, 3], so shift is in range [8, 24]
373
0
                v << (8 * (4 - slice.len()))
374
            };
375
0
            (idx, prefix, len)
376
0
        })
377
0
        .collect();
378
379
    // 2. compute the number of non-null entries to partially sort
380
0
    let vlimit = match (limit, options.nulls_first) {
381
0
        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
382
0
        _ => valids.len(),
383
    };
384
385
    // 3. Comparator: compare prefix, then (when both slices shorter than 4) length, otherwise full slice
386
0
    let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
387
0
        let (ia, pa, la) = *a;
388
0
        let (ib, pb, lb) = *b;
389
        // 3.1 prefix (first 4 bytes)
390
0
        let ord = pa.cmp(&pb);
391
0
        if ord != Ordering::Equal {
392
0
            return ord;
393
0
        }
394
        // 3.2 only if both slices had length < 4 (so prefix was padded)
395
0
        if la < 4 || lb < 4 {
396
0
            let ord = la.cmp(&lb);
397
0
            if ord != Ordering::Equal {
398
0
                return ord;
399
0
            }
400
0
        }
401
        // 3.3 full lexicographical compare
402
0
        let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
403
0
        let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
404
0
        a_bytes.cmp(b_bytes)
405
0
    };
406
407
    // 4. Partially sort according to ascending/descending
408
0
    if !options.descending {
409
0
        sort_unstable_by(&mut valids, vlimit, cmp_bytes);
410
0
    } else {
411
0
        sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
412
    }
413
414
    // 5. Assemble nulls and sorted indices into final output
415
0
    let total = valids.len() + nulls.len();
416
0
    let out_limit = limit.unwrap_or(total).min(total);
417
0
    let mut out = Vec::with_capacity(out_limit);
418
419
0
    if options.nulls_first {
420
0
        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
421
0
        let rem = out_limit - out.len();
422
0
        out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
423
    } else {
424
0
        out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
425
0
        let rem = out_limit - out.len();
426
0
        out.extend_from_slice(&nulls[..rem]);
427
    }
428
429
0
    out.into()
430
0
}
431
432
0
fn sort_byte_view<T: ByteViewType>(
433
0
    values: &GenericByteViewArray<T>,
434
0
    value_indices: Vec<u32>,
435
0
    nulls: Vec<u32>,
436
0
    options: SortOptions,
437
0
    limit: Option<usize>,
438
0
) -> UInt32Array {
439
    // 1. Build a list of (index, raw_view, length)
440
    let mut valids: Vec<_>;
441
    // 2. Compute the number of non-null entries to partially sort
442
0
    let vlimit: usize = match (limit, options.nulls_first) {
443
0
        (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
444
0
        _ => value_indices.len(),
445
    };
446
    // 3.a Check if all views are inline (no data buffers)
447
0
    if values.data_buffers().is_empty() {
448
0
        valids = value_indices
449
0
            .into_iter()
450
0
            .map(|idx| {
451
                // SAFETY: we know idx < values.len()
452
0
                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
453
0
                let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
454
0
                (idx, inline_key)
455
0
            })
456
0
            .collect();
457
0
        let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
458
459
        // Partially sort according to ascending/descending
460
0
        if !options.descending {
461
0
            sort_unstable_by(&mut valids, vlimit, cmp_inline);
462
0
        } else {
463
0
            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
464
        }
465
    } else {
466
0
        valids = value_indices
467
0
            .into_iter()
468
0
            .map(|idx| {
469
                // SAFETY: we know idx < values.len()
470
0
                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
471
0
                (idx, raw)
472
0
            })
473
0
            .collect();
474
        // 3.b Mixed comparator: first prefix, then inline vs full comparison
475
0
        let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
476
0
            let (_, raw_a) = *a;
477
0
            let (_, raw_b) = *b;
478
0
            let len_a = raw_a as u32;
479
0
            let len_b = raw_b as u32;
480
            // 3.b.1 Both inline (≤12 bytes): compare full 128-bit key including length
481
0
            if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
482
0
                return GenericByteViewArray::<T>::inline_key_fast(raw_a)
483
0
                    .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
484
0
            }
485
486
            // 3.b.2 Compare 4-byte prefix in big-endian order
487
0
            let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
488
0
            let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
489
0
            if pref_a != pref_b {
490
0
                return pref_a.cmp(&pref_b);
491
0
            }
492
493
            // 3.b.3 Fallback to full byte-slice comparison
494
0
            let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
495
0
            let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
496
0
            full_a.cmp(full_b)
497
0
        };
498
499
        // 3.b.4 Partially sort according to ascending/descending
500
0
        if !options.descending {
501
0
            sort_unstable_by(&mut valids, vlimit, cmp_mixed);
502
0
        } else {
503
0
            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
504
        }
505
    }
506
507
    // 5. Assemble nulls and sorted indices into final output
508
0
    let total = valids.len() + nulls.len();
509
0
    let out_limit = limit.unwrap_or(total).min(total);
510
0
    let mut out = Vec::with_capacity(total);
511
512
0
    if options.nulls_first {
513
        // Place null indices first
514
0
        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
515
0
        let rem = out_limit - out.len();
516
0
        out.extend(valids.iter().map(|&(i, _)| i).take(rem));
517
    } else {
518
        // Place non-null indices first
519
0
        out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
520
0
        let rem = out_limit - out.len();
521
0
        out.extend_from_slice(&nulls[..rem]);
522
    }
523
524
0
    out.into()
525
0
}
526
527
0
fn sort_fixed_size_binary(
528
0
    values: &FixedSizeBinaryArray,
529
0
    value_indices: Vec<u32>,
530
0
    nulls: Vec<u32>,
531
0
    options: SortOptions,
532
0
    limit: Option<usize>,
533
0
) -> UInt32Array {
534
0
    let mut valids = value_indices
535
0
        .iter()
536
0
        .copied()
537
0
        .map(|index| (index, values.value(index as usize)))
538
0
        .collect::<Vec<(u32, &[u8])>>();
539
0
    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
540
0
}
541
542
0
fn sort_dictionary<K: ArrowDictionaryKeyType>(
543
0
    dict: &DictionaryArray<K>,
544
0
    value_indices: Vec<u32>,
545
0
    null_indices: Vec<u32>,
546
0
    options: SortOptions,
547
0
    limit: Option<usize>,
548
0
) -> Result<UInt32Array, ArrowError> {
549
0
    let keys: &PrimitiveArray<K> = dict.keys();
550
0
    let rank = child_rank(dict.values().as_ref(), options)?;
551
552
    // create tuples that are used for sorting
553
0
    let mut valids = value_indices
554
0
        .into_iter()
555
0
        .map(|index| {
556
0
            let key: K::Native = keys.value(index as usize);
557
0
            (index, rank[key.as_usize()])
558
0
        })
559
0
        .collect::<Vec<(u32, u32)>>();
560
561
0
    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
562
0
}
563
564
0
fn sort_list<O: OffsetSizeTrait>(
565
0
    array: &GenericListArray<O>,
566
0
    value_indices: Vec<u32>,
567
0
    null_indices: Vec<u32>,
568
0
    options: SortOptions,
569
0
    limit: Option<usize>,
570
0
) -> Result<UInt32Array, ArrowError> {
571
0
    let rank = child_rank(array.values().as_ref(), options)?;
572
0
    let offsets = array.value_offsets();
573
0
    let mut valids = value_indices
574
0
        .into_iter()
575
0
        .map(|index| {
576
0
            let end = offsets[index as usize + 1].as_usize();
577
0
            let start = offsets[index as usize].as_usize();
578
0
            (index, &rank[start..end])
579
0
        })
580
0
        .collect::<Vec<(u32, &[u32])>>();
581
0
    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
582
0
}
583
584
0
fn sort_fixed_size_list(
585
0
    array: &FixedSizeListArray,
586
0
    value_indices: Vec<u32>,
587
0
    null_indices: Vec<u32>,
588
0
    options: SortOptions,
589
0
    limit: Option<usize>,
590
0
) -> Result<UInt32Array, ArrowError> {
591
0
    let rank = child_rank(array.values().as_ref(), options)?;
592
0
    let size = array.value_length() as usize;
593
0
    let mut valids = value_indices
594
0
        .into_iter()
595
0
        .map(|index| {
596
0
            let start = index as usize * size;
597
0
            (index, &rank[start..start + size])
598
0
        })
599
0
        .collect::<Vec<(u32, &[u32])>>();
600
0
    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
601
0
}
602
603
#[inline(never)]
604
0
fn sort_impl<T: Copy>(
605
0
    options: SortOptions,
606
0
    valids: &mut [(u32, T)],
607
0
    nulls: &[u32],
608
0
    limit: Option<usize>,
609
0
    mut cmp: impl FnMut(T, T) -> Ordering,
610
0
) -> Vec<u32> {
611
0
    let v_limit = match (limit, options.nulls_first) {
612
0
        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
613
0
        _ => valids.len(),
614
    };
615
616
0
    match options.descending {
617
0
        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
618
0
        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
619
    }
620
621
0
    let len = valids.len() + nulls.len();
622
0
    let limit = limit.unwrap_or(len).min(len);
623
0
    let mut out = Vec::with_capacity(len);
624
0
    match options.nulls_first {
625
        true => {
626
0
            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
627
0
            let remaining = limit - out.len();
628
0
            out.extend(valids.iter().map(|x| x.0).take(remaining));
629
        }
630
        false => {
631
0
            out.extend(valids.iter().map(|x| x.0).take(limit));
632
0
            let remaining = limit - out.len();
633
0
            out.extend_from_slice(&nulls[..remaining])
634
        }
635
    }
636
0
    out
637
0
}
638
639
/// Computes the rank for a set of child values
640
0
fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
641
    // If parent sort order is descending we need to invert the value of nulls_first so that
642
    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
643
0
    let value_options = Some(SortOptions {
644
0
        descending: false,
645
0
        nulls_first: options.nulls_first != options.descending,
646
0
    });
647
0
    rank(values, value_options)
648
0
}
649
650
// Sort run array and return sorted run array.
651
// The output RunArray will be encoded at the same level as input run array.
652
// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
653
// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
654
// and not RunArray { run_ends = [4,8], values = [1,2] }
655
0
fn sort_run(
656
0
    values: &dyn Array,
657
0
    options: Option<SortOptions>,
658
0
    limit: Option<usize>,
659
0
) -> Result<ArrayRef, ArrowError> {
660
0
    match values.data_type() {
661
0
        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
662
0
            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
663
0
            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
664
0
            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
665
0
            dt => unreachable!("Not valid run ends data type {dt}"),
666
        },
667
0
        dt => Err(ArrowError::InvalidArgumentError(format!(
668
0
            "Input is not a run encoded array. Input data type {dt}"
669
0
        ))),
670
    }
671
0
}
672
673
0
fn sort_run_downcasted<R: RunEndIndexType>(
674
0
    values: &dyn Array,
675
0
    options: Option<SortOptions>,
676
0
    limit: Option<usize>,
677
0
) -> Result<ArrayRef, ArrowError> {
678
0
    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
679
680
    // Determine the length of output run array.
681
0
    let output_len = if let Some(limit) = limit {
682
0
        limit.min(run_array.len())
683
    } else {
684
0
        run_array.len()
685
    };
686
687
0
    let run_ends = run_array.run_ends();
688
689
0
    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
690
0
    let mut new_run_end: usize = 0;
691
0
    let mut new_physical_len: usize = 0;
692
693
0
    let consume_runs = |run_length, _| {
694
0
        new_run_end += run_length;
695
0
        new_physical_len += 1;
696
0
        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
697
0
    };
698
699
0
    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
700
701
0
    let new_run_ends = unsafe {
702
        // Safety:
703
        // The function builds a valid run_ends array and hence need not be validated.
704
0
        ArrayDataBuilder::new(R::DATA_TYPE)
705
0
            .len(new_physical_len)
706
0
            .add_buffer(new_run_ends_builder.finish())
707
0
            .build_unchecked()
708
    };
709
710
    // slice the sorted value indices based on limit.
711
0
    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
712
0
        .slice(0, new_run_ends.len())
713
0
        .into_data()
714
0
        .into();
715
716
0
    let new_values = take(&run_values, &new_values_indices, None)?;
717
718
    // Build sorted run array
719
0
    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
720
0
        .len(new_run_end)
721
0
        .add_child_data(new_run_ends)
722
0
        .add_child_data(new_values.into_data());
723
0
    let array_data: RunArray<R> = unsafe {
724
        // Safety:
725
        //  This function builds a valid run array and hence can skip validation.
726
0
        builder.build_unchecked().into()
727
    };
728
0
    Ok(Arc::new(array_data))
729
0
}
730
731
// Sort to indices for run encoded array.
732
// This function will be slow for run array as it decodes the physical indices to
733
// logical indices and to get the run array back, the logical indices has to be
734
// encoded back to run array.
735
0
fn sort_run_to_indices<R: RunEndIndexType>(
736
0
    values: &dyn Array,
737
0
    options: SortOptions,
738
0
    limit: Option<usize>,
739
0
) -> UInt32Array {
740
0
    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
741
0
    let output_len = if let Some(limit) = limit {
742
0
        limit.min(run_array.len())
743
    } else {
744
0
        run_array.len()
745
    };
746
0
    let mut result: Vec<u32> = Vec::with_capacity(output_len);
747
748
    //Add all logical indices belonging to a physical index to the output
749
0
    let consume_runs = |run_length, logical_start| {
750
0
        result.extend(logical_start as u32..(logical_start + run_length) as u32);
751
0
    };
752
0
    sort_run_inner(run_array, Some(options), output_len, consume_runs);
753
754
0
    UInt32Array::from(result)
755
0
}
756
757
0
fn sort_run_inner<R: RunEndIndexType, F>(
758
0
    run_array: &RunArray<R>,
759
0
    options: Option<SortOptions>,
760
0
    output_len: usize,
761
0
    mut consume_runs: F,
762
0
) -> (PrimitiveArray<UInt32Type>, ArrayRef)
763
0
where
764
0
    F: FnMut(usize, usize),
765
{
766
    // slice the run_array.values based on offset and length.
767
0
    let start_physical_index = run_array.get_start_physical_index();
768
0
    let end_physical_index = run_array.get_end_physical_index();
769
0
    let physical_len = end_physical_index - start_physical_index + 1;
770
0
    let run_values = run_array.values().slice(start_physical_index, physical_len);
771
772
    // All the values have to be sorted irrespective of input limit.
773
0
    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
774
775
0
    let mut remaining_len = output_len;
776
777
0
    let run_ends = run_array.run_ends().values();
778
779
0
    assert_eq!(
780
        0,
781
0
        values_indices.null_count(),
782
0
        "The output of sort_to_indices should not have null values. Its values is {}",
783
0
        values_indices.null_count()
784
    );
785
786
    // Calculate `run length` of sorted value indices.
787
    // Find the `logical index` at which the run starts.
788
    // Call the consumer using the run length and starting logical index.
789
0
    for physical_index in values_indices.values() {
790
        // As the values were sliced with offset = start_physical_index, it has to be added back
791
        // before accessing `RunArray::run_ends`
792
0
        let physical_index = *physical_index as usize + start_physical_index;
793
794
        // calculate the run length and logical index of sorted values
795
0
        let (run_length, logical_index_start) = unsafe {
796
            // Safety:
797
            // The index will be within bounds as its in bounds of start_physical_index
798
            // and len, both of which are within bounds of run_array
799
0
            if physical_index == start_physical_index {
800
0
                (
801
0
                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
802
0
                    0,
803
0
                )
804
0
            } else if physical_index == end_physical_index {
805
0
                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
806
0
                (
807
0
                    run_array.offset() + run_array.len() - prev_run_end,
808
0
                    prev_run_end - run_array.offset(),
809
0
                )
810
            } else {
811
0
                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
812
0
                (
813
0
                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
814
0
                    prev_run_end - run_array.offset(),
815
0
                )
816
            }
817
        };
818
0
        let new_run_length = run_length.min(remaining_len);
819
0
        consume_runs(new_run_length, logical_index_start);
820
0
        remaining_len -= new_run_length;
821
822
0
        if remaining_len == 0 {
823
0
            break;
824
0
        }
825
    }
826
827
0
    if remaining_len > 0 {
828
0
        panic!("Remaining length should be zero its values is {remaining_len}")
829
0
    }
830
0
    (values_indices, run_values)
831
0
}
832
833
/// One column to be used in lexicographical sort
834
#[derive(Clone, Debug)]
835
pub struct SortColumn {
836
    /// The column to sort
837
    pub values: ArrayRef,
838
    /// Sort options for this column
839
    pub options: Option<SortOptions>,
840
}
841
842
/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
843
///
844
/// Performs a stable lexicographical sort on values and indices.
845
///
846
/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
847
/// `lexsort_to_indices` or `take`.
848
///
849
/// Example:
850
///
851
/// ```
852
/// # use std::convert::From;
853
/// # use std::sync::Arc;
854
/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
855
/// # use arrow_array::types::Int64Type;
856
/// # use arrow_array::cast::AsArray;
857
/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
858
///
859
/// let sorted_columns = lexsort(&vec![
860
///     SortColumn {
861
///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
862
///             None,
863
///             Some(-2),
864
///             Some(89),
865
///             Some(-64),
866
///             Some(101),
867
///         ])) as ArrayRef,
868
///         options: None,
869
///     },
870
///     SortColumn {
871
///         values: Arc::new(StringArray::from(vec![
872
///             Some("hello"),
873
///             Some("world"),
874
///             Some(","),
875
///             Some("foobar"),
876
///             Some("!"),
877
///         ])) as ArrayRef,
878
///         options: Some(SortOptions {
879
///             descending: true,
880
///             nulls_first: false,
881
///         }),
882
///     },
883
/// ], None).unwrap();
884
///
885
/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
886
/// assert!(sorted_columns[0].is_null(0));
887
/// ```
888
///
889
/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
890
/// may be significantly faster
891
///
892
0
pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
893
0
    let indices = lexsort_to_indices(columns, limit)?;
894
0
    columns
895
0
        .iter()
896
0
        .map(|c| take(c.values.as_ref(), &indices, None))
897
0
        .collect()
898
0
}
899
900
/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
901
/// (`UInt32Array`) of indices.
902
///
903
/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
904
/// may be significantly faster
905
0
pub fn lexsort_to_indices(
906
0
    columns: &[SortColumn],
907
0
    limit: Option<usize>,
908
0
) -> Result<UInt32Array, ArrowError> {
909
0
    if columns.is_empty() {
910
0
        return Err(ArrowError::InvalidArgumentError(
911
0
            "Sort requires at least one column".to_string(),
912
0
        ));
913
0
    }
914
0
    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
915
        // fallback to non-lexical sort
916
0
        let column = &columns[0];
917
0
        return sort_to_indices(&column.values, column.options, limit);
918
0
    }
919
920
0
    let row_count = columns[0].values.len();
921
0
    if columns.iter().any(|item| item.values.len() != row_count) {
922
0
        return Err(ArrowError::ComputeError(
923
0
            "lexical sort columns have different row counts".to_string(),
924
0
        ));
925
0
    };
926
927
0
    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
928
0
    let mut len = value_indices.len();
929
930
0
    if let Some(limit) = limit {
931
0
        len = limit.min(len);
932
0
    }
933
934
    // Instantiate specialized versions of comparisons for small numbers
935
    // of columns as it helps the compiler generate better code.
936
0
    match columns.len() {
937
        2 => {
938
0
            sort_fixed_column::<2>(columns, &mut value_indices, len)?;
939
        }
940
        3 => {
941
0
            sort_fixed_column::<3>(columns, &mut value_indices, len)?;
942
        }
943
        4 => {
944
0
            sort_fixed_column::<4>(columns, &mut value_indices, len)?;
945
        }
946
        5 => {
947
0
            sort_fixed_column::<5>(columns, &mut value_indices, len)?;
948
        }
949
        _ => {
950
0
            let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
951
            // uint32 can be sorted unstably
952
0
            sort_unstable_by(&mut value_indices, len, |a, b| {
953
0
                lexicographical_comparator.compare(*a, *b)
954
0
            });
955
        }
956
    }
957
0
    Ok(UInt32Array::from(
958
0
        value_indices[..len]
959
0
            .iter()
960
0
            .map(|i| *i as u32)
961
0
            .collect::<Vec<_>>(),
962
    ))
963
0
}
964
965
// Sort a fixed number of columns using FixedLexicographicalComparator
966
0
fn sort_fixed_column<const N: usize>(
967
0
    columns: &[SortColumn],
968
0
    value_indices: &mut [usize],
969
0
    len: usize,
970
0
) -> Result<(), ArrowError> {
971
0
    let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
972
0
    sort_unstable_by(value_indices, len, |a, b| {
973
0
        lexicographical_comparator.compare(*a, *b)
974
0
    });
975
0
    Ok(())
976
0
}
977
978
/// It's unstable_sort, may not preserve the order of equal elements
979
0
pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
980
0
where
981
0
    F: FnMut(&T, &T) -> Ordering,
982
{
983
0
    if let Some(n) = limit.checked_sub(1) {
984
0
        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
985
0
        before.sort_unstable_by(is_less);
986
0
    }
987
0
}
988
989
/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
990
/// at given two indices. The lifetime is the same at the data wrapped.
991
pub struct LexicographicalComparator {
992
    compare_items: Vec<DynComparator>,
993
}
994
995
impl LexicographicalComparator {
996
    /// lexicographically compare values at the wrapped columns with given indices.
997
0
    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
998
0
        for comparator in &self.compare_items {
999
0
            match comparator(a_idx, b_idx) {
1000
0
                Ordering::Equal => continue,
1001
0
                r => return r,
1002
            }
1003
        }
1004
0
        Ordering::Equal
1005
0
    }
1006
1007
    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1008
    /// results with two indices.
1009
0
    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
1010
0
        let compare_items = columns
1011
0
            .iter()
1012
0
            .map(|c| {
1013
0
                make_comparator(
1014
0
                    c.values.as_ref(),
1015
0
                    c.values.as_ref(),
1016
0
                    c.options.unwrap_or_default(),
1017
                )
1018
0
            })
1019
0
            .collect::<Result<Vec<_>, ArrowError>>()?;
1020
0
        Ok(LexicographicalComparator { compare_items })
1021
0
    }
1022
}
1023
1024
/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
1025
/// at given two indices. This version of the comparator is for compile-time constant number of columns.
1026
/// The lifetime is the same at the data wrapped.
1027
pub struct FixedLexicographicalComparator<const N: usize> {
1028
    compare_items: [DynComparator; N],
1029
}
1030
1031
impl<const N: usize> FixedLexicographicalComparator<N> {
1032
    /// lexicographically compare values at the wrapped columns with given indices.
1033
0
    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1034
0
        for comparator in &self.compare_items {
1035
0
            match comparator(a_idx, b_idx) {
1036
0
                Ordering::Equal => continue,
1037
0
                r => return r,
1038
            }
1039
        }
1040
0
        Ordering::Equal
1041
0
    }
1042
1043
    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1044
    /// results with two indices.
1045
    /// The number of columns should be equal to the compile-time constant N.
1046
0
    pub fn try_new(
1047
0
        columns: &[SortColumn],
1048
0
    ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
1049
0
        let compare_items = columns
1050
0
            .iter()
1051
0
            .map(|c| {
1052
0
                make_comparator(
1053
0
                    c.values.as_ref(),
1054
0
                    c.values.as_ref(),
1055
0
                    c.options.unwrap_or_default(),
1056
                )
1057
0
            })
1058
0
            .collect::<Result<Vec<_>, ArrowError>>()?
1059
0
            .try_into();
1060
0
        let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
1061
0
            compare_items.map_err(|_| {
1062
0
                ArrowError::ComputeError("Could not create fixed size array".to_string())
1063
0
            })?;
1064
0
        Ok(FixedLexicographicalComparator { compare_items })
1065
0
    }
1066
}
1067
1068
#[cfg(test)]
1069
mod tests {
1070
    use super::*;
1071
    use arrow_array::builder::{
1072
        BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
1073
        PrimitiveRunBuilder,
1074
    };
1075
    use arrow_buffer::{i256, NullBuffer};
1076
    use arrow_schema::Field;
1077
    use half::f16;
1078
    use rand::rngs::StdRng;
1079
    use rand::seq::SliceRandom;
1080
    use rand::{Rng, RngCore, SeedableRng};
1081
1082
    fn create_decimal_array<T: DecimalType>(
1083
        data: Vec<Option<usize>>,
1084
        precision: u8,
1085
        scale: i8,
1086
    ) -> PrimitiveArray<T> {
1087
        data.into_iter()
1088
            .map(|x| x.and_then(T::Native::from_usize))
1089
            .collect::<PrimitiveArray<T>>()
1090
            .with_precision_and_scale(precision, scale)
1091
            .unwrap()
1092
    }
1093
1094
    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
1095
        data.into_iter()
1096
            .collect::<Decimal256Array>()
1097
            .with_precision_and_scale(53, 6)
1098
            .unwrap()
1099
    }
1100
1101
    fn test_sort_to_indices_decimal_array<T: DecimalType>(
1102
        data: Vec<Option<usize>>,
1103
        options: Option<SortOptions>,
1104
        limit: Option<usize>,
1105
        expected_data: Vec<u32>,
1106
        precision: u8,
1107
        scale: i8,
1108
    ) {
1109
        let output = create_decimal_array::<T>(data, precision, scale);
1110
        let expected = UInt32Array::from(expected_data);
1111
        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1112
        assert_eq!(output, expected)
1113
    }
1114
1115
    fn test_sort_to_indices_decimal256_array(
1116
        data: Vec<Option<i256>>,
1117
        options: Option<SortOptions>,
1118
        limit: Option<usize>,
1119
        expected_data: Vec<u32>,
1120
    ) {
1121
        let output = create_decimal256_array(data);
1122
        let expected = UInt32Array::from(expected_data);
1123
        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1124
        assert_eq!(output, expected)
1125
    }
1126
1127
    fn test_sort_decimal_array<T: DecimalType>(
1128
        data: Vec<Option<usize>>,
1129
        options: Option<SortOptions>,
1130
        limit: Option<usize>,
1131
        expected_data: Vec<Option<usize>>,
1132
        p: u8,
1133
        s: i8,
1134
    ) {
1135
        let output = create_decimal_array::<T>(data, p, s);
1136
        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1137
        let output = match limit {
1138
            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1139
            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1140
        };
1141
        assert_eq!(&output, &expected)
1142
    }
1143
1144
    fn test_sort_decimal256_array(
1145
        data: Vec<Option<i256>>,
1146
        options: Option<SortOptions>,
1147
        limit: Option<usize>,
1148
        expected_data: Vec<Option<i256>>,
1149
    ) {
1150
        let output = create_decimal256_array(data);
1151
        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1152
        let output = match limit {
1153
            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1154
            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1155
        };
1156
        assert_eq!(&output, &expected)
1157
    }
1158
1159
    fn test_sort_to_indices_boolean_arrays(
1160
        data: Vec<Option<bool>>,
1161
        options: Option<SortOptions>,
1162
        limit: Option<usize>,
1163
        expected_data: Vec<u32>,
1164
    ) {
1165
        let output = BooleanArray::from(data);
1166
        let expected = UInt32Array::from(expected_data);
1167
        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1168
        assert_eq!(output, expected)
1169
    }
1170
1171
    fn test_sort_to_indices_primitive_arrays<T>(
1172
        data: Vec<Option<T::Native>>,
1173
        options: Option<SortOptions>,
1174
        limit: Option<usize>,
1175
        expected_data: Vec<u32>,
1176
    ) where
1177
        T: ArrowPrimitiveType,
1178
        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1179
    {
1180
        let output = PrimitiveArray::<T>::from(data);
1181
        let expected = UInt32Array::from(expected_data);
1182
        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1183
        assert_eq!(output, expected)
1184
    }
1185
1186
    fn test_sort_primitive_arrays<T>(
1187
        data: Vec<Option<T::Native>>,
1188
        options: Option<SortOptions>,
1189
        limit: Option<usize>,
1190
        expected_data: Vec<Option<T::Native>>,
1191
    ) where
1192
        T: ArrowPrimitiveType,
1193
        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1194
    {
1195
        let output = PrimitiveArray::<T>::from(data);
1196
        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1197
        let output = match limit {
1198
            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1199
            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1200
        };
1201
        assert_eq!(&output, &expected)
1202
    }
1203
1204
    fn test_sort_to_indices_string_arrays(
1205
        data: Vec<Option<&str>>,
1206
        options: Option<SortOptions>,
1207
        limit: Option<usize>,
1208
        expected_data: Vec<u32>,
1209
    ) {
1210
        let output = StringArray::from(data);
1211
        let expected = UInt32Array::from(expected_data);
1212
        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1213
        assert_eq!(output, expected)
1214
    }
1215
1216
    /// Tests both Utf8 and LargeUtf8
1217
    fn test_sort_string_arrays(
1218
        data: Vec<Option<&str>>,
1219
        options: Option<SortOptions>,
1220
        limit: Option<usize>,
1221
        expected_data: Vec<Option<&str>>,
1222
    ) {
1223
        let output = StringArray::from(data.clone());
1224
        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1225
        let output = match limit {
1226
            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1227
            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1228
        };
1229
        assert_eq!(&output, &expected);
1230
1231
        let output = LargeStringArray::from(data.clone());
1232
        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1233
        let output = match limit {
1234
            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1235
            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1236
        };
1237
        assert_eq!(&output, &expected);
1238
1239
        let output = StringViewArray::from(data);
1240
        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1241
        let output = match limit {
1242
            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1243
            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1244
        };
1245
        assert_eq!(&output, &expected);
1246
    }
1247
1248
    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1249
        data: Vec<Option<&str>>,
1250
        options: Option<SortOptions>,
1251
        limit: Option<usize>,
1252
        expected_data: Vec<Option<&str>>,
1253
    ) {
1254
        let array = data.into_iter().collect::<DictionaryArray<T>>();
1255
        let array_values = array.values().clone();
1256
        let dict = array_values
1257
            .as_any()
1258
            .downcast_ref::<StringArray>()
1259
            .expect("Unable to get dictionary values");
1260
1261
        let sorted = match limit {
1262
            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1263
            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1264
        };
1265
        let sorted = sorted
1266
            .as_any()
1267
            .downcast_ref::<DictionaryArray<T>>()
1268
            .unwrap();
1269
        let sorted_values = sorted.values();
1270
        let sorted_dict = sorted_values
1271
            .as_any()
1272
            .downcast_ref::<StringArray>()
1273
            .expect("Unable to get dictionary values");
1274
        let sorted_keys = sorted.keys();
1275
1276
        assert_eq!(sorted_dict, dict);
1277
1278
        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1279
            if sorted.is_valid(i) {
1280
                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1281
            } else {
1282
                None
1283
            }
1284
        }));
1285
        let expected = StringArray::from(expected_data);
1286
1287
        assert_eq!(sorted_strings, expected)
1288
    }
1289
1290
    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1291
        keys: PrimitiveArray<K>,
1292
        values: PrimitiveArray<T>,
1293
        options: Option<SortOptions>,
1294
        limit: Option<usize>,
1295
        expected_data: Vec<Option<T::Native>>,
1296
    ) where
1297
        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1298
    {
1299
        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1300
        let array_values = array.values().clone();
1301
        let dict = array_values.as_primitive::<T>();
1302
1303
        let sorted = match limit {
1304
            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1305
            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1306
        };
1307
        let sorted = sorted
1308
            .as_any()
1309
            .downcast_ref::<DictionaryArray<K>>()
1310
            .unwrap();
1311
        let sorted_values = sorted.values();
1312
        let sorted_dict = sorted_values
1313
            .as_any()
1314
            .downcast_ref::<PrimitiveArray<T>>()
1315
            .expect("Unable to get dictionary values");
1316
        let sorted_keys = sorted.keys();
1317
1318
        assert_eq!(sorted_dict, dict);
1319
1320
        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1321
            (0..sorted.len())
1322
                .map(|i| {
1323
                    let key = sorted_keys.value(i).as_usize();
1324
                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1325
                        Some(sorted_dict.value(key))
1326
                    } else {
1327
                        None
1328
                    }
1329
                })
1330
                .collect::<Vec<Option<T::Native>>>(),
1331
        );
1332
        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1333
1334
        assert_eq!(sorted_values, expected)
1335
    }
1336
1337
    fn test_sort_list_arrays<T>(
1338
        data: Vec<Option<Vec<Option<T::Native>>>>,
1339
        options: Option<SortOptions>,
1340
        limit: Option<usize>,
1341
        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1342
        fixed_length: Option<i32>,
1343
    ) where
1344
        T: ArrowPrimitiveType,
1345
        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1346
    {
1347
        // for FixedSizedList
1348
        if let Some(length) = fixed_length {
1349
            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1350
                data.clone(),
1351
                length,
1352
            ));
1353
            let sorted = match limit {
1354
                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1355
                _ => sort(&(input as ArrayRef), options).unwrap(),
1356
            };
1357
            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1358
                expected_data.clone(),
1359
                length,
1360
            )) as ArrayRef;
1361
1362
            assert_eq!(&sorted, &expected);
1363
        }
1364
1365
        // for List
1366
        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1367
        let sorted = match limit {
1368
            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1369
            _ => sort(&(input as ArrayRef), options).unwrap(),
1370
        };
1371
        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1372
            expected_data.clone(),
1373
        )) as ArrayRef;
1374
1375
        assert_eq!(&sorted, &expected);
1376
1377
        // for LargeList
1378
        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1379
        let sorted = match limit {
1380
            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1381
            _ => sort(&(input as ArrayRef), options).unwrap(),
1382
        };
1383
        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1384
            expected_data,
1385
        )) as ArrayRef;
1386
1387
        assert_eq!(&sorted, &expected);
1388
    }
1389
1390
    fn test_lex_sort_arrays(
1391
        input: Vec<SortColumn>,
1392
        expected_output: Vec<ArrayRef>,
1393
        limit: Option<usize>,
1394
    ) {
1395
        let sorted = lexsort(&input, limit).unwrap();
1396
1397
        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1398
            assert_eq!(result, expected);
1399
        }
1400
    }
1401
1402
    /// slice all arrays in expected_output to offset/length
1403
    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1404
        expected_output
1405
            .into_iter()
1406
            .map(|array| array.slice(offset, length))
1407
            .collect()
1408
    }
1409
1410
    fn test_sort_binary_arrays(
1411
        data: Vec<Option<Vec<u8>>>,
1412
        options: Option<SortOptions>,
1413
        limit: Option<usize>,
1414
        expected_data: Vec<Option<Vec<u8>>>,
1415
        fixed_length: Option<i32>,
1416
    ) {
1417
        // Fixed size binary array
1418
        if let Some(length) = fixed_length {
1419
            let input = Arc::new(
1420
                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1421
                    .unwrap(),
1422
            );
1423
            let sorted = match limit {
1424
                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1425
                None => sort(&(input as ArrayRef), options).unwrap(),
1426
            };
1427
            let expected = Arc::new(
1428
                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1429
                    expected_data.iter().cloned(),
1430
                    length,
1431
                )
1432
                .unwrap(),
1433
            ) as ArrayRef;
1434
1435
            assert_eq!(&sorted, &expected);
1436
        }
1437
1438
        // Generic size binary array
1439
        fn make_generic_binary_array<S: OffsetSizeTrait>(
1440
            data: &[Option<Vec<u8>>],
1441
        ) -> Arc<GenericBinaryArray<S>> {
1442
            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1443
                data.iter()
1444
                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1445
                    .collect(),
1446
            ))
1447
        }
1448
1449
        // BinaryArray
1450
        let input = make_generic_binary_array::<i32>(&data);
1451
        let sorted = match limit {
1452
            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1453
            None => sort(&(input as ArrayRef), options).unwrap(),
1454
        };
1455
        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1456
        assert_eq!(&sorted, &expected);
1457
1458
        // LargeBinaryArray
1459
        let input = make_generic_binary_array::<i64>(&data);
1460
        let sorted = match limit {
1461
            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1462
            None => sort(&(input as ArrayRef), options).unwrap(),
1463
        };
1464
        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1465
        assert_eq!(&sorted, &expected);
1466
    }
1467
1468
    #[test]
1469
    fn test_sort_to_indices_primitives() {
1470
        test_sort_to_indices_primitive_arrays::<Int8Type>(
1471
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1472
            None,
1473
            None,
1474
            vec![0, 5, 3, 1, 4, 2],
1475
        );
1476
        test_sort_to_indices_primitive_arrays::<Int16Type>(
1477
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1478
            None,
1479
            None,
1480
            vec![0, 5, 3, 1, 4, 2],
1481
        );
1482
        test_sort_to_indices_primitive_arrays::<Int32Type>(
1483
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1484
            None,
1485
            None,
1486
            vec![0, 5, 3, 1, 4, 2],
1487
        );
1488
        test_sort_to_indices_primitive_arrays::<Int64Type>(
1489
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1490
            None,
1491
            None,
1492
            vec![0, 5, 3, 1, 4, 2],
1493
        );
1494
        test_sort_to_indices_primitive_arrays::<Float16Type>(
1495
            vec![
1496
                None,
1497
                Some(f16::from_f32(-0.05)),
1498
                Some(f16::from_f32(2.225)),
1499
                Some(f16::from_f32(-1.01)),
1500
                Some(f16::from_f32(-0.05)),
1501
                None,
1502
            ],
1503
            None,
1504
            None,
1505
            vec![0, 5, 3, 1, 4, 2],
1506
        );
1507
        test_sort_to_indices_primitive_arrays::<Float32Type>(
1508
            vec![
1509
                None,
1510
                Some(-0.05),
1511
                Some(2.225),
1512
                Some(-1.01),
1513
                Some(-0.05),
1514
                None,
1515
            ],
1516
            None,
1517
            None,
1518
            vec![0, 5, 3, 1, 4, 2],
1519
        );
1520
        test_sort_to_indices_primitive_arrays::<Float64Type>(
1521
            vec![
1522
                None,
1523
                Some(-0.05),
1524
                Some(2.225),
1525
                Some(-1.01),
1526
                Some(-0.05),
1527
                None,
1528
            ],
1529
            None,
1530
            None,
1531
            vec![0, 5, 3, 1, 4, 2],
1532
        );
1533
1534
        // descending
1535
        test_sort_to_indices_primitive_arrays::<Int8Type>(
1536
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1537
            Some(SortOptions {
1538
                descending: true,
1539
                nulls_first: false,
1540
            }),
1541
            None,
1542
            vec![2, 1, 4, 3, 0, 5],
1543
        );
1544
1545
        test_sort_to_indices_primitive_arrays::<Int16Type>(
1546
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1547
            Some(SortOptions {
1548
                descending: true,
1549
                nulls_first: false,
1550
            }),
1551
            None,
1552
            vec![2, 1, 4, 3, 0, 5],
1553
        );
1554
1555
        test_sort_to_indices_primitive_arrays::<Int32Type>(
1556
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1557
            Some(SortOptions {
1558
                descending: true,
1559
                nulls_first: false,
1560
            }),
1561
            None,
1562
            vec![2, 1, 4, 3, 0, 5],
1563
        );
1564
1565
        test_sort_to_indices_primitive_arrays::<Int64Type>(
1566
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1567
            Some(SortOptions {
1568
                descending: true,
1569
                nulls_first: false,
1570
            }),
1571
            None,
1572
            vec![2, 1, 4, 3, 0, 5],
1573
        );
1574
1575
        test_sort_to_indices_primitive_arrays::<Float16Type>(
1576
            vec![
1577
                None,
1578
                Some(f16::from_f32(0.005)),
1579
                Some(f16::from_f32(20.22)),
1580
                Some(f16::from_f32(-10.3)),
1581
                Some(f16::from_f32(0.005)),
1582
                None,
1583
            ],
1584
            Some(SortOptions {
1585
                descending: true,
1586
                nulls_first: false,
1587
            }),
1588
            None,
1589
            vec![2, 1, 4, 3, 0, 5],
1590
        );
1591
1592
        test_sort_to_indices_primitive_arrays::<Float32Type>(
1593
            vec![
1594
                None,
1595
                Some(0.005),
1596
                Some(20.22),
1597
                Some(-10.3),
1598
                Some(0.005),
1599
                None,
1600
            ],
1601
            Some(SortOptions {
1602
                descending: true,
1603
                nulls_first: false,
1604
            }),
1605
            None,
1606
            vec![2, 1, 4, 3, 0, 5],
1607
        );
1608
1609
        test_sort_to_indices_primitive_arrays::<Float64Type>(
1610
            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1611
            Some(SortOptions {
1612
                descending: true,
1613
                nulls_first: false,
1614
            }),
1615
            None,
1616
            vec![2, 1, 4, 3, 0, 5],
1617
        );
1618
1619
        // descending, nulls first
1620
        test_sort_to_indices_primitive_arrays::<Int8Type>(
1621
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1622
            Some(SortOptions {
1623
                descending: true,
1624
                nulls_first: true,
1625
            }),
1626
            None,
1627
            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1628
        );
1629
1630
        test_sort_to_indices_primitive_arrays::<Int16Type>(
1631
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1632
            Some(SortOptions {
1633
                descending: true,
1634
                nulls_first: true,
1635
            }),
1636
            None,
1637
            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1638
        );
1639
1640
        test_sort_to_indices_primitive_arrays::<Int32Type>(
1641
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1642
            Some(SortOptions {
1643
                descending: true,
1644
                nulls_first: true,
1645
            }),
1646
            None,
1647
            vec![0, 5, 2, 1, 4, 3],
1648
        );
1649
1650
        test_sort_to_indices_primitive_arrays::<Int64Type>(
1651
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1652
            Some(SortOptions {
1653
                descending: true,
1654
                nulls_first: true,
1655
            }),
1656
            None,
1657
            vec![0, 5, 2, 1, 4, 3],
1658
        );
1659
1660
        test_sort_to_indices_primitive_arrays::<Float16Type>(
1661
            vec![
1662
                None,
1663
                Some(f16::from_f32(0.1)),
1664
                Some(f16::from_f32(0.2)),
1665
                Some(f16::from_f32(-1.3)),
1666
                Some(f16::from_f32(0.01)),
1667
                None,
1668
            ],
1669
            Some(SortOptions {
1670
                descending: true,
1671
                nulls_first: true,
1672
            }),
1673
            None,
1674
            vec![0, 5, 2, 1, 4, 3],
1675
        );
1676
1677
        test_sort_to_indices_primitive_arrays::<Float32Type>(
1678
            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1679
            Some(SortOptions {
1680
                descending: true,
1681
                nulls_first: true,
1682
            }),
1683
            None,
1684
            vec![0, 5, 2, 1, 4, 3],
1685
        );
1686
1687
        test_sort_to_indices_primitive_arrays::<Float64Type>(
1688
            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1689
            Some(SortOptions {
1690
                descending: true,
1691
                nulls_first: true,
1692
            }),
1693
            None,
1694
            vec![0, 5, 2, 1, 4, 3],
1695
        );
1696
1697
        // valid values less than limit with extra nulls
1698
        test_sort_to_indices_primitive_arrays::<Float64Type>(
1699
            vec![Some(2.0), None, None, Some(1.0)],
1700
            Some(SortOptions {
1701
                descending: false,
1702
                nulls_first: false,
1703
            }),
1704
            Some(3),
1705
            vec![3, 0, 1],
1706
        );
1707
1708
        test_sort_to_indices_primitive_arrays::<Float64Type>(
1709
            vec![Some(2.0), None, None, Some(1.0)],
1710
            Some(SortOptions {
1711
                descending: false,
1712
                nulls_first: true,
1713
            }),
1714
            Some(3),
1715
            vec![1, 2, 3],
1716
        );
1717
1718
        // more nulls than limit
1719
        test_sort_to_indices_primitive_arrays::<Float64Type>(
1720
            vec![Some(1.0), None, None, None],
1721
            Some(SortOptions {
1722
                descending: false,
1723
                nulls_first: true,
1724
            }),
1725
            Some(2),
1726
            vec![1, 2],
1727
        );
1728
1729
        test_sort_to_indices_primitive_arrays::<Float64Type>(
1730
            vec![Some(1.0), None, None, None],
1731
            Some(SortOptions {
1732
                descending: false,
1733
                nulls_first: false,
1734
            }),
1735
            Some(2),
1736
            vec![0, 1],
1737
        );
1738
    }
1739
1740
    #[test]
1741
    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1742
        test_sort_to_indices_primitive_arrays::<Int32Type>(
1743
            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1744
            Some(SortOptions {
1745
                descending: false,
1746
                nulls_first: false,
1747
            }),
1748
            Some(2),
1749
            vec![4, 6],
1750
        );
1751
    }
1752
1753
    #[test]
1754
    fn test_sort_boolean() {
1755
        // boolean
1756
        test_sort_to_indices_boolean_arrays(
1757
            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1758
            None,
1759
            None,
1760
            vec![0, 5, 1, 4, 2, 3],
1761
        );
1762
1763
        // boolean, descending
1764
        test_sort_to_indices_boolean_arrays(
1765
            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1766
            Some(SortOptions {
1767
                descending: true,
1768
                nulls_first: false,
1769
            }),
1770
            None,
1771
            vec![2, 3, 1, 4, 0, 5],
1772
        );
1773
1774
        // boolean, descending, nulls first
1775
        test_sort_to_indices_boolean_arrays(
1776
            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1777
            Some(SortOptions {
1778
                descending: true,
1779
                nulls_first: true,
1780
            }),
1781
            None,
1782
            vec![0, 5, 2, 3, 1, 4],
1783
        );
1784
1785
        // boolean, descending, nulls first, limit
1786
        test_sort_to_indices_boolean_arrays(
1787
            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1788
            Some(SortOptions {
1789
                descending: true,
1790
                nulls_first: true,
1791
            }),
1792
            Some(3),
1793
            vec![0, 5, 2],
1794
        );
1795
1796
        // valid values less than limit with extra nulls
1797
        test_sort_to_indices_boolean_arrays(
1798
            vec![Some(true), None, None, Some(false)],
1799
            Some(SortOptions {
1800
                descending: false,
1801
                nulls_first: false,
1802
            }),
1803
            Some(3),
1804
            vec![3, 0, 1],
1805
        );
1806
1807
        test_sort_to_indices_boolean_arrays(
1808
            vec![Some(true), None, None, Some(false)],
1809
            Some(SortOptions {
1810
                descending: false,
1811
                nulls_first: true,
1812
            }),
1813
            Some(3),
1814
            vec![1, 2, 3],
1815
        );
1816
1817
        // more nulls than limit
1818
        test_sort_to_indices_boolean_arrays(
1819
            vec![Some(true), None, None, None],
1820
            Some(SortOptions {
1821
                descending: false,
1822
                nulls_first: true,
1823
            }),
1824
            Some(2),
1825
            vec![1, 2],
1826
        );
1827
1828
        test_sort_to_indices_boolean_arrays(
1829
            vec![Some(true), None, None, None],
1830
            Some(SortOptions {
1831
                descending: false,
1832
                nulls_first: false,
1833
            }),
1834
            Some(2),
1835
            vec![0, 1],
1836
        );
1837
    }
1838
1839
    /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray
1840
    ///
1841
    /// The input data must have the same length for all list items so that we can test FixedSizeListArray
1842
    ///
1843
    fn test_every_config_sort_boolean_list_arrays(
1844
        data: Vec<Option<Vec<Option<bool>>>>,
1845
        options: Option<SortOptions>,
1846
        expected_data: Vec<Option<Vec<Option<bool>>>>,
1847
    ) {
1848
        let first_length = data
1849
            .iter()
1850
            .find_map(|x| x.as_ref().map(|x| x.len()))
1851
            .unwrap_or(0);
1852
        let first_non_match_length = data
1853
            .iter()
1854
            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1855
            .position(|x| x != first_length);
1856
1857
        assert_eq!(
1858
            first_non_match_length, None,
1859
            "All list items should have the same length {first_length}, input data is invalid"
1860
        );
1861
1862
        let first_non_match_length = expected_data
1863
            .iter()
1864
            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1865
            .position(|x| x != first_length);
1866
1867
        assert_eq!(
1868
            first_non_match_length, None,
1869
            "All list items should have the same length {first_length}, expected data is invalid"
1870
        );
1871
1872
        let limit = expected_data.len().saturating_div(2);
1873
1874
        for &with_limit in &[false, true] {
1875
            let (limit, expected_data) = if with_limit {
1876
                (
1877
                    Some(limit),
1878
                    expected_data.iter().take(limit).cloned().collect(),
1879
                )
1880
            } else {
1881
                (None, expected_data.clone())
1882
            };
1883
1884
            for &fixed_length in &[None, Some(first_length as i32)] {
1885
                test_sort_boolean_list_arrays(
1886
                    data.clone(),
1887
                    options,
1888
                    limit,
1889
                    expected_data.clone(),
1890
                    fixed_length,
1891
                );
1892
            }
1893
        }
1894
    }
1895
1896
    fn test_sort_boolean_list_arrays(
1897
        data: Vec<Option<Vec<Option<bool>>>>,
1898
        options: Option<SortOptions>,
1899
        limit: Option<usize>,
1900
        expected_data: Vec<Option<Vec<Option<bool>>>>,
1901
        fixed_length: Option<i32>,
1902
    ) {
1903
        fn build_fixed_boolean_list_array(
1904
            data: Vec<Option<Vec<Option<bool>>>>,
1905
            fixed_length: i32,
1906
        ) -> ArrayRef {
1907
            let mut builder = FixedSizeListBuilder::new(
1908
                BooleanBuilder::with_capacity(fixed_length as usize),
1909
                fixed_length,
1910
            );
1911
            for sublist in data {
1912
                match sublist {
1913
                    Some(sublist) => {
1914
                        builder.values().extend(sublist);
1915
                        builder.append(true);
1916
                    }
1917
                    None => {
1918
                        builder
1919
                            .values()
1920
                            .extend(std::iter::repeat_n(None, fixed_length as usize));
1921
                        builder.append(false);
1922
                    }
1923
                }
1924
            }
1925
            Arc::new(builder.finish()) as ArrayRef
1926
        }
1927
1928
        fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1929
            data: Vec<Option<Vec<Option<bool>>>>,
1930
        ) -> ArrayRef {
1931
            let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1932
            builder.extend(data);
1933
            Arc::new(builder.finish()) as ArrayRef
1934
        }
1935
1936
        // for FixedSizedList
1937
        if let Some(length) = fixed_length {
1938
            let input = build_fixed_boolean_list_array(data.clone(), length);
1939
            let sorted = match limit {
1940
                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1941
                _ => sort(&(input as ArrayRef), options).unwrap(),
1942
            };
1943
            let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1944
1945
            assert_eq!(&sorted, &expected);
1946
        }
1947
1948
        // for List
1949
        let input = build_generic_boolean_list_array::<i32>(data.clone());
1950
        let sorted = match limit {
1951
            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1952
            _ => sort(&(input as ArrayRef), options).unwrap(),
1953
        };
1954
        let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1955
1956
        assert_eq!(&sorted, &expected);
1957
1958
        // for LargeList
1959
        let input = build_generic_boolean_list_array::<i64>(data.clone());
1960
        let sorted = match limit {
1961
            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1962
            _ => sort(&(input as ArrayRef), options).unwrap(),
1963
        };
1964
        let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1965
1966
        assert_eq!(&sorted, &expected);
1967
    }
1968
1969
    #[test]
1970
    fn test_sort_list_of_booleans() {
1971
        // These are all the possible combinations of boolean values
1972
        // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value)
1973
        #[rustfmt::skip]
1974
        let mut cases = vec![
1975
            Some(vec![Some(true),  Some(true),  Some(true)]),
1976
            Some(vec![Some(true),  Some(true),  Some(false)]),
1977
            Some(vec![Some(true),  Some(true),  None]),
1978
1979
            Some(vec![Some(true),  Some(false), Some(true)]),
1980
            Some(vec![Some(true),  Some(false), Some(false)]),
1981
            Some(vec![Some(true),  Some(false), None]),
1982
1983
            Some(vec![Some(true),  None,        Some(true)]),
1984
            Some(vec![Some(true),  None,        Some(false)]),
1985
            Some(vec![Some(true),  None,        None]),
1986
1987
            Some(vec![Some(false), Some(true),  Some(true)]),
1988
            Some(vec![Some(false), Some(true),  Some(false)]),
1989
            Some(vec![Some(false), Some(true),  None]),
1990
1991
            Some(vec![Some(false), Some(false), Some(true)]),
1992
            Some(vec![Some(false), Some(false), Some(false)]),
1993
            Some(vec![Some(false), Some(false), None]),
1994
1995
            Some(vec![Some(false), None,        Some(true)]),
1996
            Some(vec![Some(false), None,        Some(false)]),
1997
            Some(vec![Some(false), None,        None]),
1998
1999
            Some(vec![None,        Some(true),  Some(true)]),
2000
            Some(vec![None,        Some(true),  Some(false)]),
2001
            Some(vec![None,        Some(true),  None]),
2002
2003
            Some(vec![None,        Some(false), Some(true)]),
2004
            Some(vec![None,        Some(false), Some(false)]),
2005
            Some(vec![None,        Some(false), None]),
2006
2007
            Some(vec![None,        None,        Some(true)]),
2008
            Some(vec![None,        None,        Some(false)]),
2009
            Some(vec![None,        None,        None]),
2010
            None,
2011
        ];
2012
2013
        cases.shuffle(&mut StdRng::seed_from_u64(42));
2014
2015
        // The order is false, true, null
2016
        #[rustfmt::skip]
2017
        let expected_descending_false_nulls_first_false = vec![
2018
            Some(vec![Some(false), Some(false), Some(false)]),
2019
            Some(vec![Some(false), Some(false), Some(true)]),
2020
            Some(vec![Some(false), Some(false), None]),
2021
2022
            Some(vec![Some(false), Some(true),  Some(false)]),
2023
            Some(vec![Some(false), Some(true),  Some(true)]),
2024
            Some(vec![Some(false), Some(true),  None]),
2025
2026
            Some(vec![Some(false), None,        Some(false)]),
2027
            Some(vec![Some(false), None,        Some(true)]),
2028
            Some(vec![Some(false), None,        None]),
2029
2030
            Some(vec![Some(true),  Some(false), Some(false)]),
2031
            Some(vec![Some(true),  Some(false), Some(true)]),
2032
            Some(vec![Some(true),  Some(false), None]),
2033
2034
            Some(vec![Some(true),  Some(true),  Some(false)]),
2035
            Some(vec![Some(true),  Some(true),  Some(true)]),
2036
            Some(vec![Some(true),  Some(true),  None]),
2037
2038
            Some(vec![Some(true),  None,        Some(false)]),
2039
            Some(vec![Some(true),  None,        Some(true)]),
2040
            Some(vec![Some(true),  None,        None]),
2041
2042
            Some(vec![None,        Some(false), Some(false)]),
2043
            Some(vec![None,        Some(false), Some(true)]),
2044
            Some(vec![None,        Some(false), None]),
2045
2046
            Some(vec![None,        Some(true),  Some(false)]),
2047
            Some(vec![None,        Some(true),  Some(true)]),
2048
            Some(vec![None,        Some(true),  None]),
2049
2050
            Some(vec![None,        None,        Some(false)]),
2051
            Some(vec![None,        None,        Some(true)]),
2052
            Some(vec![None,        None,        None]),
2053
            None,
2054
        ];
2055
        test_every_config_sort_boolean_list_arrays(
2056
            cases.clone(),
2057
            Some(SortOptions {
2058
                descending: false,
2059
                nulls_first: false,
2060
            }),
2061
            expected_descending_false_nulls_first_false,
2062
        );
2063
2064
        // The order is null, false, true
2065
        #[rustfmt::skip]
2066
        let expected_descending_false_nulls_first_true = vec![
2067
            None,
2068
2069
            Some(vec![None,        None,        None]),
2070
            Some(vec![None,        None,        Some(false)]),
2071
            Some(vec![None,        None,        Some(true)]),
2072
2073
            Some(vec![None,        Some(false), None]),
2074
            Some(vec![None,        Some(false), Some(false)]),
2075
            Some(vec![None,        Some(false), Some(true)]),
2076
2077
            Some(vec![None,        Some(true),  None]),
2078
            Some(vec![None,        Some(true),  Some(false)]),
2079
            Some(vec![None,        Some(true),  Some(true)]),
2080
2081
            Some(vec![Some(false), None,        None]),
2082
            Some(vec![Some(false), None,        Some(false)]),
2083
            Some(vec![Some(false), None,        Some(true)]),
2084
2085
            Some(vec![Some(false), Some(false), None]),
2086
            Some(vec![Some(false), Some(false), Some(false)]),
2087
            Some(vec![Some(false), Some(false), Some(true)]),
2088
2089
            Some(vec![Some(false), Some(true),  None]),
2090
            Some(vec![Some(false), Some(true),  Some(false)]),
2091
            Some(vec![Some(false), Some(true),  Some(true)]),
2092
2093
            Some(vec![Some(true),  None,        None]),
2094
            Some(vec![Some(true),  None,        Some(false)]),
2095
            Some(vec![Some(true),  None,        Some(true)]),
2096
2097
            Some(vec![Some(true),  Some(false), None]),
2098
            Some(vec![Some(true),  Some(false), Some(false)]),
2099
            Some(vec![Some(true),  Some(false), Some(true)]),
2100
2101
            Some(vec![Some(true),  Some(true),  None]),
2102
            Some(vec![Some(true),  Some(true),  Some(false)]),
2103
            Some(vec![Some(true),  Some(true),  Some(true)]),
2104
        ];
2105
2106
        test_every_config_sort_boolean_list_arrays(
2107
            cases.clone(),
2108
            Some(SortOptions {
2109
                descending: false,
2110
                nulls_first: true,
2111
            }),
2112
            expected_descending_false_nulls_first_true,
2113
        );
2114
2115
        // The order is true, false, null
2116
        #[rustfmt::skip]
2117
        let expected_descending_true_nulls_first_false = vec![
2118
            Some(vec![Some(true),  Some(true),  Some(true)]),
2119
            Some(vec![Some(true),  Some(true),  Some(false)]),
2120
            Some(vec![Some(true),  Some(true),  None]),
2121
2122
            Some(vec![Some(true),  Some(false), Some(true)]),
2123
            Some(vec![Some(true),  Some(false), Some(false)]),
2124
            Some(vec![Some(true),  Some(false), None]),
2125
2126
            Some(vec![Some(true),  None,        Some(true)]),
2127
            Some(vec![Some(true),  None,        Some(false)]),
2128
            Some(vec![Some(true),  None,        None]),
2129
2130
            Some(vec![Some(false), Some(true),  Some(true)]),
2131
            Some(vec![Some(false), Some(true),  Some(false)]),
2132
            Some(vec![Some(false), Some(true),  None]),
2133
2134
            Some(vec![Some(false), Some(false), Some(true)]),
2135
            Some(vec![Some(false), Some(false), Some(false)]),
2136
            Some(vec![Some(false), Some(false), None]),
2137
2138
            Some(vec![Some(false), None,        Some(true)]),
2139
            Some(vec![Some(false), None,        Some(false)]),
2140
            Some(vec![Some(false), None,        None]),
2141
2142
            Some(vec![None,        Some(true),  Some(true)]),
2143
            Some(vec![None,        Some(true),  Some(false)]),
2144
            Some(vec![None,        Some(true),  None]),
2145
2146
            Some(vec![None,        Some(false), Some(true)]),
2147
            Some(vec![None,        Some(false), Some(false)]),
2148
            Some(vec![None,        Some(false), None]),
2149
2150
            Some(vec![None,        None,        Some(true)]),
2151
            Some(vec![None,        None,        Some(false)]),
2152
            Some(vec![None,        None,        None]),
2153
2154
            None,
2155
        ];
2156
        test_every_config_sort_boolean_list_arrays(
2157
            cases.clone(),
2158
            Some(SortOptions {
2159
                descending: true,
2160
                nulls_first: false,
2161
            }),
2162
            expected_descending_true_nulls_first_false,
2163
        );
2164
2165
        // The order is null, true, false
2166
        #[rustfmt::skip]
2167
        let expected_descending_true_nulls_first_true = vec![
2168
            None,
2169
2170
            Some(vec![None,        None,        None]),
2171
            Some(vec![None,        None,        Some(true)]),
2172
            Some(vec![None,        None,        Some(false)]),
2173
2174
            Some(vec![None,        Some(true),  None]),
2175
            Some(vec![None,        Some(true),  Some(true)]),
2176
            Some(vec![None,        Some(true),  Some(false)]),
2177
2178
            Some(vec![None,        Some(false), None]),
2179
            Some(vec![None,        Some(false), Some(true)]),
2180
            Some(vec![None,        Some(false), Some(false)]),
2181
2182
            Some(vec![Some(true),  None,        None]),
2183
            Some(vec![Some(true),  None,        Some(true)]),
2184
            Some(vec![Some(true),  None,        Some(false)]),
2185
2186
            Some(vec![Some(true),  Some(true),  None]),
2187
            Some(vec![Some(true),  Some(true),  Some(true)]),
2188
            Some(vec![Some(true),  Some(true),  Some(false)]),
2189
2190
            Some(vec![Some(true),  Some(false), None]),
2191
            Some(vec![Some(true),  Some(false), Some(true)]),
2192
            Some(vec![Some(true),  Some(false), Some(false)]),
2193
2194
            Some(vec![Some(false), None,        None]),
2195
            Some(vec![Some(false), None,        Some(true)]),
2196
            Some(vec![Some(false), None,        Some(false)]),
2197
2198
            Some(vec![Some(false), Some(true),  None]),
2199
            Some(vec![Some(false), Some(true),  Some(true)]),
2200
            Some(vec![Some(false), Some(true),  Some(false)]),
2201
2202
            Some(vec![Some(false), Some(false), None]),
2203
            Some(vec![Some(false), Some(false), Some(true)]),
2204
            Some(vec![Some(false), Some(false), Some(false)]),
2205
        ];
2206
        // Testing with limit false and fixed_length None
2207
        test_every_config_sort_boolean_list_arrays(
2208
            cases.clone(),
2209
            Some(SortOptions {
2210
                descending: true,
2211
                nulls_first: true,
2212
            }),
2213
            expected_descending_true_nulls_first_true,
2214
        );
2215
    }
2216
2217
    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2218
        // decimal default
2219
        test_sort_to_indices_decimal_array::<T>(
2220
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2221
            None,
2222
            None,
2223
            vec![0, 6, 4, 2, 3, 5, 1],
2224
            precision,
2225
            scale,
2226
        );
2227
        // decimal descending
2228
        test_sort_to_indices_decimal_array::<T>(
2229
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2230
            Some(SortOptions {
2231
                descending: true,
2232
                nulls_first: false,
2233
            }),
2234
            None,
2235
            vec![1, 5, 3, 2, 4, 0, 6],
2236
            precision,
2237
            scale,
2238
        );
2239
        // decimal null_first and descending
2240
        test_sort_to_indices_decimal_array::<T>(
2241
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2242
            Some(SortOptions {
2243
                descending: true,
2244
                nulls_first: true,
2245
            }),
2246
            None,
2247
            vec![0, 6, 1, 5, 3, 2, 4],
2248
            precision,
2249
            scale,
2250
        );
2251
        // decimal null_first
2252
        test_sort_to_indices_decimal_array::<T>(
2253
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2254
            Some(SortOptions {
2255
                descending: false,
2256
                nulls_first: true,
2257
            }),
2258
            None,
2259
            vec![0, 6, 4, 2, 3, 5, 1],
2260
            precision,
2261
            scale,
2262
        );
2263
        // limit
2264
        test_sort_to_indices_decimal_array::<T>(
2265
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2266
            None,
2267
            Some(3),
2268
            vec![0, 6, 4],
2269
            precision,
2270
            scale,
2271
        );
2272
        // limit descending
2273
        test_sort_to_indices_decimal_array::<T>(
2274
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2275
            Some(SortOptions {
2276
                descending: true,
2277
                nulls_first: false,
2278
            }),
2279
            Some(3),
2280
            vec![1, 5, 3],
2281
            precision,
2282
            scale,
2283
        );
2284
        // limit descending null_first
2285
        test_sort_to_indices_decimal_array::<T>(
2286
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2287
            Some(SortOptions {
2288
                descending: true,
2289
                nulls_first: true,
2290
            }),
2291
            Some(3),
2292
            vec![0, 6, 1],
2293
            precision,
2294
            scale,
2295
        );
2296
        // limit null_first
2297
        test_sort_to_indices_decimal_array::<T>(
2298
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2299
            Some(SortOptions {
2300
                descending: false,
2301
                nulls_first: true,
2302
            }),
2303
            Some(3),
2304
            vec![0, 6, 4],
2305
            precision,
2306
            scale,
2307
        );
2308
    }
2309
2310
    #[test]
2311
    fn test_sort_indices_decimal32() {
2312
        test_sort_indices_decimal::<Decimal32Type>(8, 3);
2313
    }
2314
2315
    #[test]
2316
    fn test_sort_indices_decimal64() {
2317
        test_sort_indices_decimal::<Decimal64Type>(17, 5);
2318
    }
2319
2320
    #[test]
2321
    fn test_sort_indices_decimal128() {
2322
        test_sort_indices_decimal::<Decimal128Type>(23, 6);
2323
    }
2324
2325
    #[test]
2326
    fn test_sort_indices_decimal256() {
2327
        test_sort_indices_decimal::<Decimal256Type>(53, 6);
2328
    }
2329
2330
    #[test]
2331
    fn test_sort_indices_decimal256_max_min() {
2332
        let data = vec![
2333
            None,
2334
            Some(i256::MIN),
2335
            Some(i256::from_i128(1)),
2336
            Some(i256::MAX),
2337
            Some(i256::from_i128(-1)),
2338
        ];
2339
        test_sort_to_indices_decimal256_array(
2340
            data.clone(),
2341
            Some(SortOptions {
2342
                descending: false,
2343
                nulls_first: true,
2344
            }),
2345
            None,
2346
            vec![0, 1, 4, 2, 3],
2347
        );
2348
2349
        test_sort_to_indices_decimal256_array(
2350
            data.clone(),
2351
            Some(SortOptions {
2352
                descending: true,
2353
                nulls_first: true,
2354
            }),
2355
            None,
2356
            vec![0, 3, 2, 4, 1],
2357
        );
2358
2359
        test_sort_to_indices_decimal256_array(
2360
            data.clone(),
2361
            Some(SortOptions {
2362
                descending: false,
2363
                nulls_first: true,
2364
            }),
2365
            Some(4),
2366
            vec![0, 1, 4, 2],
2367
        );
2368
2369
        test_sort_to_indices_decimal256_array(
2370
            data.clone(),
2371
            Some(SortOptions {
2372
                descending: true,
2373
                nulls_first: true,
2374
            }),
2375
            Some(4),
2376
            vec![0, 3, 2, 4],
2377
        );
2378
    }
2379
2380
    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2381
        // decimal default
2382
        test_sort_decimal_array::<T>(
2383
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2384
            None,
2385
            None,
2386
            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2387
            precision,
2388
            scale,
2389
        );
2390
        // decimal descending
2391
        test_sort_decimal_array::<T>(
2392
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2393
            Some(SortOptions {
2394
                descending: true,
2395
                nulls_first: false,
2396
            }),
2397
            None,
2398
            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2399
            precision,
2400
            scale,
2401
        );
2402
        // decimal null_first and descending
2403
        test_sort_decimal_array::<T>(
2404
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2405
            Some(SortOptions {
2406
                descending: true,
2407
                nulls_first: true,
2408
            }),
2409
            None,
2410
            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2411
            precision,
2412
            scale,
2413
        );
2414
        // decimal null_first
2415
        test_sort_decimal_array::<T>(
2416
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2417
            Some(SortOptions {
2418
                descending: false,
2419
                nulls_first: true,
2420
            }),
2421
            None,
2422
            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2423
            precision,
2424
            scale,
2425
        );
2426
        // limit
2427
        test_sort_decimal_array::<T>(
2428
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2429
            None,
2430
            Some(3),
2431
            vec![None, None, Some(1)],
2432
            precision,
2433
            scale,
2434
        );
2435
        // limit descending
2436
        test_sort_decimal_array::<T>(
2437
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2438
            Some(SortOptions {
2439
                descending: true,
2440
                nulls_first: false,
2441
            }),
2442
            Some(3),
2443
            vec![Some(5), Some(4), Some(3)],
2444
            precision,
2445
            scale,
2446
        );
2447
        // limit descending null_first
2448
        test_sort_decimal_array::<T>(
2449
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2450
            Some(SortOptions {
2451
                descending: true,
2452
                nulls_first: true,
2453
            }),
2454
            Some(3),
2455
            vec![None, None, Some(5)],
2456
            precision,
2457
            scale,
2458
        );
2459
        // limit null_first
2460
        test_sort_decimal_array::<T>(
2461
            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2462
            Some(SortOptions {
2463
                descending: false,
2464
                nulls_first: true,
2465
            }),
2466
            Some(3),
2467
            vec![None, None, Some(1)],
2468
            precision,
2469
            scale,
2470
        );
2471
    }
2472
2473
    #[test]
2474
    fn test_sort_decimal32() {
2475
        test_sort_decimal::<Decimal32Type>(8, 3);
2476
    }
2477
2478
    #[test]
2479
    fn test_sort_decimal64() {
2480
        test_sort_decimal::<Decimal64Type>(17, 5);
2481
    }
2482
2483
    #[test]
2484
    fn test_sort_decimal128() {
2485
        test_sort_decimal::<Decimal128Type>(23, 6);
2486
    }
2487
2488
    #[test]
2489
    fn test_sort_decimal256() {
2490
        test_sort_decimal::<Decimal256Type>(53, 6);
2491
    }
2492
2493
    #[test]
2494
    fn test_sort_decimal256_max_min() {
2495
        test_sort_decimal256_array(
2496
            vec![
2497
                None,
2498
                Some(i256::MIN),
2499
                Some(i256::from_i128(1)),
2500
                Some(i256::MAX),
2501
                Some(i256::from_i128(-1)),
2502
                None,
2503
            ],
2504
            Some(SortOptions {
2505
                descending: false,
2506
                nulls_first: true,
2507
            }),
2508
            None,
2509
            vec![
2510
                None,
2511
                None,
2512
                Some(i256::MIN),
2513
                Some(i256::from_i128(-1)),
2514
                Some(i256::from_i128(1)),
2515
                Some(i256::MAX),
2516
            ],
2517
        );
2518
2519
        test_sort_decimal256_array(
2520
            vec![
2521
                None,
2522
                Some(i256::MIN),
2523
                Some(i256::from_i128(1)),
2524
                Some(i256::MAX),
2525
                Some(i256::from_i128(-1)),
2526
                None,
2527
            ],
2528
            Some(SortOptions {
2529
                descending: true,
2530
                nulls_first: true,
2531
            }),
2532
            None,
2533
            vec![
2534
                None,
2535
                None,
2536
                Some(i256::MAX),
2537
                Some(i256::from_i128(1)),
2538
                Some(i256::from_i128(-1)),
2539
                Some(i256::MIN),
2540
            ],
2541
        );
2542
2543
        test_sort_decimal256_array(
2544
            vec![
2545
                None,
2546
                Some(i256::MIN),
2547
                Some(i256::from_i128(1)),
2548
                Some(i256::MAX),
2549
                Some(i256::from_i128(-1)),
2550
                None,
2551
            ],
2552
            Some(SortOptions {
2553
                descending: false,
2554
                nulls_first: true,
2555
            }),
2556
            Some(4),
2557
            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2558
        );
2559
2560
        test_sort_decimal256_array(
2561
            vec![
2562
                None,
2563
                Some(i256::MIN),
2564
                Some(i256::from_i128(1)),
2565
                Some(i256::MAX),
2566
                Some(i256::from_i128(-1)),
2567
                None,
2568
            ],
2569
            Some(SortOptions {
2570
                descending: true,
2571
                nulls_first: true,
2572
            }),
2573
            Some(4),
2574
            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2575
        );
2576
    }
2577
2578
    #[test]
2579
    fn test_sort_primitives() {
2580
        // default case
2581
        test_sort_primitive_arrays::<UInt8Type>(
2582
            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2583
            None,
2584
            None,
2585
            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2586
        );
2587
        test_sort_primitive_arrays::<UInt16Type>(
2588
            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2589
            None,
2590
            None,
2591
            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2592
        );
2593
        test_sort_primitive_arrays::<UInt32Type>(
2594
            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2595
            None,
2596
            None,
2597
            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2598
        );
2599
        test_sort_primitive_arrays::<UInt64Type>(
2600
            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2601
            None,
2602
            None,
2603
            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2604
        );
2605
2606
        // descending
2607
        test_sort_primitive_arrays::<Int8Type>(
2608
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2609
            Some(SortOptions {
2610
                descending: true,
2611
                nulls_first: false,
2612
            }),
2613
            None,
2614
            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2615
        );
2616
        test_sort_primitive_arrays::<Int16Type>(
2617
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2618
            Some(SortOptions {
2619
                descending: true,
2620
                nulls_first: false,
2621
            }),
2622
            None,
2623
            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2624
        );
2625
        test_sort_primitive_arrays::<Int32Type>(
2626
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2627
            Some(SortOptions {
2628
                descending: true,
2629
                nulls_first: false,
2630
            }),
2631
            None,
2632
            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2633
        );
2634
        test_sort_primitive_arrays::<Int16Type>(
2635
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2636
            Some(SortOptions {
2637
                descending: true,
2638
                nulls_first: false,
2639
            }),
2640
            None,
2641
            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2642
        );
2643
2644
        // descending, nulls first
2645
        test_sort_primitive_arrays::<Int8Type>(
2646
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2647
            Some(SortOptions {
2648
                descending: true,
2649
                nulls_first: true,
2650
            }),
2651
            None,
2652
            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2653
        );
2654
        test_sort_primitive_arrays::<Int16Type>(
2655
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2656
            Some(SortOptions {
2657
                descending: true,
2658
                nulls_first: true,
2659
            }),
2660
            None,
2661
            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2662
        );
2663
        test_sort_primitive_arrays::<Int32Type>(
2664
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2665
            Some(SortOptions {
2666
                descending: true,
2667
                nulls_first: true,
2668
            }),
2669
            None,
2670
            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2671
        );
2672
        test_sort_primitive_arrays::<Int64Type>(
2673
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2674
            Some(SortOptions {
2675
                descending: true,
2676
                nulls_first: true,
2677
            }),
2678
            None,
2679
            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2680
        );
2681
2682
        test_sort_primitive_arrays::<Int64Type>(
2683
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2684
            Some(SortOptions {
2685
                descending: true,
2686
                nulls_first: true,
2687
            }),
2688
            Some(3),
2689
            vec![None, None, Some(2)],
2690
        );
2691
2692
        test_sort_primitive_arrays::<Float16Type>(
2693
            vec![
2694
                None,
2695
                Some(f16::from_f32(0.0)),
2696
                Some(f16::from_f32(2.0)),
2697
                Some(f16::from_f32(-1.0)),
2698
                Some(f16::from_f32(0.0)),
2699
                None,
2700
            ],
2701
            Some(SortOptions {
2702
                descending: true,
2703
                nulls_first: true,
2704
            }),
2705
            None,
2706
            vec![
2707
                None,
2708
                None,
2709
                Some(f16::from_f32(2.0)),
2710
                Some(f16::from_f32(0.0)),
2711
                Some(f16::from_f32(0.0)),
2712
                Some(f16::from_f32(-1.0)),
2713
            ],
2714
        );
2715
2716
        test_sort_primitive_arrays::<Float32Type>(
2717
            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2718
            Some(SortOptions {
2719
                descending: true,
2720
                nulls_first: true,
2721
            }),
2722
            None,
2723
            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2724
        );
2725
        test_sort_primitive_arrays::<Float64Type>(
2726
            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2727
            Some(SortOptions {
2728
                descending: true,
2729
                nulls_first: true,
2730
            }),
2731
            None,
2732
            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2733
        );
2734
        test_sort_primitive_arrays::<Float64Type>(
2735
            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2736
            Some(SortOptions {
2737
                descending: true,
2738
                nulls_first: true,
2739
            }),
2740
            None,
2741
            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2742
        );
2743
2744
        // int8 nulls first
2745
        test_sort_primitive_arrays::<Int8Type>(
2746
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2747
            Some(SortOptions {
2748
                descending: false,
2749
                nulls_first: true,
2750
            }),
2751
            None,
2752
            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2753
        );
2754
        test_sort_primitive_arrays::<Int16Type>(
2755
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2756
            Some(SortOptions {
2757
                descending: false,
2758
                nulls_first: true,
2759
            }),
2760
            None,
2761
            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2762
        );
2763
        test_sort_primitive_arrays::<Int32Type>(
2764
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2765
            Some(SortOptions {
2766
                descending: false,
2767
                nulls_first: true,
2768
            }),
2769
            None,
2770
            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2771
        );
2772
        test_sort_primitive_arrays::<Int64Type>(
2773
            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2774
            Some(SortOptions {
2775
                descending: false,
2776
                nulls_first: true,
2777
            }),
2778
            None,
2779
            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2780
        );
2781
        test_sort_primitive_arrays::<Float16Type>(
2782
            vec![
2783
                None,
2784
                Some(f16::from_f32(0.0)),
2785
                Some(f16::from_f32(2.0)),
2786
                Some(f16::from_f32(-1.0)),
2787
                Some(f16::from_f32(0.0)),
2788
                None,
2789
            ],
2790
            Some(SortOptions {
2791
                descending: false,
2792
                nulls_first: true,
2793
            }),
2794
            None,
2795
            vec![
2796
                None,
2797
                None,
2798
                Some(f16::from_f32(-1.0)),
2799
                Some(f16::from_f32(0.0)),
2800
                Some(f16::from_f32(0.0)),
2801
                Some(f16::from_f32(2.0)),
2802
            ],
2803
        );
2804
        test_sort_primitive_arrays::<Float32Type>(
2805
            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2806
            Some(SortOptions {
2807
                descending: false,
2808
                nulls_first: true,
2809
            }),
2810
            None,
2811
            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2812
        );
2813
        test_sort_primitive_arrays::<Float64Type>(
2814
            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2815
            Some(SortOptions {
2816
                descending: false,
2817
                nulls_first: true,
2818
            }),
2819
            None,
2820
            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2821
        );
2822
        test_sort_primitive_arrays::<Float64Type>(
2823
            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2824
            Some(SortOptions {
2825
                descending: false,
2826
                nulls_first: true,
2827
            }),
2828
            None,
2829
            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2830
        );
2831
2832
        // limit
2833
        test_sort_primitive_arrays::<Float64Type>(
2834
            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2835
            Some(SortOptions {
2836
                descending: false,
2837
                nulls_first: true,
2838
            }),
2839
            Some(2),
2840
            vec![Some(1.0), Some(f64::NAN)],
2841
        );
2842
2843
        // limit with actual value
2844
        test_sort_primitive_arrays::<Float64Type>(
2845
            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2846
            Some(SortOptions {
2847
                descending: false,
2848
                nulls_first: true,
2849
            }),
2850
            Some(3),
2851
            vec![Some(1.0), Some(2.0), Some(3.0)],
2852
        );
2853
2854
        // valid values less than limit with extra nulls
2855
        test_sort_primitive_arrays::<Float64Type>(
2856
            vec![Some(2.0), None, None, Some(1.0)],
2857
            Some(SortOptions {
2858
                descending: false,
2859
                nulls_first: false,
2860
            }),
2861
            Some(3),
2862
            vec![Some(1.0), Some(2.0), None],
2863
        );
2864
2865
        test_sort_primitive_arrays::<Float64Type>(
2866
            vec![Some(2.0), None, None, Some(1.0)],
2867
            Some(SortOptions {
2868
                descending: false,
2869
                nulls_first: true,
2870
            }),
2871
            Some(3),
2872
            vec![None, None, Some(1.0)],
2873
        );
2874
2875
        // more nulls than limit
2876
        test_sort_primitive_arrays::<Float64Type>(
2877
            vec![Some(2.0), None, None, None],
2878
            Some(SortOptions {
2879
                descending: false,
2880
                nulls_first: true,
2881
            }),
2882
            Some(2),
2883
            vec![None, None],
2884
        );
2885
2886
        test_sort_primitive_arrays::<Float64Type>(
2887
            vec![Some(2.0), None, None, None],
2888
            Some(SortOptions {
2889
                descending: false,
2890
                nulls_first: false,
2891
            }),
2892
            Some(2),
2893
            vec![Some(2.0), None],
2894
        );
2895
    }
2896
2897
    #[test]
2898
    fn test_sort_to_indices_strings() {
2899
        test_sort_to_indices_string_arrays(
2900
            vec![
2901
                None,
2902
                Some("bad"),
2903
                Some("sad"),
2904
                None,
2905
                Some("glad"),
2906
                Some("-ad"),
2907
            ],
2908
            None,
2909
            None,
2910
            vec![0, 3, 5, 1, 4, 2],
2911
        );
2912
2913
        test_sort_to_indices_string_arrays(
2914
            vec![
2915
                None,
2916
                Some("bad"),
2917
                Some("sad"),
2918
                None,
2919
                Some("glad"),
2920
                Some("-ad"),
2921
            ],
2922
            Some(SortOptions {
2923
                descending: true,
2924
                nulls_first: false,
2925
            }),
2926
            None,
2927
            vec![2, 4, 1, 5, 0, 3],
2928
        );
2929
2930
        test_sort_to_indices_string_arrays(
2931
            vec![
2932
                None,
2933
                Some("bad"),
2934
                Some("sad"),
2935
                None,
2936
                Some("glad"),
2937
                Some("-ad"),
2938
            ],
2939
            Some(SortOptions {
2940
                descending: false,
2941
                nulls_first: true,
2942
            }),
2943
            None,
2944
            vec![0, 3, 5, 1, 4, 2],
2945
        );
2946
2947
        test_sort_to_indices_string_arrays(
2948
            vec![
2949
                None,
2950
                Some("bad"),
2951
                Some("sad"),
2952
                None,
2953
                Some("glad"),
2954
                Some("-ad"),
2955
            ],
2956
            Some(SortOptions {
2957
                descending: true,
2958
                nulls_first: true,
2959
            }),
2960
            None,
2961
            vec![0, 3, 2, 4, 1, 5],
2962
        );
2963
2964
        test_sort_to_indices_string_arrays(
2965
            vec![
2966
                None,
2967
                Some("bad"),
2968
                Some("sad"),
2969
                None,
2970
                Some("glad"),
2971
                Some("-ad"),
2972
            ],
2973
            Some(SortOptions {
2974
                descending: true,
2975
                nulls_first: true,
2976
            }),
2977
            Some(3),
2978
            vec![0, 3, 2],
2979
        );
2980
2981
        // valid values less than limit with extra nulls
2982
        test_sort_to_indices_string_arrays(
2983
            vec![Some("def"), None, None, Some("abc")],
2984
            Some(SortOptions {
2985
                descending: false,
2986
                nulls_first: false,
2987
            }),
2988
            Some(3),
2989
            vec![3, 0, 1],
2990
        );
2991
2992
        test_sort_to_indices_string_arrays(
2993
            vec![Some("def"), None, None, Some("abc")],
2994
            Some(SortOptions {
2995
                descending: false,
2996
                nulls_first: true,
2997
            }),
2998
            Some(3),
2999
            vec![1, 2, 3],
3000
        );
3001
3002
        // more nulls than limit
3003
        test_sort_to_indices_string_arrays(
3004
            vec![Some("def"), None, None, None],
3005
            Some(SortOptions {
3006
                descending: false,
3007
                nulls_first: true,
3008
            }),
3009
            Some(2),
3010
            vec![1, 2],
3011
        );
3012
3013
        test_sort_to_indices_string_arrays(
3014
            vec![Some("def"), None, None, None],
3015
            Some(SortOptions {
3016
                descending: false,
3017
                nulls_first: false,
3018
            }),
3019
            Some(2),
3020
            vec![0, 1],
3021
        );
3022
    }
3023
3024
    #[test]
3025
    fn test_sort_strings() {
3026
        test_sort_string_arrays(
3027
            vec![
3028
                None,
3029
                Some("bad"),
3030
                Some("sad"),
3031
                Some("long string longer than 12 bytes"),
3032
                None,
3033
                Some("glad"),
3034
                Some("lang string longer than 12 bytes"),
3035
                Some("-ad"),
3036
            ],
3037
            None,
3038
            None,
3039
            vec![
3040
                None,
3041
                None,
3042
                Some("-ad"),
3043
                Some("bad"),
3044
                Some("glad"),
3045
                Some("lang string longer than 12 bytes"),
3046
                Some("long string longer than 12 bytes"),
3047
                Some("sad"),
3048
            ],
3049
        );
3050
3051
        test_sort_string_arrays(
3052
            vec![
3053
                None,
3054
                Some("bad"),
3055
                Some("sad"),
3056
                Some("long string longer than 12 bytes"),
3057
                None,
3058
                Some("glad"),
3059
                Some("lang string longer than 12 bytes"),
3060
                Some("-ad"),
3061
            ],
3062
            Some(SortOptions {
3063
                descending: true,
3064
                nulls_first: false,
3065
            }),
3066
            None,
3067
            vec![
3068
                Some("sad"),
3069
                Some("long string longer than 12 bytes"),
3070
                Some("lang string longer than 12 bytes"),
3071
                Some("glad"),
3072
                Some("bad"),
3073
                Some("-ad"),
3074
                None,
3075
                None,
3076
            ],
3077
        );
3078
3079
        test_sort_string_arrays(
3080
            vec![
3081
                None,
3082
                Some("bad"),
3083
                Some("long string longer than 12 bytes"),
3084
                Some("sad"),
3085
                None,
3086
                Some("glad"),
3087
                Some("lang string longer than 12 bytes"),
3088
                Some("-ad"),
3089
            ],
3090
            Some(SortOptions {
3091
                descending: false,
3092
                nulls_first: true,
3093
            }),
3094
            None,
3095
            vec![
3096
                None,
3097
                None,
3098
                Some("-ad"),
3099
                Some("bad"),
3100
                Some("glad"),
3101
                Some("lang string longer than 12 bytes"),
3102
                Some("long string longer than 12 bytes"),
3103
                Some("sad"),
3104
            ],
3105
        );
3106
3107
        test_sort_string_arrays(
3108
            vec![
3109
                None,
3110
                Some("bad"),
3111
                Some("long string longer than 12 bytes"),
3112
                Some("sad"),
3113
                None,
3114
                Some("glad"),
3115
                Some("lang string longer than 12 bytes"),
3116
                Some("-ad"),
3117
            ],
3118
            Some(SortOptions {
3119
                descending: true,
3120
                nulls_first: true,
3121
            }),
3122
            None,
3123
            vec![
3124
                None,
3125
                None,
3126
                Some("sad"),
3127
                Some("long string longer than 12 bytes"),
3128
                Some("lang string longer than 12 bytes"),
3129
                Some("glad"),
3130
                Some("bad"),
3131
                Some("-ad"),
3132
            ],
3133
        );
3134
3135
        test_sort_string_arrays(
3136
            vec![
3137
                None,
3138
                Some("bad"),
3139
                Some("long string longer than 12 bytes"),
3140
                Some("sad"),
3141
                None,
3142
                Some("glad"),
3143
                Some("lang string longer than 12 bytes"),
3144
                Some("-ad"),
3145
            ],
3146
            Some(SortOptions {
3147
                descending: true,
3148
                nulls_first: true,
3149
            }),
3150
            Some(3),
3151
            vec![None, None, Some("sad")],
3152
        );
3153
3154
        // valid values less than limit with extra nulls
3155
        test_sort_string_arrays(
3156
            vec![
3157
                Some("def long string longer than 12"),
3158
                None,
3159
                None,
3160
                Some("abc"),
3161
            ],
3162
            Some(SortOptions {
3163
                descending: false,
3164
                nulls_first: false,
3165
            }),
3166
            Some(3),
3167
            vec![Some("abc"), Some("def long string longer than 12"), None],
3168
        );
3169
3170
        test_sort_string_arrays(
3171
            vec![
3172
                Some("def long string longer than 12"),
3173
                None,
3174
                None,
3175
                Some("abc"),
3176
            ],
3177
            Some(SortOptions {
3178
                descending: false,
3179
                nulls_first: true,
3180
            }),
3181
            Some(3),
3182
            vec![None, None, Some("abc")],
3183
        );
3184
3185
        // more nulls than limit
3186
        test_sort_string_arrays(
3187
            vec![Some("def long string longer than 12"), None, None, None],
3188
            Some(SortOptions {
3189
                descending: false,
3190
                nulls_first: true,
3191
            }),
3192
            Some(2),
3193
            vec![None, None],
3194
        );
3195
3196
        test_sort_string_arrays(
3197
            vec![Some("def long string longer than 12"), None, None, None],
3198
            Some(SortOptions {
3199
                descending: false,
3200
                nulls_first: false,
3201
            }),
3202
            Some(2),
3203
            vec![Some("def long string longer than 12"), None],
3204
        );
3205
    }
3206
3207
    #[test]
3208
    fn test_sort_run_to_run() {
3209
        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3210
    }
3211
3212
    #[test]
3213
    fn test_sort_run_to_indices() {
3214
        test_sort_run_inner(|array, sort_options, limit| {
3215
            let indices = sort_to_indices(array, sort_options, limit).unwrap();
3216
            take(array, &indices, None)
3217
        });
3218
    }
3219
3220
    fn test_sort_run_inner<F>(sort_fn: F)
3221
    where
3222
        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3223
    {
3224
        // Create an input array for testing
3225
        let total_len = 80;
3226
        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3227
        let repeats: Vec<usize> = vec![1, 3, 2, 4];
3228
        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3229
        for ix in 0_usize..32 {
3230
            let repeat: usize = repeats[ix % repeats.len()];
3231
            let val: Option<i32> = vals[ix % vals.len()];
3232
            input_array.resize(input_array.len() + repeat, val);
3233
        }
3234
3235
        // create run array using input_array
3236
        // Encode the input_array to run array
3237
        let mut builder =
3238
            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3239
        builder.extend(input_array.iter().copied());
3240
        let run_array = builder.finish();
3241
3242
        // slice lengths that are tested
3243
        let slice_lens = [
3244
            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3245
        ];
3246
        for slice_len in slice_lens {
3247
            test_sort_run_inner2(
3248
                input_array.as_slice(),
3249
                &run_array,
3250
                0,
3251
                slice_len,
3252
                None,
3253
                &sort_fn,
3254
            );
3255
            test_sort_run_inner2(
3256
                input_array.as_slice(),
3257
                &run_array,
3258
                total_len - slice_len,
3259
                slice_len,
3260
                None,
3261
                &sort_fn,
3262
            );
3263
            // Test with non zero limit
3264
            if slice_len > 1 {
3265
                test_sort_run_inner2(
3266
                    input_array.as_slice(),
3267
                    &run_array,
3268
                    0,
3269
                    slice_len,
3270
                    Some(slice_len / 2),
3271
                    &sort_fn,
3272
                );
3273
                test_sort_run_inner2(
3274
                    input_array.as_slice(),
3275
                    &run_array,
3276
                    total_len - slice_len,
3277
                    slice_len,
3278
                    Some(slice_len / 2),
3279
                    &sort_fn,
3280
                );
3281
            }
3282
        }
3283
    }
3284
3285
    fn test_sort_run_inner2<F>(
3286
        input_array: &[Option<i32>],
3287
        run_array: &RunArray<Int16Type>,
3288
        offset: usize,
3289
        length: usize,
3290
        limit: Option<usize>,
3291
        sort_fn: &F,
3292
    ) where
3293
        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3294
    {
3295
        // Run the sort and build actual result
3296
        let sliced_array = run_array.slice(offset, length);
3297
        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3298
        let sorted_run_array = sorted_sliced_array
3299
            .as_any()
3300
            .downcast_ref::<RunArray<Int16Type>>()
3301
            .unwrap();
3302
        let typed_run_array = sorted_run_array
3303
            .downcast::<PrimitiveArray<Int32Type>>()
3304
            .unwrap();
3305
        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3306
3307
        // build expected result.
3308
        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3309
        sliced_input.sort();
3310
        let expected = if let Some(limit) = limit {
3311
            sliced_input.iter().take(limit).copied().collect()
3312
        } else {
3313
            sliced_input
3314
        };
3315
3316
        assert_eq!(expected, actual)
3317
    }
3318
3319
    #[test]
3320
    fn test_sort_string_dicts() {
3321
        test_sort_string_dict_arrays::<Int8Type>(
3322
            vec![
3323
                None,
3324
                Some("bad"),
3325
                Some("sad"),
3326
                None,
3327
                Some("glad"),
3328
                Some("-ad"),
3329
            ],
3330
            None,
3331
            None,
3332
            vec![
3333
                None,
3334
                None,
3335
                Some("-ad"),
3336
                Some("bad"),
3337
                Some("glad"),
3338
                Some("sad"),
3339
            ],
3340
        );
3341
3342
        test_sort_string_dict_arrays::<Int16Type>(
3343
            vec![
3344
                None,
3345
                Some("bad"),
3346
                Some("sad"),
3347
                None,
3348
                Some("glad"),
3349
                Some("-ad"),
3350
            ],
3351
            Some(SortOptions {
3352
                descending: true,
3353
                nulls_first: false,
3354
            }),
3355
            None,
3356
            vec![
3357
                Some("sad"),
3358
                Some("glad"),
3359
                Some("bad"),
3360
                Some("-ad"),
3361
                None,
3362
                None,
3363
            ],
3364
        );
3365
3366
        test_sort_string_dict_arrays::<Int32Type>(
3367
            vec![
3368
                None,
3369
                Some("bad"),
3370
                Some("sad"),
3371
                None,
3372
                Some("glad"),
3373
                Some("-ad"),
3374
            ],
3375
            Some(SortOptions {
3376
                descending: false,
3377
                nulls_first: true,
3378
            }),
3379
            None,
3380
            vec![
3381
                None,
3382
                None,
3383
                Some("-ad"),
3384
                Some("bad"),
3385
                Some("glad"),
3386
                Some("sad"),
3387
            ],
3388
        );
3389
3390
        test_sort_string_dict_arrays::<Int16Type>(
3391
            vec![
3392
                None,
3393
                Some("bad"),
3394
                Some("sad"),
3395
                None,
3396
                Some("glad"),
3397
                Some("-ad"),
3398
            ],
3399
            Some(SortOptions {
3400
                descending: true,
3401
                nulls_first: true,
3402
            }),
3403
            None,
3404
            vec![
3405
                None,
3406
                None,
3407
                Some("sad"),
3408
                Some("glad"),
3409
                Some("bad"),
3410
                Some("-ad"),
3411
            ],
3412
        );
3413
3414
        test_sort_string_dict_arrays::<Int16Type>(
3415
            vec![
3416
                None,
3417
                Some("bad"),
3418
                Some("sad"),
3419
                None,
3420
                Some("glad"),
3421
                Some("-ad"),
3422
            ],
3423
            Some(SortOptions {
3424
                descending: true,
3425
                nulls_first: true,
3426
            }),
3427
            Some(3),
3428
            vec![None, None, Some("sad")],
3429
        );
3430
3431
        // valid values less than limit with extra nulls
3432
        test_sort_string_dict_arrays::<Int16Type>(
3433
            vec![Some("def"), None, None, Some("abc")],
3434
            Some(SortOptions {
3435
                descending: false,
3436
                nulls_first: false,
3437
            }),
3438
            Some(3),
3439
            vec![Some("abc"), Some("def"), None],
3440
        );
3441
3442
        test_sort_string_dict_arrays::<Int16Type>(
3443
            vec![Some("def"), None, None, Some("abc")],
3444
            Some(SortOptions {
3445
                descending: false,
3446
                nulls_first: true,
3447
            }),
3448
            Some(3),
3449
            vec![None, None, Some("abc")],
3450
        );
3451
3452
        // more nulls than limit
3453
        test_sort_string_dict_arrays::<Int16Type>(
3454
            vec![Some("def"), None, None, None],
3455
            Some(SortOptions {
3456
                descending: false,
3457
                nulls_first: true,
3458
            }),
3459
            Some(2),
3460
            vec![None, None],
3461
        );
3462
3463
        test_sort_string_dict_arrays::<Int16Type>(
3464
            vec![Some("def"), None, None, None],
3465
            Some(SortOptions {
3466
                descending: false,
3467
                nulls_first: false,
3468
            }),
3469
            Some(2),
3470
            vec![Some("def"), None],
3471
        );
3472
    }
3473
3474
    #[test]
3475
    fn test_sort_list() {
3476
        test_sort_list_arrays::<Int8Type>(
3477
            vec![
3478
                Some(vec![Some(1)]),
3479
                Some(vec![Some(4)]),
3480
                Some(vec![Some(2)]),
3481
                Some(vec![Some(3)]),
3482
            ],
3483
            Some(SortOptions {
3484
                descending: false,
3485
                nulls_first: false,
3486
            }),
3487
            None,
3488
            vec![
3489
                Some(vec![Some(1)]),
3490
                Some(vec![Some(2)]),
3491
                Some(vec![Some(3)]),
3492
                Some(vec![Some(4)]),
3493
            ],
3494
            Some(1),
3495
        );
3496
3497
        test_sort_list_arrays::<Float16Type>(
3498
            vec![
3499
                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3500
                Some(vec![
3501
                    Some(f16::from_f32(4.0)),
3502
                    Some(f16::from_f32(3.0)),
3503
                    Some(f16::from_f32(2.0)),
3504
                    Some(f16::from_f32(1.0)),
3505
                ]),
3506
                Some(vec![
3507
                    Some(f16::from_f32(2.0)),
3508
                    Some(f16::from_f32(3.0)),
3509
                    Some(f16::from_f32(4.0)),
3510
                ]),
3511
                Some(vec![
3512
                    Some(f16::from_f32(3.0)),
3513
                    Some(f16::from_f32(3.0)),
3514
                    Some(f16::from_f32(3.0)),
3515
                    Some(f16::from_f32(3.0)),
3516
                ]),
3517
                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3518
            ],
3519
            Some(SortOptions {
3520
                descending: false,
3521
                nulls_first: false,
3522
            }),
3523
            None,
3524
            vec![
3525
                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3526
                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3527
                Some(vec![
3528
                    Some(f16::from_f32(2.0)),
3529
                    Some(f16::from_f32(3.0)),
3530
                    Some(f16::from_f32(4.0)),
3531
                ]),
3532
                Some(vec![
3533
                    Some(f16::from_f32(3.0)),
3534
                    Some(f16::from_f32(3.0)),
3535
                    Some(f16::from_f32(3.0)),
3536
                    Some(f16::from_f32(3.0)),
3537
                ]),
3538
                Some(vec![
3539
                    Some(f16::from_f32(4.0)),
3540
                    Some(f16::from_f32(3.0)),
3541
                    Some(f16::from_f32(2.0)),
3542
                    Some(f16::from_f32(1.0)),
3543
                ]),
3544
            ],
3545
            None,
3546
        );
3547
3548
        test_sort_list_arrays::<Float32Type>(
3549
            vec![
3550
                Some(vec![Some(1.0), Some(0.0)]),
3551
                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3552
                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3553
                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3554
                Some(vec![Some(1.0), Some(1.0)]),
3555
            ],
3556
            Some(SortOptions {
3557
                descending: false,
3558
                nulls_first: false,
3559
            }),
3560
            None,
3561
            vec![
3562
                Some(vec![Some(1.0), Some(0.0)]),
3563
                Some(vec![Some(1.0), Some(1.0)]),
3564
                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3565
                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3566
                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3567
            ],
3568
            None,
3569
        );
3570
3571
        test_sort_list_arrays::<Float64Type>(
3572
            vec![
3573
                Some(vec![Some(1.0), Some(0.0)]),
3574
                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3575
                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3576
                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3577
                Some(vec![Some(1.0), Some(1.0)]),
3578
            ],
3579
            Some(SortOptions {
3580
                descending: false,
3581
                nulls_first: false,
3582
            }),
3583
            None,
3584
            vec![
3585
                Some(vec![Some(1.0), Some(0.0)]),
3586
                Some(vec![Some(1.0), Some(1.0)]),
3587
                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3588
                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3589
                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3590
            ],
3591
            None,
3592
        );
3593
3594
        test_sort_list_arrays::<Int32Type>(
3595
            vec![
3596
                Some(vec![Some(1), Some(0)]),
3597
                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3598
                Some(vec![Some(2), Some(3), Some(4)]),
3599
                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3600
                Some(vec![Some(1), Some(1)]),
3601
            ],
3602
            Some(SortOptions {
3603
                descending: false,
3604
                nulls_first: false,
3605
            }),
3606
            None,
3607
            vec![
3608
                Some(vec![Some(1), Some(0)]),
3609
                Some(vec![Some(1), Some(1)]),
3610
                Some(vec![Some(2), Some(3), Some(4)]),
3611
                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3612
                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3613
            ],
3614
            None,
3615
        );
3616
3617
        test_sort_list_arrays::<Int32Type>(
3618
            vec![
3619
                None,
3620
                Some(vec![Some(4), None, Some(2)]),
3621
                Some(vec![Some(2), Some(3), Some(4)]),
3622
                None,
3623
                Some(vec![Some(3), Some(3), None]),
3624
            ],
3625
            Some(SortOptions {
3626
                descending: false,
3627
                nulls_first: false,
3628
            }),
3629
            None,
3630
            vec![
3631
                Some(vec![Some(2), Some(3), Some(4)]),
3632
                Some(vec![Some(3), Some(3), None]),
3633
                Some(vec![Some(4), None, Some(2)]),
3634
                None,
3635
                None,
3636
            ],
3637
            Some(3),
3638
        );
3639
3640
        test_sort_list_arrays::<Int32Type>(
3641
            vec![
3642
                Some(vec![Some(1), Some(0)]),
3643
                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3644
                Some(vec![Some(2), Some(3), Some(4)]),
3645
                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3646
                Some(vec![Some(1), Some(1)]),
3647
            ],
3648
            Some(SortOptions {
3649
                descending: false,
3650
                nulls_first: false,
3651
            }),
3652
            Some(2),
3653
            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3654
            None,
3655
        );
3656
3657
        // valid values less than limit with extra nulls
3658
        test_sort_list_arrays::<Int32Type>(
3659
            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3660
            Some(SortOptions {
3661
                descending: false,
3662
                nulls_first: false,
3663
            }),
3664
            Some(3),
3665
            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3666
            None,
3667
        );
3668
3669
        test_sort_list_arrays::<Int32Type>(
3670
            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3671
            Some(SortOptions {
3672
                descending: false,
3673
                nulls_first: true,
3674
            }),
3675
            Some(3),
3676
            vec![None, None, Some(vec![Some(1)])],
3677
            None,
3678
        );
3679
3680
        // more nulls than limit
3681
        test_sort_list_arrays::<Int32Type>(
3682
            vec![Some(vec![Some(1)]), None, None, None],
3683
            Some(SortOptions {
3684
                descending: false,
3685
                nulls_first: true,
3686
            }),
3687
            Some(2),
3688
            vec![None, None],
3689
            None,
3690
        );
3691
3692
        test_sort_list_arrays::<Int32Type>(
3693
            vec![Some(vec![Some(1)]), None, None, None],
3694
            Some(SortOptions {
3695
                descending: false,
3696
                nulls_first: false,
3697
            }),
3698
            Some(2),
3699
            vec![Some(vec![Some(1)]), None],
3700
            None,
3701
        );
3702
    }
3703
3704
    #[test]
3705
    fn test_sort_binary() {
3706
        test_sort_binary_arrays(
3707
            vec![
3708
                Some(vec![0, 0, 0]),
3709
                Some(vec![0, 0, 5]),
3710
                Some(vec![0, 0, 3]),
3711
                Some(vec![0, 0, 7]),
3712
                Some(vec![0, 0, 1]),
3713
            ],
3714
            Some(SortOptions {
3715
                descending: false,
3716
                nulls_first: false,
3717
            }),
3718
            None,
3719
            vec![
3720
                Some(vec![0, 0, 0]),
3721
                Some(vec![0, 0, 1]),
3722
                Some(vec![0, 0, 3]),
3723
                Some(vec![0, 0, 5]),
3724
                Some(vec![0, 0, 7]),
3725
            ],
3726
            Some(3),
3727
        );
3728
3729
        // with nulls
3730
        test_sort_binary_arrays(
3731
            vec![
3732
                Some(vec![0, 0, 0]),
3733
                None,
3734
                Some(vec![0, 0, 3]),
3735
                Some(vec![0, 0, 7]),
3736
                Some(vec![0, 0, 1]),
3737
                None,
3738
            ],
3739
            Some(SortOptions {
3740
                descending: false,
3741
                nulls_first: false,
3742
            }),
3743
            None,
3744
            vec![
3745
                Some(vec![0, 0, 0]),
3746
                Some(vec![0, 0, 1]),
3747
                Some(vec![0, 0, 3]),
3748
                Some(vec![0, 0, 7]),
3749
                None,
3750
                None,
3751
            ],
3752
            Some(3),
3753
        );
3754
3755
        test_sort_binary_arrays(
3756
            vec![
3757
                Some(vec![3, 5, 7]),
3758
                None,
3759
                Some(vec![1, 7, 1]),
3760
                Some(vec![2, 7, 3]),
3761
                None,
3762
                Some(vec![1, 4, 3]),
3763
            ],
3764
            Some(SortOptions {
3765
                descending: false,
3766
                nulls_first: false,
3767
            }),
3768
            None,
3769
            vec![
3770
                Some(vec![1, 4, 3]),
3771
                Some(vec![1, 7, 1]),
3772
                Some(vec![2, 7, 3]),
3773
                Some(vec![3, 5, 7]),
3774
                None,
3775
                None,
3776
            ],
3777
            Some(3),
3778
        );
3779
3780
        // descending
3781
        test_sort_binary_arrays(
3782
            vec![
3783
                Some(vec![0, 0, 0]),
3784
                None,
3785
                Some(vec![0, 0, 3]),
3786
                Some(vec![0, 0, 7]),
3787
                Some(vec![0, 0, 1]),
3788
                None,
3789
            ],
3790
            Some(SortOptions {
3791
                descending: true,
3792
                nulls_first: false,
3793
            }),
3794
            None,
3795
            vec![
3796
                Some(vec![0, 0, 7]),
3797
                Some(vec![0, 0, 3]),
3798
                Some(vec![0, 0, 1]),
3799
                Some(vec![0, 0, 0]),
3800
                None,
3801
                None,
3802
            ],
3803
            Some(3),
3804
        );
3805
3806
        // nulls first
3807
        test_sort_binary_arrays(
3808
            vec![
3809
                Some(vec![0, 0, 0]),
3810
                None,
3811
                Some(vec![0, 0, 3]),
3812
                Some(vec![0, 0, 7]),
3813
                Some(vec![0, 0, 1]),
3814
                None,
3815
            ],
3816
            Some(SortOptions {
3817
                descending: false,
3818
                nulls_first: true,
3819
            }),
3820
            None,
3821
            vec![
3822
                None,
3823
                None,
3824
                Some(vec![0, 0, 0]),
3825
                Some(vec![0, 0, 1]),
3826
                Some(vec![0, 0, 3]),
3827
                Some(vec![0, 0, 7]),
3828
            ],
3829
            Some(3),
3830
        );
3831
3832
        // limit
3833
        test_sort_binary_arrays(
3834
            vec![
3835
                Some(vec![0, 0, 0]),
3836
                None,
3837
                Some(vec![0, 0, 3]),
3838
                Some(vec![0, 0, 7]),
3839
                Some(vec![0, 0, 1]),
3840
                None,
3841
            ],
3842
            Some(SortOptions {
3843
                descending: false,
3844
                nulls_first: true,
3845
            }),
3846
            Some(4),
3847
            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3848
            Some(3),
3849
        );
3850
3851
        // var length
3852
        test_sort_binary_arrays(
3853
            vec![
3854
                Some(b"Hello".to_vec()),
3855
                None,
3856
                Some(b"from".to_vec()),
3857
                Some(b"Apache".to_vec()),
3858
                Some(b"Arrow-rs".to_vec()),
3859
                None,
3860
            ],
3861
            Some(SortOptions {
3862
                descending: false,
3863
                nulls_first: false,
3864
            }),
3865
            None,
3866
            vec![
3867
                Some(b"Apache".to_vec()),
3868
                Some(b"Arrow-rs".to_vec()),
3869
                Some(b"Hello".to_vec()),
3870
                Some(b"from".to_vec()),
3871
                None,
3872
                None,
3873
            ],
3874
            None,
3875
        );
3876
3877
        // limit
3878
        test_sort_binary_arrays(
3879
            vec![
3880
                Some(b"Hello".to_vec()),
3881
                None,
3882
                Some(b"from".to_vec()),
3883
                Some(b"Apache".to_vec()),
3884
                Some(b"Arrow-rs".to_vec()),
3885
                None,
3886
            ],
3887
            Some(SortOptions {
3888
                descending: false,
3889
                nulls_first: true,
3890
            }),
3891
            Some(4),
3892
            vec![
3893
                None,
3894
                None,
3895
                Some(b"Apache".to_vec()),
3896
                Some(b"Arrow-rs".to_vec()),
3897
            ],
3898
            None,
3899
        );
3900
    }
3901
3902
    #[test]
3903
    fn test_lex_sort_single_column() {
3904
        let input = vec![SortColumn {
3905
            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3906
                Some(17),
3907
                Some(2),
3908
                Some(-1),
3909
                Some(0),
3910
            ])) as ArrayRef,
3911
            options: None,
3912
        }];
3913
        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3914
            Some(-1),
3915
            Some(0),
3916
            Some(2),
3917
            Some(17),
3918
        ])) as ArrayRef];
3919
        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3920
        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3921
3922
        // Explicitly test a limit on the sort as a demonstration
3923
        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3924
            Some(-1),
3925
            Some(0),
3926
            Some(2),
3927
        ])) as ArrayRef];
3928
        test_lex_sort_arrays(input, expected, Some(3));
3929
    }
3930
3931
    #[test]
3932
    fn test_lex_sort_unaligned_rows() {
3933
        let input = vec![
3934
            SortColumn {
3935
                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3936
                    as ArrayRef,
3937
                options: None,
3938
            },
3939
            SortColumn {
3940
                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3941
                options: None,
3942
            },
3943
        ];
3944
        assert!(
3945
            lexsort(&input, None).is_err(),
3946
            "lexsort should reject columns with different row counts"
3947
        );
3948
    }
3949
3950
    #[test]
3951
    fn test_lex_sort_mixed_types() {
3952
        let input = vec![
3953
            SortColumn {
3954
                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3955
                    Some(0),
3956
                    Some(2),
3957
                    Some(-1),
3958
                    Some(0),
3959
                ])) as ArrayRef,
3960
                options: None,
3961
            },
3962
            SortColumn {
3963
                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3964
                    Some(101),
3965
                    Some(8),
3966
                    Some(7),
3967
                    Some(102),
3968
                ])) as ArrayRef,
3969
                options: None,
3970
            },
3971
            SortColumn {
3972
                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3973
                    Some(-1),
3974
                    Some(-2),
3975
                    Some(-3),
3976
                    Some(-4),
3977
                ])) as ArrayRef,
3978
                options: None,
3979
            },
3980
        ];
3981
        let expected = vec![
3982
            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3983
                Some(-1),
3984
                Some(0),
3985
                Some(0),
3986
                Some(2),
3987
            ])) as ArrayRef,
3988
            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3989
                Some(7),
3990
                Some(101),
3991
                Some(102),
3992
                Some(8),
3993
            ])) as ArrayRef,
3994
            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3995
                Some(-3),
3996
                Some(-1),
3997
                Some(-4),
3998
                Some(-2),
3999
            ])) as ArrayRef,
4000
        ];
4001
        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4002
        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4003
4004
        // test mix of string and in64 with option
4005
        let input = vec![
4006
            SortColumn {
4007
                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4008
                    Some(0),
4009
                    Some(2),
4010
                    Some(-1),
4011
                    Some(0),
4012
                ])) as ArrayRef,
4013
                options: Some(SortOptions {
4014
                    descending: true,
4015
                    nulls_first: true,
4016
                }),
4017
            },
4018
            SortColumn {
4019
                values: Arc::new(StringArray::from(vec![
4020
                    Some("foo"),
4021
                    Some("9"),
4022
                    Some("7"),
4023
                    Some("bar"),
4024
                ])) as ArrayRef,
4025
                options: Some(SortOptions {
4026
                    descending: true,
4027
                    nulls_first: true,
4028
                }),
4029
            },
4030
        ];
4031
        let expected = vec![
4032
            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4033
                Some(2),
4034
                Some(0),
4035
                Some(0),
4036
                Some(-1),
4037
            ])) as ArrayRef,
4038
            Arc::new(StringArray::from(vec![
4039
                Some("9"),
4040
                Some("foo"),
4041
                Some("bar"),
4042
                Some("7"),
4043
            ])) as ArrayRef,
4044
        ];
4045
        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4046
        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
4047
4048
        // test sort with nulls first
4049
        let input = vec![
4050
            SortColumn {
4051
                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4052
                    None,
4053
                    Some(-1),
4054
                    Some(2),
4055
                    None,
4056
                ])) as ArrayRef,
4057
                options: Some(SortOptions {
4058
                    descending: true,
4059
                    nulls_first: true,
4060
                }),
4061
            },
4062
            SortColumn {
4063
                values: Arc::new(StringArray::from(vec![
4064
                    Some("foo"),
4065
                    Some("world"),
4066
                    Some("hello"),
4067
                    None,
4068
                ])) as ArrayRef,
4069
                options: Some(SortOptions {
4070
                    descending: true,
4071
                    nulls_first: true,
4072
                }),
4073
            },
4074
        ];
4075
        let expected = vec![
4076
            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4077
                None,
4078
                None,
4079
                Some(2),
4080
                Some(-1),
4081
            ])) as ArrayRef,
4082
            Arc::new(StringArray::from(vec![
4083
                None,
4084
                Some("foo"),
4085
                Some("hello"),
4086
                Some("world"),
4087
            ])) as ArrayRef,
4088
        ];
4089
        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4090
        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
4091
4092
        // test sort with nulls last
4093
        let input = vec![
4094
            SortColumn {
4095
                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4096
                    None,
4097
                    Some(-1),
4098
                    Some(2),
4099
                    None,
4100
                ])) as ArrayRef,
4101
                options: Some(SortOptions {
4102
                    descending: true,
4103
                    nulls_first: false,
4104
                }),
4105
            },
4106
            SortColumn {
4107
                values: Arc::new(StringArray::from(vec![
4108
                    Some("foo"),
4109
                    Some("world"),
4110
                    Some("hello"),
4111
                    None,
4112
                ])) as ArrayRef,
4113
                options: Some(SortOptions {
4114
                    descending: true,
4115
                    nulls_first: false,
4116
                }),
4117
            },
4118
        ];
4119
        let expected = vec![
4120
            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4121
                Some(2),
4122
                Some(-1),
4123
                None,
4124
                None,
4125
            ])) as ArrayRef,
4126
            Arc::new(StringArray::from(vec![
4127
                Some("hello"),
4128
                Some("world"),
4129
                Some("foo"),
4130
                None,
4131
            ])) as ArrayRef,
4132
        ];
4133
        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4134
        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4135
4136
        // test sort with opposite options
4137
        let input = vec![
4138
            SortColumn {
4139
                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4140
                    None,
4141
                    Some(-1),
4142
                    Some(2),
4143
                    Some(-1),
4144
                    None,
4145
                ])) as ArrayRef,
4146
                options: Some(SortOptions {
4147
                    descending: false,
4148
                    nulls_first: false,
4149
                }),
4150
            },
4151
            SortColumn {
4152
                values: Arc::new(StringArray::from(vec![
4153
                    Some("foo"),
4154
                    Some("bar"),
4155
                    Some("world"),
4156
                    Some("hello"),
4157
                    None,
4158
                ])) as ArrayRef,
4159
                options: Some(SortOptions {
4160
                    descending: true,
4161
                    nulls_first: true,
4162
                }),
4163
            },
4164
        ];
4165
        let expected = vec![
4166
            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4167
                Some(-1),
4168
                Some(-1),
4169
                Some(2),
4170
                None,
4171
                None,
4172
            ])) as ArrayRef,
4173
            Arc::new(StringArray::from(vec![
4174
                Some("hello"),
4175
                Some("bar"),
4176
                Some("world"),
4177
                None,
4178
                Some("foo"),
4179
            ])) as ArrayRef,
4180
        ];
4181
        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4182
        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4183
4184
        // Limiting by more rows than present is ok
4185
        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4186
4187
        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
4188
4189
        // case1
4190
        let primitive_array_data = vec![
4191
            Some(2),
4192
            Some(3),
4193
            Some(2),
4194
            Some(0),
4195
            None,
4196
            Some(2),
4197
            Some(1),
4198
            Some(2),
4199
        ];
4200
        let list_array_data = vec![
4201
            None,
4202
            Some(vec![Some(4)]),
4203
            Some(vec![Some(3)]),
4204
            Some(vec![Some(1)]),
4205
            Some(vec![Some(5)]),
4206
            Some(vec![Some(0)]),
4207
            Some(vec![Some(2)]),
4208
            Some(vec![None]),
4209
        ];
4210
4211
        let expected_primitive_array_data = vec![
4212
            None,
4213
            Some(0),
4214
            Some(1),
4215
            Some(2),
4216
            Some(2),
4217
            Some(2),
4218
            Some(2),
4219
            Some(3),
4220
        ];
4221
        let expected_list_array_data = vec![
4222
            Some(vec![Some(5)]),
4223
            Some(vec![Some(1)]),
4224
            Some(vec![Some(2)]),
4225
            None, // <-
4226
            Some(vec![None]),
4227
            Some(vec![Some(0)]),
4228
            Some(vec![Some(3)]), // <-
4229
            Some(vec![Some(4)]),
4230
        ];
4231
        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4232
            primitive_array_data.clone(),
4233
            list_array_data.clone(),
4234
            expected_primitive_array_data.clone(),
4235
            expected_list_array_data,
4236
            None,
4237
            None,
4238
        );
4239
4240
        // case2
4241
        let primitive_array_options = SortOptions {
4242
            descending: false,
4243
            nulls_first: true,
4244
        };
4245
        let list_array_options = SortOptions {
4246
            descending: false,
4247
            nulls_first: false, // has been modified
4248
        };
4249
        let expected_list_array_data = vec![
4250
            Some(vec![Some(5)]),
4251
            Some(vec![Some(1)]),
4252
            Some(vec![Some(2)]),
4253
            Some(vec![Some(0)]), // <-
4254
            Some(vec![Some(3)]),
4255
            Some(vec![None]),
4256
            None, // <-
4257
            Some(vec![Some(4)]),
4258
        ];
4259
        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4260
            primitive_array_data.clone(),
4261
            list_array_data.clone(),
4262
            expected_primitive_array_data.clone(),
4263
            expected_list_array_data,
4264
            Some(primitive_array_options),
4265
            Some(list_array_options),
4266
        );
4267
4268
        // case3
4269
        let primitive_array_options = SortOptions {
4270
            descending: false,
4271
            nulls_first: true,
4272
        };
4273
        let list_array_options = SortOptions {
4274
            descending: true, // has been modified
4275
            nulls_first: true,
4276
        };
4277
        let expected_list_array_data = vec![
4278
            Some(vec![Some(5)]),
4279
            Some(vec![Some(1)]),
4280
            Some(vec![Some(2)]),
4281
            None, // <-
4282
            Some(vec![None]),
4283
            Some(vec![Some(3)]),
4284
            Some(vec![Some(0)]), // <-
4285
            Some(vec![Some(4)]),
4286
        ];
4287
        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4288
            primitive_array_data.clone(),
4289
            list_array_data.clone(),
4290
            expected_primitive_array_data,
4291
            expected_list_array_data,
4292
            Some(primitive_array_options),
4293
            Some(list_array_options),
4294
        );
4295
4296
        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
4297
4298
        let list_array_data = vec![
4299
            Some(vec![Some(2), Some(1)]), // 0
4300
            None,                         // 10
4301
            Some(vec![Some(3)]),          // 1
4302
            Some(vec![Some(2), Some(0)]), // 2
4303
            Some(vec![None, Some(2)]),    // 3
4304
            Some(vec![Some(0)]),          // none
4305
            None,                         // 11
4306
            Some(vec![Some(2), None]),    // 4
4307
            Some(vec![None]),             // 5
4308
            Some(vec![Some(2), Some(1)]), // 6
4309
        ];
4310
        let primitive_array_data = vec![
4311
            Some(0),
4312
            Some(10),
4313
            Some(1),
4314
            Some(2),
4315
            Some(3),
4316
            None,
4317
            Some(11),
4318
            Some(4),
4319
            Some(5),
4320
            Some(6),
4321
        ];
4322
        let expected_list_array_data = vec![
4323
            None,
4324
            None,
4325
            Some(vec![None]),
4326
            Some(vec![None, Some(2)]),
4327
            Some(vec![Some(0)]),
4328
            Some(vec![Some(2), None]),
4329
            Some(vec![Some(2), Some(0)]),
4330
            Some(vec![Some(2), Some(1)]),
4331
            Some(vec![Some(2), Some(1)]),
4332
            Some(vec![Some(3)]),
4333
        ];
4334
        let expected_primitive_array_data = vec![
4335
            Some(10),
4336
            Some(11),
4337
            Some(5),
4338
            Some(3),
4339
            None,
4340
            Some(4),
4341
            Some(2),
4342
            Some(0),
4343
            Some(6),
4344
            Some(1),
4345
        ];
4346
        test_lex_sort_mixed_types_with_list::<Int32Type>(
4347
            list_array_data.clone(),
4348
            primitive_array_data.clone(),
4349
            expected_list_array_data,
4350
            expected_primitive_array_data,
4351
            None,
4352
            None,
4353
        );
4354
    }
4355
4356
    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4357
        primitive_array_data: Vec<Option<T::Native>>,
4358
        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4359
        expected_primitive_array_data: Vec<Option<T::Native>>,
4360
        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4361
        primitive_array_options: Option<SortOptions>,
4362
        list_array_options: Option<SortOptions>,
4363
    ) where
4364
        T: ArrowPrimitiveType,
4365
        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4366
    {
4367
        let input = vec![
4368
            SortColumn {
4369
                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4370
                    as ArrayRef,
4371
                options: primitive_array_options,
4372
            },
4373
            SortColumn {
4374
                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4375
                    list_array_data.clone(),
4376
                    1,
4377
                )) as ArrayRef,
4378
                options: list_array_options,
4379
            },
4380
        ];
4381
4382
        let expected = vec![
4383
            Arc::new(PrimitiveArray::<T>::from(
4384
                expected_primitive_array_data.clone(),
4385
            )) as ArrayRef,
4386
            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4387
                expected_list_array_data.clone(),
4388
                1,
4389
            )) as ArrayRef,
4390
        ];
4391
4392
        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4393
        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4394
    }
4395
4396
    fn test_lex_sort_mixed_types_with_list<T>(
4397
        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4398
        primitive_array_data: Vec<Option<T::Native>>,
4399
        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4400
        expected_primitive_array_data: Vec<Option<T::Native>>,
4401
        list_array_options: Option<SortOptions>,
4402
        primitive_array_options: Option<SortOptions>,
4403
    ) where
4404
        T: ArrowPrimitiveType,
4405
        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4406
    {
4407
        macro_rules! run_test {
4408
            ($ARRAY_TYPE:ident) => {
4409
                let input = vec![
4410
                    SortColumn {
4411
                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4412
                            list_array_data.clone(),
4413
                        )) as ArrayRef,
4414
                        options: list_array_options.clone(),
4415
                    },
4416
                    SortColumn {
4417
                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4418
                            as ArrayRef,
4419
                        options: primitive_array_options.clone(),
4420
                    },
4421
                ];
4422
4423
                let expected = vec![
4424
                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4425
                        expected_list_array_data.clone(),
4426
                    )) as ArrayRef,
4427
                    Arc::new(PrimitiveArray::<T>::from(
4428
                        expected_primitive_array_data.clone(),
4429
                    )) as ArrayRef,
4430
                ];
4431
4432
                test_lex_sort_arrays(input.clone(), expected.clone(), None);
4433
                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4434
            };
4435
        }
4436
        run_test!(ListArray);
4437
        run_test!(LargeListArray);
4438
    }
4439
4440
    #[test]
4441
    fn test_partial_sort() {
4442
        let mut before: Vec<&str> = vec![
4443
            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4444
        ];
4445
        let mut d = before.clone();
4446
        d.sort_unstable();
4447
4448
        for last in 0..before.len() {
4449
            partial_sort(&mut before, last, |a, b| a.cmp(b));
4450
            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4451
        }
4452
    }
4453
4454
    #[test]
4455
    fn test_partial_rand_sort() {
4456
        let size = 1000u32;
4457
        let mut rng = StdRng::seed_from_u64(42);
4458
        let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4459
        let mut d = before.clone();
4460
        let last = (rng.next_u32() % size) as usize;
4461
        d.sort_unstable();
4462
4463
        partial_sort(&mut before, last, |a, b| a.cmp(b));
4464
        assert_eq!(&d[0..last], &before[0..last]);
4465
    }
4466
4467
    #[test]
4468
    fn test_sort_int8_dicts() {
4469
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4470
        let values = Int8Array::from(vec![1, 3, 5]);
4471
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4472
            keys,
4473
            values,
4474
            None,
4475
            None,
4476
            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4477
        );
4478
4479
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4480
        let values = Int8Array::from(vec![1, 3, 5]);
4481
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4482
            keys,
4483
            values,
4484
            Some(SortOptions {
4485
                descending: true,
4486
                nulls_first: false,
4487
            }),
4488
            None,
4489
            vec![Some(5), Some(5), Some(3), Some(1), None, None],
4490
        );
4491
4492
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4493
        let values = Int8Array::from(vec![1, 3, 5]);
4494
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4495
            keys,
4496
            values,
4497
            Some(SortOptions {
4498
                descending: false,
4499
                nulls_first: false,
4500
            }),
4501
            None,
4502
            vec![Some(1), Some(3), Some(5), Some(5), None, None],
4503
        );
4504
4505
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4506
        let values = Int8Array::from(vec![1, 3, 5]);
4507
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4508
            keys,
4509
            values,
4510
            Some(SortOptions {
4511
                descending: true,
4512
                nulls_first: true,
4513
            }),
4514
            Some(3),
4515
            vec![None, None, Some(5)],
4516
        );
4517
4518
        // Values have `None`.
4519
        let keys = Int8Array::from(vec![
4520
            Some(1_i8),
4521
            None,
4522
            Some(3),
4523
            None,
4524
            Some(2),
4525
            Some(3),
4526
            Some(0),
4527
        ]);
4528
        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4529
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4530
            keys,
4531
            values,
4532
            None,
4533
            None,
4534
            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4535
        );
4536
4537
        let keys = Int8Array::from(vec![
4538
            Some(1_i8),
4539
            None,
4540
            Some(3),
4541
            None,
4542
            Some(2),
4543
            Some(3),
4544
            Some(0),
4545
        ]);
4546
        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4547
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4548
            keys,
4549
            values,
4550
            Some(SortOptions {
4551
                descending: false,
4552
                nulls_first: false,
4553
            }),
4554
            None,
4555
            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4556
        );
4557
4558
        let keys = Int8Array::from(vec![
4559
            Some(1_i8),
4560
            None,
4561
            Some(3),
4562
            None,
4563
            Some(2),
4564
            Some(3),
4565
            Some(0),
4566
        ]);
4567
        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4568
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4569
            keys,
4570
            values,
4571
            Some(SortOptions {
4572
                descending: true,
4573
                nulls_first: false,
4574
            }),
4575
            None,
4576
            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4577
        );
4578
4579
        let keys = Int8Array::from(vec![
4580
            Some(1_i8),
4581
            None,
4582
            Some(3),
4583
            None,
4584
            Some(2),
4585
            Some(3),
4586
            Some(0),
4587
        ]);
4588
        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4589
        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4590
            keys,
4591
            values,
4592
            Some(SortOptions {
4593
                descending: true,
4594
                nulls_first: true,
4595
            }),
4596
            None,
4597
            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4598
        );
4599
    }
4600
4601
    #[test]
4602
    fn test_sort_f32_dicts() {
4603
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4604
        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4605
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4606
            keys,
4607
            values,
4608
            None,
4609
            None,
4610
            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4611
        );
4612
4613
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4614
        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4615
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4616
            keys,
4617
            values,
4618
            Some(SortOptions {
4619
                descending: true,
4620
                nulls_first: false,
4621
            }),
4622
            None,
4623
            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4624
        );
4625
4626
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4627
        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4628
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4629
            keys,
4630
            values,
4631
            Some(SortOptions {
4632
                descending: false,
4633
                nulls_first: false,
4634
            }),
4635
            None,
4636
            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4637
        );
4638
4639
        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4640
        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4641
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4642
            keys,
4643
            values,
4644
            Some(SortOptions {
4645
                descending: true,
4646
                nulls_first: true,
4647
            }),
4648
            Some(3),
4649
            vec![None, None, Some(5.1)],
4650
        );
4651
4652
        // Values have `None`.
4653
        let keys = Int8Array::from(vec![
4654
            Some(1_i8),
4655
            None,
4656
            Some(3),
4657
            None,
4658
            Some(2),
4659
            Some(3),
4660
            Some(0),
4661
        ]);
4662
        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4663
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4664
            keys,
4665
            values,
4666
            None,
4667
            None,
4668
            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4669
        );
4670
4671
        let keys = Int8Array::from(vec![
4672
            Some(1_i8),
4673
            None,
4674
            Some(3),
4675
            None,
4676
            Some(2),
4677
            Some(3),
4678
            Some(0),
4679
        ]);
4680
        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4681
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4682
            keys,
4683
            values,
4684
            Some(SortOptions {
4685
                descending: false,
4686
                nulls_first: false,
4687
            }),
4688
            None,
4689
            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4690
        );
4691
4692
        let keys = Int8Array::from(vec![
4693
            Some(1_i8),
4694
            None,
4695
            Some(3),
4696
            None,
4697
            Some(2),
4698
            Some(3),
4699
            Some(0),
4700
        ]);
4701
        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4702
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4703
            keys,
4704
            values,
4705
            Some(SortOptions {
4706
                descending: true,
4707
                nulls_first: false,
4708
            }),
4709
            None,
4710
            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4711
        );
4712
4713
        let keys = Int8Array::from(vec![
4714
            Some(1_i8),
4715
            None,
4716
            Some(3),
4717
            None,
4718
            Some(2),
4719
            Some(3),
4720
            Some(0),
4721
        ]);
4722
        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4723
        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4724
            keys,
4725
            values,
4726
            Some(SortOptions {
4727
                descending: true,
4728
                nulls_first: true,
4729
            }),
4730
            None,
4731
            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4732
        );
4733
    }
4734
4735
    #[test]
4736
    fn test_lexicographic_comparator_null_dict_values() {
4737
        let values = Int32Array::new(
4738
            vec![1, 2, 3, 4].into(),
4739
            Some(NullBuffer::from(vec![true, false, false, true])),
4740
        );
4741
        let keys = Int32Array::new(
4742
            vec![0, 1, 53, 3].into(),
4743
            Some(NullBuffer::from(vec![true, true, false, true])),
4744
        );
4745
        // [1, NULL, NULL, 4]
4746
        let dict = DictionaryArray::new(keys, Arc::new(values));
4747
4748
        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4749
            values: Arc::new(dict),
4750
            options: None,
4751
        }])
4752
        .unwrap();
4753
        // 1.cmp(NULL)
4754
        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4755
        // NULL.cmp(NULL)
4756
        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4757
        // NULL.cmp(4)
4758
        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4759
    }
4760
4761
    #[test]
4762
    fn sort_list_equal() {
4763
        let a = {
4764
            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4765
            for value in [[1, 5], [0, 3], [1, 3]] {
4766
                builder.values().append_slice(&value);
4767
                builder.append(true);
4768
            }
4769
            builder.finish()
4770
        };
4771
4772
        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4773
        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4774
4775
        let a = {
4776
            let mut builder = ListBuilder::new(Int64Builder::new());
4777
            for value in [[1, 5], [0, 3], [1, 3]] {
4778
                builder.values().append_slice(&value);
4779
                builder.append(true);
4780
            }
4781
            builder.finish()
4782
        };
4783
4784
        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4785
        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4786
    }
4787
4788
    #[test]
4789
    fn sort_struct_fallback_to_lexsort() {
4790
        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4791
        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4792
4793
        let struct_array = StructArray::from(vec![
4794
            (
4795
                Arc::new(Field::new("b", DataType::Float32, false)),
4796
                float.clone() as ArrayRef,
4797
            ),
4798
            (
4799
                Arc::new(Field::new("c", DataType::Int32, false)),
4800
                int.clone() as ArrayRef,
4801
            ),
4802
        ]);
4803
4804
        assert!(!can_sort_to_indices(struct_array.data_type()));
4805
        assert!(sort_to_indices(&struct_array, None, None)
4806
            .err()
4807
            .unwrap()
4808
            .to_string()
4809
            .contains("Sort not supported for data type"));
4810
4811
        let sort_columns = vec![SortColumn {
4812
            values: Arc::new(struct_array.clone()) as ArrayRef,
4813
            options: None,
4814
        }];
4815
        let sorted = lexsort(&sort_columns, None).unwrap();
4816
4817
        let expected_struct_array = Arc::new(StructArray::from(vec![
4818
            (
4819
                Arc::new(Field::new("b", DataType::Float32, false)),
4820
                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4821
            ),
4822
            (
4823
                Arc::new(Field::new("c", DataType::Int32, false)),
4824
                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4825
            ),
4826
        ])) as ArrayRef;
4827
4828
        assert_eq!(&sorted[0], &expected_struct_array);
4829
    }
4830
4831
    /// A simple, correct but slower reference implementation.
4832
    fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
4833
        let len = array.len();
4834
        let mut valid = Vec::with_capacity(len);
4835
        let mut nulls = Vec::with_capacity(len);
4836
        for i in 0..len {
4837
            if array.is_valid(i) {
4838
                valid.push(i as u32);
4839
            } else {
4840
                nulls.push(i as u32);
4841
            }
4842
        }
4843
        (valid, nulls)
4844
    }
4845
4846
    #[test]
4847
    fn fuzz_partition_validity() {
4848
        let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
4849
        for _ in 0..1_000 {
4850
            // build a random BooleanArray with some nulls
4851
            let len = rng.random_range(0..512);
4852
            let mut builder = BooleanBuilder::new();
4853
            for _ in 0..len {
4854
                if rng.random_bool(0.2) {
4855
                    builder.append_null();
4856
                } else {
4857
                    builder.append_value(rng.random_bool(0.5));
4858
                }
4859
            }
4860
            let array = builder.finish();
4861
4862
            // Test both implementations on the full array
4863
            let (v1, n1) = partition_validity(&array);
4864
            let (v2, n2) = naive_partition(&array);
4865
            assert_eq!(v1, v2, "valid mismatch on full array");
4866
            assert_eq!(n1, n2, "null  mismatch on full array");
4867
4868
            if len >= 8 {
4869
                // 1) Random slice within the array
4870
                let max_offset = len - 4;
4871
                let offset = rng.random_range(0..=max_offset);
4872
                let max_slice_len = len - offset;
4873
                let slice_len = rng.random_range(1..=max_slice_len);
4874
4875
                // Bind the sliced ArrayRef to keep it alive
4876
                let sliced = array.slice(offset, slice_len);
4877
                let slice = sliced
4878
                    .as_any()
4879
                    .downcast_ref::<BooleanArray>()
4880
                    .expect("slice should be a BooleanArray");
4881
4882
                let (sv1, sn1) = partition_validity(slice);
4883
                let (sv2, sn2) = naive_partition(slice);
4884
                assert_eq!(
4885
                    sv1, sv2,
4886
                    "valid mismatch on random slice at offset {offset} length {slice_len}",
4887
                );
4888
                assert_eq!(
4889
                    sn1, sn2,
4890
                    "null mismatch on random slice at offset {offset} length {slice_len}",
4891
                );
4892
4893
                // 2) Ensure we test slices that start beyond one 64-bit chunk boundary
4894
                if len > 68 {
4895
                    let offset2 = rng.random_range(65..(len - 3));
4896
                    let len2 = rng.random_range(1..=(len - offset2));
4897
4898
                    let sliced2 = array.slice(offset2, len2);
4899
                    let slice2 = sliced2
4900
                        .as_any()
4901
                        .downcast_ref::<BooleanArray>()
4902
                        .expect("slice2 should be a BooleanArray");
4903
4904
                    let (sv3, sn3) = partition_validity(slice2);
4905
                    let (sv4, sn4) = naive_partition(slice2);
4906
                    assert_eq!(
4907
                        sv3, sv4,
4908
                        "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4909
                    );
4910
                    assert_eq!(
4911
                        sn3, sn4,
4912
                        "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4913
                    );
4914
                }
4915
            }
4916
        }
4917
    }
4918
4919
    // A few small deterministic checks
4920
    #[test]
4921
    fn test_partition_edge_cases() {
4922
        // all valid
4923
        let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
4924
        let (valid, nulls) = partition_validity(&array);
4925
        assert_eq!(valid, vec![0, 1, 2]);
4926
        assert!(nulls.is_empty());
4927
4928
        // all null
4929
        let array = BooleanArray::from(vec![None, None, None]);
4930
        let (valid, nulls) = partition_validity(&array);
4931
        assert!(valid.is_empty());
4932
        assert_eq!(nulls, vec![0, 1, 2]);
4933
4934
        // alternating
4935
        let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
4936
        let (valid, nulls) = partition_validity(&array);
4937
        assert_eq!(valid, vec![0, 2]);
4938
        assert_eq!(nulls, vec![1, 3]);
4939
    }
4940
4941
    // Test specific edge case strings that exercise the 4-byte prefix logic
4942
    #[test]
4943
    fn test_specific_edge_cases() {
4944
        let test_cases = vec![
4945
            // Key test cases for lengths 1-4 that test prefix padding
4946
            "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
4947
            // Test cases where first 4 bytes are same but subsequent bytes differ
4948
            "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
4949
            // Test cases with length < 4 that require padding
4950
            "z", "za", "zaa", "zaaa", "zaaab", // Empty string
4951
            "",      // Test various length combinations with same prefix
4952
            "test", "test1", "test12", "test123", "test1234",
4953
        ];
4954
4955
        // Use standard library sort as reference
4956
        let mut expected = test_cases.clone();
4957
        expected.sort();
4958
4959
        // Use our sorting algorithm
4960
        let string_array = StringArray::from(test_cases.clone());
4961
        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
4962
        let result = sort_bytes(
4963
            &string_array,
4964
            indices,
4965
            vec![], // no nulls
4966
            SortOptions::default(),
4967
            None,
4968
        );
4969
4970
        // Verify results
4971
        let sorted_strings: Vec<&str> = result
4972
            .values()
4973
            .iter()
4974
            .map(|&idx| test_cases[idx as usize])
4975
            .collect();
4976
4977
        assert_eq!(sorted_strings, expected);
4978
    }
4979
4980
    // Test sorting correctness for different length combinations
4981
    #[test]
4982
    fn test_length_combinations() {
4983
        let test_cases = vec![
4984
            // Focus on testing strings of length 1-4, as these affect padding logic
4985
            ("", 0),
4986
            ("a", 1),
4987
            ("ab", 2),
4988
            ("abc", 3),
4989
            ("abcd", 4),
4990
            ("abcde", 5),
4991
            ("b", 1),
4992
            ("ba", 2),
4993
            ("bab", 3),
4994
            ("babc", 4),
4995
            ("babcd", 5),
4996
            // Test same prefix with different lengths
4997
            ("test", 4),
4998
            ("test1", 5),
4999
            ("test12", 6),
5000
            ("test123", 7),
5001
        ];
5002
5003
        let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
5004
        let mut expected = strings.clone();
5005
        expected.sort();
5006
5007
        let string_array = StringArray::from(strings.clone());
5008
        let indices: Vec<u32> = (0..strings.len() as u32).collect();
5009
        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5010
5011
        let sorted_strings: Vec<&str> = result
5012
            .values()
5013
            .iter()
5014
            .map(|&idx| strings[idx as usize])
5015
            .collect();
5016
5017
        assert_eq!(sorted_strings, expected);
5018
    }
5019
5020
    // Test UTF-8 string handling
5021
    #[test]
5022
    fn test_utf8_strings() {
5023
        let test_cases = vec![
5024
            "a",
5025
            "你",       // 3-byte UTF-8 character
5026
            "你好",     // 6 bytes
5027
            "你好世界", // 12 bytes
5028
            "🎉",       // 4-byte emoji
5029
            "🎉🎊",     // 8 bytes
5030
            "café",     // Contains accent character
5031
            "naïve",
5032
            "Москва", // Cyrillic script
5033
            "東京",   // Japanese kanji
5034
            "한국",   // Korean
5035
        ];
5036
5037
        let mut expected = test_cases.clone();
5038
        expected.sort();
5039
5040
        let string_array = StringArray::from(test_cases.clone());
5041
        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5042
        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5043
5044
        let sorted_strings: Vec<&str> = result
5045
            .values()
5046
            .iter()
5047
            .map(|&idx| test_cases[idx as usize])
5048
            .collect();
5049
5050
        assert_eq!(sorted_strings, expected);
5051
    }
5052
5053
    // Fuzz testing: generate random UTF-8 strings and verify sort correctness
5054
    #[test]
5055
    fn test_fuzz_random_strings() {
5056
        let mut rng = StdRng::seed_from_u64(42); // Fixed seed for reproducibility
5057
5058
        for _ in 0..100 {
5059
            // Run 100 rounds of fuzz testing
5060
            let mut test_strings = Vec::new();
5061
5062
            // Generate 20-50 random strings
5063
            let num_strings = rng.random_range(20..=50);
5064
5065
            for _ in 0..num_strings {
5066
                let string = generate_random_string(&mut rng);
5067
                test_strings.push(string);
5068
            }
5069
5070
            // Use standard library sort as reference
5071
            let mut expected = test_strings.clone();
5072
            expected.sort();
5073
5074
            // Use our sorting algorithm
5075
            let string_array = StringArray::from(test_strings.clone());
5076
            let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
5077
            let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5078
5079
            let sorted_strings: Vec<String> = result
5080
                .values()
5081
                .iter()
5082
                .map(|&idx| test_strings[idx as usize].clone())
5083
                .collect();
5084
5085
            assert_eq!(
5086
                sorted_strings, expected,
5087
                "Fuzz test failed with input: {test_strings:?}"
5088
            );
5089
        }
5090
    }
5091
5092
    // Helper function to generate random UTF-8 strings
5093
    fn generate_random_string(rng: &mut StdRng) -> String {
5094
        // Bias towards generating short strings, especially length 1-4
5095
        let length = if rng.random_bool(0.6) {
5096
            rng.random_range(0..=4) // 60% probability for 0-4 length strings
5097
        } else {
5098
            rng.random_range(5..=20) // 40% probability for longer strings
5099
        };
5100
5101
        if length == 0 {
5102
            return String::new();
5103
        }
5104
5105
        let mut result = String::new();
5106
        let mut current_len = 0;
5107
5108
        while current_len < length {
5109
            let c = generate_random_char(rng);
5110
            let char_len = c.len_utf8();
5111
5112
            // Ensure we don't exceed target length
5113
            if current_len + char_len <= length {
5114
                result.push(c);
5115
                current_len += char_len;
5116
            } else {
5117
                // If adding this character would exceed length, fill with ASCII
5118
                let remaining = length - current_len;
5119
                for _ in 0..remaining {
5120
                    result.push(rng.random_range('a'..='z'));
5121
                    current_len += 1;
5122
                }
5123
                break;
5124
            }
5125
        }
5126
5127
        result
5128
    }
5129
5130
    // Generate random characters (including various UTF-8 characters)
5131
    fn generate_random_char(rng: &mut StdRng) -> char {
5132
        match rng.random_range(0..10) {
5133
            0..=5 => rng.random_range('a'..='z'), // 60% ASCII lowercase
5134
            6 => rng.random_range('A'..='Z'),     // 10% ASCII uppercase
5135
            7 => rng.random_range('0'..='9'),     // 10% digits
5136
            8 => {
5137
                // 10% Chinese characters
5138
                let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
5139
                chinese_chars[rng.random_range(0..chinese_chars.len())]
5140
            }
5141
            9 => {
5142
                // 10% other Unicode characters (single `char`s)
5143
                let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
5144
                special_chars[rng.random_range(0..special_chars.len())]
5145
            }
5146
            _ => unreachable!(),
5147
        }
5148
    }
5149
5150
    // Test descending sort order
5151
    #[test]
5152
    fn test_descending_sort() {
5153
        let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
5154
5155
        let mut expected = test_cases.clone();
5156
        expected.sort();
5157
        expected.reverse(); // Descending order
5158
5159
        let string_array = StringArray::from(test_cases.clone());
5160
        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5161
        let result = sort_bytes(
5162
            &string_array,
5163
            indices,
5164
            vec![],
5165
            SortOptions {
5166
                descending: true,
5167
                nulls_first: false,
5168
            },
5169
            None,
5170
        );
5171
5172
        let sorted_strings: Vec<&str> = result
5173
            .values()
5174
            .iter()
5175
            .map(|&idx| test_cases[idx as usize])
5176
            .collect();
5177
5178
        assert_eq!(sorted_strings, expected);
5179
    }
5180
5181
    // Stress test: large number of strings with same prefix
5182
    #[test]
5183
    fn test_same_prefix_stress() {
5184
        let mut test_cases = Vec::new();
5185
        let prefix = "same";
5186
5187
        // Generate many strings with the same prefix
5188
        for i in 0..1000 {
5189
            test_cases.push(format!("{prefix}{i:04}"));
5190
        }
5191
5192
        let mut expected = test_cases.clone();
5193
        expected.sort();
5194
5195
        let string_array = StringArray::from(test_cases.clone());
5196
        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5197
        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5198
5199
        let sorted_strings: Vec<String> = result
5200
            .values()
5201
            .iter()
5202
            .map(|&idx| test_cases[idx as usize].clone())
5203
            .collect();
5204
5205
        assert_eq!(sorted_strings, expected);
5206
    }
5207
5208
    // Test limit parameter
5209
    #[test]
5210
    fn test_with_limit() {
5211
        let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
5212
        let limit = 3;
5213
5214
        let mut expected = test_cases.clone();
5215
        expected.sort();
5216
        expected.truncate(limit);
5217
5218
        let string_array = StringArray::from(test_cases.clone());
5219
        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5220
        let result = sort_bytes(
5221
            &string_array,
5222
            indices,
5223
            vec![],
5224
            SortOptions::default(),
5225
            Some(limit),
5226
        );
5227
5228
        let sorted_strings: Vec<&str> = result
5229
            .values()
5230
            .iter()
5231
            .map(|&idx| test_cases[idx as usize])
5232
            .collect();
5233
5234
        assert_eq!(sorted_strings, expected);
5235
        assert_eq!(sorted_strings.len(), limit);
5236
    }
5237
}