/Users/andrewlamb/Software/arrow-rs/arrow-ord/src/ord.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Contains functions and function factories to compare arrays. |
19 | | |
20 | | use arrow_array::cast::AsArray; |
21 | | use arrow_array::types::*; |
22 | | use arrow_array::*; |
23 | | use arrow_buffer::{ArrowNativeType, NullBuffer}; |
24 | | use arrow_schema::{ArrowError, SortOptions}; |
25 | | use std::cmp::Ordering; |
26 | | |
27 | | /// Compare the values at two arbitrary indices in two arrays. |
28 | | pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>; |
29 | | |
30 | | /// If parent sort order is descending we need to invert the value of nulls_first so that |
31 | | /// when the parent is sorted based on the produced ranks, nulls are still ordered correctly |
32 | 0 | fn child_opts(opts: SortOptions) -> SortOptions { |
33 | 0 | SortOptions { |
34 | 0 | descending: false, |
35 | 0 | nulls_first: opts.nulls_first != opts.descending, |
36 | 0 | } |
37 | 0 | } |
38 | | |
39 | 0 | fn compare<A, F>(l: &A, r: &A, opts: SortOptions, cmp: F) -> DynComparator |
40 | 0 | where |
41 | 0 | A: Array + Clone, |
42 | 0 | F: Fn(usize, usize) -> Ordering + Send + Sync + 'static, |
43 | | { |
44 | 0 | let l = l.logical_nulls().filter(|x| x.null_count() > 0); |
45 | 0 | let r = r.logical_nulls().filter(|x| x.null_count() > 0); |
46 | 0 | match (opts.nulls_first, opts.descending) { |
47 | 0 | (true, true) => compare_impl::<true, true, _>(l, r, cmp), |
48 | 0 | (true, false) => compare_impl::<true, false, _>(l, r, cmp), |
49 | 0 | (false, true) => compare_impl::<false, true, _>(l, r, cmp), |
50 | 0 | (false, false) => compare_impl::<false, false, _>(l, r, cmp), |
51 | | } |
52 | 0 | } |
53 | | |
54 | 0 | fn compare_impl<const NULLS_FIRST: bool, const DESCENDING: bool, F>( |
55 | 0 | l: Option<NullBuffer>, |
56 | 0 | r: Option<NullBuffer>, |
57 | 0 | cmp: F, |
58 | 0 | ) -> DynComparator |
59 | 0 | where |
60 | 0 | F: Fn(usize, usize) -> Ordering + Send + Sync + 'static, |
61 | | { |
62 | 0 | let cmp = move |i, j| match DESCENDING { |
63 | 0 | true => cmp(i, j).reverse(), |
64 | 0 | false => cmp(i, j), |
65 | 0 | }; |
66 | | |
67 | 0 | let (left_null, right_null) = match NULLS_FIRST { |
68 | 0 | true => (Ordering::Less, Ordering::Greater), |
69 | 0 | false => (Ordering::Greater, Ordering::Less), |
70 | | }; |
71 | | |
72 | 0 | match (l, r) { |
73 | 0 | (None, None) => Box::new(cmp), |
74 | 0 | (Some(l), None) => Box::new(move |i, j| match l.is_null(i) { |
75 | 0 | true => left_null, |
76 | 0 | false => cmp(i, j), |
77 | 0 | }), |
78 | 0 | (None, Some(r)) => Box::new(move |i, j| match r.is_null(j) { |
79 | 0 | true => right_null, |
80 | 0 | false => cmp(i, j), |
81 | 0 | }), |
82 | 0 | (Some(l), Some(r)) => Box::new(move |i, j| match (l.is_null(i), r.is_null(j)) { |
83 | 0 | (true, true) => Ordering::Equal, |
84 | 0 | (true, false) => left_null, |
85 | 0 | (false, true) => right_null, |
86 | 0 | (false, false) => cmp(i, j), |
87 | 0 | }), |
88 | | } |
89 | 0 | } |
90 | | |
91 | 0 | fn compare_primitive<T: ArrowPrimitiveType>( |
92 | 0 | left: &dyn Array, |
93 | 0 | right: &dyn Array, |
94 | 0 | opts: SortOptions, |
95 | 0 | ) -> DynComparator |
96 | 0 | where |
97 | 0 | T::Native: ArrowNativeTypeOp, |
98 | | { |
99 | 0 | let left = left.as_primitive::<T>(); |
100 | 0 | let right = right.as_primitive::<T>(); |
101 | 0 | let l_values = left.values().clone(); |
102 | 0 | let r_values = right.values().clone(); |
103 | | |
104 | 0 | compare(&left, &right, opts, move |i, j| { |
105 | 0 | l_values[i].compare(r_values[j]) |
106 | 0 | }) |
107 | 0 | } |
108 | | |
109 | 0 | fn compare_boolean(left: &dyn Array, right: &dyn Array, opts: SortOptions) -> DynComparator { |
110 | 0 | let left = left.as_boolean(); |
111 | 0 | let right = right.as_boolean(); |
112 | | |
113 | 0 | let l_values = left.values().clone(); |
114 | 0 | let r_values = right.values().clone(); |
115 | | |
116 | 0 | compare(left, right, opts, move |i, j| { |
117 | 0 | l_values.value(i).cmp(&r_values.value(j)) |
118 | 0 | }) |
119 | 0 | } |
120 | | |
121 | 0 | fn compare_bytes<T: ByteArrayType>( |
122 | 0 | left: &dyn Array, |
123 | 0 | right: &dyn Array, |
124 | 0 | opts: SortOptions, |
125 | 0 | ) -> DynComparator { |
126 | 0 | let left = left.as_bytes::<T>(); |
127 | 0 | let right = right.as_bytes::<T>(); |
128 | | |
129 | 0 | let l = left.clone(); |
130 | 0 | let r = right.clone(); |
131 | 0 | compare(left, right, opts, move |i, j| { |
132 | 0 | let l: &[u8] = l.value(i).as_ref(); |
133 | 0 | let r: &[u8] = r.value(j).as_ref(); |
134 | 0 | l.cmp(r) |
135 | 0 | }) |
136 | 0 | } |
137 | | |
138 | 0 | fn compare_byte_view<T: ByteViewType>( |
139 | 0 | left: &dyn Array, |
140 | 0 | right: &dyn Array, |
141 | 0 | opts: SortOptions, |
142 | 0 | ) -> DynComparator { |
143 | 0 | let left = left.as_byte_view::<T>(); |
144 | 0 | let right = right.as_byte_view::<T>(); |
145 | | |
146 | 0 | let l = left.clone(); |
147 | 0 | let r = right.clone(); |
148 | 0 | compare(left, right, opts, move |i, j| { |
149 | 0 | crate::cmp::compare_byte_view(&l, i, &r, j) |
150 | 0 | }) |
151 | 0 | } |
152 | | |
153 | 0 | fn compare_dict<K: ArrowDictionaryKeyType>( |
154 | 0 | left: &dyn Array, |
155 | 0 | right: &dyn Array, |
156 | 0 | opts: SortOptions, |
157 | 0 | ) -> Result<DynComparator, ArrowError> { |
158 | 0 | let left = left.as_dictionary::<K>(); |
159 | 0 | let right = right.as_dictionary::<K>(); |
160 | | |
161 | 0 | let c_opts = child_opts(opts); |
162 | 0 | let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?; |
163 | 0 | let left_keys = left.keys().values().clone(); |
164 | 0 | let right_keys = right.keys().values().clone(); |
165 | | |
166 | 0 | let f = compare(left, right, opts, move |i, j| { |
167 | 0 | let l = left_keys[i].as_usize(); |
168 | 0 | let r = right_keys[j].as_usize(); |
169 | 0 | cmp(l, r) |
170 | 0 | }); |
171 | 0 | Ok(f) |
172 | 0 | } |
173 | | |
174 | 0 | fn compare_list<O: OffsetSizeTrait>( |
175 | 0 | left: &dyn Array, |
176 | 0 | right: &dyn Array, |
177 | 0 | opts: SortOptions, |
178 | 0 | ) -> Result<DynComparator, ArrowError> { |
179 | 0 | let left = left.as_list::<O>(); |
180 | 0 | let right = right.as_list::<O>(); |
181 | | |
182 | 0 | let c_opts = child_opts(opts); |
183 | 0 | let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?; |
184 | | |
185 | 0 | let l_o = left.offsets().clone(); |
186 | 0 | let r_o = right.offsets().clone(); |
187 | 0 | let f = compare(left, right, opts, move |i, j| { |
188 | 0 | let l_end = l_o[i + 1].as_usize(); |
189 | 0 | let l_start = l_o[i].as_usize(); |
190 | | |
191 | 0 | let r_end = r_o[j + 1].as_usize(); |
192 | 0 | let r_start = r_o[j].as_usize(); |
193 | | |
194 | 0 | for (i, j) in (l_start..l_end).zip(r_start..r_end) { |
195 | 0 | match cmp(i, j) { |
196 | 0 | Ordering::Equal => continue, |
197 | 0 | r => return r, |
198 | | } |
199 | | } |
200 | 0 | (l_end - l_start).cmp(&(r_end - r_start)) |
201 | 0 | }); |
202 | 0 | Ok(f) |
203 | 0 | } |
204 | | |
205 | 0 | fn compare_fixed_list( |
206 | 0 | left: &dyn Array, |
207 | 0 | right: &dyn Array, |
208 | 0 | opts: SortOptions, |
209 | 0 | ) -> Result<DynComparator, ArrowError> { |
210 | 0 | let left = left.as_fixed_size_list(); |
211 | 0 | let right = right.as_fixed_size_list(); |
212 | | |
213 | 0 | let c_opts = child_opts(opts); |
214 | 0 | let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?; |
215 | | |
216 | 0 | let l_size = left.value_length().to_usize().unwrap(); |
217 | 0 | let r_size = right.value_length().to_usize().unwrap(); |
218 | 0 | let size_cmp = l_size.cmp(&r_size); |
219 | | |
220 | 0 | let f = compare(left, right, opts, move |i, j| { |
221 | 0 | let l_start = i * l_size; |
222 | 0 | let l_end = l_start + l_size; |
223 | 0 | let r_start = j * r_size; |
224 | 0 | let r_end = r_start + r_size; |
225 | 0 | for (i, j) in (l_start..l_end).zip(r_start..r_end) { |
226 | 0 | match cmp(i, j) { |
227 | 0 | Ordering::Equal => continue, |
228 | 0 | r => return r, |
229 | | } |
230 | | } |
231 | 0 | size_cmp |
232 | 0 | }); |
233 | 0 | Ok(f) |
234 | 0 | } |
235 | | |
236 | 0 | fn compare_map( |
237 | 0 | left: &dyn Array, |
238 | 0 | right: &dyn Array, |
239 | 0 | opts: SortOptions, |
240 | 0 | ) -> Result<DynComparator, ArrowError> { |
241 | 0 | let left = left.as_map(); |
242 | 0 | let right = right.as_map(); |
243 | | |
244 | 0 | let c_opts = child_opts(opts); |
245 | 0 | let cmp = make_comparator(left.entries(), right.entries(), c_opts)?; |
246 | | |
247 | 0 | let l_o = left.offsets().clone(); |
248 | 0 | let r_o = right.offsets().clone(); |
249 | 0 | let f = compare(left, right, opts, move |i, j| { |
250 | 0 | let l_end = l_o[i + 1].as_usize(); |
251 | 0 | let l_start = l_o[i].as_usize(); |
252 | | |
253 | 0 | let r_end = r_o[j + 1].as_usize(); |
254 | 0 | let r_start = r_o[j].as_usize(); |
255 | | |
256 | 0 | for (i, j) in (l_start..l_end).zip(r_start..r_end) { |
257 | 0 | match cmp(i, j) { |
258 | 0 | Ordering::Equal => continue, |
259 | 0 | r => return r, |
260 | | } |
261 | | } |
262 | 0 | (l_end - l_start).cmp(&(r_end - r_start)) |
263 | 0 | }); |
264 | 0 | Ok(f) |
265 | 0 | } |
266 | | |
267 | 0 | fn compare_struct( |
268 | 0 | left: &dyn Array, |
269 | 0 | right: &dyn Array, |
270 | 0 | opts: SortOptions, |
271 | 0 | ) -> Result<DynComparator, ArrowError> { |
272 | 0 | let left = left.as_struct(); |
273 | 0 | let right = right.as_struct(); |
274 | | |
275 | 0 | if left.columns().len() != right.columns().len() { |
276 | 0 | return Err(ArrowError::InvalidArgumentError( |
277 | 0 | "Cannot compare StructArray with different number of columns".to_string(), |
278 | 0 | )); |
279 | 0 | } |
280 | | |
281 | 0 | let c_opts = child_opts(opts); |
282 | 0 | let columns = left.columns().iter().zip(right.columns()); |
283 | 0 | let comparators = columns |
284 | 0 | .map(|(l, r)| make_comparator(l, r, c_opts)) |
285 | 0 | .collect::<Result<Vec<_>, _>>()?; |
286 | | |
287 | 0 | let f = compare(left, right, opts, move |i, j| { |
288 | 0 | for cmp in &comparators { |
289 | 0 | match cmp(i, j) { |
290 | 0 | Ordering::Equal => continue, |
291 | 0 | r => return r, |
292 | | } |
293 | | } |
294 | 0 | Ordering::Equal |
295 | 0 | }); |
296 | 0 | Ok(f) |
297 | 0 | } |
298 | | |
299 | | /// Returns a comparison function that compares two values at two different positions |
300 | | /// between the two arrays. |
301 | | /// |
302 | | /// For comparing arrays element-wise, see also the vectorised kernels in [`crate::cmp`]. |
303 | | /// |
304 | | /// If `nulls_first` is true `NULL` values will be considered less than any non-null value, |
305 | | /// otherwise they will be considered greater. |
306 | | /// |
307 | | /// # Basic Usage |
308 | | /// |
309 | | /// ``` |
310 | | /// # use std::cmp::Ordering; |
311 | | /// # use arrow_array::Int32Array; |
312 | | /// # use arrow_ord::ord::make_comparator; |
313 | | /// # use arrow_schema::SortOptions; |
314 | | /// # |
315 | | /// let array1 = Int32Array::from(vec![1, 2]); |
316 | | /// let array2 = Int32Array::from(vec![3, 4]); |
317 | | /// |
318 | | /// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
319 | | /// // 1 (index 0 of array1) is smaller than 4 (index 1 of array2) |
320 | | /// assert_eq!(cmp(0, 1), Ordering::Less); |
321 | | /// |
322 | | /// let array1 = Int32Array::from(vec![Some(1), None]); |
323 | | /// let array2 = Int32Array::from(vec![None, Some(2)]); |
324 | | /// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
325 | | /// |
326 | | /// assert_eq!(cmp(0, 1), Ordering::Less); // Some(1) vs Some(2) |
327 | | /// assert_eq!(cmp(1, 1), Ordering::Less); // None vs Some(2) |
328 | | /// assert_eq!(cmp(1, 0), Ordering::Equal); // None vs None |
329 | | /// assert_eq!(cmp(0, 0), Ordering::Greater); // Some(1) vs None |
330 | | /// ``` |
331 | | /// |
332 | | /// # Postgres-compatible Nested Comparison |
333 | | /// |
334 | | /// Whilst SQL prescribes ternary logic for nulls, that is comparing a value against a NULL yields |
335 | | /// a NULL, many systems, including postgres, instead apply a total ordering to comparison of |
336 | | /// nested nulls. That is nulls within nested types are either greater than any value (postgres), |
337 | | /// or less than any value (Spark). |
338 | | /// |
339 | | /// In particular |
340 | | /// |
341 | | /// ```ignore |
342 | | /// { a: 1, b: null } == { a: 1, b: null } => true |
343 | | /// { a: 1, b: null } == { a: 1, b: 1 } => false |
344 | | /// { a: 1, b: null } == null => null |
345 | | /// null == null => null |
346 | | /// ``` |
347 | | /// |
348 | | /// This could be implemented as below |
349 | | /// |
350 | | /// ``` |
351 | | /// # use arrow_array::{Array, BooleanArray}; |
352 | | /// # use arrow_buffer::NullBuffer; |
353 | | /// # use arrow_ord::cmp; |
354 | | /// # use arrow_ord::ord::make_comparator; |
355 | | /// # use arrow_schema::{ArrowError, SortOptions}; |
356 | | /// fn eq(a: &dyn Array, b: &dyn Array) -> Result<BooleanArray, ArrowError> { |
357 | | /// if !a.data_type().is_nested() { |
358 | | /// return cmp::eq(&a, &b); // Use faster vectorised kernel |
359 | | /// } |
360 | | /// |
361 | | /// let cmp = make_comparator(a, b, SortOptions::default())?; |
362 | | /// let len = a.len().min(b.len()); |
363 | | /// let values = (0..len).map(|i| cmp(i, i).is_eq()).collect(); |
364 | | /// let nulls = NullBuffer::union(a.nulls(), b.nulls()); |
365 | | /// Ok(BooleanArray::new(values, nulls)) |
366 | | /// } |
367 | | /// ```` |
368 | 0 | pub fn make_comparator( |
369 | 0 | left: &dyn Array, |
370 | 0 | right: &dyn Array, |
371 | 0 | opts: SortOptions, |
372 | 0 | ) -> Result<DynComparator, ArrowError> { |
373 | | use arrow_schema::DataType::*; |
374 | | |
375 | | macro_rules! primitive_helper { |
376 | | ($t:ty, $left:expr, $right:expr, $nulls_first:expr) => { |
377 | | Ok(compare_primitive::<$t>($left, $right, $nulls_first)) |
378 | | }; |
379 | | } |
380 | 0 | downcast_primitive! { |
381 | 0 | left.data_type(), right.data_type() => (primitive_helper, left, right, opts), |
382 | 0 | (Boolean, Boolean) => Ok(compare_boolean(left, right, opts)), |
383 | 0 | (Utf8, Utf8) => Ok(compare_bytes::<Utf8Type>(left, right, opts)), |
384 | 0 | (LargeUtf8, LargeUtf8) => Ok(compare_bytes::<LargeUtf8Type>(left, right, opts)), |
385 | 0 | (Utf8View, Utf8View) => Ok(compare_byte_view::<StringViewType>(left, right, opts)), |
386 | 0 | (Binary, Binary) => Ok(compare_bytes::<BinaryType>(left, right, opts)), |
387 | 0 | (LargeBinary, LargeBinary) => Ok(compare_bytes::<LargeBinaryType>(left, right, opts)), |
388 | 0 | (BinaryView, BinaryView) => Ok(compare_byte_view::<BinaryViewType>(left, right, opts)), |
389 | | (FixedSizeBinary(_), FixedSizeBinary(_)) => { |
390 | 0 | let left = left.as_fixed_size_binary(); |
391 | 0 | let right = right.as_fixed_size_binary(); |
392 | | |
393 | 0 | let l = left.clone(); |
394 | 0 | let r = right.clone(); |
395 | 0 | Ok(compare(left, right, opts, move |i, j| { |
396 | 0 | l.value(i).cmp(r.value(j)) |
397 | 0 | })) |
398 | | }, |
399 | 0 | (List(_), List(_)) => compare_list::<i32>(left, right, opts), |
400 | 0 | (LargeList(_), LargeList(_)) => compare_list::<i64>(left, right, opts), |
401 | 0 | (FixedSizeList(_, _), FixedSizeList(_, _)) => compare_fixed_list(left, right, opts), |
402 | 0 | (Struct(_), Struct(_)) => compare_struct(left, right, opts), |
403 | 0 | (Dictionary(l_key, _), Dictionary(r_key, _)) => { |
404 | | macro_rules! dict_helper { |
405 | | ($t:ty, $left:expr, $right:expr, $opts: expr) => { |
406 | | compare_dict::<$t>($left, $right, $opts) |
407 | | }; |
408 | | } |
409 | 0 | downcast_integer! { |
410 | 0 | l_key.as_ref(), r_key.as_ref() => (dict_helper, left, right, opts), |
411 | 0 | _ => unreachable!() |
412 | | } |
413 | | }, |
414 | 0 | (Map(_, _), Map(_, _)) => compare_map(left, right, opts), |
415 | 0 | (lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs { |
416 | 0 | true => format!("The data type type {lhs:?} has no natural order"), |
417 | 0 | false => "Can't compare arrays of different types".to_string(), |
418 | | })) |
419 | | } |
420 | 0 | } |
421 | | |
422 | | #[cfg(test)] |
423 | | mod tests { |
424 | | use super::*; |
425 | | use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder}; |
426 | | use arrow_buffer::{i256, IntervalDayTime, OffsetBuffer}; |
427 | | use arrow_schema::{DataType, Field, Fields}; |
428 | | use half::f16; |
429 | | use std::sync::Arc; |
430 | | |
431 | | #[test] |
432 | | fn test_fixed_size_binary() { |
433 | | let items = vec![vec![1u8], vec![2u8]]; |
434 | | let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap(); |
435 | | |
436 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
437 | | |
438 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
439 | | } |
440 | | |
441 | | #[test] |
442 | | fn test_fixed_size_binary_fixed_size_binary() { |
443 | | let items = vec![vec![1u8]]; |
444 | | let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap(); |
445 | | let items = vec![vec![2u8]]; |
446 | | let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap(); |
447 | | |
448 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
449 | | |
450 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
451 | | } |
452 | | |
453 | | #[test] |
454 | | fn test_i32() { |
455 | | let array = Int32Array::from(vec![1, 2]); |
456 | | |
457 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
458 | | |
459 | | assert_eq!(Ordering::Less, (cmp)(0, 1)); |
460 | | } |
461 | | |
462 | | #[test] |
463 | | fn test_i32_i32() { |
464 | | let array1 = Int32Array::from(vec![1]); |
465 | | let array2 = Int32Array::from(vec![2]); |
466 | | |
467 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
468 | | |
469 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
470 | | } |
471 | | |
472 | | #[test] |
473 | | fn test_f16() { |
474 | | let array = Float16Array::from(vec![f16::from_f32(1.0), f16::from_f32(2.0)]); |
475 | | |
476 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
477 | | |
478 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
479 | | } |
480 | | |
481 | | #[test] |
482 | | fn test_f64() { |
483 | | let array = Float64Array::from(vec![1.0, 2.0]); |
484 | | |
485 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
486 | | |
487 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
488 | | } |
489 | | |
490 | | #[test] |
491 | | fn test_f64_nan() { |
492 | | let array = Float64Array::from(vec![1.0, f64::NAN]); |
493 | | |
494 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
495 | | |
496 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
497 | | assert_eq!(Ordering::Equal, cmp(1, 1)); |
498 | | } |
499 | | |
500 | | #[test] |
501 | | fn test_f64_zeros() { |
502 | | let array = Float64Array::from(vec![-0.0, 0.0]); |
503 | | |
504 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
505 | | |
506 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
507 | | assert_eq!(Ordering::Greater, cmp(1, 0)); |
508 | | } |
509 | | |
510 | | #[test] |
511 | | fn test_interval_day_time() { |
512 | | let array = IntervalDayTimeArray::from(vec![ |
513 | | // 0 days, 1 second |
514 | | IntervalDayTimeType::make_value(0, 1000), |
515 | | // 1 day, 2 milliseconds |
516 | | IntervalDayTimeType::make_value(1, 2), |
517 | | // 90M milliseconds (which is more than is in 1 day) |
518 | | IntervalDayTimeType::make_value(0, 90_000_000), |
519 | | ]); |
520 | | |
521 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
522 | | |
523 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
524 | | assert_eq!(Ordering::Greater, cmp(1, 0)); |
525 | | |
526 | | // somewhat confusingly, while 90M milliseconds is more than 1 day, |
527 | | // it will compare less as the comparison is done on the underlying |
528 | | // values not field by field |
529 | | assert_eq!(Ordering::Greater, cmp(1, 2)); |
530 | | assert_eq!(Ordering::Less, cmp(2, 1)); |
531 | | } |
532 | | |
533 | | #[test] |
534 | | fn test_interval_year_month() { |
535 | | let array = IntervalYearMonthArray::from(vec![ |
536 | | // 1 year, 0 months |
537 | | IntervalYearMonthType::make_value(1, 0), |
538 | | // 0 years, 13 months |
539 | | IntervalYearMonthType::make_value(0, 13), |
540 | | // 1 year, 1 month |
541 | | IntervalYearMonthType::make_value(1, 1), |
542 | | ]); |
543 | | |
544 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
545 | | |
546 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
547 | | assert_eq!(Ordering::Greater, cmp(1, 0)); |
548 | | |
549 | | // the underlying representation is months, so both quantities are the same |
550 | | assert_eq!(Ordering::Equal, cmp(1, 2)); |
551 | | assert_eq!(Ordering::Equal, cmp(2, 1)); |
552 | | } |
553 | | |
554 | | #[test] |
555 | | fn test_interval_month_day_nano() { |
556 | | let array = IntervalMonthDayNanoArray::from(vec![ |
557 | | // 100 days |
558 | | IntervalMonthDayNanoType::make_value(0, 100, 0), |
559 | | // 1 month |
560 | | IntervalMonthDayNanoType::make_value(1, 0, 0), |
561 | | // 100 day, 1 nanoseconds |
562 | | IntervalMonthDayNanoType::make_value(0, 100, 2), |
563 | | ]); |
564 | | |
565 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
566 | | |
567 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
568 | | assert_eq!(Ordering::Greater, cmp(1, 0)); |
569 | | |
570 | | // somewhat confusingly, while 100 days is more than 1 month in all cases |
571 | | // it will compare less as the comparison is done on the underlying |
572 | | // values not field by field |
573 | | assert_eq!(Ordering::Greater, cmp(1, 2)); |
574 | | assert_eq!(Ordering::Less, cmp(2, 1)); |
575 | | } |
576 | | |
577 | | #[test] |
578 | | fn test_decimali32() { |
579 | | let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)] |
580 | | .into_iter() |
581 | | .collect::<Decimal32Array>() |
582 | | .with_precision_and_scale(8, 6) |
583 | | .unwrap(); |
584 | | |
585 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
586 | | assert_eq!(Ordering::Less, cmp(1, 0)); |
587 | | assert_eq!(Ordering::Greater, cmp(0, 2)); |
588 | | } |
589 | | |
590 | | #[test] |
591 | | fn test_decimali64() { |
592 | | let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)] |
593 | | .into_iter() |
594 | | .collect::<Decimal64Array>() |
595 | | .with_precision_and_scale(16, 6) |
596 | | .unwrap(); |
597 | | |
598 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
599 | | assert_eq!(Ordering::Less, cmp(1, 0)); |
600 | | assert_eq!(Ordering::Greater, cmp(0, 2)); |
601 | | } |
602 | | |
603 | | #[test] |
604 | | fn test_decimali128() { |
605 | | let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)] |
606 | | .into_iter() |
607 | | .collect::<Decimal128Array>() |
608 | | .with_precision_and_scale(23, 6) |
609 | | .unwrap(); |
610 | | |
611 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
612 | | assert_eq!(Ordering::Less, cmp(1, 0)); |
613 | | assert_eq!(Ordering::Greater, cmp(0, 2)); |
614 | | } |
615 | | |
616 | | #[test] |
617 | | fn test_decimali256() { |
618 | | let array = vec![ |
619 | | Some(i256::from_i128(5_i128)), |
620 | | Some(i256::from_i128(2_i128)), |
621 | | Some(i256::from_i128(3_i128)), |
622 | | ] |
623 | | .into_iter() |
624 | | .collect::<Decimal256Array>() |
625 | | .with_precision_and_scale(53, 6) |
626 | | .unwrap(); |
627 | | |
628 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
629 | | assert_eq!(Ordering::Less, cmp(1, 0)); |
630 | | assert_eq!(Ordering::Greater, cmp(0, 2)); |
631 | | } |
632 | | |
633 | | #[test] |
634 | | fn test_dict() { |
635 | | let data = vec!["a", "b", "c", "a", "a", "c", "c"]; |
636 | | let array = data.into_iter().collect::<DictionaryArray<Int16Type>>(); |
637 | | |
638 | | let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap(); |
639 | | |
640 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
641 | | assert_eq!(Ordering::Equal, cmp(3, 4)); |
642 | | assert_eq!(Ordering::Greater, cmp(2, 3)); |
643 | | } |
644 | | |
645 | | #[test] |
646 | | fn test_multiple_dict() { |
647 | | let d1 = vec!["a", "b", "c", "d"]; |
648 | | let a1 = d1.into_iter().collect::<DictionaryArray<Int16Type>>(); |
649 | | let d2 = vec!["e", "f", "g", "a"]; |
650 | | let a2 = d2.into_iter().collect::<DictionaryArray<Int16Type>>(); |
651 | | |
652 | | let cmp = make_comparator(&a1, &a2, SortOptions::default()).unwrap(); |
653 | | |
654 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
655 | | assert_eq!(Ordering::Equal, cmp(0, 3)); |
656 | | assert_eq!(Ordering::Greater, cmp(1, 3)); |
657 | | } |
658 | | |
659 | | #[test] |
660 | | fn test_primitive_dict() { |
661 | | let values = Int32Array::from(vec![1_i32, 0, 2, 5]); |
662 | | let keys = Int8Array::from_iter_values([0, 0, 1, 3]); |
663 | | let array1 = DictionaryArray::new(keys, Arc::new(values)); |
664 | | |
665 | | let values = Int32Array::from(vec![2_i32, 3, 4, 5]); |
666 | | let keys = Int8Array::from_iter_values([0, 1, 1, 3]); |
667 | | let array2 = DictionaryArray::new(keys, Arc::new(values)); |
668 | | |
669 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
670 | | |
671 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
672 | | assert_eq!(Ordering::Less, cmp(0, 3)); |
673 | | assert_eq!(Ordering::Equal, cmp(3, 3)); |
674 | | assert_eq!(Ordering::Greater, cmp(3, 1)); |
675 | | assert_eq!(Ordering::Greater, cmp(3, 2)); |
676 | | } |
677 | | |
678 | | #[test] |
679 | | fn test_float_dict() { |
680 | | let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]); |
681 | | let keys = Int8Array::from_iter_values([0, 0, 1, 3]); |
682 | | let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap(); |
683 | | |
684 | | let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]); |
685 | | let keys = Int8Array::from_iter_values([0, 1, 1, 3]); |
686 | | let array2 = DictionaryArray::new(keys, Arc::new(values)); |
687 | | |
688 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
689 | | |
690 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
691 | | assert_eq!(Ordering::Less, cmp(0, 3)); |
692 | | assert_eq!(Ordering::Equal, cmp(3, 3)); |
693 | | assert_eq!(Ordering::Greater, cmp(3, 1)); |
694 | | assert_eq!(Ordering::Greater, cmp(3, 2)); |
695 | | } |
696 | | |
697 | | #[test] |
698 | | fn test_timestamp_dict() { |
699 | | let values = TimestampSecondArray::from(vec![1, 0, 2, 5]); |
700 | | let keys = Int8Array::from_iter_values([0, 0, 1, 3]); |
701 | | let array1 = DictionaryArray::new(keys, Arc::new(values)); |
702 | | |
703 | | let values = TimestampSecondArray::from(vec![2, 3, 4, 5]); |
704 | | let keys = Int8Array::from_iter_values([0, 1, 1, 3]); |
705 | | let array2 = DictionaryArray::new(keys, Arc::new(values)); |
706 | | |
707 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
708 | | |
709 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
710 | | assert_eq!(Ordering::Less, cmp(0, 3)); |
711 | | assert_eq!(Ordering::Equal, cmp(3, 3)); |
712 | | assert_eq!(Ordering::Greater, cmp(3, 1)); |
713 | | assert_eq!(Ordering::Greater, cmp(3, 2)); |
714 | | } |
715 | | |
716 | | #[test] |
717 | | fn test_interval_dict() { |
718 | | let v1 = IntervalDayTime::new(0, 1); |
719 | | let v2 = IntervalDayTime::new(0, 2); |
720 | | let v3 = IntervalDayTime::new(12, 2); |
721 | | |
722 | | let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None, Some(v3)]); |
723 | | let keys = Int8Array::from_iter_values([0, 0, 1, 3]); |
724 | | let array1 = DictionaryArray::new(keys, Arc::new(values)); |
725 | | |
726 | | let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None, Some(v1)]); |
727 | | let keys = Int8Array::from_iter_values([0, 1, 1, 3]); |
728 | | let array2 = DictionaryArray::new(keys, Arc::new(values)); |
729 | | |
730 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
731 | | |
732 | | assert_eq!(Ordering::Less, cmp(0, 0)); // v1 vs v3 |
733 | | assert_eq!(Ordering::Equal, cmp(0, 3)); // v1 vs v1 |
734 | | assert_eq!(Ordering::Greater, cmp(3, 3)); // v3 vs v1 |
735 | | assert_eq!(Ordering::Greater, cmp(3, 1)); // v3 vs v2 |
736 | | assert_eq!(Ordering::Greater, cmp(3, 2)); // v3 vs v2 |
737 | | } |
738 | | |
739 | | #[test] |
740 | | fn test_duration_dict() { |
741 | | let values = DurationSecondArray::from(vec![1, 0, 2, 5]); |
742 | | let keys = Int8Array::from_iter_values([0, 0, 1, 3]); |
743 | | let array1 = DictionaryArray::new(keys, Arc::new(values)); |
744 | | |
745 | | let values = DurationSecondArray::from(vec![2, 3, 4, 5]); |
746 | | let keys = Int8Array::from_iter_values([0, 1, 1, 3]); |
747 | | let array2 = DictionaryArray::new(keys, Arc::new(values)); |
748 | | |
749 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
750 | | |
751 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
752 | | assert_eq!(Ordering::Less, cmp(0, 3)); |
753 | | assert_eq!(Ordering::Equal, cmp(3, 3)); |
754 | | assert_eq!(Ordering::Greater, cmp(3, 1)); |
755 | | assert_eq!(Ordering::Greater, cmp(3, 2)); |
756 | | } |
757 | | |
758 | | #[test] |
759 | | fn test_decimal_dict() { |
760 | | let values = Decimal128Array::from(vec![1, 0, 2, 5]); |
761 | | let keys = Int8Array::from_iter_values([0, 0, 1, 3]); |
762 | | let array1 = DictionaryArray::new(keys, Arc::new(values)); |
763 | | |
764 | | let values = Decimal128Array::from(vec![2, 3, 4, 5]); |
765 | | let keys = Int8Array::from_iter_values([0, 1, 1, 3]); |
766 | | let array2 = DictionaryArray::new(keys, Arc::new(values)); |
767 | | |
768 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
769 | | |
770 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
771 | | assert_eq!(Ordering::Less, cmp(0, 3)); |
772 | | assert_eq!(Ordering::Equal, cmp(3, 3)); |
773 | | assert_eq!(Ordering::Greater, cmp(3, 1)); |
774 | | assert_eq!(Ordering::Greater, cmp(3, 2)); |
775 | | } |
776 | | |
777 | | #[test] |
778 | | fn test_decimal256_dict() { |
779 | | let values = Decimal256Array::from(vec![ |
780 | | i256::from_i128(1), |
781 | | i256::from_i128(0), |
782 | | i256::from_i128(2), |
783 | | i256::from_i128(5), |
784 | | ]); |
785 | | let keys = Int8Array::from_iter_values([0, 0, 1, 3]); |
786 | | let array1 = DictionaryArray::new(keys, Arc::new(values)); |
787 | | |
788 | | let values = Decimal256Array::from(vec![ |
789 | | i256::from_i128(2), |
790 | | i256::from_i128(3), |
791 | | i256::from_i128(4), |
792 | | i256::from_i128(5), |
793 | | ]); |
794 | | let keys = Int8Array::from_iter_values([0, 1, 1, 3]); |
795 | | let array2 = DictionaryArray::new(keys, Arc::new(values)); |
796 | | |
797 | | let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap(); |
798 | | |
799 | | assert_eq!(Ordering::Less, cmp(0, 0)); |
800 | | assert_eq!(Ordering::Less, cmp(0, 3)); |
801 | | assert_eq!(Ordering::Equal, cmp(3, 3)); |
802 | | assert_eq!(Ordering::Greater, cmp(3, 1)); |
803 | | assert_eq!(Ordering::Greater, cmp(3, 2)); |
804 | | } |
805 | | |
806 | | fn test_bytes_impl<T: ByteArrayType>() { |
807 | | let offsets = OffsetBuffer::from_lengths([3, 3, 1]); |
808 | | let a = GenericByteArray::<T>::new(offsets, b"abcdefa".into(), None); |
809 | | let cmp = make_comparator(&a, &a, SortOptions::default()).unwrap(); |
810 | | |
811 | | assert_eq!(Ordering::Less, cmp(0, 1)); |
812 | | assert_eq!(Ordering::Greater, cmp(0, 2)); |
813 | | assert_eq!(Ordering::Equal, cmp(1, 1)); |
814 | | } |
815 | | |
816 | | #[test] |
817 | | fn test_bytes() { |
818 | | test_bytes_impl::<Utf8Type>(); |
819 | | test_bytes_impl::<LargeUtf8Type>(); |
820 | | test_bytes_impl::<BinaryType>(); |
821 | | test_bytes_impl::<LargeBinaryType>(); |
822 | | } |
823 | | |
824 | | #[test] |
825 | | fn test_lists() { |
826 | | let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new())); |
827 | | a.extend([ |
828 | | Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]), |
829 | | Some(vec![ |
830 | | Some(vec![Some(1), Some(2), Some(3)]), |
831 | | Some(vec![Some(1)]), |
832 | | ]), |
833 | | Some(vec![]), |
834 | | ]); |
835 | | let a = a.finish(); |
836 | | let mut b = ListBuilder::new(ListBuilder::new(Int32Builder::new())); |
837 | | b.extend([ |
838 | | Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]), |
839 | | Some(vec![ |
840 | | Some(vec![Some(1), Some(2), None]), |
841 | | Some(vec![Some(1)]), |
842 | | ]), |
843 | | Some(vec![ |
844 | | Some(vec![Some(1), Some(2), Some(3), Some(4)]), |
845 | | Some(vec![Some(1)]), |
846 | | ]), |
847 | | None, |
848 | | ]); |
849 | | let b = b.finish(); |
850 | | |
851 | | let opts = SortOptions { |
852 | | descending: false, |
853 | | nulls_first: true, |
854 | | }; |
855 | | let cmp = make_comparator(&a, &b, opts).unwrap(); |
856 | | assert_eq!(cmp(0, 0), Ordering::Equal); |
857 | | assert_eq!(cmp(0, 1), Ordering::Less); |
858 | | assert_eq!(cmp(0, 2), Ordering::Less); |
859 | | assert_eq!(cmp(1, 2), Ordering::Less); |
860 | | assert_eq!(cmp(1, 3), Ordering::Greater); |
861 | | assert_eq!(cmp(2, 0), Ordering::Less); |
862 | | |
863 | | let opts = SortOptions { |
864 | | descending: true, |
865 | | nulls_first: true, |
866 | | }; |
867 | | let cmp = make_comparator(&a, &b, opts).unwrap(); |
868 | | assert_eq!(cmp(0, 0), Ordering::Equal); |
869 | | assert_eq!(cmp(0, 1), Ordering::Less); |
870 | | assert_eq!(cmp(0, 2), Ordering::Less); |
871 | | assert_eq!(cmp(1, 2), Ordering::Greater); |
872 | | assert_eq!(cmp(1, 3), Ordering::Greater); |
873 | | assert_eq!(cmp(2, 0), Ordering::Greater); |
874 | | |
875 | | let opts = SortOptions { |
876 | | descending: true, |
877 | | nulls_first: false, |
878 | | }; |
879 | | let cmp = make_comparator(&a, &b, opts).unwrap(); |
880 | | assert_eq!(cmp(0, 0), Ordering::Equal); |
881 | | assert_eq!(cmp(0, 1), Ordering::Greater); |
882 | | assert_eq!(cmp(0, 2), Ordering::Greater); |
883 | | assert_eq!(cmp(1, 2), Ordering::Greater); |
884 | | assert_eq!(cmp(1, 3), Ordering::Less); |
885 | | assert_eq!(cmp(2, 0), Ordering::Greater); |
886 | | |
887 | | let opts = SortOptions { |
888 | | descending: false, |
889 | | nulls_first: false, |
890 | | }; |
891 | | let cmp = make_comparator(&a, &b, opts).unwrap(); |
892 | | assert_eq!(cmp(0, 0), Ordering::Equal); |
893 | | assert_eq!(cmp(0, 1), Ordering::Greater); |
894 | | assert_eq!(cmp(0, 2), Ordering::Greater); |
895 | | assert_eq!(cmp(1, 2), Ordering::Less); |
896 | | assert_eq!(cmp(1, 3), Ordering::Less); |
897 | | assert_eq!(cmp(2, 0), Ordering::Less); |
898 | | } |
899 | | |
900 | | #[test] |
901 | | fn test_struct() { |
902 | | let fields = Fields::from(vec![ |
903 | | Field::new("a", DataType::Int32, true), |
904 | | Field::new_list("b", Field::new_list_field(DataType::Int32, true), true), |
905 | | ]); |
906 | | |
907 | | let a = Int32Array::from(vec![Some(1), Some(2), None, None]); |
908 | | let mut b = ListBuilder::new(Int32Builder::new()); |
909 | | b.extend([Some(vec![Some(1), Some(2)]), Some(vec![None]), None, None]); |
910 | | let b = b.finish(); |
911 | | |
912 | | let nulls = Some(NullBuffer::from_iter([true, true, true, false])); |
913 | | let values = vec![Arc::new(a) as _, Arc::new(b) as _]; |
914 | | let s1 = StructArray::new(fields.clone(), values, nulls); |
915 | | |
916 | | let a = Int32Array::from(vec![None, Some(2), None]); |
917 | | let mut b = ListBuilder::new(Int32Builder::new()); |
918 | | b.extend([None, None, Some(vec![])]); |
919 | | let b = b.finish(); |
920 | | |
921 | | let values = vec![Arc::new(a) as _, Arc::new(b) as _]; |
922 | | let s2 = StructArray::new(fields.clone(), values, None); |
923 | | |
924 | | let opts = SortOptions { |
925 | | descending: false, |
926 | | nulls_first: true, |
927 | | }; |
928 | | let cmp = make_comparator(&s1, &s2, opts).unwrap(); |
929 | | assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None) |
930 | | assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None) |
931 | | assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None) |
932 | | assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, []) |
933 | | assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, []) |
934 | | assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None) |
935 | | assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None) |
936 | | |
937 | | let opts = SortOptions { |
938 | | descending: true, |
939 | | nulls_first: true, |
940 | | }; |
941 | | let cmp = make_comparator(&s1, &s2, opts).unwrap(); |
942 | | assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None) |
943 | | assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None) |
944 | | assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None) |
945 | | assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, []) |
946 | | assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, []) |
947 | | assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None) |
948 | | assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None) |
949 | | |
950 | | let opts = SortOptions { |
951 | | descending: true, |
952 | | nulls_first: false, |
953 | | }; |
954 | | let cmp = make_comparator(&s1, &s2, opts).unwrap(); |
955 | | assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None) |
956 | | assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None) |
957 | | assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None) |
958 | | assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, []) |
959 | | assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, []) |
960 | | assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None) |
961 | | assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None) |
962 | | |
963 | | let opts = SortOptions { |
964 | | descending: false, |
965 | | nulls_first: false, |
966 | | }; |
967 | | let cmp = make_comparator(&s1, &s2, opts).unwrap(); |
968 | | assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None) |
969 | | assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None) |
970 | | assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None) |
971 | | assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, []) |
972 | | assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, []) |
973 | | assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None) |
974 | | assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None) |
975 | | } |
976 | | |
977 | | #[test] |
978 | | fn test_map() { |
979 | | // Create first map array demonstrating key priority over values: |
980 | | // [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}] |
981 | | let string_builder = StringBuilder::new(); |
982 | | let int_builder = Int32Builder::new(); |
983 | | let mut map1_builder = MapBuilder::new(None, string_builder, int_builder); |
984 | | |
985 | | // {"a": 100, "b": 1} - high value for "a", low value for "b" |
986 | | map1_builder.keys().append_value("a"); |
987 | | map1_builder.values().append_value(100); |
988 | | map1_builder.keys().append_value("b"); |
989 | | map1_builder.values().append_value(1); |
990 | | map1_builder.append(true).unwrap(); |
991 | | |
992 | | // {"b": 999, "c": 1} - very high value for "b", low value for "c" |
993 | | map1_builder.keys().append_value("b"); |
994 | | map1_builder.values().append_value(999); |
995 | | map1_builder.keys().append_value("c"); |
996 | | map1_builder.values().append_value(1); |
997 | | map1_builder.append(true).unwrap(); |
998 | | |
999 | | // {} |
1000 | | map1_builder.append(true).unwrap(); |
1001 | | |
1002 | | // {"x": 1} |
1003 | | map1_builder.keys().append_value("x"); |
1004 | | map1_builder.values().append_value(1); |
1005 | | map1_builder.append(true).unwrap(); |
1006 | | |
1007 | | let map1 = map1_builder.finish(); |
1008 | | |
1009 | | // Create second map array: |
1010 | | // [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None] |
1011 | | let string_builder = StringBuilder::new(); |
1012 | | let int_builder = Int32Builder::new(); |
1013 | | let mut map2_builder = MapBuilder::new(None, string_builder, int_builder); |
1014 | | |
1015 | | // {"a": 1, "c": 999} - low value for "a", high value for "c" |
1016 | | map2_builder.keys().append_value("a"); |
1017 | | map2_builder.values().append_value(1); |
1018 | | map2_builder.keys().append_value("c"); |
1019 | | map2_builder.values().append_value(999); |
1020 | | map2_builder.append(true).unwrap(); |
1021 | | |
1022 | | // {"b": 1, "d": 999} - low value for "b", high value for "d" |
1023 | | map2_builder.keys().append_value("b"); |
1024 | | map2_builder.values().append_value(1); |
1025 | | map2_builder.keys().append_value("d"); |
1026 | | map2_builder.values().append_value(999); |
1027 | | map2_builder.append(true).unwrap(); |
1028 | | |
1029 | | // {"a": 1} |
1030 | | map2_builder.keys().append_value("a"); |
1031 | | map2_builder.values().append_value(1); |
1032 | | map2_builder.append(true).unwrap(); |
1033 | | |
1034 | | // None |
1035 | | map2_builder.append(false).unwrap(); |
1036 | | |
1037 | | let map2 = map2_builder.finish(); |
1038 | | |
1039 | | let opts = SortOptions { |
1040 | | descending: false, |
1041 | | nulls_first: true, |
1042 | | }; |
1043 | | let cmp = make_comparator(&map1, &map2, opts).unwrap(); |
1044 | | |
1045 | | // Test that keys have priority over values: |
1046 | | // {"a": 100, "b": 1} vs {"a": 1, "c": 999} |
1047 | | // First entries match (a:100 vs a:1), but 100 > 1, so Greater |
1048 | | assert_eq!(cmp(0, 0), Ordering::Greater); |
1049 | | |
1050 | | // {"b": 999, "c": 1} vs {"b": 1, "d": 999} |
1051 | | // First entries match (b:999 vs b:1), but 999 > 1, so Greater |
1052 | | assert_eq!(cmp(1, 1), Ordering::Greater); |
1053 | | |
1054 | | // Key comparison: "a" < "b", so {"a": 100, "b": 1} < {"b": 999, "c": 1} |
1055 | | assert_eq!(cmp(0, 1), Ordering::Less); |
1056 | | |
1057 | | // Empty map vs non-empty |
1058 | | assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1} |
1059 | | |
1060 | | // Non-null vs null |
1061 | | assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None |
1062 | | |
1063 | | // Key priority test: "x" > "a", regardless of values |
1064 | | assert_eq!(cmp(3, 0), Ordering::Greater); // {"x": 1} > {"a": 1, "c": 999} |
1065 | | |
1066 | | // Empty vs non-empty |
1067 | | assert_eq!(cmp(2, 0), Ordering::Less); // {} < {"a": 1, "c": 999} |
1068 | | |
1069 | | let opts = SortOptions { |
1070 | | descending: true, |
1071 | | nulls_first: true, |
1072 | | }; |
1073 | | let cmp = make_comparator(&map1, &map2, opts).unwrap(); |
1074 | | |
1075 | | // With descending=true, value comparison is reversed |
1076 | | assert_eq!(cmp(0, 0), Ordering::Less); // {"a": 100, "b": 1} vs {"a": 1, "c": 999} (reversed) |
1077 | | assert_eq!(cmp(1, 1), Ordering::Less); // {"b": 999, "c": 1} vs {"b": 1, "d": 999} (reversed) |
1078 | | assert_eq!(cmp(0, 1), Ordering::Greater); // {"a": 100, "b": 1} vs {"b": 999, "c": 1} (key order reversed) |
1079 | | assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None |
1080 | | assert_eq!(cmp(2, 2), Ordering::Greater); // {} > {"a": 1} (reversed) |
1081 | | |
1082 | | let opts = SortOptions { |
1083 | | descending: false, |
1084 | | nulls_first: false, |
1085 | | }; |
1086 | | let cmp = make_comparator(&map1, &map2, opts).unwrap(); |
1087 | | |
1088 | | // Same key priority behavior with nulls_first=false |
1089 | | assert_eq!(cmp(0, 0), Ordering::Greater); // {"a": 100, "b": 1} vs {"a": 1, "c": 999} |
1090 | | assert_eq!(cmp(1, 1), Ordering::Greater); // {"b": 999, "c": 1} vs {"b": 1, "d": 999} |
1091 | | assert_eq!(cmp(3, 3), Ordering::Less); // {"x": 1} < None (nulls last) |
1092 | | assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1} |
1093 | | } |
1094 | | |
1095 | | #[test] |
1096 | | fn test_map_vs_list_consistency() { |
1097 | | // Create map arrays and convert them to list arrays to verify comparison consistency |
1098 | | // Map arrays: [{"a": 1, "b": 2}, {"x": 10}, {}, {"c": 3}] |
1099 | | let string_builder = StringBuilder::new(); |
1100 | | let int_builder = Int32Builder::new(); |
1101 | | let mut map1_builder = MapBuilder::new(None, string_builder, int_builder); |
1102 | | |
1103 | | // {"a": 1, "b": 2} |
1104 | | map1_builder.keys().append_value("a"); |
1105 | | map1_builder.values().append_value(1); |
1106 | | map1_builder.keys().append_value("b"); |
1107 | | map1_builder.values().append_value(2); |
1108 | | map1_builder.append(true).unwrap(); |
1109 | | |
1110 | | // {"x": 10} |
1111 | | map1_builder.keys().append_value("x"); |
1112 | | map1_builder.values().append_value(10); |
1113 | | map1_builder.append(true).unwrap(); |
1114 | | |
1115 | | // {} |
1116 | | map1_builder.append(true).unwrap(); |
1117 | | |
1118 | | // {"c": 3} |
1119 | | map1_builder.keys().append_value("c"); |
1120 | | map1_builder.values().append_value(3); |
1121 | | map1_builder.append(true).unwrap(); |
1122 | | |
1123 | | let map1 = map1_builder.finish(); |
1124 | | |
1125 | | // Second map array: [{"a": 1, "b": 2}, {"y": 20}, {"d": 4}, None] |
1126 | | let string_builder = StringBuilder::new(); |
1127 | | let int_builder = Int32Builder::new(); |
1128 | | let mut map2_builder = MapBuilder::new(None, string_builder, int_builder); |
1129 | | |
1130 | | // {"a": 1, "b": 2} |
1131 | | map2_builder.keys().append_value("a"); |
1132 | | map2_builder.values().append_value(1); |
1133 | | map2_builder.keys().append_value("b"); |
1134 | | map2_builder.values().append_value(2); |
1135 | | map2_builder.append(true).unwrap(); |
1136 | | |
1137 | | // {"y": 20} |
1138 | | map2_builder.keys().append_value("y"); |
1139 | | map2_builder.values().append_value(20); |
1140 | | map2_builder.append(true).unwrap(); |
1141 | | |
1142 | | // {"d": 4} |
1143 | | map2_builder.keys().append_value("d"); |
1144 | | map2_builder.values().append_value(4); |
1145 | | map2_builder.append(true).unwrap(); |
1146 | | |
1147 | | // None |
1148 | | map2_builder.append(false).unwrap(); |
1149 | | |
1150 | | let map2 = map2_builder.finish(); |
1151 | | |
1152 | | // Convert map arrays to list arrays (Map entries are struct arrays with key-value pairs) |
1153 | | let list1: ListArray = map1.clone().into(); |
1154 | | let list2: ListArray = map2.clone().into(); |
1155 | | |
1156 | | let test_cases = [ |
1157 | | SortOptions { |
1158 | | descending: false, |
1159 | | nulls_first: true, |
1160 | | }, |
1161 | | SortOptions { |
1162 | | descending: true, |
1163 | | nulls_first: true, |
1164 | | }, |
1165 | | SortOptions { |
1166 | | descending: false, |
1167 | | nulls_first: false, |
1168 | | }, |
1169 | | SortOptions { |
1170 | | descending: true, |
1171 | | nulls_first: false, |
1172 | | }, |
1173 | | ]; |
1174 | | |
1175 | | for opts in test_cases { |
1176 | | let map_cmp = make_comparator(&map1, &map2, opts).unwrap(); |
1177 | | let list_cmp = make_comparator(&list1, &list2, opts).unwrap(); |
1178 | | |
1179 | | // Test all possible index combinations |
1180 | | for i in 0..map1.len() { |
1181 | | for j in 0..map2.len() { |
1182 | | let map_result = map_cmp(i, j); |
1183 | | let list_result = list_cmp(i, j); |
1184 | | assert_eq!( |
1185 | | map_result, list_result, |
1186 | | "Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}" |
1187 | | ); |
1188 | | } |
1189 | | } |
1190 | | } |
1191 | | } |
1192 | | } |