/Users/andrewlamb/Software/arrow-rs/arrow-string/src/length.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Defines kernel for length of string arrays and binary arrays |
19 | | |
20 | | use arrow_array::*; |
21 | | use arrow_array::{cast::AsArray, types::*}; |
22 | | use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer}; |
23 | | use arrow_schema::{ArrowError, DataType}; |
24 | | use std::sync::Arc; |
25 | | |
26 | 0 | fn length_impl<P: ArrowPrimitiveType>( |
27 | 0 | offsets: &OffsetBuffer<P::Native>, |
28 | 0 | nulls: Option<&NullBuffer>, |
29 | 0 | ) -> ArrayRef { |
30 | 0 | let v: Vec<_> = offsets |
31 | 0 | .windows(2) |
32 | 0 | .map(|w| w[1].sub_wrapping(w[0])) |
33 | 0 | .collect(); |
34 | 0 | Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned())) |
35 | 0 | } |
36 | | |
37 | 0 | fn bit_length_impl<P: ArrowPrimitiveType>( |
38 | 0 | offsets: &OffsetBuffer<P::Native>, |
39 | 0 | nulls: Option<&NullBuffer>, |
40 | 0 | ) -> ArrayRef { |
41 | 0 | let bits = P::Native::usize_as(8); |
42 | 0 | let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits); |
43 | 0 | let v: Vec<_> = offsets.windows(2).map(c).collect(); |
44 | 0 | Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned())) |
45 | 0 | } |
46 | | |
47 | | /// Returns an array of Int32/Int64 denoting the length of each value in the array. |
48 | | /// |
49 | | /// For list array, length is the number of elements in each list. |
50 | | /// For string array and binary array, length is the number of bytes of each value. |
51 | | /// |
52 | | /// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, and FixedSizeListArray, |
53 | | /// or DictionaryArray with above Arrays as values |
54 | | /// * length of null is null. |
55 | | pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> { |
56 | | if let Some(d) = array.as_any_dictionary_opt() { |
57 | | let lengths = length(d.values().as_ref())?; |
58 | | return Ok(d.with_values(lengths)); |
59 | | } |
60 | | |
61 | | match array.data_type() { |
62 | | DataType::List(_) => { |
63 | | let list = array.as_list::<i32>(); |
64 | | Ok(length_impl::<Int32Type>(list.offsets(), list.nulls())) |
65 | | } |
66 | | DataType::LargeList(_) => { |
67 | | let list = array.as_list::<i64>(); |
68 | | Ok(length_impl::<Int64Type>(list.offsets(), list.nulls())) |
69 | | } |
70 | | DataType::Utf8 => { |
71 | | let list = array.as_string::<i32>(); |
72 | | Ok(length_impl::<Int32Type>(list.offsets(), list.nulls())) |
73 | | } |
74 | | DataType::LargeUtf8 => { |
75 | | let list = array.as_string::<i64>(); |
76 | | Ok(length_impl::<Int64Type>(list.offsets(), list.nulls())) |
77 | | } |
78 | | DataType::Utf8View => { |
79 | | let list = array.as_string_view(); |
80 | 0 | let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>(); |
81 | | Ok(Arc::new(PrimitiveArray::<Int32Type>::new( |
82 | | v.into(), |
83 | | list.nulls().cloned(), |
84 | | ))) |
85 | | } |
86 | | DataType::Binary => { |
87 | | let list = array.as_binary::<i32>(); |
88 | | Ok(length_impl::<Int32Type>(list.offsets(), list.nulls())) |
89 | | } |
90 | | DataType::LargeBinary => { |
91 | | let list = array.as_binary::<i64>(); |
92 | | Ok(length_impl::<Int64Type>(list.offsets(), list.nulls())) |
93 | | } |
94 | | DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new( |
95 | | Int32Array::new(vec![*len; array.len()].into(), array.nulls().cloned()), |
96 | | )), |
97 | | DataType::BinaryView => { |
98 | | let list = array.as_binary_view(); |
99 | 0 | let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>(); |
100 | | Ok(Arc::new(PrimitiveArray::<Int32Type>::new( |
101 | | v.into(), |
102 | | list.nulls().cloned(), |
103 | | ))) |
104 | | } |
105 | | other => Err(ArrowError::ComputeError(format!( |
106 | | "length not supported for {other:?}" |
107 | | ))), |
108 | | } |
109 | | } |
110 | | |
111 | | /// Returns an array of Int32/Int64 denoting the number of bits in each value in the array. |
112 | | /// |
113 | | /// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, BinaryArray and LargeBinaryArray, |
114 | | /// or DictionaryArray with above Arrays as values |
115 | | /// * bit_length of null is null. |
116 | | /// * bit_length is in number of bits |
117 | | pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> { |
118 | | if let Some(d) = array.as_any_dictionary_opt() { |
119 | | let lengths = bit_length(d.values().as_ref())?; |
120 | | return Ok(d.with_values(lengths)); |
121 | | } |
122 | | |
123 | | match array.data_type() { |
124 | | DataType::List(_) => { |
125 | | let list = array.as_list::<i32>(); |
126 | | Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls())) |
127 | | } |
128 | | DataType::LargeList(_) => { |
129 | | let list = array.as_list::<i64>(); |
130 | | Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls())) |
131 | | } |
132 | | DataType::Utf8 => { |
133 | | let list = array.as_string::<i32>(); |
134 | | Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls())) |
135 | | } |
136 | | DataType::LargeUtf8 => { |
137 | | let list = array.as_string::<i64>(); |
138 | | Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls())) |
139 | | } |
140 | | DataType::Utf8View => { |
141 | | let list = array.as_string_view(); |
142 | | let values = list |
143 | | .views() |
144 | | .iter() |
145 | 0 | .map(|view| (*view as i32).wrapping_mul(8)) |
146 | | .collect(); |
147 | | Ok(Arc::new(Int32Array::new(values, array.nulls().cloned()))) |
148 | | } |
149 | | DataType::Binary => { |
150 | | let list = array.as_binary::<i32>(); |
151 | | Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls())) |
152 | | } |
153 | | DataType::LargeBinary => { |
154 | | let list = array.as_binary::<i64>(); |
155 | | Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls())) |
156 | | } |
157 | | DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::new( |
158 | | vec![*len * 8; array.len()].into(), |
159 | | array.nulls().cloned(), |
160 | | ))), |
161 | | other => Err(ArrowError::ComputeError(format!( |
162 | | "bit_length not supported for {other:?}" |
163 | | ))), |
164 | | } |
165 | | } |
166 | | |
167 | | #[cfg(test)] |
168 | | mod tests { |
169 | | use super::*; |
170 | | use arrow_buffer::Buffer; |
171 | | use arrow_data::ArrayData; |
172 | | use arrow_schema::Field; |
173 | | |
174 | | fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> { |
175 | | // a large array |
176 | | let values = [ |
177 | | "one", |
178 | | "on", |
179 | | "o", |
180 | | "", |
181 | | "this is a longer string to test string array with", |
182 | | ]; |
183 | | let values = values.into_iter().cycle().take(4096).collect(); |
184 | | let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect(); |
185 | | |
186 | | vec![ |
187 | | (vec!["hello", " ", "world"], 3, vec![5, 1, 5]), |
188 | | (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]), |
189 | | (vec!["💖"], 1, vec![4]), |
190 | | (values, 4096, expected), |
191 | | ] |
192 | | } |
193 | | |
194 | | macro_rules! length_binary_helper { |
195 | | ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{ |
196 | | let array = GenericBinaryArray::<$offset_ty>::from($value); |
197 | | let result = $kernel(&array).unwrap(); |
198 | | let result = result.as_any().downcast_ref::<$result_ty>().unwrap(); |
199 | | let expected: $result_ty = $expected.into(); |
200 | | assert_eq!(&expected, result); |
201 | | }}; |
202 | | } |
203 | | |
204 | | macro_rules! length_list_helper { |
205 | | ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{ |
206 | | let array = |
207 | | GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value); |
208 | | let result = length(&array).unwrap(); |
209 | | let result = result.as_any().downcast_ref::<$result_ty>().unwrap(); |
210 | | let expected: $result_ty = $expected.into(); |
211 | | assert_eq!(&expected, result); |
212 | | }}; |
213 | | } |
214 | | |
215 | | #[test] |
216 | | fn length_test_string() { |
217 | | length_cases_string() |
218 | | .into_iter() |
219 | | .for_each(|(input, len, expected)| { |
220 | | let array = StringArray::from(input); |
221 | | let result = length(&array).unwrap(); |
222 | | assert_eq!(len, result.len()); |
223 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
224 | | expected.iter().enumerate().for_each(|(i, value)| { |
225 | | assert_eq!(*value, result.value(i)); |
226 | | }); |
227 | | }) |
228 | | } |
229 | | |
230 | | #[test] |
231 | | fn length_test_large_string() { |
232 | | length_cases_string() |
233 | | .into_iter() |
234 | | .for_each(|(input, len, expected)| { |
235 | | let array = LargeStringArray::from(input); |
236 | | let result = length(&array).unwrap(); |
237 | | assert_eq!(len, result.len()); |
238 | | let result = result.as_any().downcast_ref::<Int64Array>().unwrap(); |
239 | | expected.iter().enumerate().for_each(|(i, value)| { |
240 | | assert_eq!(*value as i64, result.value(i)); |
241 | | }); |
242 | | }) |
243 | | } |
244 | | |
245 | | #[test] |
246 | | fn length_test_string_view() { |
247 | | length_cases_string() |
248 | | .into_iter() |
249 | | .for_each(|(input, len, expected)| { |
250 | | let array = StringViewArray::from(input); |
251 | | let result = length(&array).unwrap(); |
252 | | assert_eq!(len, result.len()); |
253 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
254 | | expected.iter().enumerate().for_each(|(i, value)| { |
255 | | assert_eq!(*value, result.value(i)); |
256 | | }); |
257 | | }) |
258 | | } |
259 | | |
260 | | #[test] |
261 | | fn length_test_binary() { |
262 | | let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]]; |
263 | | let result: Vec<i32> = vec![4, 3, 2]; |
264 | | length_binary_helper!(i32, Int32Array, length, value, result) |
265 | | } |
266 | | |
267 | | #[test] |
268 | | fn length_test_large_binary() { |
269 | | let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"]; |
270 | | let result: Vec<i64> = vec![4, 2, 3]; |
271 | | length_binary_helper!(i64, Int64Array, length, value, result) |
272 | | } |
273 | | |
274 | | #[test] |
275 | | fn length_test_binary_view() { |
276 | | let value: Vec<&[u8]> = vec![ |
277 | | b"zero", |
278 | | &[0xff, 0xf8], |
279 | | b"two", |
280 | | b"this is a longer string to test binary array with", |
281 | | ]; |
282 | | let expected: Vec<i32> = vec![4, 2, 3, 49]; |
283 | | |
284 | | let array = BinaryViewArray::from(value); |
285 | | let result = length(&array).unwrap(); |
286 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
287 | | let expected: Int32Array = expected.into(); |
288 | | assert_eq!(&expected, result); |
289 | | } |
290 | | |
291 | | #[test] |
292 | | fn length_test_list() { |
293 | | let value = vec![ |
294 | | Some(vec![]), |
295 | | Some(vec![Some(1), Some(2), Some(4)]), |
296 | | Some(vec![Some(0)]), |
297 | | ]; |
298 | | let result: Vec<i32> = vec![0, 3, 1]; |
299 | | length_list_helper!(i32, Int32Array, Int32Type, value, result) |
300 | | } |
301 | | |
302 | | #[test] |
303 | | fn length_test_large_list() { |
304 | | let value = vec![ |
305 | | Some(vec![]), |
306 | | Some(vec![Some(1.1), Some(2.2), Some(3.3)]), |
307 | | Some(vec![None]), |
308 | | ]; |
309 | | let result: Vec<i64> = vec![0, 3, 1]; |
310 | | length_list_helper!(i64, Int64Array, Float32Type, value, result) |
311 | | } |
312 | | |
313 | | type OptionStr = Option<&'static str>; |
314 | | |
315 | | fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> { |
316 | | vec![( |
317 | | vec![Some("one"), None, Some("three"), Some("four")], |
318 | | 4, |
319 | | vec![Some(3), None, Some(5), Some(4)], |
320 | | )] |
321 | | } |
322 | | |
323 | | #[test] |
324 | | fn length_null_string() { |
325 | | length_null_cases_string() |
326 | | .into_iter() |
327 | | .for_each(|(input, len, expected)| { |
328 | | let array = StringArray::from(input); |
329 | | let result = length(&array).unwrap(); |
330 | | assert_eq!(len, result.len()); |
331 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
332 | | |
333 | | let expected: Int32Array = expected.into(); |
334 | | assert_eq!(&expected, result); |
335 | | }) |
336 | | } |
337 | | |
338 | | #[test] |
339 | | fn length_null_large_string() { |
340 | | length_null_cases_string() |
341 | | .into_iter() |
342 | | .for_each(|(input, len, expected)| { |
343 | | let array = LargeStringArray::from(input); |
344 | | let result = length(&array).unwrap(); |
345 | | assert_eq!(len, result.len()); |
346 | | let result = result.as_any().downcast_ref::<Int64Array>().unwrap(); |
347 | | |
348 | | // convert to i64 |
349 | | let expected: Int64Array = expected |
350 | | .iter() |
351 | | .map(|e| e.map(|e| e as i64)) |
352 | | .collect::<Vec<_>>() |
353 | | .into(); |
354 | | assert_eq!(&expected, result); |
355 | | }) |
356 | | } |
357 | | |
358 | | #[test] |
359 | | fn length_null_binary() { |
360 | | let value: Vec<Option<&[u8]>> = |
361 | | vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")]; |
362 | | let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)]; |
363 | | length_binary_helper!(i32, Int32Array, length, value, result) |
364 | | } |
365 | | |
366 | | #[test] |
367 | | fn length_null_large_binary() { |
368 | | let value: Vec<Option<&[u8]>> = |
369 | | vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")]; |
370 | | let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)]; |
371 | | length_binary_helper!(i64, Int64Array, length, value, result) |
372 | | } |
373 | | |
374 | | #[test] |
375 | | fn length_null_list() { |
376 | | let value = vec![ |
377 | | Some(vec![]), |
378 | | None, |
379 | | Some(vec![Some(1), None, Some(2), Some(4)]), |
380 | | Some(vec![Some(0)]), |
381 | | ]; |
382 | | let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)]; |
383 | | length_list_helper!(i32, Int32Array, Int8Type, value, result) |
384 | | } |
385 | | |
386 | | #[test] |
387 | | fn length_null_large_list() { |
388 | | let value = vec![ |
389 | | Some(vec![]), |
390 | | None, |
391 | | Some(vec![Some(1.1), None, Some(4.0)]), |
392 | | Some(vec![Some(0.1)]), |
393 | | ]; |
394 | | let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)]; |
395 | | length_list_helper!(i64, Int64Array, Float32Type, value, result) |
396 | | } |
397 | | |
398 | | /// Tests that length is not valid for u64. |
399 | | #[test] |
400 | | fn length_wrong_type() { |
401 | | let array: UInt64Array = vec![1u64].into(); |
402 | | |
403 | | assert!(length(&array).is_err()); |
404 | | } |
405 | | |
406 | | /// Tests with an offset |
407 | | #[test] |
408 | | fn length_offsets_string() { |
409 | | let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]); |
410 | | let b = a.slice(1, 3); |
411 | | let result = length(&b).unwrap(); |
412 | | let result: &Int32Array = result.as_primitive(); |
413 | | |
414 | | let expected = Int32Array::from(vec![Some(1), Some(5), None]); |
415 | | assert_eq!(&expected, result); |
416 | | } |
417 | | |
418 | | #[test] |
419 | | fn length_offsets_binary() { |
420 | | let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None]; |
421 | | let a = BinaryArray::from(value); |
422 | | let b = a.slice(1, 3); |
423 | | let result = length(&b).unwrap(); |
424 | | let result: &Int32Array = result.as_primitive(); |
425 | | |
426 | | let expected = Int32Array::from(vec![Some(1), Some(2), None]); |
427 | | assert_eq!(&expected, result); |
428 | | } |
429 | | |
430 | | fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> { |
431 | | // a large array |
432 | | let values = ["one", "on", "o", ""]; |
433 | | let values = values.into_iter().cycle().take(4096).collect(); |
434 | | let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect(); |
435 | | |
436 | | vec![ |
437 | | (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]), |
438 | | (vec!["💖"], 1, vec![32]), |
439 | | (vec!["josé"], 1, vec![40]), |
440 | | (values, 4096, expected), |
441 | | ] |
442 | | } |
443 | | |
444 | | #[test] |
445 | | fn bit_length_test_string() { |
446 | | bit_length_cases() |
447 | | .into_iter() |
448 | | .for_each(|(input, len, expected)| { |
449 | | let array = StringArray::from(input); |
450 | | let result = bit_length(&array).unwrap(); |
451 | | assert_eq!(len, result.len()); |
452 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
453 | | expected.iter().enumerate().for_each(|(i, value)| { |
454 | | assert_eq!(*value, result.value(i)); |
455 | | }); |
456 | | }) |
457 | | } |
458 | | |
459 | | #[test] |
460 | | fn bit_length_test_large_string() { |
461 | | bit_length_cases() |
462 | | .into_iter() |
463 | | .for_each(|(input, len, expected)| { |
464 | | let array = LargeStringArray::from(input); |
465 | | let result = bit_length(&array).unwrap(); |
466 | | assert_eq!(len, result.len()); |
467 | | let result = result.as_any().downcast_ref::<Int64Array>().unwrap(); |
468 | | expected.iter().enumerate().for_each(|(i, value)| { |
469 | | assert_eq!(*value as i64, result.value(i)); |
470 | | }); |
471 | | }) |
472 | | } |
473 | | |
474 | | #[test] |
475 | | fn bit_length_test_utf8view() { |
476 | | bit_length_cases() |
477 | | .into_iter() |
478 | | .for_each(|(input, len, expected)| { |
479 | | let string_array = StringViewArray::from(input); |
480 | | let result = bit_length(&string_array).unwrap(); |
481 | | assert_eq!(len, result.len()); |
482 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
483 | | expected.iter().enumerate().for_each(|(i, value)| { |
484 | | assert_eq!(*value, result.value(i)); |
485 | | }); |
486 | | }) |
487 | | } |
488 | | |
489 | | #[test] |
490 | | fn bit_length_null_utf8view() { |
491 | | bit_length_null_cases() |
492 | | .into_iter() |
493 | | .for_each(|(input, len, expected)| { |
494 | | let array = StringArray::from(input); |
495 | | let result = bit_length(&array).unwrap(); |
496 | | assert_eq!(len, result.len()); |
497 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
498 | | |
499 | | let expected: Int32Array = expected.into(); |
500 | | assert_eq!(&expected, result); |
501 | | }) |
502 | | } |
503 | | #[test] |
504 | | fn bit_length_binary() { |
505 | | let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"]; |
506 | | let expected: Vec<i32> = vec![24, 16, 40]; |
507 | | length_binary_helper!(i32, Int32Array, bit_length, value, expected) |
508 | | } |
509 | | |
510 | | #[test] |
511 | | fn bit_length_large_binary() { |
512 | | let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]]; |
513 | | let expected: Vec<i64> = vec![32, 8, 16]; |
514 | | length_binary_helper!(i64, Int64Array, bit_length, value, expected) |
515 | | } |
516 | | |
517 | | fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> { |
518 | | vec![( |
519 | | vec![Some("one"), None, Some("three"), Some("four")], |
520 | | 4, |
521 | | vec![Some(24), None, Some(40), Some(32)], |
522 | | )] |
523 | | } |
524 | | |
525 | | #[test] |
526 | | fn bit_length_null_string() { |
527 | | bit_length_null_cases() |
528 | | .into_iter() |
529 | | .for_each(|(input, len, expected)| { |
530 | | let array = StringArray::from(input); |
531 | | let result = bit_length(&array).unwrap(); |
532 | | assert_eq!(len, result.len()); |
533 | | let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); |
534 | | |
535 | | let expected: Int32Array = expected.into(); |
536 | | assert_eq!(&expected, result); |
537 | | }) |
538 | | } |
539 | | |
540 | | #[test] |
541 | | fn bit_length_null_large_string() { |
542 | | bit_length_null_cases() |
543 | | .into_iter() |
544 | | .for_each(|(input, len, expected)| { |
545 | | let array = LargeStringArray::from(input); |
546 | | let result = bit_length(&array).unwrap(); |
547 | | assert_eq!(len, result.len()); |
548 | | let result = result.as_any().downcast_ref::<Int64Array>().unwrap(); |
549 | | |
550 | | // convert to i64 |
551 | | let expected: Int64Array = expected |
552 | | .iter() |
553 | | .map(|e| e.map(|e| e as i64)) |
554 | | .collect::<Vec<_>>() |
555 | | .into(); |
556 | | assert_eq!(&expected, result); |
557 | | }) |
558 | | } |
559 | | |
560 | | #[test] |
561 | | fn bit_length_null_binary() { |
562 | | let value: Vec<Option<&[u8]>> = |
563 | | vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])]; |
564 | | let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)]; |
565 | | length_binary_helper!(i32, Int32Array, bit_length, value, expected) |
566 | | } |
567 | | |
568 | | #[test] |
569 | | fn bit_length_null_large_binary() { |
570 | | let value: Vec<Option<&[u8]>> = |
571 | | vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")]; |
572 | | let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)]; |
573 | | length_binary_helper!(i64, Int64Array, bit_length, value, expected) |
574 | | } |
575 | | |
576 | | /// Tests that bit_length is not valid for u64. |
577 | | #[test] |
578 | | fn bit_length_wrong_type() { |
579 | | let array: UInt64Array = vec![1u64].into(); |
580 | | |
581 | | assert!(bit_length(&array).is_err()); |
582 | | } |
583 | | |
584 | | /// Tests with an offset |
585 | | #[test] |
586 | | fn bit_length_offsets_string() { |
587 | | let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]); |
588 | | let b = a.slice(1, 3); |
589 | | let result = bit_length(&b).unwrap(); |
590 | | let result: &Int32Array = result.as_primitive(); |
591 | | |
592 | | let expected = Int32Array::from(vec![Some(8), Some(40), None]); |
593 | | assert_eq!(&expected, result); |
594 | | } |
595 | | |
596 | | #[test] |
597 | | fn bit_length_offsets_binary() { |
598 | | let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None]; |
599 | | let a = BinaryArray::from(value); |
600 | | let b = a.slice(1, 3); |
601 | | let result = bit_length(&b).unwrap(); |
602 | | let result: &Int32Array = result.as_primitive(); |
603 | | |
604 | | let expected = Int32Array::from(vec![Some(0), Some(40), None]); |
605 | | assert_eq!(&expected, result); |
606 | | } |
607 | | |
608 | | #[test] |
609 | | fn length_dictionary() { |
610 | | _length_dictionary::<Int8Type>(); |
611 | | _length_dictionary::<Int16Type>(); |
612 | | _length_dictionary::<Int32Type>(); |
613 | | _length_dictionary::<Int64Type>(); |
614 | | _length_dictionary::<UInt8Type>(); |
615 | | _length_dictionary::<UInt16Type>(); |
616 | | _length_dictionary::<UInt32Type>(); |
617 | | _length_dictionary::<UInt64Type>(); |
618 | | } |
619 | | |
620 | | fn _length_dictionary<K: ArrowDictionaryKeyType>() { |
621 | | const TOTAL: i32 = 100; |
622 | | |
623 | | let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"]; |
624 | | let data: Vec<Option<&str>> = (0..TOTAL) |
625 | | .map(|n| { |
626 | | let i = n % 5; |
627 | | if i == 3 { |
628 | | None |
629 | | } else { |
630 | | Some(v[i as usize]) |
631 | | } |
632 | | }) |
633 | | .collect(); |
634 | | |
635 | | let dict_array: DictionaryArray<K> = data.clone().into_iter().collect(); |
636 | | |
637 | | let expected: Vec<Option<i32>> = |
638 | | data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect(); |
639 | | |
640 | | let res = length(&dict_array).unwrap(); |
641 | | let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap(); |
642 | | let actual: Vec<Option<i32>> = actual |
643 | | .values() |
644 | | .as_any() |
645 | | .downcast_ref::<Int32Array>() |
646 | | .unwrap() |
647 | | .take_iter(dict_array.keys_iter()) |
648 | | .collect(); |
649 | | |
650 | | for i in 0..TOTAL as usize { |
651 | | assert_eq!(expected[i], actual[i],); |
652 | | } |
653 | | } |
654 | | |
655 | | #[test] |
656 | | fn bit_length_dictionary() { |
657 | | _bit_length_dictionary::<Int8Type>(); |
658 | | _bit_length_dictionary::<Int16Type>(); |
659 | | _bit_length_dictionary::<Int32Type>(); |
660 | | _bit_length_dictionary::<Int64Type>(); |
661 | | _bit_length_dictionary::<UInt8Type>(); |
662 | | _bit_length_dictionary::<UInt16Type>(); |
663 | | _bit_length_dictionary::<UInt32Type>(); |
664 | | _bit_length_dictionary::<UInt64Type>(); |
665 | | } |
666 | | |
667 | | fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() { |
668 | | const TOTAL: i32 = 100; |
669 | | |
670 | | let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"]; |
671 | | let data: Vec<Option<&str>> = (0..TOTAL) |
672 | | .map(|n| { |
673 | | let i = n % 5; |
674 | | if i == 3 { |
675 | | None |
676 | | } else { |
677 | | Some(v[i as usize]) |
678 | | } |
679 | | }) |
680 | | .collect(); |
681 | | |
682 | | let dict_array: DictionaryArray<K> = data.clone().into_iter().collect(); |
683 | | |
684 | | let expected: Vec<Option<i32>> = data |
685 | | .iter() |
686 | | .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32)) |
687 | | .collect(); |
688 | | |
689 | | let res = bit_length(&dict_array).unwrap(); |
690 | | let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap(); |
691 | | let actual: Vec<Option<i32>> = actual |
692 | | .values() |
693 | | .as_any() |
694 | | .downcast_ref::<Int32Array>() |
695 | | .unwrap() |
696 | | .take_iter(dict_array.keys_iter()) |
697 | | .collect(); |
698 | | |
699 | | for i in 0..TOTAL as usize { |
700 | | assert_eq!(expected[i], actual[i],); |
701 | | } |
702 | | } |
703 | | |
704 | | #[test] |
705 | | fn test_fixed_size_list_length() { |
706 | | // Construct a value array |
707 | | let value_data = ArrayData::builder(DataType::Int32) |
708 | | .len(9) |
709 | | .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8])) |
710 | | .build() |
711 | | .unwrap(); |
712 | | let list_data_type = |
713 | | DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3); |
714 | | let nulls = NullBuffer::from(vec![true, false, true]); |
715 | | let list_data = ArrayData::builder(list_data_type) |
716 | | .len(3) |
717 | | .add_child_data(value_data) |
718 | | .nulls(Some(nulls)) |
719 | | .build() |
720 | | .unwrap(); |
721 | | let list_array = FixedSizeListArray::from(list_data); |
722 | | |
723 | | let lengths = length(&list_array).unwrap(); |
724 | | let lengths = lengths.as_primitive::<Int32Type>(); |
725 | | |
726 | | assert_eq!(lengths.len(), 3); |
727 | | assert_eq!(lengths.value(0), 3); |
728 | | assert!(lengths.is_null(1)); |
729 | | assert_eq!(lengths.value(2), 3); |
730 | | } |
731 | | |
732 | | #[test] |
733 | | fn test_fixed_size_binary() { |
734 | | let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None); |
735 | | let result = length(&array).unwrap(); |
736 | | assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4])); |
737 | | |
738 | | let result = bit_length(&array).unwrap(); |
739 | | assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4])); |
740 | | } |
741 | | } |