/Users/andrewlamb/Software/arrow-rs/arrow-array/src/array/string_array.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::types::GenericStringType; |
19 | | use crate::{GenericBinaryArray, GenericByteArray, GenericListArray, OffsetSizeTrait}; |
20 | | use arrow_schema::ArrowError; |
21 | | |
22 | | /// A [`GenericByteArray`] for storing `str` |
23 | | pub type GenericStringArray<OffsetSize> = GenericByteArray<GenericStringType<OffsetSize>>; |
24 | | |
25 | | impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> { |
26 | | /// Returns the number of `Unicode Scalar Value` in the string at index `i`. |
27 | | /// # Performance |
28 | | /// This function has `O(n)` time complexity where `n` is the string length. |
29 | | /// If you can make sure that all chars in the string are in the range `U+0x0000` ~ `U+0x007F`, |
30 | | /// please use the function [`value_length`](#method.value_length) which has O(1) time complexity. |
31 | | pub fn num_chars(&self, i: usize) -> usize { |
32 | | self.value(i).chars().count() |
33 | | } |
34 | | |
35 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
36 | | pub fn take_iter<'a>( |
37 | | &'a self, |
38 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
39 | | ) -> impl Iterator<Item = Option<&'a str>> { |
40 | | indexes.map(|opt_index| opt_index.map(|index| self.value(index))) |
41 | | } |
42 | | |
43 | | /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
44 | | /// # Safety |
45 | | /// |
46 | | /// caller must ensure that the indexes in the iterator are less than the `array.len()` |
47 | | pub unsafe fn take_iter_unchecked<'a>( |
48 | | &'a self, |
49 | | indexes: impl Iterator<Item = Option<usize>> + 'a, |
50 | | ) -> impl Iterator<Item = Option<&'a str>> { |
51 | | indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) |
52 | | } |
53 | | |
54 | | /// Fallibly creates a [`GenericStringArray`] from a [`GenericBinaryArray`] returning |
55 | | /// an error if [`GenericBinaryArray`] contains invalid UTF-8 data |
56 | 0 | pub fn try_from_binary(v: GenericBinaryArray<OffsetSize>) -> Result<Self, ArrowError> { |
57 | 0 | let (offsets, values, nulls) = v.into_parts(); |
58 | 0 | Self::try_new(offsets, values, nulls) |
59 | 0 | } |
60 | | } |
61 | | |
62 | | impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> |
63 | | for GenericStringArray<OffsetSize> |
64 | | { |
65 | | fn from(v: GenericListArray<OffsetSize>) -> Self { |
66 | | GenericBinaryArray::<OffsetSize>::from(v).into() |
67 | | } |
68 | | } |
69 | | |
70 | | impl<OffsetSize: OffsetSizeTrait> From<GenericBinaryArray<OffsetSize>> |
71 | | for GenericStringArray<OffsetSize> |
72 | | { |
73 | 0 | fn from(v: GenericBinaryArray<OffsetSize>) -> Self { |
74 | 0 | Self::try_from_binary(v).unwrap() |
75 | 0 | } |
76 | | } |
77 | | |
78 | | impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>> for GenericStringArray<OffsetSize> { |
79 | 4 | fn from(v: Vec<Option<&str>>) -> Self { |
80 | 4 | v.into_iter().collect() |
81 | 4 | } |
82 | | } |
83 | | |
84 | | impl<OffsetSize: OffsetSizeTrait> From<Vec<&str>> for GenericStringArray<OffsetSize> { |
85 | 39 | fn from(v: Vec<&str>) -> Self { |
86 | 39 | Self::from_iter_values(v) |
87 | 39 | } |
88 | | } |
89 | | |
90 | | impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>> for GenericStringArray<OffsetSize> { |
91 | | fn from(v: Vec<Option<String>>) -> Self { |
92 | | v.into_iter().collect() |
93 | | } |
94 | | } |
95 | | |
96 | | impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for GenericStringArray<OffsetSize> { |
97 | | fn from(v: Vec<String>) -> Self { |
98 | | Self::from_iter_values(v) |
99 | | } |
100 | | } |
101 | | |
102 | | /// A [`GenericStringArray`] of `str` using `i32` offsets |
103 | | /// |
104 | | /// # Examples |
105 | | /// |
106 | | /// Construction |
107 | | /// |
108 | | /// ``` |
109 | | /// # use arrow_array::StringArray; |
110 | | /// // Create from Vec<Option<&str>> |
111 | | /// let arr = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); |
112 | | /// // Create from Vec<&str> |
113 | | /// let arr = StringArray::from(vec!["foo", "bar", "baz"]); |
114 | | /// // Create from iter/collect (requires Option<&str>) |
115 | | /// let arr: StringArray = std::iter::repeat(Some("foo")).take(10).collect(); |
116 | | /// ``` |
117 | | /// |
118 | | /// Construction and Access |
119 | | /// |
120 | | /// ``` |
121 | | /// # use arrow_array::StringArray; |
122 | | /// let array = StringArray::from(vec![Some("foo"), None, Some("bar")]); |
123 | | /// assert_eq!(array.value(0), "foo"); |
124 | | /// ``` |
125 | | /// |
126 | | /// See [`GenericByteArray`] for more information and examples |
127 | | pub type StringArray = GenericStringArray<i32>; |
128 | | |
129 | | /// A [`GenericStringArray`] of `str` using `i64` offsets |
130 | | /// |
131 | | /// # Examples |
132 | | /// |
133 | | /// Construction |
134 | | /// |
135 | | /// ``` |
136 | | /// # use arrow_array::LargeStringArray; |
137 | | /// // Create from Vec<Option<&str>> |
138 | | /// let arr = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); |
139 | | /// // Create from Vec<&str> |
140 | | /// let arr = LargeStringArray::from(vec!["foo", "bar", "baz"]); |
141 | | /// // Create from iter/collect (requires Option<&str>) |
142 | | /// let arr: LargeStringArray = std::iter::repeat(Some("foo")).take(10).collect(); |
143 | | /// ``` |
144 | | /// |
145 | | /// Construction and Access |
146 | | /// |
147 | | /// ``` |
148 | | /// use arrow_array::LargeStringArray; |
149 | | /// let array = LargeStringArray::from(vec![Some("foo"), None, Some("bar")]); |
150 | | /// assert_eq!(array.value(2), "bar"); |
151 | | /// ``` |
152 | | /// |
153 | | /// See [`GenericByteArray`] for more information and examples |
154 | | pub type LargeStringArray = GenericStringArray<i64>; |
155 | | |
156 | | #[cfg(test)] |
157 | | mod tests { |
158 | | use super::*; |
159 | | use crate::builder::{ListBuilder, PrimitiveBuilder, StringBuilder}; |
160 | | use crate::types::UInt8Type; |
161 | | use crate::Array; |
162 | | use arrow_buffer::Buffer; |
163 | | use arrow_data::ArrayData; |
164 | | use arrow_schema::{DataType, Field}; |
165 | | use std::sync::Arc; |
166 | | |
167 | | #[test] |
168 | | fn test_string_array_from_u8_slice() { |
169 | | let values: Vec<&str> = vec!["hello", "", "A£ऀ𖼚𝌆৩ƐZ"]; |
170 | | |
171 | | // Array data: ["hello", "", "A£ऀ𖼚𝌆৩ƐZ"] |
172 | | let string_array = StringArray::from(values); |
173 | | |
174 | | assert_eq!(3, string_array.len()); |
175 | | assert_eq!(0, string_array.null_count()); |
176 | | assert_eq!("hello", string_array.value(0)); |
177 | | assert_eq!("hello", unsafe { string_array.value_unchecked(0) }); |
178 | | assert_eq!("", string_array.value(1)); |
179 | | assert_eq!("", unsafe { string_array.value_unchecked(1) }); |
180 | | assert_eq!("A£ऀ𖼚𝌆৩ƐZ", string_array.value(2)); |
181 | | assert_eq!("A£ऀ𖼚𝌆৩ƐZ", unsafe { |
182 | | string_array.value_unchecked(2) |
183 | | }); |
184 | | assert_eq!(20, string_array.value_length(2)); // 1 + 2 + 3 + 4 + 4 + 3 + 2 + 1 |
185 | | assert_eq!(8, string_array.num_chars(2)); |
186 | | for i in 0..3 { |
187 | | assert!(string_array.is_valid(i)); |
188 | | assert!(!string_array.is_null(i)); |
189 | | } |
190 | | } |
191 | | |
192 | | #[test] |
193 | | #[should_panic(expected = "StringArray expects DataType::Utf8")] |
194 | | fn test_string_array_from_int() { |
195 | | let array = LargeStringArray::from(vec!["a", "b"]); |
196 | | drop(StringArray::from(array.into_data())); |
197 | | } |
198 | | |
199 | | #[test] |
200 | | fn test_large_string_array_from_u8_slice() { |
201 | | let values: Vec<&str> = vec!["hello", "", "A£ऀ𖼚𝌆৩ƐZ"]; |
202 | | |
203 | | // Array data: ["hello", "", "A£ऀ𖼚𝌆৩ƐZ"] |
204 | | let string_array = LargeStringArray::from(values); |
205 | | |
206 | | assert_eq!(3, string_array.len()); |
207 | | assert_eq!(0, string_array.null_count()); |
208 | | assert_eq!("hello", string_array.value(0)); |
209 | | assert_eq!("hello", unsafe { string_array.value_unchecked(0) }); |
210 | | assert_eq!("", string_array.value(1)); |
211 | | assert_eq!("", unsafe { string_array.value_unchecked(1) }); |
212 | | assert_eq!("A£ऀ𖼚𝌆৩ƐZ", string_array.value(2)); |
213 | | assert_eq!("A£ऀ𖼚𝌆৩ƐZ", unsafe { |
214 | | string_array.value_unchecked(2) |
215 | | }); |
216 | | assert_eq!(5, string_array.value_offsets()[2]); |
217 | | assert_eq!(20, string_array.value_length(2)); // 1 + 2 + 3 + 4 + 4 + 3 + 2 + 1 |
218 | | assert_eq!(8, string_array.num_chars(2)); |
219 | | for i in 0..3 { |
220 | | assert!(string_array.is_valid(i)); |
221 | | assert!(!string_array.is_null(i)); |
222 | | } |
223 | | } |
224 | | |
225 | | #[test] |
226 | | fn test_nested_string_array() { |
227 | | let string_builder = StringBuilder::with_capacity(3, 10); |
228 | | let mut list_of_string_builder = ListBuilder::new(string_builder); |
229 | | |
230 | | list_of_string_builder.values().append_value("foo"); |
231 | | list_of_string_builder.values().append_value("bar"); |
232 | | list_of_string_builder.append(true); |
233 | | |
234 | | list_of_string_builder.values().append_value("foobar"); |
235 | | list_of_string_builder.append(true); |
236 | | let list_of_strings = list_of_string_builder.finish(); |
237 | | |
238 | | assert_eq!(list_of_strings.len(), 2); |
239 | | |
240 | | let first_slot = list_of_strings.value(0); |
241 | | let first_list = first_slot.as_any().downcast_ref::<StringArray>().unwrap(); |
242 | | assert_eq!(first_list.len(), 2); |
243 | | assert_eq!(first_list.value(0), "foo"); |
244 | | assert_eq!(unsafe { first_list.value_unchecked(0) }, "foo"); |
245 | | assert_eq!(first_list.value(1), "bar"); |
246 | | assert_eq!(unsafe { first_list.value_unchecked(1) }, "bar"); |
247 | | |
248 | | let second_slot = list_of_strings.value(1); |
249 | | let second_list = second_slot.as_any().downcast_ref::<StringArray>().unwrap(); |
250 | | assert_eq!(second_list.len(), 1); |
251 | | assert_eq!(second_list.value(0), "foobar"); |
252 | | assert_eq!(unsafe { second_list.value_unchecked(0) }, "foobar"); |
253 | | } |
254 | | |
255 | | #[test] |
256 | | #[should_panic( |
257 | | expected = "Trying to access an element at index 4 from a StringArray of length 3" |
258 | | )] |
259 | | fn test_string_array_get_value_index_out_of_bound() { |
260 | | let values: [u8; 12] = [ |
261 | | b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
262 | | ]; |
263 | | let offsets: [i32; 4] = [0, 5, 5, 12]; |
264 | | let array_data = ArrayData::builder(DataType::Utf8) |
265 | | .len(3) |
266 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
267 | | .add_buffer(Buffer::from_slice_ref(values)) |
268 | | .build() |
269 | | .unwrap(); |
270 | | let string_array = StringArray::from(array_data); |
271 | | string_array.value(4); |
272 | | } |
273 | | |
274 | | #[test] |
275 | | fn test_string_array_fmt_debug() { |
276 | | let arr: StringArray = vec!["hello", "arrow"].into(); |
277 | | assert_eq!( |
278 | | "StringArray\n[\n \"hello\",\n \"arrow\",\n]", |
279 | | format!("{arr:?}") |
280 | | ); |
281 | | } |
282 | | |
283 | | #[test] |
284 | | fn test_large_string_array_fmt_debug() { |
285 | | let arr: LargeStringArray = vec!["hello", "arrow"].into(); |
286 | | assert_eq!( |
287 | | "LargeStringArray\n[\n \"hello\",\n \"arrow\",\n]", |
288 | | format!("{arr:?}") |
289 | | ); |
290 | | } |
291 | | |
292 | | #[test] |
293 | | fn test_string_array_from_iter() { |
294 | | let data = [Some("hello"), None, Some("arrow")]; |
295 | | let data_vec = data.to_vec(); |
296 | | // from Vec<Option<&str>> |
297 | | let array1 = StringArray::from(data_vec.clone()); |
298 | | // from Iterator<Option<&str>> |
299 | | let array2: StringArray = data_vec.clone().into_iter().collect(); |
300 | | // from Iterator<Option<String>> |
301 | | let array3: StringArray = data_vec |
302 | | .into_iter() |
303 | | .map(|x| x.map(|s| s.to_string())) |
304 | | .collect(); |
305 | | // from Iterator<&Option<&str>> |
306 | | let array4: StringArray = data.iter().collect::<StringArray>(); |
307 | | |
308 | | assert_eq!(array1, array2); |
309 | | assert_eq!(array2, array3); |
310 | | assert_eq!(array3, array4); |
311 | | } |
312 | | |
313 | | #[test] |
314 | | fn test_string_array_from_iter_values() { |
315 | | let data = ["hello", "hello2"]; |
316 | | let array1 = StringArray::from_iter_values(data.iter()); |
317 | | |
318 | | assert_eq!(array1.value(0), "hello"); |
319 | | assert_eq!(array1.value(1), "hello2"); |
320 | | |
321 | | // Also works with String types. |
322 | | let data2 = ["goodbye".to_string(), "goodbye2".to_string()]; |
323 | | let array2 = StringArray::from_iter_values(data2.iter()); |
324 | | |
325 | | assert_eq!(array2.value(0), "goodbye"); |
326 | | assert_eq!(array2.value(1), "goodbye2"); |
327 | | } |
328 | | |
329 | | #[test] |
330 | | fn test_string_array_from_unbound_iter() { |
331 | | // iterator that doesn't declare (upper) size bound |
332 | | let string_iter = (0..) |
333 | | .scan(0usize, |pos, i| { |
334 | | if *pos < 10 { |
335 | | *pos += 1; |
336 | | Some(Some(format!("value {i}"))) |
337 | | } else { |
338 | | // actually returns up to 10 values |
339 | | None |
340 | | } |
341 | | }) |
342 | | // limited using take() |
343 | | .take(100); |
344 | | |
345 | | let (_, upper_size_bound) = string_iter.size_hint(); |
346 | | // the upper bound, defined by take above, is 100 |
347 | | assert_eq!(upper_size_bound, Some(100)); |
348 | | let string_array: StringArray = string_iter.collect(); |
349 | | // but the actual number of items in the array should be 10 |
350 | | assert_eq!(string_array.len(), 10); |
351 | | } |
352 | | |
353 | | #[test] |
354 | | fn test_string_array_all_null() { |
355 | | let data: Vec<Option<&str>> = vec![None]; |
356 | | let array = StringArray::from(data); |
357 | | array |
358 | | .into_data() |
359 | | .validate_full() |
360 | | .expect("All null array has valid array data"); |
361 | | } |
362 | | |
363 | | #[test] |
364 | | fn test_large_string_array_all_null() { |
365 | | let data: Vec<Option<&str>> = vec![None]; |
366 | | let array = LargeStringArray::from(data); |
367 | | array |
368 | | .into_data() |
369 | | .validate_full() |
370 | | .expect("All null array has valid array data"); |
371 | | } |
372 | | |
373 | | fn _test_generic_string_array_from_list_array<O: OffsetSizeTrait>() { |
374 | | let values = b"HelloArrowAndParquet"; |
375 | | // "ArrowAndParquet" |
376 | | let child_data = ArrayData::builder(DataType::UInt8) |
377 | | .len(15) |
378 | | .offset(5) |
379 | | .add_buffer(Buffer::from(values)) |
380 | | .build() |
381 | | .unwrap(); |
382 | | |
383 | | let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap()); |
384 | | let null_buffer = Buffer::from_slice_ref([0b101]); |
385 | | let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new( |
386 | | Field::new_list_field(DataType::UInt8, false), |
387 | | )); |
388 | | |
389 | | // [None, Some("Parquet")] |
390 | | let array_data = ArrayData::builder(data_type) |
391 | | .len(2) |
392 | | .offset(1) |
393 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
394 | | .null_bit_buffer(Some(null_buffer)) |
395 | | .add_child_data(child_data) |
396 | | .build() |
397 | | .unwrap(); |
398 | | let list_array = GenericListArray::<O>::from(array_data); |
399 | | let string_array = GenericStringArray::<O>::from(list_array); |
400 | | |
401 | | assert_eq!(2, string_array.len()); |
402 | | assert_eq!(1, string_array.null_count()); |
403 | | assert!(string_array.is_null(0)); |
404 | | assert!(string_array.is_valid(1)); |
405 | | assert_eq!("Parquet", string_array.value(1)); |
406 | | } |
407 | | |
408 | | #[test] |
409 | | fn test_string_array_from_list_array() { |
410 | | _test_generic_string_array_from_list_array::<i32>(); |
411 | | } |
412 | | |
413 | | #[test] |
414 | | fn test_large_string_array_from_list_array() { |
415 | | _test_generic_string_array_from_list_array::<i64>(); |
416 | | } |
417 | | |
418 | | fn _test_generic_string_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() { |
419 | | let values = b"HelloArrow"; |
420 | | let child_data = ArrayData::builder(DataType::UInt8) |
421 | | .len(10) |
422 | | .add_buffer(Buffer::from(values)) |
423 | | .null_bit_buffer(Some(Buffer::from_slice_ref([0b1010101010]))) |
424 | | .build() |
425 | | .unwrap(); |
426 | | |
427 | | let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap()); |
428 | | |
429 | | // It is possible to create a null struct containing a non-nullable child |
430 | | // see https://github.com/apache/arrow-rs/pull/3244 for details |
431 | | let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new( |
432 | | Field::new_list_field(DataType::UInt8, true), |
433 | | )); |
434 | | |
435 | | // [None, Some(b"Parquet")] |
436 | | let array_data = ArrayData::builder(data_type) |
437 | | .len(2) |
438 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
439 | | .add_child_data(child_data) |
440 | | .build() |
441 | | .unwrap(); |
442 | | let list_array = GenericListArray::<O>::from(array_data); |
443 | | drop(GenericStringArray::<O>::from(list_array)); |
444 | | } |
445 | | |
446 | | #[test] |
447 | | #[should_panic(expected = "The child array cannot contain null values.")] |
448 | | fn test_string_array_from_list_array_with_child_nulls_failed() { |
449 | | _test_generic_string_array_from_list_array_with_child_nulls_failed::<i32>(); |
450 | | } |
451 | | |
452 | | #[test] |
453 | | #[should_panic(expected = "The child array cannot contain null values.")] |
454 | | fn test_large_string_array_from_list_array_with_child_nulls_failed() { |
455 | | _test_generic_string_array_from_list_array_with_child_nulls_failed::<i64>(); |
456 | | } |
457 | | |
458 | | fn _test_generic_string_array_from_list_array_wrong_type<O: OffsetSizeTrait>() { |
459 | | let values = b"HelloArrow"; |
460 | | let child_data = ArrayData::builder(DataType::UInt16) |
461 | | .len(5) |
462 | | .add_buffer(Buffer::from(values)) |
463 | | .build() |
464 | | .unwrap(); |
465 | | |
466 | | let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap()); |
467 | | let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new( |
468 | | Field::new_list_field(DataType::UInt16, false), |
469 | | )); |
470 | | |
471 | | let array_data = ArrayData::builder(data_type) |
472 | | .len(2) |
473 | | .add_buffer(Buffer::from_slice_ref(offsets)) |
474 | | .add_child_data(child_data) |
475 | | .build() |
476 | | .unwrap(); |
477 | | let list_array = GenericListArray::<O>::from(array_data); |
478 | | drop(GenericStringArray::<O>::from(list_array)); |
479 | | } |
480 | | |
481 | | #[test] |
482 | | #[should_panic( |
483 | | expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types." |
484 | | )] |
485 | | fn test_string_array_from_list_array_wrong_type() { |
486 | | _test_generic_string_array_from_list_array_wrong_type::<i32>(); |
487 | | } |
488 | | |
489 | | #[test] |
490 | | #[should_panic( |
491 | | expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types." |
492 | | )] |
493 | | fn test_large_string_array_from_list_array_wrong_type() { |
494 | | _test_generic_string_array_from_list_array_wrong_type::<i64>(); |
495 | | } |
496 | | |
497 | | #[test] |
498 | | #[should_panic( |
499 | | expected = "Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 0" |
500 | | )] |
501 | | fn test_list_array_utf8_validation() { |
502 | | let mut builder = ListBuilder::new(PrimitiveBuilder::<UInt8Type>::new()); |
503 | | builder.values().append_value(0xFF); |
504 | | builder.append(true); |
505 | | let list = builder.finish(); |
506 | | let _ = StringArray::from(list); |
507 | | } |
508 | | |
509 | | #[test] |
510 | | fn test_empty_offsets() { |
511 | | let string = StringArray::from( |
512 | | ArrayData::builder(DataType::Utf8) |
513 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
514 | | .build() |
515 | | .unwrap(), |
516 | | ); |
517 | | assert_eq!(string.len(), 0); |
518 | | assert_eq!(string.value_offsets(), &[0]); |
519 | | |
520 | | let string = LargeStringArray::from( |
521 | | ArrayData::builder(DataType::LargeUtf8) |
522 | | .buffers(vec![Buffer::from(&[]), Buffer::from(&[])]) |
523 | | .build() |
524 | | .unwrap(), |
525 | | ); |
526 | | assert_eq!(string.len(), 0); |
527 | | assert_eq!(string.value_offsets(), &[0]); |
528 | | } |
529 | | |
530 | | #[test] |
531 | | fn test_into_builder() { |
532 | | let array: StringArray = vec!["hello", "arrow"].into(); |
533 | | |
534 | | // Append values |
535 | | let mut builder = array.into_builder().unwrap(); |
536 | | |
537 | | builder.append_value("rust"); |
538 | | |
539 | | let expected: StringArray = vec!["hello", "arrow", "rust"].into(); |
540 | | let array = builder.finish(); |
541 | | assert_eq!(expected, array); |
542 | | } |
543 | | |
544 | | #[test] |
545 | | fn test_into_builder_err() { |
546 | | let array: StringArray = vec!["hello", "arrow"].into(); |
547 | | |
548 | | // Clone it, so we cannot get a mutable builder back |
549 | | let shared_array = array.clone(); |
550 | | |
551 | | let err_return = array.into_builder().unwrap_err(); |
552 | | assert_eq!(&err_return, &shared_array); |
553 | | } |
554 | | } |