/Users/andrewlamb/Software/arrow-rs/arrow-cast/src/cast/list.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::cast::*; |
19 | | |
20 | | /// Helper function that takes a primitive array and casts to a (generic) list array. |
21 | 0 | pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>( |
22 | 0 | array: &dyn Array, |
23 | 0 | to: &FieldRef, |
24 | 0 | cast_options: &CastOptions, |
25 | 0 | ) -> Result<ArrayRef, ArrowError> { |
26 | 0 | let values = cast_with_options(array, to.data_type(), cast_options)?; |
27 | 0 | let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(1, values.len())); |
28 | 0 | let list = GenericListArray::<O>::new(to.clone(), offsets, values, None); |
29 | 0 | Ok(Arc::new(list)) |
30 | 0 | } |
31 | | |
32 | | /// Helper function that takes a primitive array and casts to a fixed size list array. |
33 | 0 | pub(crate) fn cast_values_to_fixed_size_list( |
34 | 0 | array: &dyn Array, |
35 | 0 | to: &FieldRef, |
36 | 0 | size: i32, |
37 | 0 | cast_options: &CastOptions, |
38 | 0 | ) -> Result<ArrayRef, ArrowError> { |
39 | 0 | let values = cast_with_options(array, to.data_type(), cast_options)?; |
40 | 0 | let list = FixedSizeListArray::new(to.clone(), size, values, None); |
41 | 0 | Ok(Arc::new(list)) |
42 | 0 | } |
43 | | |
44 | 0 | pub(crate) fn cast_single_element_fixed_size_list_to_values( |
45 | 0 | array: &dyn Array, |
46 | 0 | to: &DataType, |
47 | 0 | cast_options: &CastOptions, |
48 | 0 | ) -> Result<ArrayRef, ArrowError> { |
49 | 0 | let values = array.as_fixed_size_list().values(); |
50 | 0 | cast_with_options(values, to, cast_options) |
51 | 0 | } |
52 | | |
53 | 0 | pub(crate) fn cast_fixed_size_list_to_list<OffsetSize>( |
54 | 0 | array: &dyn Array, |
55 | 0 | ) -> Result<ArrayRef, ArrowError> |
56 | 0 | where |
57 | 0 | OffsetSize: OffsetSizeTrait, |
58 | | { |
59 | 0 | let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list(); |
60 | 0 | let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into(); |
61 | 0 | Ok(Arc::new(list)) |
62 | 0 | } |
63 | | |
64 | 0 | pub(crate) fn cast_list_to_fixed_size_list<OffsetSize>( |
65 | 0 | array: &GenericListArray<OffsetSize>, |
66 | 0 | field: &FieldRef, |
67 | 0 | size: i32, |
68 | 0 | cast_options: &CastOptions, |
69 | 0 | ) -> Result<ArrayRef, ArrowError> |
70 | 0 | where |
71 | 0 | OffsetSize: OffsetSizeTrait, |
72 | | { |
73 | 0 | let cap = array.len() * size as usize; |
74 | | |
75 | | // Whether the resulting array may contain null lists |
76 | 0 | let nullable = cast_options.safe || array.null_count() != 0; |
77 | 0 | let mut nulls = nullable.then(|| { |
78 | 0 | let mut buffer = BooleanBufferBuilder::new(array.len()); |
79 | 0 | match array.nulls() { |
80 | 0 | Some(n) => buffer.append_buffer(n.inner()), |
81 | 0 | None => buffer.append_n(array.len(), true), |
82 | | } |
83 | 0 | buffer |
84 | 0 | }); |
85 | | |
86 | | // Nulls in FixedSizeListArray take up space and so we must pad the values |
87 | 0 | let values = array.values().to_data(); |
88 | 0 | let mut mutable = MutableArrayData::new(vec![&values], nullable, cap); |
89 | | // The end position in values of the last incorrectly-sized list slice |
90 | 0 | let mut last_pos = 0; |
91 | | |
92 | | // Need to flag when previous vector(s) are empty/None to distinguish from 'All slices were correct length' cases. |
93 | 0 | let is_prev_empty = if array.offsets().len() < 2 { |
94 | 0 | false |
95 | | } else { |
96 | 0 | let first_offset = array.offsets()[0].as_usize(); |
97 | 0 | let second_offset = array.offsets()[1].as_usize(); |
98 | | |
99 | 0 | first_offset == 0 && second_offset == 0 |
100 | | }; |
101 | | |
102 | 0 | for (idx, w) in array.offsets().windows(2).enumerate() { |
103 | 0 | let start_pos = w[0].as_usize(); |
104 | 0 | let end_pos = w[1].as_usize(); |
105 | 0 | let len = end_pos - start_pos; |
106 | | |
107 | 0 | if len != size as usize { |
108 | 0 | if cast_options.safe || array.is_null(idx) { |
109 | 0 | if last_pos != start_pos { |
110 | 0 | // Extend with valid slices |
111 | 0 | mutable.extend(0, last_pos, start_pos); |
112 | 0 | } |
113 | | // Pad this slice with nulls |
114 | 0 | mutable.extend_nulls(size as _); |
115 | 0 | nulls.as_mut().unwrap().set_bit(idx, false); |
116 | | // Set last_pos to the end of this slice's values |
117 | 0 | last_pos = end_pos |
118 | | } else { |
119 | 0 | return Err(ArrowError::CastError(format!( |
120 | 0 | "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}", |
121 | 0 | ))); |
122 | | } |
123 | 0 | } |
124 | | } |
125 | | |
126 | 0 | let values = match last_pos { |
127 | 0 | 0 if !is_prev_empty => array.values().slice(0, cap), // All slices were the correct length |
128 | | _ => { |
129 | 0 | if mutable.len() != cap { |
130 | | // Remaining slices were all correct length |
131 | 0 | let remaining = cap - mutable.len(); |
132 | 0 | mutable.extend(0, last_pos, last_pos + remaining) |
133 | 0 | } |
134 | 0 | make_array(mutable.freeze()) |
135 | | } |
136 | | }; |
137 | | |
138 | | // Cast the inner values if necessary |
139 | 0 | let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?; |
140 | | |
141 | | // Construct the FixedSizeListArray |
142 | 0 | let nulls = nulls.map(|mut x| x.finish().into()); |
143 | 0 | let array = FixedSizeListArray::new(field.clone(), size, values, nulls); |
144 | 0 | Ok(Arc::new(array)) |
145 | 0 | } |
146 | | |
147 | | /// Helper function that takes an Generic list container and casts the inner datatype. |
148 | 0 | pub(crate) fn cast_list_values<O: OffsetSizeTrait>( |
149 | 0 | array: &dyn Array, |
150 | 0 | to: &FieldRef, |
151 | 0 | cast_options: &CastOptions, |
152 | 0 | ) -> Result<ArrayRef, ArrowError> { |
153 | 0 | let list = array.as_list::<O>(); |
154 | 0 | let values = cast_with_options(list.values(), to.data_type(), cast_options)?; |
155 | 0 | Ok(Arc::new(GenericListArray::<O>::new( |
156 | 0 | to.clone(), |
157 | 0 | list.offsets().clone(), |
158 | 0 | values, |
159 | 0 | list.nulls().cloned(), |
160 | 0 | ))) |
161 | 0 | } |
162 | | |
163 | | /// Cast the container type of List/Largelist array along with the inner datatype |
164 | 0 | pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>( |
165 | 0 | array: &dyn Array, |
166 | 0 | field: &FieldRef, |
167 | 0 | cast_options: &CastOptions, |
168 | 0 | ) -> Result<ArrayRef, ArrowError> { |
169 | 0 | let list = array.as_list::<I>(); |
170 | 0 | let values = list.values(); |
171 | 0 | let offsets = list.offsets(); |
172 | 0 | let nulls = list.nulls().cloned(); |
173 | | |
174 | 0 | if !O::IS_LARGE && values.len() > i32::MAX as usize { |
175 | 0 | return Err(ArrowError::ComputeError( |
176 | 0 | "LargeList too large to cast to List".into(), |
177 | 0 | )); |
178 | 0 | } |
179 | | |
180 | | // Recursively cast values |
181 | 0 | let values = cast_with_options(values, field.data_type(), cast_options)?; |
182 | 0 | let offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect(); |
183 | | |
184 | | // Safety: valid offsets and checked for overflow |
185 | 0 | let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) }; |
186 | | |
187 | 0 | Ok(Arc::new(GenericListArray::<O>::new( |
188 | 0 | field.clone(), |
189 | 0 | offsets, |
190 | 0 | values, |
191 | 0 | nulls, |
192 | 0 | ))) |
193 | 0 | } |