Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-cast/src/cast/list.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::cast::*;
19
20
/// Helper function that takes a primitive array and casts to a (generic) list array.
21
0
pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
22
0
    array: &dyn Array,
23
0
    to: &FieldRef,
24
0
    cast_options: &CastOptions,
25
0
) -> Result<ArrayRef, ArrowError> {
26
0
    let values = cast_with_options(array, to.data_type(), cast_options)?;
27
0
    let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(1, values.len()));
28
0
    let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
29
0
    Ok(Arc::new(list))
30
0
}
31
32
/// Helper function that takes a primitive array and casts to a fixed size list array.
33
0
pub(crate) fn cast_values_to_fixed_size_list(
34
0
    array: &dyn Array,
35
0
    to: &FieldRef,
36
0
    size: i32,
37
0
    cast_options: &CastOptions,
38
0
) -> Result<ArrayRef, ArrowError> {
39
0
    let values = cast_with_options(array, to.data_type(), cast_options)?;
40
0
    let list = FixedSizeListArray::new(to.clone(), size, values, None);
41
0
    Ok(Arc::new(list))
42
0
}
43
44
0
pub(crate) fn cast_single_element_fixed_size_list_to_values(
45
0
    array: &dyn Array,
46
0
    to: &DataType,
47
0
    cast_options: &CastOptions,
48
0
) -> Result<ArrayRef, ArrowError> {
49
0
    let values = array.as_fixed_size_list().values();
50
0
    cast_with_options(values, to, cast_options)
51
0
}
52
53
0
pub(crate) fn cast_fixed_size_list_to_list<OffsetSize>(
54
0
    array: &dyn Array,
55
0
) -> Result<ArrayRef, ArrowError>
56
0
where
57
0
    OffsetSize: OffsetSizeTrait,
58
{
59
0
    let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list();
60
0
    let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into();
61
0
    Ok(Arc::new(list))
62
0
}
63
64
0
pub(crate) fn cast_list_to_fixed_size_list<OffsetSize>(
65
0
    array: &GenericListArray<OffsetSize>,
66
0
    field: &FieldRef,
67
0
    size: i32,
68
0
    cast_options: &CastOptions,
69
0
) -> Result<ArrayRef, ArrowError>
70
0
where
71
0
    OffsetSize: OffsetSizeTrait,
72
{
73
0
    let cap = array.len() * size as usize;
74
75
    // Whether the resulting array may contain null lists
76
0
    let nullable = cast_options.safe || array.null_count() != 0;
77
0
    let mut nulls = nullable.then(|| {
78
0
        let mut buffer = BooleanBufferBuilder::new(array.len());
79
0
        match array.nulls() {
80
0
            Some(n) => buffer.append_buffer(n.inner()),
81
0
            None => buffer.append_n(array.len(), true),
82
        }
83
0
        buffer
84
0
    });
85
86
    // Nulls in FixedSizeListArray take up space and so we must pad the values
87
0
    let values = array.values().to_data();
88
0
    let mut mutable = MutableArrayData::new(vec![&values], nullable, cap);
89
    // The end position in values of the last incorrectly-sized list slice
90
0
    let mut last_pos = 0;
91
92
    // Need to flag when previous vector(s) are empty/None to distinguish from 'All slices were correct length' cases.
93
0
    let is_prev_empty = if array.offsets().len() < 2 {
94
0
        false
95
    } else {
96
0
        let first_offset = array.offsets()[0].as_usize();
97
0
        let second_offset = array.offsets()[1].as_usize();
98
99
0
        first_offset == 0 && second_offset == 0
100
    };
101
102
0
    for (idx, w) in array.offsets().windows(2).enumerate() {
103
0
        let start_pos = w[0].as_usize();
104
0
        let end_pos = w[1].as_usize();
105
0
        let len = end_pos - start_pos;
106
107
0
        if len != size as usize {
108
0
            if cast_options.safe || array.is_null(idx) {
109
0
                if last_pos != start_pos {
110
0
                    // Extend with valid slices
111
0
                    mutable.extend(0, last_pos, start_pos);
112
0
                }
113
                // Pad this slice with nulls
114
0
                mutable.extend_nulls(size as _);
115
0
                nulls.as_mut().unwrap().set_bit(idx, false);
116
                // Set last_pos to the end of this slice's values
117
0
                last_pos = end_pos
118
            } else {
119
0
                return Err(ArrowError::CastError(format!(
120
0
                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
121
0
                )));
122
            }
123
0
        }
124
    }
125
126
0
    let values = match last_pos {
127
0
        0 if !is_prev_empty => array.values().slice(0, cap), // All slices were the correct length
128
        _ => {
129
0
            if mutable.len() != cap {
130
                // Remaining slices were all correct length
131
0
                let remaining = cap - mutable.len();
132
0
                mutable.extend(0, last_pos, last_pos + remaining)
133
0
            }
134
0
            make_array(mutable.freeze())
135
        }
136
    };
137
138
    // Cast the inner values if necessary
139
0
    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
140
141
    // Construct the FixedSizeListArray
142
0
    let nulls = nulls.map(|mut x| x.finish().into());
143
0
    let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
144
0
    Ok(Arc::new(array))
145
0
}
146
147
/// Helper function that takes an Generic list container and casts the inner datatype.
148
0
pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
149
0
    array: &dyn Array,
150
0
    to: &FieldRef,
151
0
    cast_options: &CastOptions,
152
0
) -> Result<ArrayRef, ArrowError> {
153
0
    let list = array.as_list::<O>();
154
0
    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
155
0
    Ok(Arc::new(GenericListArray::<O>::new(
156
0
        to.clone(),
157
0
        list.offsets().clone(),
158
0
        values,
159
0
        list.nulls().cloned(),
160
0
    )))
161
0
}
162
163
/// Cast the container type of List/Largelist array along with the inner datatype
164
0
pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
165
0
    array: &dyn Array,
166
0
    field: &FieldRef,
167
0
    cast_options: &CastOptions,
168
0
) -> Result<ArrayRef, ArrowError> {
169
0
    let list = array.as_list::<I>();
170
0
    let values = list.values();
171
0
    let offsets = list.offsets();
172
0
    let nulls = list.nulls().cloned();
173
174
0
    if !O::IS_LARGE && values.len() > i32::MAX as usize {
175
0
        return Err(ArrowError::ComputeError(
176
0
            "LargeList too large to cast to List".into(),
177
0
        ));
178
0
    }
179
180
    // Recursively cast values
181
0
    let values = cast_with_options(values, field.data_type(), cast_options)?;
182
0
    let offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
183
184
    // Safety: valid offsets and checked for overflow
185
0
    let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
186
187
0
    Ok(Arc::new(GenericListArray::<O>::new(
188
0
        field.clone(),
189
0
        offsets,
190
0
        values,
191
0
        nulls,
192
0
    )))
193
0
}