Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/iterator.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Idiomatic iterators for [`Array`](crate::Array)
19
20
use crate::array::{
21
    ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray, GenericListArray,
22
    GenericStringArray, PrimitiveArray,
23
};
24
use crate::{FixedSizeListArray, GenericListViewArray, MapArray};
25
use arrow_buffer::NullBuffer;
26
27
/// An iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`]
28
///
29
/// # Performance
30
///
31
/// [`ArrayIter`] provides an idiomatic way to iterate over an array, however, this
32
/// comes at the cost of performance. In particular the interleaved handling of
33
/// the null mask is often sub-optimal.
34
///
35
/// If performing an infallible operation, it is typically faster to perform the operation
36
/// on every index of the array, and handle the null mask separately. For [`PrimitiveArray`]
37
/// this functionality is provided by [`compute::unary`]
38
///
39
/// If performing a fallible operation, it isn't possible to perform the operation independently
40
/// of the null mask, as this might result in a spurious failure on a null index. However,
41
/// there are more efficient ways to iterate over just the non-null indices, this functionality
42
/// is provided by [`compute::try_unary`]
43
///
44
/// [`PrimitiveArray`]: crate::PrimitiveArray
45
/// [`compute::unary`]: https://docs.rs/arrow/latest/arrow/compute/fn.unary.html
46
/// [`compute::try_unary`]: https://docs.rs/arrow/latest/arrow/compute/fn.try_unary.html
47
#[derive(Debug)]
48
pub struct ArrayIter<T: ArrayAccessor> {
49
    array: T,
50
    logical_nulls: Option<NullBuffer>,
51
    current: usize,
52
    current_end: usize,
53
}
54
55
impl<T: ArrayAccessor> ArrayIter<T> {
56
    /// create a new iterator
57
0
    pub fn new(array: T) -> Self {
58
0
        let len = array.len();
59
0
        let logical_nulls = array.logical_nulls();
60
0
        ArrayIter {
61
0
            array,
62
0
            logical_nulls,
63
0
            current: 0,
64
0
            current_end: len,
65
0
        }
66
0
    }
67
68
    #[inline]
69
0
    fn is_null(&self, idx: usize) -> bool {
70
0
        self.logical_nulls
71
0
            .as_ref()
72
0
            .map(|x| x.is_null(idx))
73
0
            .unwrap_or_default()
74
0
    }
75
}
76
77
impl<T: ArrayAccessor> Iterator for ArrayIter<T> {
78
    type Item = Option<T::Item>;
79
80
    #[inline]
81
0
    fn next(&mut self) -> Option<Self::Item> {
82
0
        if self.current == self.current_end {
83
0
            None
84
0
        } else if self.is_null(self.current) {
85
0
            self.current += 1;
86
0
            Some(None)
87
        } else {
88
0
            let old = self.current;
89
0
            self.current += 1;
90
            // Safety:
91
            // we just checked bounds in `self.current_end == self.current`
92
            // this is safe on the premise that this struct is initialized with
93
            // current = array.len()
94
            // and that current_end is ever only decremented
95
0
            unsafe { Some(Some(self.array.value_unchecked(old))) }
96
        }
97
0
    }
98
99
0
    fn size_hint(&self) -> (usize, Option<usize>) {
100
0
        (
101
0
            self.array.len() - self.current,
102
0
            Some(self.array.len() - self.current),
103
0
        )
104
0
    }
105
}
106
107
impl<T: ArrayAccessor> DoubleEndedIterator for ArrayIter<T> {
108
    fn next_back(&mut self) -> Option<Self::Item> {
109
        if self.current_end == self.current {
110
            None
111
        } else {
112
            self.current_end -= 1;
113
            Some(if self.is_null(self.current_end) {
114
                None
115
            } else {
116
                // Safety:
117
                // we just checked bounds in `self.current_end == self.current`
118
                // this is safe on the premise that this struct is initialized with
119
                // current = array.len()
120
                // and that current_end is ever only decremented
121
                unsafe { Some(self.array.value_unchecked(self.current_end)) }
122
            })
123
        }
124
    }
125
}
126
127
/// all arrays have known size.
128
impl<T: ArrayAccessor> ExactSizeIterator for ArrayIter<T> {}
129
130
/// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray
131
pub type PrimitiveIter<'a, T> = ArrayIter<&'a PrimitiveArray<T>>;
132
/// an iterator that returns Some(T) or None, that can be used on any BooleanArray
133
pub type BooleanIter<'a> = ArrayIter<&'a BooleanArray>;
134
/// an iterator that returns Some(T) or None, that can be used on any Utf8Array
135
pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
136
/// an iterator that returns Some(T) or None, that can be used on any BinaryArray
137
pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
138
/// an iterator that returns Some(T) or None, that can be used on any FixedSizeBinaryArray
139
pub type FixedSizeBinaryIter<'a> = ArrayIter<&'a FixedSizeBinaryArray>;
140
/// an iterator that returns Some(T) or None, that can be used on any FixedSizeListArray
141
pub type FixedSizeListIter<'a> = ArrayIter<&'a FixedSizeListArray>;
142
/// an iterator that returns Some(T) or None, that can be used on any ListArray
143
pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;
144
/// an iterator that returns Some(T) or None, that can be used on any MapArray
145
pub type MapArrayIter<'a> = ArrayIter<&'a MapArray>;
146
/// an iterator that returns Some(T) or None, that can be used on any ListArray
147
pub type GenericListViewArrayIter<'a, O> = ArrayIter<&'a GenericListViewArray<O>>;
148
#[cfg(test)]
149
mod tests {
150
    use std::sync::Arc;
151
152
    use crate::array::{ArrayRef, BinaryArray, BooleanArray, Int32Array, StringArray};
153
154
    #[test]
155
    fn test_primitive_array_iter_round_trip() {
156
        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
157
        let array = Arc::new(array) as ArrayRef;
158
159
        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
160
161
        // to and from iter, with a +1
162
        let result: Int32Array = array.iter().map(|e| e.map(|e| e + 1)).collect();
163
164
        let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
165
        assert_eq!(result, expected);
166
167
        // check if DoubleEndedIterator is implemented
168
        let result: Int32Array = array.iter().rev().collect();
169
        let rev_array = Int32Array::from(vec![Some(4), None, Some(2), None, Some(0)]);
170
        assert_eq!(result, rev_array);
171
        // check if ExactSizeIterator is implemented
172
        let _ = array.iter().rposition(|opt_b| opt_b == Some(1));
173
    }
174
175
    #[test]
176
    fn test_double_ended() {
177
        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
178
        let mut a = array.iter();
179
        assert_eq!(a.next(), Some(Some(0)));
180
        assert_eq!(a.next(), Some(None));
181
        assert_eq!(a.next_back(), Some(Some(4)));
182
        assert_eq!(a.next_back(), Some(None));
183
        assert_eq!(a.next_back(), Some(Some(2)));
184
        // the two sides have met: None is returned by both
185
        assert_eq!(a.next_back(), None);
186
        assert_eq!(a.next(), None);
187
    }
188
189
    #[test]
190
    fn test_string_array_iter_round_trip() {
191
        let array = StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
192
        let array = Arc::new(array) as ArrayRef;
193
194
        let array = array.as_any().downcast_ref::<StringArray>().unwrap();
195
196
        // to and from iter, with a +1
197
        let result: StringArray = array
198
            .iter()
199
            .map(|e| {
200
                e.map(|e| {
201
                    let mut a = e.to_string();
202
                    a.push('b');
203
                    a
204
                })
205
            })
206
            .collect();
207
208
        let expected =
209
            StringArray::from(vec![Some("ab"), None, Some("aaab"), None, Some("aaaaab")]);
210
        assert_eq!(result, expected);
211
212
        // check if DoubleEndedIterator is implemented
213
        let result: StringArray = array.iter().rev().collect();
214
        let rev_array = StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
215
        assert_eq!(result, rev_array);
216
        // check if ExactSizeIterator is implemented
217
        let _ = array.iter().rposition(|opt_b| opt_b == Some("a"));
218
    }
219
220
    #[test]
221
    fn test_binary_array_iter_round_trip() {
222
        let array = BinaryArray::from(vec![
223
            Some(b"a" as &[u8]),
224
            None,
225
            Some(b"aaa"),
226
            None,
227
            Some(b"aaaaa"),
228
        ]);
229
230
        // to and from iter
231
        let result: BinaryArray = array.iter().collect();
232
233
        assert_eq!(result, array);
234
235
        // check if DoubleEndedIterator is implemented
236
        let result: BinaryArray = array.iter().rev().collect();
237
        let rev_array = BinaryArray::from(vec![
238
            Some(b"aaaaa" as &[u8]),
239
            None,
240
            Some(b"aaa"),
241
            None,
242
            Some(b"a"),
243
        ]);
244
        assert_eq!(result, rev_array);
245
246
        // check if ExactSizeIterator is implemented
247
        let _ = array.iter().rposition(|opt_b| opt_b == Some(&[9]));
248
    }
249
250
    #[test]
251
    fn test_boolean_array_iter_round_trip() {
252
        let array = BooleanArray::from(vec![Some(true), None, Some(false)]);
253
254
        // to and from iter
255
        let result: BooleanArray = array.iter().collect();
256
257
        assert_eq!(result, array);
258
259
        // check if DoubleEndedIterator is implemented
260
        let result: BooleanArray = array.iter().rev().collect();
261
        let rev_array = BooleanArray::from(vec![Some(false), None, Some(true)]);
262
        assert_eq!(result, rev_array);
263
264
        // check if ExactSizeIterator is implemented
265
        let _ = array.iter().rposition(|opt_b| opt_b == Some(true));
266
    }
267
}