Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/primitive_run_builder.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::{any::Any, sync::Arc};
19
20
use crate::{ArrayRef, ArrowPrimitiveType, RunArray, types::RunEndIndexType};
21
22
use super::{ArrayBuilder, PrimitiveBuilder};
23
24
use arrow_buffer::ArrowNativeType;
25
26
/// Builder for [`RunArray`] of [`PrimitiveArray`](crate::array::PrimitiveArray)
27
///
28
/// # Example:
29
///
30
/// ```
31
///
32
/// # use arrow_array::builder::PrimitiveRunBuilder;
33
/// # use arrow_array::cast::AsArray;
34
/// # use arrow_array::types::{UInt32Type, Int16Type};
35
/// # use arrow_array::{Array, UInt32Array, Int16Array};
36
///
37
/// let mut builder =
38
/// PrimitiveRunBuilder::<Int16Type, UInt32Type>::new();
39
/// builder.append_value(1234);
40
/// builder.append_value(1234);
41
/// builder.append_value(1234);
42
/// builder.append_null();
43
/// builder.append_value(5678);
44
/// builder.append_value(5678);
45
/// let array = builder.finish();
46
///
47
/// assert_eq!(array.run_ends().values(), &[3, 4, 6]);
48
///
49
/// let av = array.values();
50
///
51
/// assert!(!av.is_null(0));
52
/// assert!(av.is_null(1));
53
/// assert!(!av.is_null(2));
54
///
55
/// // Values are polymorphic and so require a downcast.
56
/// let ava: &UInt32Array = av.as_primitive::<UInt32Type>();
57
///
58
/// assert_eq!(ava, &UInt32Array::from(vec![Some(1234), None, Some(5678)]));
59
/// ```
60
#[derive(Debug)]
61
pub struct PrimitiveRunBuilder<R, V>
62
where
63
    R: RunEndIndexType,
64
    V: ArrowPrimitiveType,
65
{
66
    run_ends_builder: PrimitiveBuilder<R>,
67
    values_builder: PrimitiveBuilder<V>,
68
    current_value: Option<V::Native>,
69
    current_run_end_index: usize,
70
    prev_run_end_index: usize,
71
}
72
73
impl<R, V> Default for PrimitiveRunBuilder<R, V>
74
where
75
    R: RunEndIndexType,
76
    V: ArrowPrimitiveType,
77
{
78
    fn default() -> Self {
79
        Self::new()
80
    }
81
}
82
83
impl<R, V> PrimitiveRunBuilder<R, V>
84
where
85
    R: RunEndIndexType,
86
    V: ArrowPrimitiveType,
87
{
88
    /// Creates a new `PrimitiveRunBuilder`
89
9
    pub fn new() -> Self {
90
9
        Self {
91
9
            run_ends_builder: PrimitiveBuilder::new(),
92
9
            values_builder: PrimitiveBuilder::new(),
93
9
            current_value: None,
94
9
            current_run_end_index: 0,
95
9
            prev_run_end_index: 0,
96
9
        }
97
9
    }
98
99
    /// Creates a new `PrimitiveRunBuilder` with the provided capacity
100
    ///
101
    /// `capacity`: the expected number of run-end encoded values.
102
    pub fn with_capacity(capacity: usize) -> Self {
103
        Self {
104
            run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
105
            values_builder: PrimitiveBuilder::with_capacity(capacity),
106
            current_value: None,
107
            current_run_end_index: 0,
108
            prev_run_end_index: 0,
109
        }
110
    }
111
}
112
113
impl<R, V> ArrayBuilder for PrimitiveRunBuilder<R, V>
114
where
115
    R: RunEndIndexType,
116
    V: ArrowPrimitiveType,
117
{
118
    /// Returns the builder as a non-mutable `Any` reference.
119
    fn as_any(&self) -> &dyn Any {
120
        self
121
    }
122
123
    /// Returns the builder as a mutable `Any` reference.
124
    fn as_any_mut(&mut self) -> &mut dyn Any {
125
        self
126
    }
127
128
    /// Returns the boxed builder as a box of `Any`.
129
    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
130
        self
131
    }
132
133
    /// Returns the length of logical array encoded by
134
    /// the eventual runs array.
135
    fn len(&self) -> usize {
136
        self.current_run_end_index
137
    }
138
139
    /// Builds the array and reset this builder.
140
    fn finish(&mut self) -> ArrayRef {
141
        Arc::new(self.finish())
142
    }
143
144
    /// Builds the array without resetting the builder.
145
    fn finish_cloned(&self) -> ArrayRef {
146
        Arc::new(self.finish_cloned())
147
    }
148
}
149
150
impl<R, V> PrimitiveRunBuilder<R, V>
151
where
152
    R: RunEndIndexType,
153
    V: ArrowPrimitiveType,
154
{
155
    /// Appends optional value to the logical array encoded by the RunArray.
156
54
    pub fn append_option(&mut self, value: Option<V::Native>) {
157
54
        if self.current_run_end_index == 0 {
158
9
            self.current_run_end_index = 1;
159
9
            self.current_value = value;
160
9
            return;
161
45
        }
162
45
        if self.current_value != value {
163
19
            self.append_run_end();
164
19
            self.current_value = value;
165
26
        }
166
167
45
        self.current_run_end_index += 1;
168
54
    }
169
170
    /// Appends value to the logical array encoded by the run-ends array.
171
    pub fn append_value(&mut self, value: V::Native) {
172
        self.append_option(Some(value))
173
    }
174
175
    /// Appends null to the logical array encoded by the run-ends array.
176
    pub fn append_null(&mut self) {
177
        self.append_option(None)
178
    }
179
180
    /// Creates the RunArray and resets the builder.
181
    /// Panics if RunArray cannot be built.
182
9
    pub fn finish(&mut self) -> RunArray<R> {
183
        // write the last run end to the array.
184
9
        self.append_run_end();
185
186
        // reset the run index to zero.
187
9
        self.current_value = None;
188
9
        self.current_run_end_index = 0;
189
190
        // build the run encoded array by adding run_ends and values array as its children.
191
9
        let run_ends_array = self.run_ends_builder.finish();
192
9
        let values_array = self.values_builder.finish();
193
9
        RunArray::<R>::try_new(&run_ends_array, &values_array).unwrap()
194
9
    }
195
196
    /// Creates the RunArray and without resetting the builder.
197
    /// Panics if RunArray cannot be built.
198
    pub fn finish_cloned(&self) -> RunArray<R> {
199
        let mut run_ends_array = self.run_ends_builder.finish_cloned();
200
        let mut values_array = self.values_builder.finish_cloned();
201
202
        // Add current run if one exists
203
        if self.prev_run_end_index != self.current_run_end_index {
204
            let mut run_end_builder = run_ends_array.into_builder().unwrap();
205
            let mut values_builder = values_array.into_builder().unwrap();
206
            self.append_run_end_with_builders(&mut run_end_builder, &mut values_builder);
207
            run_ends_array = run_end_builder.finish();
208
            values_array = values_builder.finish();
209
        }
210
211
        RunArray::try_new(&run_ends_array, &values_array).unwrap()
212
    }
213
214
    // Appends the current run to the array.
215
28
    fn append_run_end(&mut self) {
216
        // empty array or the function called without appending any value.
217
28
        if self.prev_run_end_index == self.current_run_end_index {
218
0
            return;
219
28
        }
220
28
        let run_end_index = self.run_end_index_as_native();
221
28
        self.run_ends_builder.append_value(run_end_index);
222
28
        self.values_builder.append_option(self.current_value);
223
28
        self.prev_run_end_index = self.current_run_end_index;
224
28
    }
225
226
    // Similar to `append_run_end` but on custom builders.
227
    // Used in `finish_cloned` which is not suppose to mutate `self`.
228
    fn append_run_end_with_builders(
229
        &self,
230
        run_ends_builder: &mut PrimitiveBuilder<R>,
231
        values_builder: &mut PrimitiveBuilder<V>,
232
    ) {
233
        let run_end_index = self.run_end_index_as_native();
234
        run_ends_builder.append_value(run_end_index);
235
        values_builder.append_option(self.current_value);
236
    }
237
238
28
    fn run_end_index_as_native(&self) -> R::Native {
239
28
        R::Native::from_usize(self.current_run_end_index)
240
28
        .unwrap_or_else(|| 
panic!0
(
241
0
                "Cannot convert `current_run_end_index` {} from `usize` to native form of arrow datatype {}",
242
                self.current_run_end_index,
243
0
                R::DATA_TYPE
244
        ))
245
28
    }
246
}
247
248
impl<R, V> Extend<Option<V::Native>> for PrimitiveRunBuilder<R, V>
249
where
250
    R: RunEndIndexType,
251
    V: ArrowPrimitiveType,
252
{
253
9
    fn extend<T: IntoIterator<Item = Option<V::Native>>>(&mut self, iter: T) {
254
63
        for 
elem54
in iter {
255
54
            self.append_option(elem);
256
54
        }
257
9
    }
258
}
259
260
#[cfg(test)]
261
mod tests {
262
    use crate::builder::PrimitiveRunBuilder;
263
    use crate::cast::AsArray;
264
    use crate::types::{Int16Type, UInt32Type};
265
    use crate::{Array, UInt32Array};
266
267
    #[test]
268
    fn test_primitive_ree_array_builder() {
269
        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::new();
270
        builder.append_value(1234);
271
        builder.append_value(1234);
272
        builder.append_value(1234);
273
        builder.append_null();
274
        builder.append_value(5678);
275
        builder.append_value(5678);
276
277
        let array = builder.finish();
278
279
        assert_eq!(array.null_count(), 0);
280
        assert_eq!(array.logical_null_count(), 1);
281
        assert_eq!(array.len(), 6);
282
283
        assert_eq!(array.run_ends().values(), &[3, 4, 6]);
284
285
        let av = array.values();
286
287
        assert!(!av.is_null(0));
288
        assert!(av.is_null(1));
289
        assert!(!av.is_null(2));
290
291
        // Values are polymorphic and so require a downcast.
292
        let ava: &UInt32Array = av.as_primitive::<UInt32Type>();
293
294
        assert_eq!(ava, &UInt32Array::from(vec![Some(1234), None, Some(5678)]));
295
    }
296
297
    #[test]
298
    fn test_extend() {
299
        let mut builder = PrimitiveRunBuilder::<Int16Type, Int16Type>::new();
300
        builder.extend([1, 2, 2, 5, 5, 4, 4].into_iter().map(Some));
301
        builder.extend([4, 4, 6, 2].into_iter().map(Some));
302
        let array = builder.finish();
303
304
        assert_eq!(array.len(), 11);
305
        assert_eq!(array.null_count(), 0);
306
        assert_eq!(array.logical_null_count(), 0);
307
        assert_eq!(array.run_ends().values(), &[1, 3, 5, 9, 10, 11]);
308
        assert_eq!(
309
            array.values().as_primitive::<Int16Type>().values(),
310
            &[1, 2, 5, 4, 6, 2]
311
        );
312
    }
313
}