/Users/andrewlamb/Software/arrow-rs/arrow-array/src/builder/primitive_run_builder.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use std::{any::Any, sync::Arc}; |
19 | | |
20 | | use crate::{ArrayRef, ArrowPrimitiveType, RunArray, types::RunEndIndexType}; |
21 | | |
22 | | use super::{ArrayBuilder, PrimitiveBuilder}; |
23 | | |
24 | | use arrow_buffer::ArrowNativeType; |
25 | | |
26 | | /// Builder for [`RunArray`] of [`PrimitiveArray`](crate::array::PrimitiveArray) |
27 | | /// |
28 | | /// # Example: |
29 | | /// |
30 | | /// ``` |
31 | | /// |
32 | | /// # use arrow_array::builder::PrimitiveRunBuilder; |
33 | | /// # use arrow_array::cast::AsArray; |
34 | | /// # use arrow_array::types::{UInt32Type, Int16Type}; |
35 | | /// # use arrow_array::{Array, UInt32Array, Int16Array}; |
36 | | /// |
37 | | /// let mut builder = |
38 | | /// PrimitiveRunBuilder::<Int16Type, UInt32Type>::new(); |
39 | | /// builder.append_value(1234); |
40 | | /// builder.append_value(1234); |
41 | | /// builder.append_value(1234); |
42 | | /// builder.append_null(); |
43 | | /// builder.append_value(5678); |
44 | | /// builder.append_value(5678); |
45 | | /// let array = builder.finish(); |
46 | | /// |
47 | | /// assert_eq!(array.run_ends().values(), &[3, 4, 6]); |
48 | | /// |
49 | | /// let av = array.values(); |
50 | | /// |
51 | | /// assert!(!av.is_null(0)); |
52 | | /// assert!(av.is_null(1)); |
53 | | /// assert!(!av.is_null(2)); |
54 | | /// |
55 | | /// // Values are polymorphic and so require a downcast. |
56 | | /// let ava: &UInt32Array = av.as_primitive::<UInt32Type>(); |
57 | | /// |
58 | | /// assert_eq!(ava, &UInt32Array::from(vec![Some(1234), None, Some(5678)])); |
59 | | /// ``` |
60 | | #[derive(Debug)] |
61 | | pub struct PrimitiveRunBuilder<R, V> |
62 | | where |
63 | | R: RunEndIndexType, |
64 | | V: ArrowPrimitiveType, |
65 | | { |
66 | | run_ends_builder: PrimitiveBuilder<R>, |
67 | | values_builder: PrimitiveBuilder<V>, |
68 | | current_value: Option<V::Native>, |
69 | | current_run_end_index: usize, |
70 | | prev_run_end_index: usize, |
71 | | } |
72 | | |
73 | | impl<R, V> Default for PrimitiveRunBuilder<R, V> |
74 | | where |
75 | | R: RunEndIndexType, |
76 | | V: ArrowPrimitiveType, |
77 | | { |
78 | | fn default() -> Self { |
79 | | Self::new() |
80 | | } |
81 | | } |
82 | | |
83 | | impl<R, V> PrimitiveRunBuilder<R, V> |
84 | | where |
85 | | R: RunEndIndexType, |
86 | | V: ArrowPrimitiveType, |
87 | | { |
88 | | /// Creates a new `PrimitiveRunBuilder` |
89 | 9 | pub fn new() -> Self { |
90 | 9 | Self { |
91 | 9 | run_ends_builder: PrimitiveBuilder::new(), |
92 | 9 | values_builder: PrimitiveBuilder::new(), |
93 | 9 | current_value: None, |
94 | 9 | current_run_end_index: 0, |
95 | 9 | prev_run_end_index: 0, |
96 | 9 | } |
97 | 9 | } |
98 | | |
99 | | /// Creates a new `PrimitiveRunBuilder` with the provided capacity |
100 | | /// |
101 | | /// `capacity`: the expected number of run-end encoded values. |
102 | | pub fn with_capacity(capacity: usize) -> Self { |
103 | | Self { |
104 | | run_ends_builder: PrimitiveBuilder::with_capacity(capacity), |
105 | | values_builder: PrimitiveBuilder::with_capacity(capacity), |
106 | | current_value: None, |
107 | | current_run_end_index: 0, |
108 | | prev_run_end_index: 0, |
109 | | } |
110 | | } |
111 | | } |
112 | | |
113 | | impl<R, V> ArrayBuilder for PrimitiveRunBuilder<R, V> |
114 | | where |
115 | | R: RunEndIndexType, |
116 | | V: ArrowPrimitiveType, |
117 | | { |
118 | | /// Returns the builder as a non-mutable `Any` reference. |
119 | | fn as_any(&self) -> &dyn Any { |
120 | | self |
121 | | } |
122 | | |
123 | | /// Returns the builder as a mutable `Any` reference. |
124 | | fn as_any_mut(&mut self) -> &mut dyn Any { |
125 | | self |
126 | | } |
127 | | |
128 | | /// Returns the boxed builder as a box of `Any`. |
129 | | fn into_box_any(self: Box<Self>) -> Box<dyn Any> { |
130 | | self |
131 | | } |
132 | | |
133 | | /// Returns the length of logical array encoded by |
134 | | /// the eventual runs array. |
135 | | fn len(&self) -> usize { |
136 | | self.current_run_end_index |
137 | | } |
138 | | |
139 | | /// Builds the array and reset this builder. |
140 | | fn finish(&mut self) -> ArrayRef { |
141 | | Arc::new(self.finish()) |
142 | | } |
143 | | |
144 | | /// Builds the array without resetting the builder. |
145 | | fn finish_cloned(&self) -> ArrayRef { |
146 | | Arc::new(self.finish_cloned()) |
147 | | } |
148 | | } |
149 | | |
150 | | impl<R, V> PrimitiveRunBuilder<R, V> |
151 | | where |
152 | | R: RunEndIndexType, |
153 | | V: ArrowPrimitiveType, |
154 | | { |
155 | | /// Appends optional value to the logical array encoded by the RunArray. |
156 | 54 | pub fn append_option(&mut self, value: Option<V::Native>) { |
157 | 54 | if self.current_run_end_index == 0 { |
158 | 9 | self.current_run_end_index = 1; |
159 | 9 | self.current_value = value; |
160 | 9 | return; |
161 | 45 | } |
162 | 45 | if self.current_value != value { |
163 | 19 | self.append_run_end(); |
164 | 19 | self.current_value = value; |
165 | 26 | } |
166 | | |
167 | 45 | self.current_run_end_index += 1; |
168 | 54 | } |
169 | | |
170 | | /// Appends value to the logical array encoded by the run-ends array. |
171 | | pub fn append_value(&mut self, value: V::Native) { |
172 | | self.append_option(Some(value)) |
173 | | } |
174 | | |
175 | | /// Appends null to the logical array encoded by the run-ends array. |
176 | | pub fn append_null(&mut self) { |
177 | | self.append_option(None) |
178 | | } |
179 | | |
180 | | /// Creates the RunArray and resets the builder. |
181 | | /// Panics if RunArray cannot be built. |
182 | 9 | pub fn finish(&mut self) -> RunArray<R> { |
183 | | // write the last run end to the array. |
184 | 9 | self.append_run_end(); |
185 | | |
186 | | // reset the run index to zero. |
187 | 9 | self.current_value = None; |
188 | 9 | self.current_run_end_index = 0; |
189 | | |
190 | | // build the run encoded array by adding run_ends and values array as its children. |
191 | 9 | let run_ends_array = self.run_ends_builder.finish(); |
192 | 9 | let values_array = self.values_builder.finish(); |
193 | 9 | RunArray::<R>::try_new(&run_ends_array, &values_array).unwrap() |
194 | 9 | } |
195 | | |
196 | | /// Creates the RunArray and without resetting the builder. |
197 | | /// Panics if RunArray cannot be built. |
198 | | pub fn finish_cloned(&self) -> RunArray<R> { |
199 | | let mut run_ends_array = self.run_ends_builder.finish_cloned(); |
200 | | let mut values_array = self.values_builder.finish_cloned(); |
201 | | |
202 | | // Add current run if one exists |
203 | | if self.prev_run_end_index != self.current_run_end_index { |
204 | | let mut run_end_builder = run_ends_array.into_builder().unwrap(); |
205 | | let mut values_builder = values_array.into_builder().unwrap(); |
206 | | self.append_run_end_with_builders(&mut run_end_builder, &mut values_builder); |
207 | | run_ends_array = run_end_builder.finish(); |
208 | | values_array = values_builder.finish(); |
209 | | } |
210 | | |
211 | | RunArray::try_new(&run_ends_array, &values_array).unwrap() |
212 | | } |
213 | | |
214 | | // Appends the current run to the array. |
215 | 28 | fn append_run_end(&mut self) { |
216 | | // empty array or the function called without appending any value. |
217 | 28 | if self.prev_run_end_index == self.current_run_end_index { |
218 | 0 | return; |
219 | 28 | } |
220 | 28 | let run_end_index = self.run_end_index_as_native(); |
221 | 28 | self.run_ends_builder.append_value(run_end_index); |
222 | 28 | self.values_builder.append_option(self.current_value); |
223 | 28 | self.prev_run_end_index = self.current_run_end_index; |
224 | 28 | } |
225 | | |
226 | | // Similar to `append_run_end` but on custom builders. |
227 | | // Used in `finish_cloned` which is not suppose to mutate `self`. |
228 | | fn append_run_end_with_builders( |
229 | | &self, |
230 | | run_ends_builder: &mut PrimitiveBuilder<R>, |
231 | | values_builder: &mut PrimitiveBuilder<V>, |
232 | | ) { |
233 | | let run_end_index = self.run_end_index_as_native(); |
234 | | run_ends_builder.append_value(run_end_index); |
235 | | values_builder.append_option(self.current_value); |
236 | | } |
237 | | |
238 | 28 | fn run_end_index_as_native(&self) -> R::Native { |
239 | 28 | R::Native::from_usize(self.current_run_end_index) |
240 | 28 | .unwrap_or_else(|| panic!0 ( |
241 | 0 | "Cannot convert `current_run_end_index` {} from `usize` to native form of arrow datatype {}", |
242 | | self.current_run_end_index, |
243 | 0 | R::DATA_TYPE |
244 | | )) |
245 | 28 | } |
246 | | } |
247 | | |
248 | | impl<R, V> Extend<Option<V::Native>> for PrimitiveRunBuilder<R, V> |
249 | | where |
250 | | R: RunEndIndexType, |
251 | | V: ArrowPrimitiveType, |
252 | | { |
253 | 9 | fn extend<T: IntoIterator<Item = Option<V::Native>>>(&mut self, iter: T) { |
254 | 63 | for elem54 in iter { |
255 | 54 | self.append_option(elem); |
256 | 54 | } |
257 | 9 | } |
258 | | } |
259 | | |
260 | | #[cfg(test)] |
261 | | mod tests { |
262 | | use crate::builder::PrimitiveRunBuilder; |
263 | | use crate::cast::AsArray; |
264 | | use crate::types::{Int16Type, UInt32Type}; |
265 | | use crate::{Array, UInt32Array}; |
266 | | |
267 | | #[test] |
268 | | fn test_primitive_ree_array_builder() { |
269 | | let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::new(); |
270 | | builder.append_value(1234); |
271 | | builder.append_value(1234); |
272 | | builder.append_value(1234); |
273 | | builder.append_null(); |
274 | | builder.append_value(5678); |
275 | | builder.append_value(5678); |
276 | | |
277 | | let array = builder.finish(); |
278 | | |
279 | | assert_eq!(array.null_count(), 0); |
280 | | assert_eq!(array.logical_null_count(), 1); |
281 | | assert_eq!(array.len(), 6); |
282 | | |
283 | | assert_eq!(array.run_ends().values(), &[3, 4, 6]); |
284 | | |
285 | | let av = array.values(); |
286 | | |
287 | | assert!(!av.is_null(0)); |
288 | | assert!(av.is_null(1)); |
289 | | assert!(!av.is_null(2)); |
290 | | |
291 | | // Values are polymorphic and so require a downcast. |
292 | | let ava: &UInt32Array = av.as_primitive::<UInt32Type>(); |
293 | | |
294 | | assert_eq!(ava, &UInt32Array::from(vec![Some(1234), None, Some(5678)])); |
295 | | } |
296 | | |
297 | | #[test] |
298 | | fn test_extend() { |
299 | | let mut builder = PrimitiveRunBuilder::<Int16Type, Int16Type>::new(); |
300 | | builder.extend([1, 2, 2, 5, 5, 4, 4].into_iter().map(Some)); |
301 | | builder.extend([4, 4, 6, 2].into_iter().map(Some)); |
302 | | let array = builder.finish(); |
303 | | |
304 | | assert_eq!(array.len(), 11); |
305 | | assert_eq!(array.null_count(), 0); |
306 | | assert_eq!(array.logical_null_count(), 0); |
307 | | assert_eq!(array.run_ends().values(), &[1, 3, 5, 9, 10, 11]); |
308 | | assert_eq!( |
309 | | array.values().as_primitive::<Int16Type>().values(), |
310 | | &[1, 2, 5, 4, 6, 2] |
311 | | ); |
312 | | } |
313 | | } |