/Users/andrewlamb/Software/arrow-rs/arrow-select/src/coalesce/primitive.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::coalesce::InProgressArray; |
19 | | use arrow_array::cast::AsArray; |
20 | | use arrow_array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; |
21 | | use arrow_buffer::{NullBufferBuilder, ScalarBuffer}; |
22 | | use arrow_schema::{ArrowError, DataType}; |
23 | | use std::fmt::Debug; |
24 | | use std::sync::Arc; |
25 | | |
26 | | /// InProgressArray for [`PrimitiveArray`] |
27 | | #[derive(Debug)] |
28 | | pub(crate) struct InProgressPrimitiveArray<T: ArrowPrimitiveType> { |
29 | | /// Data type of the array |
30 | | data_type: DataType, |
31 | | /// The current source, if any |
32 | | source: Option<ArrayRef>, |
33 | | /// the target batch size (and thus size for views allocation) |
34 | | batch_size: usize, |
35 | | /// In progress nulls |
36 | | nulls: NullBufferBuilder, |
37 | | /// The currently in progress array |
38 | | current: Vec<T::Native>, |
39 | | } |
40 | | |
41 | | impl<T: ArrowPrimitiveType> InProgressPrimitiveArray<T> { |
42 | | /// Create a new `InProgressPrimitiveArray` |
43 | 0 | pub(crate) fn new(batch_size: usize, data_type: DataType) -> Self { |
44 | 0 | Self { |
45 | 0 | data_type, |
46 | 0 | batch_size, |
47 | 0 | source: None, |
48 | 0 | nulls: NullBufferBuilder::new(batch_size), |
49 | 0 | current: vec![], |
50 | 0 | } |
51 | 0 | } |
52 | | |
53 | | /// Allocate space for output values if necessary. |
54 | | /// |
55 | | /// This is done on write (when we know it is necessary) rather than |
56 | | /// eagerly to avoid allocations that are not used. |
57 | 0 | fn ensure_capacity(&mut self) { |
58 | 0 | self.current.reserve(self.batch_size); |
59 | 0 | } |
60 | | } |
61 | | |
62 | | impl<T: ArrowPrimitiveType + Debug> InProgressArray for InProgressPrimitiveArray<T> { |
63 | 0 | fn set_source(&mut self, source: Option<ArrayRef>) { |
64 | 0 | self.source = source; |
65 | 0 | } |
66 | | |
67 | 0 | fn copy_rows(&mut self, offset: usize, len: usize) -> Result<(), ArrowError> { |
68 | 0 | self.ensure_capacity(); |
69 | | |
70 | 0 | let s = self |
71 | 0 | .source |
72 | 0 | .as_ref() |
73 | 0 | .ok_or_else(|| { |
74 | 0 | ArrowError::InvalidArgumentError( |
75 | 0 | "Internal Error: InProgressPrimitiveArray: source not set".to_string(), |
76 | 0 | ) |
77 | 0 | })? |
78 | 0 | .as_primitive::<T>(); |
79 | | |
80 | | // add nulls if necessary |
81 | 0 | if let Some(nulls) = s.nulls().as_ref() { |
82 | 0 | let nulls = nulls.slice(offset, len); |
83 | 0 | self.nulls.append_buffer(&nulls); |
84 | 0 | } else { |
85 | 0 | self.nulls.append_n_non_nulls(len); |
86 | 0 | }; |
87 | | |
88 | | // Copy the values |
89 | 0 | self.current |
90 | 0 | .extend_from_slice(&s.values()[offset..offset + len]); |
91 | | |
92 | 0 | Ok(()) |
93 | 0 | } |
94 | | |
95 | 0 | fn finish(&mut self) -> Result<ArrayRef, ArrowError> { |
96 | | // take and reset the current values and nulls |
97 | 0 | let values = std::mem::take(&mut self.current); |
98 | 0 | let nulls = self.nulls.finish(); |
99 | 0 | self.nulls = NullBufferBuilder::new(self.batch_size); |
100 | | |
101 | 0 | let array = PrimitiveArray::<T>::try_new(ScalarBuffer::from(values), nulls)? |
102 | | // preserve timezone / precision+scale if applicable |
103 | 0 | .with_data_type(self.data_type.clone()); |
104 | 0 | Ok(Arc::new(array)) |
105 | 0 | } |
106 | | } |