Skip to content

Commit 6d0ea90

Browse files
tustvoldviirya
andauthored
Make Parquet reader filter APIs public (#1792) (#2467)
* Make filter APIs public (#1792) * Update parquet/src/arrow/arrow_reader/mod.rs Co-authored-by: Liang-Chi Hsieh <[email protected]> Co-authored-by: Liang-Chi Hsieh <[email protected]>
1 parent 0013170 commit 6d0ea90

File tree

2 files changed

+13
-19
lines changed

2 files changed

+13
-19
lines changed

parquet/src/arrow/arrow_reader/mod.rs

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,11 @@ use crate::file::reader::{ChunkReader, FileReader, SerializedFileReader};
3939
use crate::file::serialized_reader::ReadOptionsBuilder;
4040
use crate::schema::types::SchemaDescriptor;
4141

42-
#[allow(unused)]
4342
mod filter;
44-
#[allow(unused)]
4543
mod selection;
4644

47-
// TODO: Make these public once stable (#1792)
48-
#[allow(unused_imports)]
49-
pub(crate) use filter::{ArrowPredicate, ArrowPredicateFn, RowFilter};
50-
#[allow(unused_imports)]
51-
pub(crate) use selection::{RowSelection, RowSelector};
45+
pub use filter::{ArrowPredicate, ArrowPredicateFn, RowFilter};
46+
pub use selection::{RowSelection, RowSelector};
5247

5348
/// A generic builder for constructing sync or async arrow parquet readers. This is not intended
5449
/// to be used directly, instead you should use the specialization for the type of reader
@@ -140,15 +135,17 @@ impl<T> ArrowReaderBuilder<T> {
140135
}
141136
}
142137

143-
/// Provide a [`RowSelection] to filter out rows, and avoid fetching their
144-
/// data into memory
138+
/// Provide a [`RowSelection`] to filter out rows, and avoid fetching their
139+
/// data into memory.
145140
///
146-
/// Row group filtering is applied prior to this, and rows from skipped
141+
/// Row group filtering is applied prior to this, and therefore rows from skipped
147142
/// row groups should not be included in the [`RowSelection`]
148143
///
149-
/// TODO: Make public once stable (#1792)
150-
#[allow(unused)]
151-
pub(crate) fn with_row_selection(self, selection: RowSelection) -> Self {
144+
/// An example use case of this would be applying a selection determined by
145+
/// evaluating predicates against the [`Index`]
146+
///
147+
/// [`Index`]: [parquet::file::page_index::index::Index]
148+
pub fn with_row_selection(self, selection: RowSelection) -> Self {
152149
Self {
153150
selection: Some(selection),
154151
..self
@@ -158,10 +155,7 @@ impl<T> ArrowReaderBuilder<T> {
158155
/// Provide a [`RowFilter`] to skip decoding rows
159156
///
160157
/// Row filters are applied after row group selection and row selection
161-
///
162-
/// TODO: Make public once stable (#1792)
163-
#[allow(unused)]
164-
pub(crate) fn with_row_filter(self, filter: RowFilter) -> Self {
158+
pub fn with_row_filter(self, filter: RowFilter) -> Self {
165159
Self {
166160
filter: Some(filter),
167161
..self

parquet/src/arrow/arrow_reader/selection.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,11 @@ mod tests {
451451
let mut rand = thread_rng();
452452
for _ in 0..100 {
453453
let a_len = rand.gen_range(10..100);
454-
let a_bools: Vec<_> = (0..a_len).map(|x| rand.gen_bool(0.2)).collect();
454+
let a_bools: Vec<_> = (0..a_len).map(|_| rand.gen_bool(0.2)).collect();
455455
let a = RowSelection::from_filters(&[BooleanArray::from(a_bools.clone())]);
456456

457457
let b_len: usize = a_bools.iter().map(|x| *x as usize).sum();
458-
let b_bools: Vec<_> = (0..b_len).map(|x| rand.gen_bool(0.8)).collect();
458+
let b_bools: Vec<_> = (0..b_len).map(|_| rand.gen_bool(0.8)).collect();
459459
let b = RowSelection::from_filters(&[BooleanArray::from(b_bools.clone())]);
460460

461461
let mut expected_bools = vec![false; a_len];

0 commit comments

Comments
 (0)