Skip to content

Commit a23f507

Browse files
authored
parquet: Add tests for page pruning on unsigned integers (apache#9888)
1 parent ef601d2 commit a23f507

File tree

2 files changed

+155
-2
lines changed

2 files changed

+155
-2
lines changed

datafusion/core/tests/parquet/mod.rs

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow::{
2222
Array, ArrayRef, BinaryArray, Date32Array, Date64Array, FixedSizeBinaryArray,
2323
Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, StringArray,
2424
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
25-
TimestampSecondArray,
25+
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
2626
},
2727
datatypes::{DataType, Field, Schema},
2828
record_batch::RecordBatch,
@@ -65,6 +65,7 @@ enum Scenario {
6565
Dates,
6666
Int,
6767
Int32Range,
68+
UInt,
6869
Float64,
6970
Decimal,
7071
DecimalBloomFilterInt32,
@@ -387,7 +388,7 @@ fn make_timestamp_batch(offset: Duration) -> RecordBatch {
387388
.unwrap()
388389
}
389390

390-
/// Return record batch with i32 sequence
391+
/// Return record batch with i8, i16, i32, and i64 sequences
391392
///
392393
/// Columns are named
393394
/// "i8" -> Int8Array
@@ -417,6 +418,36 @@ fn make_int_batches(start: i8, end: i8) -> RecordBatch {
417418
.unwrap()
418419
}
419420

421+
/// Return record batch with i8, i16, i32, and i64 sequences
422+
///
423+
/// Columns are named
424+
/// "u8" -> UInt8Array
425+
/// "u16" -> UInt16Array
426+
/// "u32" -> UInt32Array
427+
/// "u64" -> UInt64Array
428+
fn make_uint_batches(start: u8, end: u8) -> RecordBatch {
429+
let schema = Arc::new(Schema::new(vec![
430+
Field::new("u8", DataType::UInt8, true),
431+
Field::new("u16", DataType::UInt16, true),
432+
Field::new("u32", DataType::UInt32, true),
433+
Field::new("u64", DataType::UInt64, true),
434+
]));
435+
let v8: Vec<u8> = (start..end).collect();
436+
let v16: Vec<u16> = (start as _..end as _).collect();
437+
let v32: Vec<u32> = (start as _..end as _).collect();
438+
let v64: Vec<u64> = (start as _..end as _).collect();
439+
RecordBatch::try_new(
440+
schema,
441+
vec![
442+
Arc::new(UInt8Array::from(v8)) as ArrayRef,
443+
Arc::new(UInt16Array::from(v16)) as ArrayRef,
444+
Arc::new(UInt32Array::from(v32)) as ArrayRef,
445+
Arc::new(UInt64Array::from(v64)) as ArrayRef,
446+
],
447+
)
448+
.unwrap()
449+
}
450+
420451
fn make_int32_range(start: i32, end: i32) -> RecordBatch {
421452
let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
422453
let v = vec![start, end];
@@ -620,6 +651,14 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
620651
Scenario::Int32Range => {
621652
vec![make_int32_range(0, 10), make_int32_range(200000, 300000)]
622653
}
654+
Scenario::UInt => {
655+
vec![
656+
make_uint_batches(0, 5),
657+
make_uint_batches(1, 6),
658+
make_uint_batches(5, 10),
659+
make_uint_batches(250, 255),
660+
]
661+
}
623662
Scenario::Float64 => {
624663
vec![
625664
make_f64_batch(vec![-5.0, -4.0, -3.0, -2.0, -1.0]),

datafusion/core/tests/parquet/page_pruning.rs

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,120 @@ int_tests!(16);
515515
int_tests!(32);
516516
int_tests!(64);
517517

518+
macro_rules! uint_tests {
519+
($bits:expr) => {
520+
paste::item! {
521+
#[tokio::test]
522+
// null count min max
523+
// page-0 0 0 4
524+
// page-1 0 1 5
525+
// page-2 0 5 9
526+
// page-3 0 250 254
527+
async fn [<prune_uint $bits _lt>]() {
528+
test_prune(
529+
Scenario::UInt,
530+
&format!("SELECT * FROM t where u{} < 6", $bits),
531+
Some(0),
532+
Some(5),
533+
11,
534+
)
535+
.await;
536+
}
537+
538+
#[tokio::test]
539+
async fn [<prune_uint $bits _gt >]() {
540+
test_prune(
541+
Scenario::UInt,
542+
&format!("SELECT * FROM t where u{} > 253", $bits),
543+
Some(0),
544+
Some(15),
545+
1,
546+
)
547+
.await;
548+
}
549+
550+
#[tokio::test]
551+
async fn [<prune_uint $bits _eq >]() {
552+
test_prune(
553+
Scenario::UInt,
554+
&format!("SELECT * FROM t where u{} = 6", $bits),
555+
Some(0),
556+
Some(15),
557+
1,
558+
)
559+
.await;
560+
}
561+
562+
#[tokio::test]
563+
async fn [<prune_uint $bits _scalar_fun_and_eq >]() {
564+
test_prune(
565+
Scenario::UInt,
566+
&format!("SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6", $bits, $bits),
567+
Some(0),
568+
Some(15),
569+
1,
570+
)
571+
.await;
572+
}
573+
574+
#[tokio::test]
575+
async fn [<prune_uint $bits _scalar_fun >]() {
576+
test_prune(
577+
Scenario::UInt,
578+
&format!("SELECT * FROM t where power(u{}, 2) = 25", $bits),
579+
Some(0),
580+
Some(0),
581+
2,
582+
)
583+
.await;
584+
}
585+
586+
#[tokio::test]
587+
async fn [<prune_uint $bits _complex_expr>]() {
588+
test_prune(
589+
Scenario::UInt,
590+
&format!("SELECT * FROM t where u{}+1 = 6", $bits),
591+
Some(0),
592+
Some(0),
593+
2,
594+
)
595+
.await;
596+
}
597+
598+
#[tokio::test]
599+
async fn [<prune_uint $bits _eq_in_list >]() {
600+
// result of sql "SELECT * FROM t where in (1)"
601+
test_prune(
602+
Scenario::UInt,
603+
&format!("SELECT * FROM t where u{} in (6)", $bits),
604+
Some(0),
605+
Some(15),
606+
1,
607+
)
608+
.await;
609+
}
610+
611+
#[tokio::test]
612+
async fn [<prune_uint $bits _eq_in_list_negated >]() {
613+
// result of sql "SELECT * FROM t where not in (6)" prune nothing
614+
test_prune(
615+
Scenario::UInt,
616+
&format!("SELECT * FROM t where u{} not in (6)", $bits),
617+
Some(0),
618+
Some(0),
619+
19,
620+
)
621+
.await;
622+
}
623+
}
624+
}
625+
}
626+
627+
uint_tests!(8);
628+
uint_tests!(16);
629+
uint_tests!(32);
630+
uint_tests!(64);
631+
518632
#[tokio::test]
519633
// null count min max
520634
// page-0 0 -5.0 -1.0

0 commit comments

Comments
 (0)