Skip to content

Commit c72b98e

Browse files
Enhance/Refactor Ordering Equivalence Properties (#7566)
* separate implementation of oeq properties * Simplifications * Move utils to methods * Remove unnecesary code * Address todo * Buggy is_aggressive mod eklenecek * start implementing aggressive mode * all tests pass * minor changes * All tests pass * Minor changes * All tests pass * minor changes * all tests pass * Simplifications * minor changes * Resolve linter error * Minor changes * minor changes * Update plan * Simplifications, update comments * Update comments, Use existing stats to find constants * Simplifications * Unknown input stats are handled * Address reviews * Simplifications * Simplifications * Address reviews * Fix subdirectories --------- Co-authored-by: berkaysynnada <[email protected]>
1 parent 678d27a commit c72b98e

File tree

14 files changed

+952
-586
lines changed

14 files changed

+952
-586
lines changed

datafusion/common/src/stats.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
2020
use std::fmt::Display;
2121

22+
use arrow::datatypes::DataType;
23+
2224
use crate::ScalarValue;
2325

2426
/// Statistics for a relation
@@ -70,3 +72,25 @@ pub struct ColumnStatistics {
7072
/// Number of distinct values
7173
pub distinct_count: Option<usize>,
7274
}
75+
76+
impl ColumnStatistics {
77+
/// Column contains a single non null value (e.g constant).
78+
pub fn is_singleton(&self) -> bool {
79+
match (&self.min_value, &self.max_value) {
80+
// Min and max values are the same and not infinity.
81+
(Some(min), Some(max)) => !min.is_null() && !max.is_null() && (min == max),
82+
(_, _) => false,
83+
}
84+
}
85+
86+
/// Returns the [`ColumnStatistics`] corresponding to the given datatype by assigning infinite bounds.
87+
pub fn new_with_unbounded_column(dt: &DataType) -> ColumnStatistics {
88+
let null = ScalarValue::try_from(dt.clone()).ok();
89+
ColumnStatistics {
90+
null_count: None,
91+
max_value: null.clone(),
92+
min_value: null,
93+
distinct_count: None,
94+
}
95+
}
96+
}

datafusion/core/src/physical_optimizer/enforce_distribution.rs

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ use datafusion_physical_expr::utils::{
5454
map_columns_before_projection, ordering_satisfy_requirement_concrete,
5555
};
5656
use datafusion_physical_expr::{
57-
expr_list_eq_strict_order, normalize_expr_with_equivalence_properties, PhysicalExpr,
58-
PhysicalSortRequirement,
57+
expr_list_eq_strict_order, PhysicalExpr, PhysicalSortRequirement,
5958
};
6059

6160
use datafusion_common::internal_err;
@@ -807,36 +806,21 @@ fn try_reorder(
807806
} else if !equivalence_properties.classes().is_empty() {
808807
normalized_expected = expected
809808
.iter()
810-
.map(|e| {
811-
normalize_expr_with_equivalence_properties(
812-
e.clone(),
813-
equivalence_properties.classes(),
814-
)
815-
})
809+
.map(|e| equivalence_properties.normalize_expr(e.clone()))
816810
.collect::<Vec<_>>();
817811
assert_eq!(normalized_expected.len(), expected.len());
818812

819813
normalized_left_keys = join_keys
820814
.left_keys
821815
.iter()
822-
.map(|e| {
823-
normalize_expr_with_equivalence_properties(
824-
e.clone(),
825-
equivalence_properties.classes(),
826-
)
827-
})
816+
.map(|e| equivalence_properties.normalize_expr(e.clone()))
828817
.collect::<Vec<_>>();
829818
assert_eq!(join_keys.left_keys.len(), normalized_left_keys.len());
830819

831820
normalized_right_keys = join_keys
832821
.right_keys
833822
.iter()
834-
.map(|e| {
835-
normalize_expr_with_equivalence_properties(
836-
e.clone(),
837-
equivalence_properties.classes(),
838-
)
839-
})
823+
.map(|e| equivalence_properties.normalize_expr(e.clone()))
840824
.collect::<Vec<_>>();
841825
assert_eq!(join_keys.right_keys.len(), normalized_right_keys.len());
842826

datafusion/physical-expr/src/analysis.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,15 @@ fn shrink_boundaries(
189189
})?;
190190
let final_result = graph.get_interval(*root_index);
191191

192+
// If during selectivity calculation we encounter an error, use 1.0 as cardinality estimate
193+
// safest estimate(e.q largest possible value).
192194
let selectivity = calculate_selectivity(
193195
&final_result.lower.value,
194196
&final_result.upper.value,
195197
&target_boundaries,
196198
&initial_boundaries,
197-
)?;
199+
)
200+
.unwrap_or(1.0);
198201

199202
if !(0.0..=1.0).contains(&selectivity) {
200203
return internal_err!("Selectivity is out of limit: {}", selectivity);

0 commit comments

Comments
 (0)