Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
ea83c37
separate implementation of oeq properties
mustafasrepo Aug 10, 2023
0d9f208
Merge branch 'apache_main' into refactor/oeq_properties
mustafasrepo Aug 17, 2023
4ad5ec5
Simplifications
mustafasrepo Aug 17, 2023
016558b
Move utils to methods
mustafasrepo Aug 17, 2023
8007b1b
Remove unnecesary code
mustafasrepo Aug 17, 2023
5d896a8
Address todo
mustafasrepo Aug 17, 2023
b8def0a
Buggy is_aggressive mod eklenecek
mustafasrepo Aug 17, 2023
8850f33
start implementing aggressive mode
mustafasrepo Aug 17, 2023
0d32ca5
all tests pass
mustafasrepo Sep 6, 2023
aac0a0c
minor changes
mustafasrepo Sep 6, 2023
f0dbd85
All tests pass
mustafasrepo Sep 6, 2023
7112a25
Minor changes
mustafasrepo Sep 6, 2023
ec41194
All tests pass
mustafasrepo Sep 7, 2023
b16ad15
minor changes
mustafasrepo Sep 7, 2023
717631e
all tests pass
mustafasrepo Sep 7, 2023
b93cc5d
Simplifications
mustafasrepo Sep 7, 2023
b832b2d
minor changes
mustafasrepo Sep 7, 2023
5a92633
Merge branch 'apache_main' into refactor/oeq_properties
mustafasrepo Sep 7, 2023
858576b
Resolve linter error
mustafasrepo Sep 7, 2023
09aa6c8
Minor changes
mustafasrepo Sep 7, 2023
7212e56
minor changes
mustafasrepo Sep 8, 2023
49ea333
Update plan
mustafasrepo Sep 8, 2023
eb81b43
Merge branch 'apache_main' into refactor/oeq_properties
mustafasrepo Sep 8, 2023
18c4bab
Simplifications, update comments
mustafasrepo Sep 8, 2023
fe322b4
Update comments, Use existing stats to find constants
mustafasrepo Sep 8, 2023
0cb1ee2
Merge branch 'apache_main' into refactor/oeq_properties
mustafasrepo Sep 12, 2023
cff6f2f
Simplifications
mustafasrepo Sep 12, 2023
6cb4d5a
Unknown input stats are handled
berkaysynnada Sep 12, 2023
ef994fb
Address reviews
mustafasrepo Sep 13, 2023
0290184
Merge branch 'apache_main' into refactor/oeq_properties
mustafasrepo Sep 13, 2023
4fe9c0d
Simplifications
mustafasrepo Sep 15, 2023
1c1de0d
Simplifications
mustafasrepo Sep 15, 2023
23e30ae
Merge branch 'apache_main' into refactor/oeq_properties
mustafasrepo Sep 18, 2023
abe0f31
Address reviews
mustafasrepo Sep 18, 2023
2eaf755
Fix subdirectories
mustafasrepo Sep 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions datafusion/common/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

use std::fmt::Display;

use arrow::datatypes::DataType;

use crate::ScalarValue;

/// Statistics for a relation
Expand Down Expand Up @@ -70,3 +72,25 @@ pub struct ColumnStatistics {
/// Number of distinct values
pub distinct_count: Option<usize>,
}

impl ColumnStatistics {
/// Column contains a single non null value (e.g constant).
pub fn is_singleton(&self) -> bool {
match (&self.min_value, &self.max_value) {
// Min and max values are the same and not infinity.
(Some(min), Some(max)) => !min.is_null() && !max.is_null() && (min == max),
(_, _) => false,
}
}

/// Returns the [`ColumnStatistics`] corresponding to the given datatype by assigning infinite bounds.
pub fn new_with_unbounded_column(dt: &DataType) -> ColumnStatistics {
let null = ScalarValue::try_from(dt.clone()).ok();
ColumnStatistics {
null_count: None,
max_value: null.clone(),
min_value: null,
distinct_count: None,
}
}
}
24 changes: 4 additions & 20 deletions datafusion/core/src/physical_optimizer/enforce_distribution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ use datafusion_physical_expr::utils::{
map_columns_before_projection, ordering_satisfy_requirement_concrete,
};
use datafusion_physical_expr::{
expr_list_eq_strict_order, normalize_expr_with_equivalence_properties, PhysicalExpr,
PhysicalSortRequirement,
expr_list_eq_strict_order, PhysicalExpr, PhysicalSortRequirement,
};

use datafusion_common::internal_err;
Expand Down Expand Up @@ -807,36 +806,21 @@ fn try_reorder(
} else if !equivalence_properties.classes().is_empty() {
normalized_expected = expected
.iter()
.map(|e| {
normalize_expr_with_equivalence_properties(
e.clone(),
equivalence_properties.classes(),
)
})
.map(|e| equivalence_properties.normalize_expr(e.clone()))
.collect::<Vec<_>>();
assert_eq!(normalized_expected.len(), expected.len());

normalized_left_keys = join_keys
.left_keys
.iter()
.map(|e| {
normalize_expr_with_equivalence_properties(
e.clone(),
equivalence_properties.classes(),
)
})
.map(|e| equivalence_properties.normalize_expr(e.clone()))
.collect::<Vec<_>>();
assert_eq!(join_keys.left_keys.len(), normalized_left_keys.len());

normalized_right_keys = join_keys
.right_keys
.iter()
.map(|e| {
normalize_expr_with_equivalence_properties(
e.clone(),
equivalence_properties.classes(),
)
})
.map(|e| equivalence_properties.normalize_expr(e.clone()))
.collect::<Vec<_>>();
assert_eq!(join_keys.right_keys.len(), normalized_right_keys.len());

Expand Down
5 changes: 4 additions & 1 deletion datafusion/physical-expr/src/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,15 @@ fn shrink_boundaries(
})?;
let final_result = graph.get_interval(*root_index);

// If during selectivity calculation we encounter an error, use 1.0 as cardinality estimate
// safest estimate(e.q largest possible value).
let selectivity = calculate_selectivity(
&final_result.lower.value,
&final_result.upper.value,
&target_boundaries,
&initial_boundaries,
)?;
)
.unwrap_or(1.0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is an example of #7552 -- ignoring errors in the normal path results in potential slowdowns of planning

can we please fix the underlying issue here rather than ignoring it? Maybe we need to change the interval analysis API to return Result<Option<>> rather than just Result

In general ignoring the errors I think is just papering over real problems

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you. I will remove this error ignoring, in the subsequent PR, where I will

  • remove Statistics::default
  • Remove unwrap_or, error ignoring.


if !(0.0..=1.0).contains(&selectivity) {
return internal_err!("Selectivity is out of limit: {}", selectivity);
Expand Down
Loading