-
Notifications
You must be signed in to change notification settings - Fork 1.8k
#17801 Improve nullability reporting of case expressions #17813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 24 commits
408cee1
045fc9c
de8b780
bbd2949
2075f4b
8c87937
e155d41
5cfe8b6
ac4267c
101db28
f4c8579
81b6ec1
b6ebd13
ebc2d38
3131899
3da92e5
0a6b2e7
113e899
9dee1e8
4a22dfc
a1bc263
ac765e9
51af749
4af84a7
427fc30
0223a54
c5914d6
4b879e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -60,6 +60,7 @@ use crate::schema_equivalence::schema_satisfied_by; | |||||||||
| use arrow::array::{builder::StringBuilder, RecordBatch}; | ||||||||||
| use arrow::compute::SortOptions; | ||||||||||
| use arrow::datatypes::Schema; | ||||||||||
| use arrow_schema::Field; | ||||||||||
| use datafusion_catalog::ScanArgs; | ||||||||||
| use datafusion_common::display::ToStringifiedPlan; | ||||||||||
| use datafusion_common::format::ExplainAnalyzeLevel; | ||||||||||
|
|
@@ -2516,7 +2517,9 @@ impl<'a> OptimizationInvariantChecker<'a> { | |||||||||
| previous_schema: Arc<Schema>, | ||||||||||
| ) -> Result<()> { | ||||||||||
| // if the rule is not permitted to change the schema, confirm that it did not change. | ||||||||||
| if self.rule.schema_check() && plan.schema() != previous_schema { | ||||||||||
| if self.rule.schema_check() | ||||||||||
| && !is_allowed_schema_change(previous_schema.as_ref(), plan.schema().as_ref()) | ||||||||||
| { | ||||||||||
| internal_err!("PhysicalOptimizer rule '{}' failed. Schema mismatch. Expected original schema: {:?}, got new schema: {:?}", | ||||||||||
| self.rule.name(), | ||||||||||
| previous_schema, | ||||||||||
|
|
@@ -2532,6 +2535,33 @@ impl<'a> OptimizationInvariantChecker<'a> { | |||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| /// Checks if the change from `old` schema to `new` is allowed or not. | ||||||||||
| /// The current implementation only allows nullability of individual fields to change | ||||||||||
| /// from 'nullable' to 'not nullable'. | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think adding rationale about why would be helpful here as it may not be immediately obvious
Suggested change
|
||||||||||
| fn is_allowed_schema_change(old: &Schema, new: &Schema) -> bool { | ||||||||||
| if new.metadata != old.metadata { | ||||||||||
| return false; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| if new.fields.len() != old.fields.len() { | ||||||||||
| return false; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| let new_fields = new.fields.iter().map(|f| f.as_ref()); | ||||||||||
| let old_fields = old.fields.iter().map(|f| f.as_ref()); | ||||||||||
| old_fields | ||||||||||
| .zip(new_fields) | ||||||||||
| .all(|(old, new)| is_allowed_field_change(old, new)) | ||||||||||
| } | ||||||||||
|
|
||||||||||
| fn is_allowed_field_change(old_field: &Field, new_field: &Field) -> bool { | ||||||||||
| new_field.name() == old_field.name() | ||||||||||
| && new_field.data_type() == old_field.data_type() | ||||||||||
| && new_field.metadata() == old_field.metadata() | ||||||||||
| && (new_field.is_nullable() == old_field.is_nullable() | ||||||||||
| || !new_field.is_nullable()) | ||||||||||
| } | ||||||||||
|
|
||||||||||
| impl<'n> TreeNodeVisitor<'n> for OptimizationInvariantChecker<'_> { | ||||||||||
| type Node = Arc<dyn ExecutionPlan>; | ||||||||||
|
|
||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,7 @@ sql = ["sqlparser"] | |
| [dependencies] | ||
| arrow = { workspace = true } | ||
| async-trait = { workspace = true } | ||
| bitflags = "2.9.4" | ||
|
||
| chrono = { workspace = true } | ||
| datafusion-common = { workspace = true, default-features = false } | ||
| datafusion-doc = { workspace = true } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -341,6 +341,11 @@ pub fn is_null(expr: Expr) -> Expr { | |
| Expr::IsNull(Box::new(expr)) | ||
| } | ||
|
|
||
| /// Create is not null expression | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a nice drive by cleanup |
||
| pub fn is_not_null(expr: Expr) -> Expr { | ||
| Expr::IsNotNull(Box::new(expr)) | ||
| } | ||
|
|
||
| /// Create is true expression | ||
| pub fn is_true(expr: Expr) -> Expr { | ||
| Expr::IsTrue(Box::new(expr)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change relaxes the schema check slightly. It now allows individual fields to change from nullable to not-nullable which is ok because it only allow a strict subset of the original schema.
schema_checkhas documentation stating that you should disable the schema check entirely if you want to do this. Seemed better to not have to disable checking entirely.