-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Reproducer tests for #18380 (resorting sorted inputs) #18352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
a4288a2
fcd4d9f
b44cf08
6c31946
566da90
b09a26e
602ac89
7f153b9
83e8d15
f98d6ae
fa07937
1fb2219
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,7 @@ use insta::assert_snapshot; | |
| use object_store::local::LocalFileSystem; | ||
| use std::collections::HashMap; | ||
| use std::fs; | ||
| use std::path::Path; | ||
| use std::sync::Arc; | ||
| use tempfile::TempDir; | ||
| use url::Url; | ||
|
|
@@ -2996,6 +2997,152 @@ async fn test_count_wildcard_on_window() -> Result<()> { | |
| Ok(()) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false() -> Result<()> { | ||
| assert_snapshot!( | ||
| union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?, | ||
| @r#" | ||
| +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ||
| | plan_type | plan | | ||
| +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ||
| | logical_plan | Aggregate: groupBy=[[id]], aggr=[[]] | | ||
| | | Union | | ||
| | | TableScan: sorted projection=[id] | | ||
| | | Sort: unsorted.id ASC NULLS LAST | | ||
| | | TableScan: unsorted projection=[id] | | ||
| | physical_plan | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted | | ||
| | | SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] | | ||
| | | CoalescePartitionsExec | | ||
| | | AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[] | | ||
| | | UnionExec | | ||
| | | DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet | | ||
| | | DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet | | ||
| | | | | ||
| +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ||
| "#); | ||
| Ok(()) | ||
| } | ||
|
|
||
| #[ignore] // See https://github.com/apache/datafusion/issues/18380 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we ignoring the test when the plan is also commented out in the body? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The plan is not commented out. |
||
| #[tokio::test] | ||
| async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true() -> Result<()> { | ||
rgehan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| assert_snapshot!( | ||
| union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?, | ||
| @r#" | ||
| +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ||
| | plan_type | plan | | ||
| +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ||
| | logical_plan | Aggregate: groupBy=[[id]], aggr=[[]] | | ||
| | | Union | | ||
| | | TableScan: sorted projection=[id] | | ||
| | | Sort: unsorted.id ASC NULLS LAST | | ||
| | | TableScan: unsorted projection=[id] | | ||
| | physical_plan | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted | | ||
| | | SortPreservingMergeExec: [id@0 ASC NULLS LAST] | | ||
| | | AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted | | ||
| | | UnionExec | | ||
| | | DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet | | ||
| | | SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] | | ||
| | | DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet | | ||
| | | | | ||
| +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ||
| "#); | ||
|
|
||
| // 💥 Doesn't pass, and generates this plan: | ||
| // | ||
| // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted | ||
| // SortPreservingMergeExec: [id@0 ASC NULLS LAST] | ||
| // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true] | ||
| // AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[] | ||
| // UnionExec | ||
| // DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet | ||
| // DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet | ||
rgehan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // | ||
| // | ||
| // === Excerpt from the verbose explain === | ||
| // | ||
| // Physical_plan after EnforceDistribution: | ||
| // | ||
| // OutputRequirementExec: order_by=[], dist_by=Unspecified | ||
| // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted | ||
| // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] | ||
| // CoalescePartitionsExec | ||
| // AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted | ||
| // UnionExec | ||
| // DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet | ||
| // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] | ||
| // DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet | ||
| // | ||
| // Physical_plan after EnforceSorting: | ||
rgehan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // | ||
| // OutputRequirementExec: order_by=[], dist_by=Unspecified | ||
| // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted | ||
| // SortPreservingMergeExec: [id@0 ASC NULLS LAST] | ||
| // SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true] | ||
| // AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[] | ||
| // UnionExec | ||
| // DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet | ||
| // DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(repartition_sorts: bool) -> Result<String> { | ||
| let config = SessionConfig::default() | ||
| .with_target_partitions(1) | ||
| .with_repartition_sorts(repartition_sorts); | ||
rgehan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| let ctx = SessionContext::new_with_config(config); | ||
|
|
||
| let testdata = parquet_test_data(); | ||
|
|
||
| // Register "sorted" table, that is sorted | ||
| ctx.register_parquet( | ||
| "sorted", | ||
| &format!("{testdata}/alltypes_tiny_pages.parquet"), | ||
| ParquetReadOptions::default() | ||
| .file_sort_order(vec![vec![col("id").sort(true, false)]]), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Sidenote: Interestingly, with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know whether the file is actually sorted or you just add this function to trick the planner to plan this file as it is sorted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The file is not actually sorted no, but I was hoping this was a valid way of making the planner think it is, and plan accordingly. Can this cause issues? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Likely not but I am not % sure if we do anything special with parquet file. |
||
| ) | ||
| .await?; | ||
|
|
||
| // Register "unsorted" table | ||
| ctx.register_parquet( | ||
| "unsorted", | ||
| &format!("{testdata}/alltypes_tiny_pages.parquet"), | ||
| ParquetReadOptions::default() | ||
| ) | ||
| .await?; | ||
|
|
||
| let source_sorted = ctx | ||
| .table("sorted") | ||
| .await | ||
| .unwrap() | ||
| .select(vec![col("id")]) | ||
| .unwrap(); | ||
|
|
||
| let source_unsorted = ctx | ||
| .table("unsorted") | ||
| .await | ||
| .unwrap() | ||
| .select(vec![col("id")]) | ||
| .unwrap(); | ||
|
|
||
| let source_unsorted_resorted = source_unsorted | ||
| .sort(vec![col("id").sort(true, false)])?; | ||
|
|
||
| let union = source_sorted.union(source_unsorted_resorted)?; | ||
rgehan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| let agg = union.aggregate(vec![col("id")], vec![])?; | ||
|
|
||
| let df = agg; | ||
rgehan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // To be able to remove user specific paths from the plan, for stable assertions | ||
| let testdata_clean = Path::new(&testdata).canonicalize()?.display().to_string(); | ||
| let testdata_clean = testdata_clean.strip_prefix("/").unwrap_or(&testdata_clean); | ||
|
|
||
| let plan = df.explain(false, false)?.collect().await?; | ||
| Ok(pretty_format_batches(&plan)?.to_string().replace(&testdata_clean, "{testdata}")) | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_count_wildcard_on_aggregate() -> Result<()> { | ||
| let ctx = create_join_context()?; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.