Skip to content

Commit e088945

Browse files
authored
UDAF refactor: Add PhysicalExpr trait dependency on datafusion-expr and remove logical expressions requirement for creating physical aggregate expression (#11845)
* init draft Signed-off-by: jayzhan211 <[email protected]> * production ready Signed-off-by: jayzhan211 <[email protected]> * cleanup Signed-off-by: jayzhan211 <[email protected]> * fix merge conflict Signed-off-by: jayzhan211 <[email protected]> * mv accumulator out Signed-off-by: jayzhan211 <[email protected]> * fix doc Signed-off-by: jayzhan211 <[email protected]> * rename Signed-off-by: jayzhan211 <[email protected]> * fix test Signed-off-by: jayzhan211 <[email protected]> * fix test Signed-off-by: jayzhan211 <[email protected]> * doc Signed-off-by: jayzhan211 <[email protected]> * fix doc and cleanup Signed-off-by: jayzhan211 <[email protected]> * fix doc Signed-off-by: jayzhan211 <[email protected]> * clippy + doc Signed-off-by: jayzhan211 <[email protected]> * cleanup Signed-off-by: jayzhan211 <[email protected]> * cleanup Signed-off-by: jayzhan211 <[email protected]> * rename exprs Signed-off-by: jayzhan211 <[email protected]> * rm create_aggregate_expr_with_dfschema Signed-off-by: jayzhan211 <[email protected]> * revert change in calc_requirements Signed-off-by: jayzhan211 <[email protected]> * fmt Signed-off-by: jayzhan211 <[email protected]> * doc and cleanup Signed-off-by: jayzhan211 <[email protected]> * rm dfschema Signed-off-by: jayzhan211 <[email protected]> * rm input types Signed-off-by: jayzhan211 <[email protected]> * rename return_type Signed-off-by: jayzhan211 <[email protected]> * upd doc Signed-off-by: jayzhan211 <[email protected]> * move group accumulator adapter to functions-aggregate-common Signed-off-by: jayzhan211 <[email protected]> * fix Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]>
1 parent b5d7931 commit e088945

File tree

100 files changed

+1776
-1846
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+1776
-1846
lines changed

Cargo.toml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,16 @@ members = [
2323
"datafusion/catalog",
2424
"datafusion/core",
2525
"datafusion/expr",
26+
"datafusion/expr-common",
2627
"datafusion/execution",
27-
"datafusion/functions-aggregate",
2828
"datafusion/functions",
29+
"datafusion/functions-aggregate",
30+
"datafusion/functions-aggregate-common",
2931
"datafusion/functions-nested",
3032
"datafusion/optimizer",
31-
"datafusion/physical-expr-common",
3233
"datafusion/physical-expr",
34+
"datafusion/physical-expr-common",
35+
"datafusion/physical-expr-functions-aggregate",
3336
"datafusion/physical-optimizer",
3437
"datafusion/physical-plan",
3538
"datafusion/proto",
@@ -94,12 +97,15 @@ datafusion-common = { path = "datafusion/common", version = "41.0.0", default-fe
9497
datafusion-common-runtime = { path = "datafusion/common-runtime", version = "41.0.0" }
9598
datafusion-execution = { path = "datafusion/execution", version = "41.0.0" }
9699
datafusion-expr = { path = "datafusion/expr", version = "41.0.0" }
100+
datafusion-expr-common = { path = "datafusion/expr-common", version = "41.0.0" }
97101
datafusion-functions = { path = "datafusion/functions", version = "41.0.0" }
98102
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "41.0.0" }
103+
datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "41.0.0" }
99104
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "41.0.0" }
100105
datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false }
101106
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false }
102107
datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false }
108+
datafusion-physical-expr-functions-aggregate = { path = "datafusion/physical-expr-functions-aggregate", version = "41.0.0" }
103109
datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "41.0.0" }
104110
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "41.0.0" }
105111
datafusion-proto = { path = "datafusion/proto", version = "41.0.0" }

datafusion-cli/Cargo.lock

Lines changed: 46 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ datafusion-functions-nested = { workspace = true, optional = true }
109109
datafusion-optimizer = { workspace = true }
110110
datafusion-physical-expr = { workspace = true }
111111
datafusion-physical-expr-common = { workspace = true }
112+
datafusion-physical-expr-functions-aggregate = { workspace = true }
112113
datafusion-physical-optimizer = { workspace = true }
113114
datafusion-physical-plan = { workspace = true }
114115
datafusion-sql = { workspace = true }

datafusion/core/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,11 @@ pub mod physical_expr_common {
556556
pub use datafusion_physical_expr_common::*;
557557
}
558558

559+
/// re-export of [`datafusion_physical_expr_functions_aggregate`] crate
560+
pub mod physical_expr_functions_aggregate {
561+
pub use datafusion_physical_expr_functions_aggregate::*;
562+
}
563+
559564
/// re-export of [`datafusion_physical_expr`] crate
560565
pub mod physical_expr {
561566
pub use datafusion_physical_expr::*;

datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ mod tests {
177177
use datafusion_functions_aggregate::count::count_udaf;
178178
use datafusion_functions_aggregate::sum::sum_udaf;
179179
use datafusion_physical_expr::expressions::col;
180-
use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
180+
use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
181181

182182
/// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
183183
macro_rules! assert_optimized {

datafusion/core/src/physical_optimizer/limit_pushdown.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,8 @@ mod tests {
258258
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
259259
use datafusion_expr::Operator;
260260
use datafusion_physical_expr::expressions::BinaryExpr;
261+
use datafusion_physical_expr::expressions::{col, lit};
261262
use datafusion_physical_expr::Partitioning;
262-
use datafusion_physical_expr_common::expressions::column::col;
263-
use datafusion_physical_expr_common::expressions::lit;
264263
use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
265264
use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
266265
use datafusion_physical_plan::empty::EmptyExec;

datafusion/core/src/physical_planner.rs

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ use crate::physical_plan::unnest::UnnestExec;
5858
use crate::physical_plan::values::ValuesExec;
5959
use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
6060
use crate::physical_plan::{
61-
displayable, udaf, windows, AggregateExpr, ExecutionPlan, ExecutionPlanProperties,
61+
displayable, windows, AggregateExpr, ExecutionPlan, ExecutionPlanProperties,
6262
InputOrderMode, Partitioning, PhysicalExpr, WindowExpr,
6363
};
6464

@@ -73,7 +73,8 @@ use datafusion_common::{
7373
};
7474
use datafusion_expr::dml::CopyTo;
7575
use datafusion_expr::expr::{
76-
self, physical_name, AggregateFunction, Alias, GroupingSet, WindowFunction,
76+
self, create_function_physical_name, physical_name, AggregateFunction, Alias,
77+
GroupingSet, WindowFunction,
7778
};
7879
use datafusion_expr::expr_rewriter::unnormalize_cols;
7980
use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
@@ -83,6 +84,7 @@ use datafusion_expr::{
8384
};
8485
use datafusion_physical_expr::expressions::Literal;
8586
use datafusion_physical_expr::LexOrdering;
87+
use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
8688
use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
8789
use datafusion_sql::utils::window_expr_common_partition_keys;
8890

@@ -1559,6 +1561,17 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
15591561
order_by,
15601562
null_treatment,
15611563
}) => {
1564+
let name = if let Some(name) = name {
1565+
name
1566+
} else {
1567+
create_function_physical_name(
1568+
func.name(),
1569+
*distinct,
1570+
args,
1571+
order_by.as_ref(),
1572+
)?
1573+
};
1574+
15621575
let physical_args =
15631576
create_physical_exprs(args, logical_input_schema, execution_props)?;
15641577
let filter = match filter {
@@ -1575,7 +1588,6 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
15751588
== NullTreatment::IgnoreNulls;
15761589

15771590
let (agg_expr, filter, order_by) = {
1578-
let sort_exprs = order_by.clone().unwrap_or(vec![]);
15791591
let physical_sort_exprs = match order_by {
15801592
Some(exprs) => Some(create_physical_sort_exprs(
15811593
exprs,
@@ -1588,18 +1600,15 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
15881600
let ordering_reqs: Vec<PhysicalSortExpr> =
15891601
physical_sort_exprs.clone().unwrap_or(vec![]);
15901602

1591-
let agg_expr = udaf::create_aggregate_expr_with_dfschema(
1592-
func,
1593-
&physical_args,
1594-
args,
1595-
&sort_exprs,
1596-
&ordering_reqs,
1597-
logical_input_schema,
1598-
name,
1599-
ignore_nulls,
1600-
*distinct,
1601-
false,
1602-
)?;
1603+
let schema: Schema = logical_input_schema.clone().into();
1604+
let agg_expr =
1605+
AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec())
1606+
.order_by(ordering_reqs.to_vec())
1607+
.schema(Arc::new(schema))
1608+
.alias(name)
1609+
.with_ignore_nulls(ignore_nulls)
1610+
.with_distinct(*distinct)
1611+
.build()?;
16031612

16041613
(agg_expr, filter, physical_sort_exprs)
16051614
};

datafusion/core/src/test_util/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ use datafusion_physical_expr::{
5454

5555
use async_trait::async_trait;
5656
use datafusion_catalog::Session;
57-
use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
57+
use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
5858
use futures::Stream;
5959
use tempfile::TempDir;
6060
// backwards compatibility

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use arrow::util::pretty::pretty_format_batches;
2525
use arrow_array::types::Int64Type;
2626
use datafusion::common::Result;
2727
use datafusion::datasource::MemTable;
28+
use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
2829
use datafusion::physical_plan::aggregates::{
2930
AggregateExec, AggregateMode, PhysicalGroupBy,
3031
};
@@ -35,7 +36,6 @@ use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}
3536
use datafusion_functions_aggregate::sum::sum_udaf;
3637
use datafusion_physical_expr::expressions::col;
3738
use datafusion_physical_expr::PhysicalSortExpr;
38-
use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
3939
use datafusion_physical_plan::InputOrderMode;
4040
use test_utils::{add_empty_batches, StringBatchGenerator};
4141

datafusion/expr-common/Cargo.toml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-expr-common"
20+
description = "Logical plan and expression representation for DataFusion query engine"
21+
keywords = ["datafusion", "logical", "plan", "expressions"]
22+
readme = "README.md"
23+
version = { workspace = true }
24+
edition = { workspace = true }
25+
homepage = { workspace = true }
26+
repository = { workspace = true }
27+
license = { workspace = true }
28+
authors = { workspace = true }
29+
rust-version = { workspace = true }
30+
31+
[lints]
32+
workspace = true
33+
34+
[lib]
35+
name = "datafusion_expr_common"
36+
path = "src/lib.rs"
37+
38+
[features]
39+
40+
[dependencies]
41+
arrow = { workspace = true }
42+
datafusion-common = { workspace = true }
43+
paste = "^1.0"

0 commit comments

Comments
 (0)