Skip to content

Commit d6e95f7

Browse files
committed
settings reorganized
1 parent 556b0c6 commit d6e95f7

File tree

6 files changed

+96
-43
lines changed

6 files changed

+96
-43
lines changed

datafusion/common/src/config.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,6 @@ config_namespace! {
195195
/// Should DataFusion collect statistics after listing files
196196
pub collect_statistics: bool, default = false
197197

198-
/// Enables parallel file scanning. Currently supported only for Parquet format
199-
pub parallel_file_scan: bool, default = false
200-
201198
/// Number of partitions for query execution. Increasing partitions can increase
202199
/// concurrency. Defaults to the number of cpu cores on the system
203200
pub target_partitions: usize, default = num_cpus::get()
@@ -264,10 +261,17 @@ config_namespace! {
264261
/// in parallel using the provided `target_partitions` level"
265262
pub repartition_aggregations: bool, default = true
266263

264+
/// Minimum total files size in bytes to perform file scan repartitioning.
265+
pub repartition_file_min_size: usize, default = 10 * 1024 * 1024
266+
267267
/// Should DataFusion repartition data using the join keys to execute joins in parallel
268268
/// using the provided `target_partitions` level"
269269
pub repartition_joins: bool, default = true
270270

271+
/// When set to true, file groups will be repartitioned to achieve maximum parallelism.
272+
/// Currently supported only for Parquet format
273+
pub repartition_file_scans: bool, default = false
274+
271275
/// Should DataFusion repartition data using the partitions keys to execute window
272276
/// functions in parallel using the provided `target_partitions` level"
273277
pub repartition_windows: bool, default = true

datafusion/core/src/execution/context.rs

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,11 +1216,6 @@ impl SessionConfig {
12161216
self.options.execution.collect_statistics
12171217
}
12181218

1219-
/// Are file scans parallelized during execution?
1220-
pub fn parallel_file_scan(&self) -> bool {
1221-
self.options.execution.parallel_file_scan
1222-
}
1223-
12241219
/// Selects a name for the default catalog and schema
12251220
pub fn with_default_catalog_and_schema(
12261221
mut self,
@@ -1256,6 +1251,18 @@ impl SessionConfig {
12561251
self
12571252
}
12581253

1254+
/// Sets minimum file range size for repartitioning scans
1255+
pub fn with_repartition_file_min_size(mut self, size: usize) -> Self {
1256+
self.options.optimizer.repartition_file_min_size = size;
1257+
self
1258+
}
1259+
1260+
/// Enables or disables the use of repartitioning for file scans
1261+
pub fn with_repartition_file_scans(mut self, enabled: bool) -> Self {
1262+
self.options.optimizer.repartition_file_scans = enabled;
1263+
self
1264+
}
1265+
12591266
/// Enables or disables the use of repartitioning for window functions to improve parallelism
12601267
pub fn with_repartition_windows(mut self, enabled: bool) -> Self {
12611268
self.options.optimizer.repartition_windows = enabled;
@@ -1279,12 +1286,6 @@ impl SessionConfig {
12791286
self
12801287
}
12811288

1282-
/// Enables or disables parallel file scanning after listing files
1283-
pub fn with_parallel_file_scan(mut self, enabled: bool) -> Self {
1284-
self.options.execution.parallel_file_scan = enabled;
1285-
self
1286-
}
1287-
12881289
/// Get the currently configured batch size
12891290
pub fn batch_size(&self) -> usize {
12901291
self.options.execution.batch_size

datafusion/core/src/physical_optimizer/repartition.rs

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ fn optimize_partitions(
168168
is_root: bool,
169169
can_reorder: bool,
170170
would_benefit: bool,
171-
parallel_file_scan: bool,
171+
repartition_file_scans: bool,
172+
repartition_file_min_size: usize,
172173
) -> Result<Arc<dyn ExecutionPlan>> {
173174
// Recurse into children bottom-up (attempt to repartition as
174175
// early as possible)
@@ -201,7 +202,8 @@ fn optimize_partitions(
201202
false, // child is not root
202203
can_reorder_child,
203204
plan.benefits_from_input_partitioning(),
204-
parallel_file_scan,
205+
repartition_file_scans,
206+
repartition_file_min_size,
205207
)
206208
})
207209
.collect::<Result<_>>()?;
@@ -237,10 +239,13 @@ fn optimize_partitions(
237239
return Ok(new_plan);
238240
}
239241

240-
// For ParquetExec return internally repartitioned version of the plan in case parallel_file_scan is set
242+
// For ParquetExec return internally repartitioned version of the plan in case `repartition_file_scans` is set
241243
if let Some(parquet_exec) = new_plan.as_any().downcast_ref::<ParquetExec>() {
242-
if parallel_file_scan {
243-
return Ok(Arc::new(parquet_exec.get_repartitioned(target_partitions)));
244+
if repartition_file_scans {
245+
return Ok(Arc::new(parquet_exec.get_repartitioned(
246+
target_partitions,
247+
repartition_file_min_size,
248+
)));
244249
}
245250
}
246251

@@ -267,7 +272,9 @@ impl PhysicalOptimizerRule for Repartition {
267272
) -> Result<Arc<dyn ExecutionPlan>> {
268273
let target_partitions = config.execution.target_partitions;
269274
let enabled = config.optimizer.enable_round_robin_repartition;
270-
let parallel_file_scan = config.execution.parallel_file_scan;
275+
let repartition_file_scans = config.optimizer.repartition_file_scans;
276+
let repartition_file_min_size =
277+
config.optimizer.repartition_file_min_size;
271278
// Don't run optimizer if target_partitions == 1
272279
if !enabled || target_partitions == 1 {
273280
Ok(plan)
@@ -281,7 +288,8 @@ impl PhysicalOptimizerRule for Repartition {
281288
is_root,
282289
can_reorder,
283290
would_benefit,
284-
parallel_file_scan,
291+
repartition_file_scans,
292+
repartition_file_min_size,
285293
)
286294
}
287295
}
@@ -486,15 +494,16 @@ mod tests {
486494
/// Runs the repartition optimizer and asserts the plan against the expected
487495
macro_rules! assert_optimized {
488496
($EXPECTED_LINES: expr, $PLAN: expr) => {
489-
assert_optimized!($EXPECTED_LINES, $PLAN, 10, false);
497+
assert_optimized!($EXPECTED_LINES, $PLAN, 10, false, 1024);
490498
};
491499

492-
($EXPECTED_LINES: expr, $PLAN: expr, $TAGRET_PARTITIONS: expr, $PARALLEL_SCAN: expr) => {
500+
($EXPECTED_LINES: expr, $PLAN: expr, $TAGRET_PARTITIONS: expr, $REPARTITION_FILE_SCANS: expr, $REPARTITION_FILE_MIN_SIZE: expr) => {
493501
let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
494502

495503
let mut config = ConfigOptions::new();
496504
config.execution.target_partitions = $TAGRET_PARTITIONS;
497-
config.execution.parallel_file_scan = $PARALLEL_SCAN;
505+
config.optimizer.repartition_file_scans = $REPARTITION_FILE_SCANS;
506+
config.optimizer.repartition_file_min_size = $REPARTITION_FILE_MIN_SIZE;
498507

499508
// run optimizer
500509
let optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Sync + Send>> = vec![
@@ -900,7 +909,7 @@ mod tests {
900909
"ParquetExec: limit=None, partitions={2 groups: [[x:0..50], [x:50..100]]}, projection=[c1]",
901910
];
902911

903-
assert_optimized!(expected, plan, 2, true);
912+
assert_optimized!(expected, plan, 2, true, 10);
904913
Ok(())
905914
}
906915

@@ -916,7 +925,7 @@ mod tests {
916925
"ParquetExec: limit=None, partitions={2 groups: [[x], [y]]}, projection=[c1]",
917926
];
918927

919-
assert_optimized!(expected, plan, 2, true);
928+
assert_optimized!(expected, plan, 2, true, 10);
920929
Ok(())
921930
}
922931

@@ -933,7 +942,7 @@ mod tests {
933942
"ParquetExec: limit=None, partitions={1 group: [[x]]}, projection=[c1]",
934943
];
935944

936-
assert_optimized!(expected, plan, 2, true);
945+
assert_optimized!(expected, plan, 2, true, 10);
937946
Ok(())
938947
}
939948

@@ -952,7 +961,7 @@ mod tests {
952961
"ParquetExec: limit=None, partitions={1 group: [[x]]}, projection=[c1]",
953962
];
954963

955-
assert_optimized!(expected, plan, 2, true);
964+
assert_optimized!(expected, plan, 2, true, 10);
956965
Ok(())
957966
}
958967

@@ -977,7 +986,7 @@ mod tests {
977986
"ParquetExec: limit=None, partitions={1 group: [[x]]}, projection=[c1]",
978987
];
979988

980-
assert_optimized!(expected, plan, 2, true);
989+
assert_optimized!(expected, plan, 2, true, 10);
981990
Ok(())
982991
}
983992

@@ -995,7 +1004,7 @@ mod tests {
9951004
"ParquetExec: limit=None, partitions={1 group: [[x]]}, projection=[c1]",
9961005
];
9971006

998-
assert_optimized!(expected, plan, 2, true);
1007+
assert_optimized!(expected, plan, 2, true, 10);
9991008
Ok(())
10001009
}
10011010

@@ -1010,7 +1019,7 @@ mod tests {
10101019
"ParquetExec: limit=None, partitions={1 group: [[x]]}, output_ordering=[c1@0 ASC], projection=[c1]",
10111020
];
10121021

1013-
assert_optimized!(expected, plan, 2, true);
1022+
assert_optimized!(expected, plan, 2, true, 10);
10141023
Ok(())
10151024
}
10161025

@@ -1028,7 +1037,7 @@ mod tests {
10281037
"ParquetExec: limit=None, partitions={1 group: [[x]]}, output_ordering=[c1@0 ASC], projection=[c1]",
10291038
];
10301039

1031-
assert_optimized!(expected, plan, 2, true);
1040+
assert_optimized!(expected, plan, 2, true, 10);
10321041
Ok(())
10331042
}
10341043

@@ -1045,7 +1054,7 @@ mod tests {
10451054
"ParquetExec: limit=None, partitions={1 group: [[x]]}, output_ordering=[c1@0 ASC], projection=[c1]",
10461055
];
10471056

1048-
assert_optimized!(expected, plan, 2, true);
1057+
assert_optimized!(expected, plan, 2, true, 10);
10491058
Ok(())
10501059
}
10511060

@@ -1061,7 +1070,7 @@ mod tests {
10611070
"ParquetExec: limit=None, partitions={1 group: [[x]]}, output_ordering=[c1@0 ASC], projection=[c1]",
10621071
];
10631072

1064-
assert_optimized!(expected, plan, 2, true);
1073+
assert_optimized!(expected, plan, 2, true, 10);
10651074
Ok(())
10661075
}
10671076

datafusion/core/src/physical_plan/file_format/parquet.rs

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,11 @@ impl ParquetExec {
243243
}
244244

245245
/// Redistribute files across partitions according to their size
246-
pub fn get_repartitioned(&self, target_partitions: usize) -> Self {
246+
pub fn get_repartitioned(
247+
&self,
248+
target_partitions: usize,
249+
repartition_file_min_size: usize,
250+
) -> Self {
247251
let flattened_files = self
248252
.base_config()
249253
.file_groups
@@ -261,6 +265,10 @@ impl ParquetExec {
261265
.iter()
262266
.map(|f| f.object_meta.size as i64)
263267
.sum::<i64>();
268+
if total_size < (repartition_file_min_size as i64) {
269+
return self.clone();
270+
}
271+
264272
let target_partition_size =
265273
(total_size as usize + (target_partitions) - 1) / (target_partitions);
266274

@@ -1738,7 +1746,7 @@ mod tests {
17381746

17391747
let actual = file_groups_to_vec(
17401748
parquet_exec
1741-
.get_repartitioned(4)
1749+
.get_repartitioned(4, 10)
17421750
.base_config()
17431751
.file_groups
17441752
.clone(),
@@ -1775,7 +1783,7 @@ mod tests {
17751783

17761784
let actual = file_groups_to_vec(
17771785
parquet_exec
1778-
.get_repartitioned(96)
1786+
.get_repartitioned(96, 5)
17791787
.base_config()
17801788
.file_groups
17811789
.clone(),
@@ -1817,7 +1825,7 @@ mod tests {
18171825

18181826
let actual = file_groups_to_vec(
18191827
parquet_exec
1820-
.get_repartitioned(3)
1828+
.get_repartitioned(3, 10)
18211829
.base_config()
18221830
.file_groups
18231831
.clone(),
@@ -1855,7 +1863,7 @@ mod tests {
18551863

18561864
let actual = file_groups_to_vec(
18571865
parquet_exec
1858-
.get_repartitioned(2)
1866+
.get_repartitioned(2, 10)
18591867
.base_config()
18601868
.file_groups
18611869
.clone(),
@@ -1869,7 +1877,7 @@ mod tests {
18691877
}
18701878

18711879
#[tokio::test]
1872-
async fn parquet_exec_repartition_no_action() {
1880+
async fn parquet_exec_repartition_no_action_ranges() {
18731881
// No action due to Some(range) in second file
18741882
let partitioned_file_1 = PartitionedFile::new("a".to_string(), 123);
18751883
let mut partitioned_file_2 = PartitionedFile::new("b".to_string(), 144);
@@ -1893,13 +1901,42 @@ mod tests {
18931901
);
18941902

18951903
let actual = parquet_exec
1896-
.get_repartitioned(65)
1904+
.get_repartitioned(65, 10)
18971905
.base_config()
18981906
.file_groups
18991907
.clone();
19001908
assert_eq!(2, actual.len());
19011909
}
19021910

1911+
#[tokio::test]
1912+
async fn parquet_exec_repartition_no_action_min_size() {
1913+
// No action due to target_partition_size
1914+
let partitioned_file = PartitionedFile::new("a".to_string(), 123);
1915+
let single_partition = vec![vec![partitioned_file]];
1916+
let parquet_exec = ParquetExec::new(
1917+
FileScanConfig {
1918+
object_store_url: ObjectStoreUrl::local_filesystem(),
1919+
file_groups: single_partition,
1920+
file_schema: Arc::new(Schema::empty()),
1921+
statistics: Statistics::default(),
1922+
projection: None,
1923+
limit: None,
1924+
table_partition_cols: vec![],
1925+
output_ordering: None,
1926+
infinite_source: false,
1927+
},
1928+
None,
1929+
None,
1930+
);
1931+
1932+
let actual = parquet_exec
1933+
.get_repartitioned(65, 500)
1934+
.base_config()
1935+
.file_groups
1936+
.clone();
1937+
assert_eq!(1, actual.len());
1938+
}
1939+
19031940
fn file_groups_to_vec(
19041941
file_groups: Vec<Vec<PartitionedFile>>,
19051942
) -> Vec<(usize, String, i64, i64)> {

datafusion/core/tests/sqllogictests/test_files/information_schema.slt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ datafusion.catalog.location NULL
115115
datafusion.execution.batch_size 8192
116116
datafusion.execution.coalesce_batches true
117117
datafusion.execution.collect_statistics false
118-
datafusion.execution.parallel_file_scan false
119118
datafusion.execution.parquet.enable_page_index false
120119
datafusion.execution.parquet.metadata_size_hint NULL
121120
datafusion.execution.parquet.pruning true
@@ -132,6 +131,8 @@ datafusion.optimizer.hash_join_single_partition_threshold 1048576
132131
datafusion.optimizer.max_passes 3
133132
datafusion.optimizer.prefer_hash_join true
134133
datafusion.optimizer.repartition_aggregations true
134+
datafusion.optimizer.repartition_file_min_size 10485760
135+
datafusion.optimizer.repartition_file_scans false
135136
datafusion.optimizer.repartition_joins true
136137
datafusion.optimizer.repartition_windows true
137138
datafusion.optimizer.skip_failed_rules true

docs/source/user-guide/configs.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus
4747
| datafusion.execution.batch_size | 8192 | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would results in too much metadata memory consumption |
4848
| datafusion.execution.coalesce_batches | true | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting |
4949
| datafusion.execution.collect_statistics | false | Should DataFusion collect statistics after listing files |
50-
| datafusion.execution.parallel_file_scan | false | Enables parallel file scanning. Currently supported only for Parquet format |
5150
| datafusion.execution.target_partitions | 0 | Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of cpu cores on the system |
5251
| datafusion.execution.time_zone | +00:00 | The default time zone Some functions, e.g. EXTRACT(HOUR from SOME_TIME), shift the underlying datetime according to this time zone, and then extract the hour |
5352
| datafusion.execution.parquet.enable_page_index | false | If true, uses parquet data page level metadata (Page Index) statistics to reduce the number of rows decoded. |
@@ -59,7 +58,9 @@ Environment variables are read during `SessionConfig` initialisation so they mus
5958
| datafusion.optimizer.enable_round_robin_repartition | true | When set to true, the physical plan optimizer will try to add round robin repartition to increase parallelism to leverage more CPU cores |
6059
| datafusion.optimizer.filter_null_join_keys | false | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down. |
6160
| datafusion.optimizer.repartition_aggregations | true | Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level" |
61+
| datafusion.optimizer.repartition_file_min_size | 10485760 | Minimum total files size in bytes to perform file scan repartitioning. |
6262
| datafusion.optimizer.repartition_joins | true | Should DataFusion repartition data using the join keys to execute joins in parallel using the provided `target_partitions` level" |
63+
| datafusion.optimizer.repartition_file_scans | false | When set to true, file groups will be repartitioned to achieve maximum parallelism. Currently supported only for Parquet format |
6364
| datafusion.optimizer.repartition_windows | true | Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level" |
6465
| datafusion.optimizer.skip_failed_rules | true | When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail |
6566
| datafusion.optimizer.max_passes | 3 | Number of times that the optimizer will attempt to optimize the plan |

0 commit comments

Comments
 (0)