@@ -24,64 +24,92 @@ mod data_utils;
2424use crate :: criterion:: Criterion ;
2525use arrow:: datatypes:: { DataType , Field , Schema } ;
2626use datafusion:: datasource:: MemTable ;
27- use datafusion:: error:: Result ;
2827use datafusion:: execution:: context:: SessionContext ;
29- use parking_lot:: Mutex ;
3028use std:: sync:: Arc ;
3129use tokio:: runtime:: Runtime ;
3230
33- fn plan ( ctx : Arc < Mutex < SessionContext > > , sql : & str ) {
31+ /// Create a logical plan from the specified sql
32+ fn logical_plan ( ctx : & SessionContext , sql : & str ) {
3433 let rt = Runtime :: new ( ) . unwrap ( ) ;
35- criterion:: black_box ( rt. block_on ( ctx. lock ( ) . sql ( sql) ) . unwrap ( ) ) ;
34+ criterion:: black_box ( rt. block_on ( ctx. sql ( sql) ) . unwrap ( ) ) ;
3635}
3736
38- /// Create schema representing a large table
39- pub fn create_schema ( column_prefix : & str ) -> Schema {
40- let fields = ( 0 ..200 )
37+ /// Create a physical ExecutionPlan (by way of logical plan)
38+ fn physical_plan ( ctx : & SessionContext , sql : & str ) {
39+ let rt = Runtime :: new ( ) . unwrap ( ) ;
40+ criterion:: black_box ( rt. block_on ( async {
41+ ctx. sql ( sql)
42+ . await
43+ . unwrap ( )
44+ . create_physical_plan ( )
45+ . await
46+ . unwrap ( )
47+ } ) ) ;
48+ }
49+
50+ /// Create schema with the specified number of columns
51+ pub fn create_schema ( column_prefix : & str , num_columns : usize ) -> Schema {
52+ let fields = ( 0 ..num_columns)
4153 . map ( |i| Field :: new ( format ! ( "{column_prefix}{i}" ) , DataType :: Int32 , true ) )
4254 . collect ( ) ;
4355 Schema :: new ( fields)
4456}
4557
46- pub fn create_table_provider ( column_prefix : & str ) -> Result < Arc < MemTable > > {
47- let schema = Arc :: new ( create_schema ( column_prefix) ) ;
48- MemTable :: try_new ( schema, vec ! [ ] ) . map ( Arc :: new)
58+ pub fn create_table_provider ( column_prefix : & str , num_columns : usize ) -> Arc < MemTable > {
59+ let schema = Arc :: new ( create_schema ( column_prefix, num_columns ) ) ;
60+ MemTable :: try_new ( schema, vec ! [ ] ) . map ( Arc :: new) . unwrap ( )
4961}
5062
51- fn create_context ( ) -> Result < Arc < Mutex < SessionContext > > > {
63+ fn create_context ( ) -> SessionContext {
5264 let ctx = SessionContext :: new ( ) ;
53- ctx. register_table ( "t1" , create_table_provider ( "a" ) ?) ?;
54- ctx. register_table ( "t2" , create_table_provider ( "b" ) ?) ?;
55- Ok ( Arc :: new ( Mutex :: new ( ctx) ) )
65+ ctx. register_table ( "t1" , create_table_provider ( "a" , 200 ) )
66+ . unwrap ( ) ;
67+ ctx. register_table ( "t2" , create_table_provider ( "b" , 200 ) )
68+ . unwrap ( ) ;
69+ ctx. register_table ( "t700" , create_table_provider ( "c" , 700 ) )
70+ . unwrap ( ) ;
71+ ctx
5672}
5773
5874fn criterion_benchmark ( c : & mut Criterion ) {
59- let ctx = create_context ( ) . unwrap ( ) ;
75+ let ctx = create_context ( ) ;
76+
77+ // Test simplest
78+ // https://github.com/apache/arrow-datafusion/issues/5157
79+ c. bench_function ( "logical_select_one_from_700" , |b| {
80+ b. iter ( || logical_plan ( & ctx, "SELECT c1 FROM t700" ) )
81+ } ) ;
82+
83+ // Test simplest
84+ // https://github.com/apache/arrow-datafusion/issues/5157
85+ c. bench_function ( "physical_select_one_from_700" , |b| {
86+ b. iter ( || physical_plan ( & ctx, "SELECT c1 FROM t700" ) )
87+ } ) ;
6088
61- c. bench_function ( "trivial join low numbered columns " , |b| {
89+ c. bench_function ( "logical_trivial_join_low_numbered_columns " , |b| {
6290 b. iter ( || {
63- plan (
64- ctx. clone ( ) ,
91+ logical_plan (
92+ & ctx,
6593 "SELECT t1.a2, t2.b2 \
6694 FROM t1, t2 WHERE a1 = b1",
6795 )
6896 } )
6997 } ) ;
7098
71- c. bench_function ( "trivial join high numbered columns " , |b| {
99+ c. bench_function ( "logical_trivial_join_high_numbered_columns " , |b| {
72100 b. iter ( || {
73- plan (
74- ctx. clone ( ) ,
101+ logical_plan (
102+ & ctx,
75103 "SELECT t1.a99, t2.b99 \
76104 FROM t1, t2 WHERE a199 = b199",
77105 )
78106 } )
79107 } ) ;
80108
81- c. bench_function ( "aggregate with join " , |b| {
109+ c. bench_function ( "logical_aggregate_with_join " , |b| {
82110 b. iter ( || {
83- plan (
84- ctx. clone ( ) ,
111+ logical_plan (
112+ & ctx,
85113 "SELECT t1.a99, MIN(t2.b1), MAX(t2.b199), AVG(t2.b123), COUNT(t2.b73) \
86114 FROM t1 JOIN t2 ON t1.a199 = t2.b199 GROUP BY t1.a99",
87115 )
0 commit comments