Skip to content

Commit 9673315

Browse files
committed
Add example of dataframe API aggregations
Provide example for min, max, count. This is useful for users to update their code following the conversion from builtin to UDAFs.
1 parent ebc4485 commit 9673315

File tree

2 files changed

+54
-2
lines changed

2 files changed

+54
-2
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use datafusion::error::Result;
19+
use datafusion::functions_aggregate::count::count;
20+
use datafusion::prelude::*;
21+
22+
/// This example demonstrates executing a DataFrame operation against an Arrow data source (CSV) and
23+
/// fetching results. See `csv_sql.rs` for a SQL version of this example.
24+
#[tokio::main]
25+
async fn main() -> Result<()> {
26+
// create local execution context
27+
let ctx = SessionContext::new();
28+
29+
let testdata = datafusion::test_util::arrow_test_data();
30+
31+
// execute the query
32+
let df = ctx
33+
.read_csv(
34+
&format!("{testdata}/csv/aggregate_test_100.csv"),
35+
CsvReadOptions::new(),
36+
)
37+
.await?
38+
.filter(col("c11").gt(lit(0.1)).and(col("c11").lt(lit(0.9))))?
39+
.aggregate(
40+
vec![col("c1")],
41+
vec![
42+
min(col("c12")),
43+
max(col("c12")),
44+
count(col("c12")),
45+
],
46+
)?;
47+
48+
// print the results
49+
df.show().await?;
50+
51+
Ok(())
52+
}

datafusion-examples/examples/csv_sql.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use datafusion::error::Result;
1919
use datafusion::prelude::*;
2020

2121
/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
22-
/// fetching results
22+
/// fetching results. See `csv_dataframe.rs` for a DataFrame version of this example.
2323
#[tokio::main]
2424
async fn main() -> Result<()> {
2525
// create local execution context
@@ -38,7 +38,7 @@ async fn main() -> Result<()> {
3838
// execute the query
3939
let df = ctx
4040
.sql(
41-
"SELECT c1, MIN(c12), MAX(c12) \
41+
"SELECT c1, MIN(c12), MAX(c12), COUNT(*) \
4242
FROM aggregate_test_100 \
4343
WHERE c11 > 0.1 AND c11 < 0.9 \
4444
GROUP BY c1",

0 commit comments

Comments
 (0)