quickwit-oss · mdashti · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/Cargo.toml b/Cargo.toml
@@ -69,6 +69,7 @@ hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
 futures-util = { version = "0.3.28", optional = true }
 futures-channel = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
+erased-serde = "0.4.8"
 
 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"

diff --git a/benches/agg_bench.rs b/benches/agg_bench.rs
@@ -74,6 +74,12 @@ fn bench_agg(mut group: InputGroup<Index>) {
     register!(group, histogram_with_term_agg_few);
     register!(group, avg_and_range_with_avg_sub_agg);
 
+    // Filter aggregation benchmarks
+    register!(group, filter_agg_all_query_count_agg);
+    register!(group, filter_agg_term_query_count_agg);
+    register!(group, filter_agg_all_query_with_sub_aggs);
+    register!(group, filter_agg_term_query_with_sub_aggs);
+
     group.run();
 }
 
@@ -472,3 +478,61 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
 
     Ok(index)
 }
+
+// Filter aggregation benchmarks
+
+fn filter_agg_all_query_count_agg(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "*",
+            "aggs": {
+                "count": { "value_count": { "field": "score" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn filter_agg_term_query_count_agg(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "text:cool",
+            "aggs": {
+                "count": { "value_count": { "field": "score" } }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn filter_agg_all_query_with_sub_aggs(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "*",
+            "aggs": {
+                "avg_score": { "avg": { "field": "score" } },
+                "stats_score": { "stats": { "field": "score_f64" } },
+                "terms_text": {
+                    "terms": { "field": "text_few_terms" }
+                }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
+
+fn filter_agg_term_query_with_sub_aggs(index: &Index) {
+    let agg_req = json!({
+        "filtered": {
+            "filter": "text:cool",
+            "aggs": {
+                "avg_score": { "avg": { "field": "score" } },
+                "stats_score": { "stats": { "field": "score_f64" } },
+                "terms_text": {
+                    "terms": { "field": "text_few_terms" }
+                }
+            }
+        }
+    });
+    execute_agg(index, agg_req);
+}
diff --git a/examples/filter_aggregation.rs b/examples/filter_aggregation.rs
@@ -0,0 +1,212 @@
+// # Filter Aggregation Example
+//
+// This example demonstrates filter aggregations - creating buckets of documents
+// matching specific queries, with nested aggregations computed on each bucket.
+//
+// Filter aggregations are useful for computing metrics on different subsets of
+// your data in a single query, like "average price overall + average price for
+// electronics + count of in-stock items".
+
+use serde_json::json;
+use tantivy::aggregation::agg_req::Aggregations;
+use tantivy::aggregation::AggregationCollector;
+use tantivy::query::AllQuery;
+use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
+use tantivy::{doc, Index};
+
+fn main() -> tantivy::Result<()> {
+    // Create a simple product schema
+    let mut schema_builder = Schema::builder();
+    schema_builder.add_text_field("category", TEXT | FAST);
+    schema_builder.add_text_field("brand", TEXT | FAST);
+    schema_builder.add_u64_field("price", FAST);
+    schema_builder.add_f64_field("rating", FAST);
+    schema_builder.add_bool_field("in_stock", FAST | INDEXED);
+    let schema = schema_builder.build();
+
+    // Create index and add sample products
+    let index = Index::create_in_ram(schema.clone());
+    let mut writer = index.writer(50_000_000)?;
+
+    writer.add_document(doc!(
+        schema.get_field("category")? => "electronics",
+        schema.get_field("brand")? => "apple",
+        schema.get_field("price")? => 999u64,
+        schema.get_field("rating")? => 4.5f64,
+        schema.get_field("in_stock")? => true
+    ))?;
+    writer.add_document(doc!(
+        schema.get_field("category")? => "electronics",
+        schema.get_field("brand")? => "samsung",
+        schema.get_field("price")? => 799u64,
+        schema.get_field("rating")? => 4.2f64,
+        schema.get_field("in_stock")? => true
+    ))?;
+    writer.add_document(doc!(
+        schema.get_field("category")? => "clothing",
+        schema.get_field("brand")? => "nike",
+        schema.get_field("price")? => 120u64,
+        schema.get_field("rating")? => 4.1f64,
+        schema.get_field("in_stock")? => false
+    ))?;
+    writer.add_document(doc!(
+        schema.get_field("category")? => "books",
+        schema.get_field("brand")? => "penguin",
+        schema.get_field("price")? => 25u64,
+        schema.get_field("rating")? => 4.8f64,
+        schema.get_field("in_stock")? => true
+    ))?;
+
+    writer.commit()?;
+
+    let reader = index.reader()?;
+    let searcher = reader.searcher();
+
+    // Example 1: Basic filter with metric aggregation
+    println!("=== Example 1: Electronics average price ===");
+    let agg_req = json!({
+        "electronics": {
+            "filter": "category:electronics",
+            "aggs": {
+                "avg_price": { "avg": { "field": "price" } }
+            }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "electronics": {
+            "doc_count": 2,
+            "avg_price": { "value": 899.0 }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}\n", serde_json::to_string_pretty(&result)?);
+
+    // Example 2: Multiple independent filters
+    println!("=== Example 2: Multiple filters in one query ===");
+    let agg_req = json!({
+        "electronics": {
+            "filter": "category:electronics",
+            "aggs": { "avg_price": { "avg": { "field": "price" } } }
+        },
+        "in_stock": {
+            "filter": "in_stock:true",
+            "aggs": { "count": { "value_count": { "field": "brand" } } }
+        },
+        "high_rated": {
+            "filter": "rating:[4.5 TO *]",
+            "aggs": { "count": { "value_count": { "field": "brand" } } }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "electronics": {
+            "doc_count": 2,
+            "avg_price": { "value": 899.0 }
+        },
+        "in_stock": {
+            "doc_count": 3,
+            "count": { "value": 3.0 }
+        },
+        "high_rated": {
+            "doc_count": 2,
+            "count": { "value": 2.0 }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}\n", serde_json::to_string_pretty(&result)?);
+
+    // Example 3: Nested filters - progressive refinement
+    println!("=== Example 3: Nested filters ===");
+    let agg_req = json!({
+        "in_stock": {
+            "filter": "in_stock:true",
+            "aggs": {
+                "electronics": {
+                    "filter": "category:electronics",
+                    "aggs": {
+                        "expensive": {
+                            "filter": "price:[800 TO *]",
+                            "aggs": {
+                                "avg_rating": { "avg": { "field": "rating" } }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "in_stock": {
+            "doc_count": 3,  // apple, samsung, penguin
+            "electronics": {
+                "doc_count": 2,  // apple, samsung
+                "expensive": {
+                    "doc_count": 1,  // only apple (999)
+                    "avg_rating": { "value": 4.5 }
+                }
+            }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}\n", serde_json::to_string_pretty(&result)?);
+
+    // Example 4: Filter with sub-aggregation (terms)
+    println!("=== Example 4: Filter with terms sub-aggregation ===");
+    let agg_req = json!({
+        "electronics": {
+            "filter": "category:electronics",
+            "aggs": {
+                "by_brand": {
+                    "terms": { "field": "brand" },
+                    "aggs": {
+                        "avg_price": { "avg": { "field": "price" } }
+                    }
+                }
+            }
+        }
+    });
+
+    let agg: Aggregations = serde_json::from_value(agg_req)?;
+    let collector = AggregationCollector::from_aggs(agg, Default::default());
+    let result = searcher.search(&AllQuery, &collector)?;
+
+    let expected = json!({
+        "electronics": {
+            "doc_count": 2,
+            "by_brand": {
+                "buckets": [
+                    {
+                        "key": "samsung",
+                        "doc_count": 1,
+                        "avg_price": { "value": 799.0 }
+                    },
+                    {
+                        "key": "apple",
+                        "doc_count": 1,
+                        "avg_price": { "value": 999.0 }
+                    }
+                ],
+                "sum_other_doc_count": 0,
+                "doc_count_error_upper_bound": 0
+            }
+        }
+    });
+    assert_eq!(serde_json::to_value(&result)?, expected);
+    println!("{}", serde_json::to_string_pretty(&result)?);
+
+    Ok(())
+}