Skip to content
Open
Show file tree
Hide file tree
Changes from 73 commits
Commits
Show all changes
75 commits
Select commit Hold shift + click to select a range
e8a0bb0
Initial impl
mdashti Sep 25, 2025
dd3eefc
Added `Filter` impl in `build_single_agg_segment_collector_with_reade…
mdashti Sep 25, 2025
bd03cb0
Added `Filter(FilterBucketResult)` + Made tests work.
mdashti Sep 25, 2025
baa4790
Fixed type issues.
mdashti Sep 25, 2025
7ad2379
Fixed a test.
mdashti Sep 25, 2025
225d19d
8a7a73a: Pass `segment_reader`
mdashti Sep 25, 2025
5500245
Added more tests.
mdashti Sep 25, 2025
e38c7a4
Improved parsing + tests
mdashti Sep 25, 2025
fb2e9fc
refactoring
mdashti Sep 25, 2025
ac01943
Added more tests.
mdashti Sep 25, 2025
150387a
refactoring: moved parsing code under QueryParser
mdashti Sep 25, 2025
5e3b23d
Use Tantivy syntax instead of ES
mdashti Sep 25, 2025
d3cafc0
Added a sanity check test.
mdashti Sep 26, 2025
b0de0ca
Simplified impl + tests
mdashti Sep 26, 2025
a02489c
Added back tests in a more maintable way
mdashti Sep 26, 2025
6cedc91
nitz.
mdashti Sep 26, 2025
5208690
nitz
mdashti Sep 26, 2025
f942d1f
implemented very simple fast-path
mdashti Sep 26, 2025
7d3c054
improved a comment
mdashti Sep 26, 2025
04c2913
implemented fast field support
mdashti Sep 26, 2025
4848335
Used `BoundsRange`
mdashti Sep 26, 2025
cf209ef
Improved fast field impl + tests
mdashti Sep 26, 2025
9806e10
Simplified execution.
mdashti Sep 26, 2025
b817391
Fixed exports + nitz
mdashti Sep 28, 2025
10d9d26
Improved the tests to check to the expected result.
mdashti Sep 29, 2025
49736ca
Improved test by checking the whole result JSON
mdashti Sep 29, 2025
569208e
Removed brittle perf checks.
mdashti Sep 29, 2025
1e19e02
Added efficiency verification tests.
mdashti Sep 29, 2025
a2e270a
Added one more efficiency check test.
mdashti Sep 29, 2025
6401d56
Improved the efficiency tests.
mdashti Sep 29, 2025
b13bc17
Removed unnecessary parsing code + added direct Query obj
mdashti Sep 29, 2025
720985d
Fixed tests.
mdashti Sep 29, 2025
615cb43
Improved tests
mdashti Oct 1, 2025
56e9c4f
Fixed code structure
mdashti Oct 1, 2025
0bf9f2e
Fixed lint issues
mdashti Oct 2, 2025
c7a53c7
nitz.
mdashti Oct 2, 2025
2f25694
nitz
mdashti Oct 2, 2025
455a8a6
nitz.
mdashti Oct 2, 2025
0552a33
nitz.
mdashti Oct 2, 2025
c93af1e
nitz.
mdashti Oct 2, 2025
fe0a5f8
Added an example
mdashti Oct 2, 2025
1dac4c9
Fixed PR comments.
mdashti Oct 3, 2025
8a425a2
Applied PR comments + nitz
mdashti Oct 3, 2025
15aa486
nitz.
mdashti Oct 3, 2025
d352aec
Improved the code.
mdashti Oct 3, 2025
f144df4
Fixed a perf issue.
mdashti Oct 7, 2025
9703ecb
Added batch processing.
mdashti Oct 7, 2025
5590913
Made the example more interesting
mdashti Oct 7, 2025
da84007
Fixed bucket count
mdashti Oct 7, 2025
f68e8e0
Renamed Direct to CustomQuery
mdashti Oct 7, 2025
b59d56c
Fixed lint issues.
mdashti Oct 7, 2025
6611ee7
No need for scorer to be an `Option`
mdashti Oct 8, 2025
d9f4fb6
nitz
mdashti Oct 8, 2025
a570edb
Used BitSet
mdashti Oct 8, 2025
9b421b2
Added an optimization for AllQuery
mdashti Oct 8, 2025
efdcc96
Merge branch 'tantivy-main' into paradedb/filter-agg-feature
mdashti Oct 20, 2025
27d5c52
Fixed merge issues.
mdashti Oct 20, 2025
190c1e5
Fixed lint issues.
mdashti Oct 20, 2025
17d5b3f
Added benchmark for FILTER
mdashti Oct 21, 2025
24ab4cb
Removed the Option wrapper.
mdashti Oct 21, 2025
5ac6c9d
Merge branch 'tantivy-main' into paradedb/filter-agg-feature
mdashti Oct 21, 2025
f55387f
nitz.
mdashti Oct 21, 2025
d2cf3de
Applied PR comments.
mdashti Oct 22, 2025
94bdd5d
Fixed the AllQuery optimization
mdashti Oct 22, 2025
bc565b0
Applied PR comments.
mdashti Oct 23, 2025
42c9935
feat: used `erased_serde` to allow filter query to be serialized
mdashti Oct 23, 2025
c1fb35f
further improved a comment
mdashti Oct 23, 2025
d9b148c
Added back tests.
mdashti Oct 23, 2025
144465d
removed an unused method
mdashti Oct 23, 2025
996a966
removed an unused method
mdashti Oct 23, 2025
b8e823f
Added documentation
mdashti Oct 23, 2025
7e75da5
nitz.
mdashti Oct 23, 2025
6bc7cf0
Merge branch 'main' into paradedb/filter-agg-feature
mdashti Oct 23, 2025
4b94636
Added query builder.
mdashti Oct 31, 2025
a4df7df
Fixed a comment.
mdashti Oct 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
futures-util = { version = "0.3.28", optional = true }
futures-channel = { version = "0.3.28", optional = true }
fnv = "1.0.7"
erased-serde = "0.4.8"

[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"
Expand Down
64 changes: 64 additions & 0 deletions benches/agg_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ fn bench_agg(mut group: InputGroup<Index>) {
register!(group, histogram_with_term_agg_few);
register!(group, avg_and_range_with_avg_sub_agg);

// Filter aggregation benchmarks
register!(group, filter_agg_all_query_count_agg);
register!(group, filter_agg_term_query_count_agg);
register!(group, filter_agg_all_query_with_sub_aggs);
register!(group, filter_agg_term_query_with_sub_aggs);

group.run();
}

Expand Down Expand Up @@ -472,3 +478,61 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {

Ok(index)
}

// Filter aggregation benchmarks

fn filter_agg_all_query_count_agg(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "*",
"aggs": {
"count": { "value_count": { "field": "score" } }
}
}
});
execute_agg(index, agg_req);
}

fn filter_agg_term_query_count_agg(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "text:cool",
"aggs": {
"count": { "value_count": { "field": "score" } }
}
}
});
execute_agg(index, agg_req);
}

fn filter_agg_all_query_with_sub_aggs(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "*",
"aggs": {
"avg_score": { "avg": { "field": "score" } },
"stats_score": { "stats": { "field": "score_f64" } },
"terms_text": {
"terms": { "field": "text_few_terms" }
}
}
}
});
execute_agg(index, agg_req);
}

fn filter_agg_term_query_with_sub_aggs(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "text:cool",
"aggs": {
"avg_score": { "avg": { "field": "score" } },
"stats_score": { "stats": { "field": "score_f64" } },
"terms_text": {
"terms": { "field": "text_few_terms" }
}
}
}
});
execute_agg(index, agg_req);
}
212 changes: 212 additions & 0 deletions examples/filter_aggregation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// # Filter Aggregation Example
//
// This example demonstrates filter aggregations - creating buckets of documents
// matching specific queries, with nested aggregations computed on each bucket.
//
// Filter aggregations are useful for computing metrics on different subsets of
// your data in a single query, like "average price overall + average price for
// electronics + count of in-stock items".

use serde_json::json;
use tantivy::aggregation::agg_req::Aggregations;
use tantivy::aggregation::AggregationCollector;
use tantivy::query::AllQuery;
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
use tantivy::{doc, Index};

fn main() -> tantivy::Result<()> {
// Create a simple product schema
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("category", TEXT | FAST);
schema_builder.add_text_field("brand", TEXT | FAST);
schema_builder.add_u64_field("price", FAST);
schema_builder.add_f64_field("rating", FAST);
schema_builder.add_bool_field("in_stock", FAST | INDEXED);
let schema = schema_builder.build();

// Create index and add sample products
let index = Index::create_in_ram(schema.clone());
let mut writer = index.writer(50_000_000)?;

writer.add_document(doc!(
schema.get_field("category")? => "electronics",
schema.get_field("brand")? => "apple",
schema.get_field("price")? => 999u64,
schema.get_field("rating")? => 4.5f64,
schema.get_field("in_stock")? => true
))?;
writer.add_document(doc!(
schema.get_field("category")? => "electronics",
schema.get_field("brand")? => "samsung",
schema.get_field("price")? => 799u64,
schema.get_field("rating")? => 4.2f64,
schema.get_field("in_stock")? => true
))?;
writer.add_document(doc!(
schema.get_field("category")? => "clothing",
schema.get_field("brand")? => "nike",
schema.get_field("price")? => 120u64,
schema.get_field("rating")? => 4.1f64,
schema.get_field("in_stock")? => false
))?;
writer.add_document(doc!(
schema.get_field("category")? => "books",
schema.get_field("brand")? => "penguin",
schema.get_field("price")? => 25u64,
schema.get_field("rating")? => 4.8f64,
schema.get_field("in_stock")? => true
))?;

writer.commit()?;

let reader = index.reader()?;
let searcher = reader.searcher();

// Example 1: Basic filter with metric aggregation
println!("=== Example 1: Electronics average price ===");
let agg_req = json!({
"electronics": {
"filter": "category:electronics",
"aggs": {
"avg_price": { "avg": { "field": "price" } }
}
}
});

let agg: Aggregations = serde_json::from_value(agg_req)?;
let collector = AggregationCollector::from_aggs(agg, Default::default());
let result = searcher.search(&AllQuery, &collector)?;

let expected = json!({
"electronics": {
"doc_count": 2,
"avg_price": { "value": 899.0 }
}
});
assert_eq!(serde_json::to_value(&result)?, expected);
println!("{}\n", serde_json::to_string_pretty(&result)?);

// Example 2: Multiple independent filters
println!("=== Example 2: Multiple filters in one query ===");
let agg_req = json!({
"electronics": {
"filter": "category:electronics",
"aggs": { "avg_price": { "avg": { "field": "price" } } }
},
"in_stock": {
"filter": "in_stock:true",
"aggs": { "count": { "value_count": { "field": "brand" } } }
},
"high_rated": {
"filter": "rating:[4.5 TO *]",
"aggs": { "count": { "value_count": { "field": "brand" } } }
}
});

let agg: Aggregations = serde_json::from_value(agg_req)?;
let collector = AggregationCollector::from_aggs(agg, Default::default());
let result = searcher.search(&AllQuery, &collector)?;

let expected = json!({
"electronics": {
"doc_count": 2,
"avg_price": { "value": 899.0 }
},
"in_stock": {
"doc_count": 3,
"count": { "value": 3.0 }
},
"high_rated": {
"doc_count": 2,
"count": { "value": 2.0 }
}
});
assert_eq!(serde_json::to_value(&result)?, expected);
println!("{}\n", serde_json::to_string_pretty(&result)?);

// Example 3: Nested filters - progressive refinement
println!("=== Example 3: Nested filters ===");
let agg_req = json!({
"in_stock": {
"filter": "in_stock:true",
"aggs": {
"electronics": {
"filter": "category:electronics",
"aggs": {
"expensive": {
"filter": "price:[800 TO *]",
"aggs": {
"avg_rating": { "avg": { "field": "rating" } }
}
}
}
}
}
}
});

let agg: Aggregations = serde_json::from_value(agg_req)?;
let collector = AggregationCollector::from_aggs(agg, Default::default());
let result = searcher.search(&AllQuery, &collector)?;

let expected = json!({
"in_stock": {
"doc_count": 3, // apple, samsung, penguin
"electronics": {
"doc_count": 2, // apple, samsung
"expensive": {
"doc_count": 1, // only apple (999)
"avg_rating": { "value": 4.5 }
}
}
}
});
assert_eq!(serde_json::to_value(&result)?, expected);
println!("{}\n", serde_json::to_string_pretty(&result)?);

// Example 4: Filter with sub-aggregation (terms)
println!("=== Example 4: Filter with terms sub-aggregation ===");
let agg_req = json!({
"electronics": {
"filter": "category:electronics",
"aggs": {
"by_brand": {
"terms": { "field": "brand" },
"aggs": {
"avg_price": { "avg": { "field": "price" } }
}
}
}
}
});

let agg: Aggregations = serde_json::from_value(agg_req)?;
let collector = AggregationCollector::from_aggs(agg, Default::default());
let result = searcher.search(&AllQuery, &collector)?;

let expected = json!({
"electronics": {
"doc_count": 2,
"by_brand": {
"buckets": [
{
"key": "samsung",
"doc_count": 1,
"avg_price": { "value": 799.0 }
},
{
"key": "apple",
"doc_count": 1,
"avg_price": { "value": 999.0 }
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
}
}
});
assert_eq!(serde_json::to_value(&result)?, expected);
println!("{}", serde_json::to_string_pretty(&result)?);

Ok(())
}
Loading
Loading