|
8 | 8 |
|
9 | 9 | package org.opensearch.search.aggregations.bucket.terms; |
10 | 10 |
|
11 | | -import org.apache.lucene.index.IndexReader; |
12 | 11 | import org.apache.lucene.index.LeafReaderContext; |
13 | 12 | import org.apache.lucene.index.SortedNumericDocValues; |
14 | 13 | import org.apache.lucene.util.NumericUtils; |
15 | 14 | import org.opensearch.common.Numbers; |
16 | 15 | import org.opensearch.common.lease.Releasable; |
17 | 16 | import org.opensearch.common.lease.Releasables; |
18 | | -import org.opensearch.common.util.LongArray; |
19 | 17 | import org.opensearch.index.fielddata.FieldData; |
20 | 18 | import org.opensearch.search.DocValueFormat; |
21 | 19 | import org.opensearch.search.aggregations.Aggregator; |
|
28 | 26 | import org.opensearch.search.aggregations.LeafBucketCollector; |
29 | 27 | import org.opensearch.search.aggregations.LeafBucketCollectorBase; |
30 | 28 | import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; |
31 | | -import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; |
32 | 29 | import org.opensearch.search.aggregations.support.ValuesSource; |
33 | | -import org.opensearch.search.internal.ContextIndexSearcher; |
34 | 30 | import org.opensearch.search.internal.SearchContext; |
35 | 31 |
|
36 | 32 | import java.io.IOException; |
@@ -527,126 +523,6 @@ UnsignedLongTerms.Bucket buildFinalBucket(LongKeyedBucketOrds.BucketOrdsEnum ord |
527 | 523 | } |
528 | 524 | } |
529 | 525 |
|
530 | | - class SignificantLongTermsResults extends ResultStrategy<SignificantLongTerms, SignificantLongTerms.Bucket> { |
531 | | - private final SignificanceLookup.BackgroundFrequencyForLong backgroundFrequencies; |
532 | | - private final long supersetSize; |
533 | | - private final SignificanceHeuristic significanceHeuristic; |
534 | | - private LongArray subsetSizes; |
535 | | - |
536 | | - SignificantLongTermsResults( |
537 | | - SignificanceLookup significanceLookup, |
538 | | - SignificanceHeuristic significanceHeuristic, |
539 | | - CardinalityUpperBound cardinality |
540 | | - ) { |
541 | | - backgroundFrequencies = significanceLookup.longLookup(context.bigArrays(), cardinality); |
542 | | - supersetSize = significanceLookup.supersetSize(); |
543 | | - this.significanceHeuristic = significanceHeuristic; |
544 | | - subsetSizes = context.bigArrays().newLongArray(1, true); |
545 | | - } |
546 | | - |
547 | | - @Override |
548 | | - SortedNumericDocValues getValues(LeafReaderContext ctx) throws IOException { |
549 | | - return valuesSource.longValues(ctx); |
550 | | - } |
551 | | - |
552 | | - @Override |
553 | | - String describe() { |
554 | | - return "stream_significant_terms"; |
555 | | - } |
556 | | - |
557 | | - @Override |
558 | | - LeafBucketCollector wrapCollector(LeafBucketCollector primary) { |
559 | | - return new LeafBucketCollectorBase(primary, null) { |
560 | | - @Override |
561 | | - public void collect(int doc, long owningBucketOrd) throws IOException { |
562 | | - super.collect(doc, owningBucketOrd); |
563 | | - subsetSizes = context.bigArrays().grow(subsetSizes, owningBucketOrd + 1); |
564 | | - subsetSizes.increment(owningBucketOrd, 1); |
565 | | - } |
566 | | - }; |
567 | | - } |
568 | | - |
569 | | - @Override |
570 | | - SignificantLongTerms.Bucket[][] buildTopBucketsPerOrd(int size) { |
571 | | - return new SignificantLongTerms.Bucket[size][]; |
572 | | - } |
573 | | - |
574 | | - @Override |
575 | | - SignificantLongTerms.Bucket[] buildBuckets(int size) { |
576 | | - return new SignificantLongTerms.Bucket[size]; |
577 | | - } |
578 | | - |
579 | | - @Override |
580 | | - void buildSubAggs(SignificantLongTerms.Bucket[][] topBucketsPerOrd) throws IOException { |
581 | | - buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); |
582 | | - } |
583 | | - |
584 | | - @Override |
585 | | - void collectZeroDocEntriesIfNeeded(long owningBucketOrd) throws IOException {} |
586 | | - |
587 | | - @Override |
588 | | - SignificantLongTerms buildResult(long owningBucketOrd, long otherDocCoun, SignificantLongTerms.Bucket[] topBuckets) { |
589 | | - SignificantLongTerms significantLongTerms = new SignificantLongTerms( |
590 | | - name, |
591 | | - metadata(), |
592 | | - format, |
593 | | - subsetSizes.get(owningBucketOrd), |
594 | | - supersetSize, |
595 | | - significanceHeuristic, |
596 | | - List.of(topBuckets), |
597 | | - bucketCountThresholds |
598 | | - ); |
599 | | - return significantLongTerms; |
600 | | - } |
601 | | - |
602 | | - @Override |
603 | | - SignificantLongTerms buildEmptyResult() { |
604 | | - // We need to account for the significance of a miss in our global stats - provide corpus size as context |
605 | | - ContextIndexSearcher searcher = context.searcher(); |
606 | | - IndexReader topReader = searcher.getIndexReader(); |
607 | | - int supersetSize = topReader.numDocs(); |
608 | | - return new SignificantLongTerms( |
609 | | - name, |
610 | | - metadata(), |
611 | | - format, |
612 | | - 0, |
613 | | - supersetSize, |
614 | | - significanceHeuristic, |
615 | | - emptyList(), |
616 | | - bucketCountThresholds |
617 | | - ); |
618 | | - } |
619 | | - |
620 | | - @Override |
621 | | - SignificantLongTerms.Bucket buildFinalBucket(LongKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount, long owningBucketOrd) |
622 | | - throws IOException { |
623 | | - long subsetSize = subsetSizes.get(owningBucketOrd); |
624 | | - double score = significanceHeuristic.getScore( |
625 | | - ordsEnum.value(), |
626 | | - subsetSize, |
627 | | - backgroundFrequencies.freq(ordsEnum.value()), |
628 | | - supersetSize |
629 | | - ); |
630 | | - SignificantLongTerms.Bucket result = new SignificantLongTerms.Bucket( |
631 | | - docCount, |
632 | | - subsetSize, |
633 | | - backgroundFrequencies.freq(ordsEnum.value()), |
634 | | - supersetSize, |
635 | | - ordsEnum.value(), |
636 | | - null, |
637 | | - format, |
638 | | - score |
639 | | - ); |
640 | | - result.bucketOrd = ordsEnum.ord(); |
641 | | - return result; |
642 | | - } |
643 | | - |
644 | | - @Override |
645 | | - public void close() { |
646 | | - Releasables.close(backgroundFrequencies, subsetSizes); |
647 | | - } |
648 | | - } |
649 | | - |
650 | 526 | @Override |
651 | 527 | public InternalAggregation buildEmptyAggregation() { |
652 | 528 | return resultStrategy.buildEmptyResult(); |
|
0 commit comments