Skip to content

Commit

Permalink
Fix AOOBE when setting min_doc_count to 0 in significant_terms
Browse files Browse the repository at this point in the history
This commit fixes the computation of the subset size on empty buckets (doc count of 0).
The aggregator test refactoring in elastic#60683 revealed this bug.
  • Loading branch information
jimczi committed Aug 6, 2020
1 parent 5de0ed9 commit a7fa509
Showing 1 changed file with 7 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws
BucketUpdater<TB> updater = bucketUpdater(owningBucketOrds[ordIdx]);
collectionStrategy.forEach(owningBucketOrds[ordIdx], new BucketInfoConsumer() {
TB spare = null;

@Override
public void accept(long globalOrd, long bucketOrd, long docCount) throws IOException {
otherDocCount[finalOrdIdx] += docCount;
Expand All @@ -574,7 +574,7 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep
}
}
});

// Get the top buckets
topBucketsPreOrd[ordIdx] = buildBuckets(ordered.size());
for (int i = ordered.size() - 1; i >= 0; --i) {
Expand Down Expand Up @@ -799,7 +799,8 @@ SignificantStringTerms.Bucket buildEmptyTemporaryBucket() {

@Override
BucketUpdater<SignificantStringTerms.Bucket> bucketUpdater(long owningBucketOrd) throws IOException {
long subsetSize = subsetSizes.get(owningBucketOrd);
// the subset size is missing for empty buckets (doc count of 0)
long subsetSize = owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0;
return (spare, globalOrd, bucketOrd, docCount) -> {
spare.bucketOrd = bucketOrd;
oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes);
Expand Down Expand Up @@ -833,13 +834,15 @@ void buildSubAggs(SignificantStringTerms.Bucket[][] topBucketsPreOrd) throws IOE

@Override
SignificantStringTerms buildResult(long owningBucketOrd, long otherDocCount, SignificantStringTerms.Bucket[] topBuckets) {
// the subset size is missing for empty buckets (doc count of 0)
long subsetSize = owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0;
return new SignificantStringTerms(
name,
bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(),
metadata(),
format,
subsetSizes.get(owningBucketOrd),
subsetSize,
supersetSize,
significanceHeuristic,
Arrays.asList(topBuckets)
Expand Down

0 comments on commit a7fa509

Please sign in to comment.