Skip to content

Commit

Permalink
Fix AOOBE when setting min_doc_count to 0 in significant_terms (#60823)
Browse files Browse the repository at this point in the history
This commit fixes the computation of the subset size on empty buckets (doc count of 0).
The aggregator test refactoring in #60683 revealed this bug.
  • Loading branch information
jimczi committed Aug 6, 2020
1 parent fb7c431 commit 14980ff
Showing 1 changed file with 9 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws
BucketUpdater<TB> updater = bucketUpdater(owningBucketOrds[ordIdx]);
collectionStrategy.forEach(owningBucketOrds[ordIdx], new BucketInfoConsumer() {
TB spare = null;

@Override
public void accept(long globalOrd, long bucketOrd, long docCount) throws IOException {
otherDocCount[finalOrdIdx] += docCount;
Expand All @@ -574,7 +574,7 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep
}
}
});

// Get the top buckets
topBucketsPreOrd[ordIdx] = buildBuckets(ordered.size());
for (int i = ordered.size() - 1; i >= 0; --i) {
Expand Down Expand Up @@ -797,9 +797,14 @@ SignificantStringTerms.Bucket buildEmptyTemporaryBucket() {
return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format, 0);
}

private long subsetSize(long owningBucketOrd) {
// if the owningBucketOrd is not in the array that means the bucket is empty so the size has to be 0
return owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0;
}

@Override
BucketUpdater<SignificantStringTerms.Bucket> bucketUpdater(long owningBucketOrd) throws IOException {
long subsetSize = subsetSizes.get(owningBucketOrd);
long subsetSize = subsetSize(owningBucketOrd);
return (spare, globalOrd, bucketOrd, docCount) -> {
spare.bucketOrd = bucketOrd;
oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes);
Expand Down Expand Up @@ -839,7 +844,7 @@ SignificantStringTerms buildResult(long owningBucketOrd, long otherDocCount, Sig
bucketCountThresholds.getMinDocCount(),
metadata(),
format,
subsetSizes.get(owningBucketOrd),
subsetSize(owningBucketOrd),
supersetSize,
significanceHeuristic,
Arrays.asList(topBuckets)
Expand Down

0 comments on commit 14980ff

Please sign in to comment.