From 14980ff97e949b81d4830587838678f5f9316e34 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 6 Aug 2020 18:01:54 +0200 Subject: [PATCH] Fix AOOBE when setting min_doc_count to 0 in significant_terms (#60823) This commit fixes the computation of the subset size on empty buckets (doc count of 0). The aggregator test refactoring in #60683 revealed this bug. --- .../terms/GlobalOrdinalsStringTermsAggregator.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index f5943d7902e63..cd2db4608274f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -561,7 +561,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws BucketUpdater updater = bucketUpdater(owningBucketOrds[ordIdx]); collectionStrategy.forEach(owningBucketOrds[ordIdx], new BucketInfoConsumer() { TB spare = null; - + @Override public void accept(long globalOrd, long bucketOrd, long docCount) throws IOException { otherDocCount[finalOrdIdx] += docCount; @@ -574,7 +574,7 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep } } }); - + // Get the top buckets topBucketsPreOrd[ordIdx] = buildBuckets(ordered.size()); for (int i = ordered.size() - 1; i >= 0; --i) { @@ -797,9 +797,14 @@ SignificantStringTerms.Bucket buildEmptyTemporaryBucket() { return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format, 0); } + private long subsetSize(long owningBucketOrd) { + // if the owningBucketOrd is not in the array that means the bucket is empty so the size has to be 0 + return owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0; + } + @Override BucketUpdater bucketUpdater(long owningBucketOrd) throws IOException { - long subsetSize = subsetSizes.get(owningBucketOrd); + long subsetSize = subsetSize(owningBucketOrd); return (spare, globalOrd, bucketOrd, docCount) -> { spare.bucketOrd = bucketOrd; oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes); @@ -839,7 +844,7 @@ SignificantStringTerms buildResult(long owningBucketOrd, long otherDocCount, Sig bucketCountThresholds.getMinDocCount(), metadata(), format, - subsetSizes.get(owningBucketOrd), + subsetSize(owningBucketOrd), supersetSize, significanceHeuristic, Arrays.asList(topBuckets)