From b57b3deb2a046b9de6f45fd35a372033bfd04da8 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Thu, 10 Oct 2024 23:25:11 -0700 Subject: [PATCH] Making wlm stats output easier to understand (#16278) * fix wlm stats output Signed-off-by: Kaushal Kumar * rename wlm stats vars Signed-off-by: Kaushal Kumar * fix ut failure Signed-off-by: Kaushal Kumar --------- Signed-off-by: Kaushal Kumar --- .../org/opensearch/wlm/QueryGroupService.java | 7 +--- .../opensearch/wlm/stats/QueryGroupState.java | 19 ++------- .../opensearch/wlm/stats/QueryGroupStats.java | 39 ++++++++----------- .../cluster/wlm/WlmStatsResponseTests.java | 7 +--- .../wlm/QueryGroupServiceTests.java | 4 +- ...eryGroupRequestOperationListenerTests.java | 6 --- .../wlm/stats/QueryGroupStateTests.java | 10 +---- .../wlm/stats/QueryGroupStatsTests.java | 4 +- .../opensearch/wlm/stats/WlmStatsTests.java | 3 +- 9 files changed, 28 insertions(+), 71 deletions(-) diff --git a/server/src/main/java/org/opensearch/wlm/QueryGroupService.java b/server/src/main/java/org/opensearch/wlm/QueryGroupService.java index 30d39686931be..cb0be5597766a 100644 --- a/server/src/main/java/org/opensearch/wlm/QueryGroupService.java +++ b/server/src/main/java/org/opensearch/wlm/QueryGroupService.java @@ -11,7 +11,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.ResourceNotFoundException; -import org.opensearch.action.search.SearchShardTask; import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterStateListener; import org.opensearch.cluster.metadata.Metadata; @@ -354,10 +353,6 @@ public void onTaskCompleted(Task task) { queryGroupId = QueryGroupTask.DEFAULT_QUERY_GROUP_ID_SUPPLIER.get(); } - if (task instanceof SearchShardTask) { - queryGroupsStateAccessor.getQueryGroupState(queryGroupId).shardCompletions.inc(); - } else { - queryGroupsStateAccessor.getQueryGroupState(queryGroupId).completions.inc(); - } + queryGroupsStateAccessor.getQueryGroupState(queryGroupId).totalCompletions.inc(); } } diff --git a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java index cbc7046a79464..a082ed159e829 100644 --- a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java +++ b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java @@ -21,12 +21,7 @@ public class QueryGroupState { /** * co-ordinator level completions at the query group level, this is a cumulative counter since the Opensearch start time */ - public final CounterMetric completions = new CounterMetric(); - - /** - * shard level completions at the query group level, this is a cumulative counter since the Opensearch start time - */ - public final CounterMetric shardCompletions = new CounterMetric(); + public final CounterMetric totalCompletions = new CounterMetric(); /** * rejections at the query group level, this is a cumulative counter since the OpenSearch start time @@ -61,16 +56,8 @@ public QueryGroupState() { * * @return co-ordinator completions in the query group */ - public long getCompletions() { - return completions.count(); - } - - /** - * - * @return shard completions in the query group - */ - public long getShardCompletions() { - return shardCompletions.count(); + public long getTotalCompletions() { + return totalCompletions.count(); } /** diff --git a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java index 12e0f0ff96f65..42ce3ac0019db 100644 --- a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java +++ b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java @@ -91,16 +91,14 @@ public Map getStats() { * the instance will only be created on demand through stats api */ public static class QueryGroupStatsHolder implements ToXContentObject, Writeable { - public static final String COMPLETIONS = "completions"; - public static final String REJECTIONS = "rejections"; + public static final String COMPLETIONS = "total_completions"; + public static final String REJECTIONS = "total_rejections"; public static final String TOTAL_CANCELLATIONS = "total_cancellations"; public static final String FAILURES = "failures"; - public static final String SHARD_COMPLETIONS = "shard_completions"; private long completions; - private long shardCompletions; private long rejections; private long failures; - private long totalCancellations; + private long cancellations; private Map resourceStats; // this is needed to support the factory method @@ -110,15 +108,13 @@ public QueryGroupStatsHolder( long completions, long rejections, long failures, - long totalCancellations, - long shardCompletions, + long cancellations, Map resourceStats ) { this.completions = completions; this.rejections = rejections; this.failures = failures; - this.shardCompletions = shardCompletions; - this.totalCancellations = totalCancellations; + this.cancellations = cancellations; this.resourceStats = resourceStats; } @@ -126,8 +122,7 @@ public QueryGroupStatsHolder(StreamInput in) throws IOException { this.completions = in.readVLong(); this.rejections = in.readVLong(); this.failures = in.readVLong(); - this.totalCancellations = in.readVLong(); - this.shardCompletions = in.readVLong(); + this.cancellations = in.readVLong(); this.resourceStats = in.readMap((i) -> ResourceType.fromName(i.readString()), ResourceStats::new); } @@ -145,11 +140,10 @@ public static QueryGroupStatsHolder from(QueryGroupState queryGroupState) { resourceStatsMap.put(resourceTypeStateEntry.getKey(), ResourceStats.from(resourceTypeStateEntry.getValue())); } - statsHolder.completions = queryGroupState.getCompletions(); + statsHolder.completions = queryGroupState.getTotalCompletions(); statsHolder.rejections = queryGroupState.getTotalRejections(); statsHolder.failures = queryGroupState.getFailures(); - statsHolder.totalCancellations = queryGroupState.getTotalCancellations(); - statsHolder.shardCompletions = queryGroupState.getShardCompletions(); + statsHolder.cancellations = queryGroupState.getTotalCancellations(); statsHolder.resourceStats = resourceStatsMap; return statsHolder; } @@ -164,8 +158,7 @@ public static void writeTo(StreamOutput out, QueryGroupStatsHolder statsHolder) out.writeVLong(statsHolder.completions); out.writeVLong(statsHolder.rejections); out.writeVLong(statsHolder.failures); - out.writeVLong(statsHolder.totalCancellations); - out.writeVLong(statsHolder.shardCompletions); + out.writeVLong(statsHolder.cancellations); out.writeMap(statsHolder.resourceStats, (o, val) -> o.writeString(val.getName()), ResourceStats::writeTo); } @@ -177,10 +170,10 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(COMPLETIONS, completions); - builder.field(SHARD_COMPLETIONS, shardCompletions); + // builder.field(SHARD_COMPLETIONS, shardCompletions); builder.field(REJECTIONS, rejections); - builder.field(FAILURES, failures); - builder.field(TOTAL_CANCELLATIONS, totalCancellations); + // builder.field(FAILURES, failures); + builder.field(TOTAL_CANCELLATIONS, cancellations); for (ResourceType resourceType : ResourceType.getSortedValues()) { ResourceStats resourceStats1 = resourceStats.get(resourceType); @@ -199,15 +192,14 @@ public boolean equals(Object o) { QueryGroupStatsHolder that = (QueryGroupStatsHolder) o; return completions == that.completions && rejections == that.rejections - && shardCompletions == that.shardCompletions && Objects.equals(resourceStats, that.resourceStats) && failures == that.failures - && totalCancellations == that.totalCancellations; + && cancellations == that.cancellations; } @Override public int hashCode() { - return Objects.hash(completions, shardCompletions, rejections, totalCancellations, failures, resourceStats); + return Objects.hash(completions, rejections, cancellations, failures, resourceStats); } } @@ -217,6 +209,7 @@ public int hashCode() { public static class ResourceStats implements ToXContentObject, Writeable { public static final String CURRENT_USAGE = "current_usage"; public static final String CANCELLATIONS = "cancellations"; + public static final String REJECTIONS = "rejections"; public static final double PRECISION = 1e-9; private final double currentUsage; private final long cancellations; @@ -268,7 +261,7 @@ public void writeTo(StreamOutput out) throws IOException { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(CURRENT_USAGE, currentUsage); builder.field(CANCELLATIONS, cancellations); - builder.field(QueryGroupStatsHolder.REJECTIONS, rejections); + builder.field(REJECTIONS, rejections); return builder; } diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/wlm/WlmStatsResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/wlm/WlmStatsResponseTests.java index 86fbc341691d2..01dc033568a95 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/wlm/WlmStatsResponseTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/wlm/WlmStatsResponseTests.java @@ -46,7 +46,6 @@ public class WlmStatsResponseTests extends OpenSearchTestCase { 0, 1, 0, - 0, Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats(0, 0, 0), @@ -78,10 +77,8 @@ public void testToString() { + " \"node-1\" : {\n" + " \"query_groups\" : {\n" + " \"safjgagnaeekg-3r3fads\" : {\n" - + " \"completions\" : 0,\n" - + " \"shard_completions\" : 0,\n" - + " \"rejections\" : 0,\n" - + " \"failures\" : 1,\n" + + " \"total_completions\" : 0,\n" + + " \"total_rejections\" : 0,\n" + " \"total_cancellations\" : 0,\n" + " \"cpu\" : {\n" + " \"current_usage\" : 0.0,\n" diff --git a/server/src/test/java/org/opensearch/wlm/QueryGroupServiceTests.java b/server/src/test/java/org/opensearch/wlm/QueryGroupServiceTests.java index c5cf0dac4f807..45428865259c3 100644 --- a/server/src/test/java/org/opensearch/wlm/QueryGroupServiceTests.java +++ b/server/src/test/java/org/opensearch/wlm/QueryGroupServiceTests.java @@ -401,14 +401,14 @@ public void testOnTaskCompleted() { ((QueryGroupTask) task).setQueryGroupId(mockThreadPool.getThreadContext()); queryGroupService.onTaskCompleted(task); - assertEquals(1, queryGroupState.completions.count()); + assertEquals(1, queryGroupState.totalCompletions.count()); // test non QueryGroupTask task = new Task(1, "simple", "test", "mock task", null, null); queryGroupService.onTaskCompleted(task); // It should still be 1 - assertEquals(1, queryGroupState.completions.count()); + assertEquals(1, queryGroupState.totalCompletions.count()); mockThreadPool.shutdown(); } diff --git a/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java b/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java index 4d61e89b281f7..016588acf1e24 100644 --- a/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java +++ b/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java @@ -95,7 +95,6 @@ public void testValidQueryGroupRequestFailure() throws IOException { 0, 1, 0, - 0, Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats(0, 0, 0), @@ -109,7 +108,6 @@ public void testValidQueryGroupRequestFailure() throws IOException { 0, 0, 0, - 0, Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats(0, 0, 0), @@ -172,7 +170,6 @@ public void testMultiThreadedValidQueryGroupRequestFailures() { 0, ITERATIONS, 0, - 0, Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats(0, 0, 0), @@ -186,7 +183,6 @@ public void testMultiThreadedValidQueryGroupRequestFailures() { 0, 0, 0, - 0, Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats(0, 0, 0), @@ -209,7 +205,6 @@ public void testInvalidQueryGroupFailure() throws IOException { 0, 0, 0, - 0, Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats(0, 0, 0), @@ -223,7 +218,6 @@ public void testInvalidQueryGroupFailure() throws IOException { 0, 1, 0, - 0, Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats(0, 0, 0), diff --git a/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStateTests.java b/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStateTests.java index 566c4261d6878..c0dfa06a0fba1 100644 --- a/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStateTests.java +++ b/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStateTests.java @@ -23,13 +23,7 @@ public void testRandomQueryGroupsStateUpdates() { for (int i = 0; i < 25; i++) { if (i % 5 == 0) { - updaterThreads.add(new Thread(() -> { - if (randomBoolean()) { - queryGroupState.completions.inc(); - } else { - queryGroupState.shardCompletions.inc(); - } - })); + updaterThreads.add(new Thread(() -> { queryGroupState.totalCompletions.inc(); })); } else if (i % 5 == 1) { updaterThreads.add(new Thread(() -> { queryGroupState.totalRejections.inc(); @@ -63,7 +57,7 @@ public void testRandomQueryGroupsStateUpdates() { } }); - assertEquals(5, queryGroupState.getCompletions() + queryGroupState.getShardCompletions()); + assertEquals(5, queryGroupState.getTotalCompletions()); assertEquals(5, queryGroupState.getTotalRejections()); final long sumOfRejectionsDueToResourceTypes = queryGroupState.getResourceState().get(ResourceType.CPU).rejections.count() diff --git a/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStatsTests.java b/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStatsTests.java index bcab0c4d124f5..6fc4d178e54bc 100644 --- a/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStatsTests.java +++ b/server/src/test/java/org/opensearch/wlm/stats/QueryGroupStatsTests.java @@ -38,7 +38,6 @@ public void testToXContent() throws IOException { 13, 2, 0, - 1213718, Map.of(ResourceType.CPU, new QueryGroupStats.ResourceStats(0.3, 13, 2)) ) ); @@ -48,7 +47,7 @@ public void testToXContent() throws IOException { queryGroupStats.toXContent(builder, ToXContent.EMPTY_PARAMS); builder.endObject(); assertEquals( - "{\"query_groups\":{\"afakjklaj304041-afaka\":{\"completions\":123456789,\"shard_completions\":1213718,\"rejections\":13,\"failures\":2,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}", + "{\"query_groups\":{\"afakjklaj304041-afaka\":{\"total_completions\":123456789,\"total_rejections\":13,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}", builder.toString() ); } @@ -68,7 +67,6 @@ protected QueryGroupStats createTestInstance() { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), - randomNonNegativeLong(), Map.of( ResourceType.CPU, new QueryGroupStats.ResourceStats( diff --git a/server/src/test/java/org/opensearch/wlm/stats/WlmStatsTests.java b/server/src/test/java/org/opensearch/wlm/stats/WlmStatsTests.java index 75b8f570b7975..6910ca7f9937c 100644 --- a/server/src/test/java/org/opensearch/wlm/stats/WlmStatsTests.java +++ b/server/src/test/java/org/opensearch/wlm/stats/WlmStatsTests.java @@ -39,7 +39,6 @@ public void testToXContent() throws IOException { 13, 2, 0, - 1213718, Map.of(ResourceType.CPU, new QueryGroupStats.ResourceStats(0.3, 13, 2)) ) ); @@ -50,7 +49,7 @@ public void testToXContent() throws IOException { wlmStats.toXContent(builder, ToXContent.EMPTY_PARAMS); builder.endObject(); assertEquals( - "{\"query_groups\":{\"afakjklaj304041-afaka\":{\"completions\":123456789,\"shard_completions\":1213718,\"rejections\":13,\"failures\":2,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}", + "{\"query_groups\":{\"afakjklaj304041-afaka\":{\"total_completions\":123456789,\"total_rejections\":13,\"total_cancellations\":0,\"cpu\":{\"current_usage\":0.3,\"cancellations\":13,\"rejections\":2}}}}", builder.toString() ); }