From d2362e22002d43e845226f67a027ab977fd22588 Mon Sep 17 00:00:00 2001 From: Himshikha Gupta Date: Wed, 18 Sep 2024 00:28:59 +0530 Subject: [PATCH] remote publication checksum stats (#15957) * Remote publication checksum stats Signed-off-by: Himshikha Gupta Co-authored-by: Himshikha Gupta --- .../remote/RemoteStatePublicationIT.java | 19 ++++++++++ .../remote/RemoteClusterStateService.java | 6 ++++ .../gateway/remote/RemoteDownloadStats.java | 36 +++++++++++++++++++ .../remote/RemotePersistenceStats.java | 24 ++++++++++--- .../RemoteClusterStateServiceTests.java | 10 ++++++ 5 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java index faab3645ae894..ffb9352e8ba47 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java @@ -56,6 +56,7 @@ import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; +import static org.opensearch.gateway.remote.RemoteDownloadStats.CHECKSUM_VALIDATION_FAILED_COUNT; import static org.opensearch.gateway.remote.model.RemoteClusterBlocks.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA; import static org.opensearch.gateway.remote.model.RemoteCustomMetadata.CUSTOM_METADATA; @@ -405,10 +406,28 @@ private void assertDataNodeDownloadStats(NodesStatsResponse nodesStatsResponse) assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 0); assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount()); assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0); + assertEquals( + 0, + dataNodeDiscoveryStats.getClusterStateStats() + .getPersistenceStats() + .get(0) + .getExtendedFields() + .get(CHECKSUM_VALIDATION_FAILED_COUNT) + .get() + ); assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getSuccessCount() > 0); assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getFailedCount()); assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getTotalTimeInMillis() > 0); + assertEquals( + 0, + dataNodeDiscoveryStats.getClusterStateStats() + .getPersistenceStats() + .get(1) + .getExtendedFields() + .get(CHECKSUM_VALIDATION_FAILED_COUNT) + .get() + ); } private Map getMetadataFiles(BlobStoreRepository repository, String subDirectory) throws IOException { diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 12d10fd908b44..ece29180f9cf5 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -1644,6 +1644,12 @@ void validateClusterStateFromChecksum( failedValidation ) ); + if (isFullStateDownload) { + remoteStateStats.stateFullDownloadValidationFailed(); + } else { + remoteStateStats.stateDiffDownloadValidationFailed(); + } + if (isFullStateDownload && remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) { throw new IllegalStateException( "Cluster state checksums do not match during full state read. Validation failed for " + failedValidation diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java new file mode 100644 index 0000000000000..a8f4b33a19c37 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.coordination.PersistedStateStats; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Download stats for remote state + * + * @opensearch.internal + */ +public class RemoteDownloadStats extends PersistedStateStats { + static final String CHECKSUM_VALIDATION_FAILED_COUNT = "checksum_validation_failed_count"; + private AtomicLong checksumValidationFailedCount = new AtomicLong(0); + + public RemoteDownloadStats(String statsName) { + super(statsName); + addToExtendedFields(CHECKSUM_VALIDATION_FAILED_COUNT, checksumValidationFailedCount); + } + + public void checksumValidationFailedCount() { + checksumValidationFailedCount.incrementAndGet(); + } + + public long getChecksumValidationFailedCount() { + return checksumValidationFailedCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java index 417ebdafd3ba7..11f26ac8b3ed9 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java @@ -18,16 +18,16 @@ public class RemotePersistenceStats { RemoteUploadStats remoteUploadStats; - PersistedStateStats remoteDiffDownloadStats; - PersistedStateStats remoteFullDownloadStats; + RemoteDownloadStats remoteDiffDownloadStats; + RemoteDownloadStats remoteFullDownloadStats; final String FULL_DOWNLOAD_STATS = "remote_full_download"; final String DIFF_DOWNLOAD_STATS = "remote_diff_download"; public RemotePersistenceStats() { remoteUploadStats = new RemoteUploadStats(); - remoteDiffDownloadStats = new PersistedStateStats(DIFF_DOWNLOAD_STATS); - remoteFullDownloadStats = new PersistedStateStats(FULL_DOWNLOAD_STATS); + remoteDiffDownloadStats = new RemoteDownloadStats(DIFF_DOWNLOAD_STATS); + remoteFullDownloadStats = new RemoteDownloadStats(FULL_DOWNLOAD_STATS); } public void cleanUpAttemptFailed() { @@ -90,6 +90,22 @@ public void stateDiffDownloadFailed() { remoteDiffDownloadStats.stateFailed(); } + public void stateDiffDownloadValidationFailed() { + remoteDiffDownloadStats.checksumValidationFailedCount(); + } + + public void stateFullDownloadValidationFailed() { + remoteFullDownloadStats.checksumValidationFailedCount(); + } + + public long getStateDiffDownloadValidationFailed() { + return remoteDiffDownloadStats.getChecksumValidationFailedCount(); + } + + public long getStateFullDownloadValidationFailed() { + return remoteFullDownloadStats.getChecksumValidationFailedCount(); + } + public PersistedStateStats getUploadStats() { return remoteUploadStats; } diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index b11d5e48bec06..56857285fa8d3 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -3342,6 +3342,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithNullC anyString(), anyBoolean() ); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws IOException { @@ -3374,6 +3375,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws ); mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); verify(mockService, times(1)).validateClusterStateFromChecksum(manifest, clusterState, ClusterName.DEFAULT.value(), NODE_ID, true); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationModeNone() throws IOException { @@ -3406,6 +3408,7 @@ public void testGetClusterStateForManifestWithChecksumValidationModeNone() throw ); mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); verify(mockService, times(0)).validateClusterStateFromChecksum(any(), any(), anyString(), anyString(), anyBoolean()); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMismatch() throws IOException { @@ -3448,6 +3451,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMisma NODE_ID, true ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatch() throws IOException { @@ -3494,6 +3498,7 @@ public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatc NODE_ID, true ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksum() throws IOException { @@ -3535,6 +3540,7 @@ public void testGetClusterStateUsingDiffWithChecksum() throws IOException { eq(NODE_ID), eq(false) ); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOException { @@ -3576,6 +3582,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOExceptio eq(NODE_ID), eq(false) ); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws IOException { @@ -3616,6 +3623,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws I eq(NODE_ID), eq(false) ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws IOException { @@ -3677,6 +3685,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws I eq(NODE_ID), eq(false) ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOException { @@ -3738,6 +3747,7 @@ public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOExceptio eq(NODE_ID), eq(false) ); + assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed()); } private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException {