diff --git a/CHANGELOG.md b/CHANGELOG.md index 254c35c66998b..21697ebb79f8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fixed assertion unsafe use of ClusterService.state() in ResourceUsageCollectorService ([#19775])(https://github.com/opensearch-project/OpenSearch/pull/19775)) - Fix Unified highlighter for nested fields when using matchPhrasePrefixQuery ([#19442](https://github.com/opensearch-project/OpenSearch/pull/19442)) - Add S3Repository.LEGACY_MD5_CHECKSUM_CALCULATION to list of repository-s3 settings ([#19788](https://github.com/opensearch-project/OpenSearch/pull/19788)) +- Fix NullPointerException when restoring remote snapshot with missing shard size information ([#19684](https://github.com/opensearch-project/OpenSearch/pull/19684)) - Fix NPE of ScriptScoreQuery ([#19650](https://github.com/opensearch-project/OpenSearch/pull/19650)) ### Dependencies diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java index 6001e5636014b..c0acff172fbfc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java @@ -1054,4 +1054,61 @@ private void assertSearchableSnapshotIndexDirectoryExistence(String nodeName, In assertTrue("index cache path should " + (exists ? "exist" : "not exist"), Files.exists(indexPath) == exists); }, 30, TimeUnit.SECONDS); } + + public void testRestoreRemoteSnapshotWithNullShardSizes() throws Exception { + final String snapshotName1 = "test-snap-1"; + final String snapshotName2 = "test-snap-2"; + final String repoName = "test-repo"; + final String indexName1 = "test-idx-1"; + final String indexName2 = "test-idx-2"; + final Client client = client(); + + // Setup cluster with delayed ClusterInfo updates to simulate null shard sizes + client.admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put("cluster.info.update.interval", "60m")) + .get(); + + internalCluster().ensureAtLeastNumDataNodes(2); + + createIndexWithDocsAndEnsureGreen(0, 50, indexName1); + createRepositoryWithSettings(null, repoName); + takeSnapshot(client, snapshotName1, repoName, indexName1); + deleteIndicesAndEnsureGreen(client, indexName1); + + createIndexWithDocsAndEnsureGreen(0, 50, indexName2); + takeSnapshot(client, snapshotName2, repoName, indexName2); + deleteIndicesAndEnsureGreen(client, indexName2); + + internalCluster().ensureAtLeastNumWarmNodes(2); + + RestoreSnapshotRequest firstRestore = new RestoreSnapshotRequest(repoName, snapshotName1).indices(indexName1) + .storageType(RestoreSnapshotRequest.StorageType.REMOTE_SNAPSHOT) + .renamePattern("(.+)") + .renameReplacement("remote-$1") + .waitForCompletion(true); + + client.admin().cluster().restoreSnapshot(firstRestore).get(); + ensureGreen("remote-" + indexName1); + + // Second restore immediately after - ClusterInfo won't have size data for first restore shards + RestoreSnapshotRequest secondRestore = new RestoreSnapshotRequest(repoName, snapshotName2).indices(indexName2) + .storageType(RestoreSnapshotRequest.StorageType.REMOTE_SNAPSHOT) + .renamePattern("(.+)") + .renameReplacement("remote-$1") + .waitForCompletion(true); + + client.admin().cluster().restoreSnapshot(secondRestore).get(); + ensureGreen("remote-" + indexName2); + + assertDocCount("remote-" + indexName1, 50L); + assertDocCount("remote-" + indexName2, 50L); + + client.admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().putNull("cluster.info.update.interval")) + .get(); + } } diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index ef4d0df2e9636..666e31fcd0ed6 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -909,13 +909,29 @@ private void validateSearchableSnapshotRestorable(long totalRestorableRemoteInde .routingTable() .allShardsSatisfyingPredicate(isRemoteSnapshotShard); - long totalRestoredRemoteIndexesSize = shardsIterator.getShardRoutings() - .stream() - .map(clusterInfo::getShardSize) - .mapToLong(Long::longValue) - .sum(); + long totalRestoredRemoteIndicesSize = 0; + int missingSizeCount = 0; + List routings = shardsIterator.getShardRoutings(); + + for (ShardRouting shardRouting : routings) { + Long shardSize = clusterInfo.getShardSize(shardRouting); + if (shardSize != null) { + totalRestoredRemoteIndicesSize += shardSize; + } else { + missingSizeCount++; + } + } + + if (missingSizeCount > 0) { + logger.warn( + "Size information unavailable for {} out of {} remote snapshot shards. " + + "File cache validation will use available data only.", + missingSizeCount, + routings.size() + ); + } - if (totalRestoredRemoteIndexesSize + totalRestorableRemoteIndexesSize > remoteDataToFileCacheRatio + if (totalRestoredRemoteIndicesSize + totalRestorableRemoteIndexesSize > remoteDataToFileCacheRatio * totalNodeFileCacheSize) { throw new SnapshotRestoreException( snapshot,