Skip to content

Commit 2a48308

Browse files
authored
[7.16] Fix SearchableSnapshotsPersistentCacheIntegTests (elastic#80077) (elastic#80262)
* Fix SearchableSnapshotsPersistentCacheIntegTests (elastic#80077) The two tests in SearchableSnapshotsPersistentCacheIntegTests failed few times when they were expecting the persistent cache to be empty after mounted indices deletions. While I wasn't able to reproduce the issue (which happened ~5 times in the last 2 months) I suspect the cause are the same: the tests wait for the mounted index to be green but don't always wait for the prewarming to be completed. I think it is possible that a part of a cache file finishes to be prewarmed after the test is completed, making the next step to fail as the persistent cache will contain a doc that is not related to the subsequent test. This commit changes the testCacheSurviveRestart so that it waits for the prewarming to complete. Similarly it changes t estPersistentCacheCleanUpAfterRelocation to also wait for the prewarming to complete (just in case, this test verifies that the recovery is done). It also logs more information about the emptiness of the persistent cache on data nodes and makes the cluster scope to TEST to ensure that a dedicated test cluster is used for each test. Closes elastic#76159 Closes elastic#76160 * usual suspect
1 parent 65e0502 commit 2a48308

File tree

1 file changed

+36
-14
lines changed

1 file changed

+36
-14
lines changed

x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPersistentCacheIntegTests.java

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.repositories.fs.FsRepository;
2525
import org.elasticsearch.snapshots.SnapshotInfo;
2626
import org.elasticsearch.test.BackgroundIndexer;
27+
import org.elasticsearch.test.ESIntegTestCase;
2728
import org.elasticsearch.test.InternalTestCluster;
2829
import org.elasticsearch.xpack.searchablesnapshots.BaseSearchableSnapshotsIntegTestCase;
2930
import org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshots;
@@ -46,6 +47,7 @@
4647
import static org.hamcrest.Matchers.greaterThan;
4748
import static org.hamcrest.Matchers.notNullValue;
4849

50+
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
4951
public class SearchableSnapshotsPersistentCacheIntegTests extends BaseSearchableSnapshotsIntegTestCase {
5052

5153
@Override
@@ -87,6 +89,9 @@ public void testCacheSurviveRestart() throws Exception {
8789
);
8890
ensureGreen(restoredIndexName);
8991

92+
assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME);
93+
assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME);
94+
9095
final Index restoredIndex = client().admin()
9196
.cluster()
9297
.prepareState()
@@ -147,20 +152,20 @@ public Settings onNodeStopped(String nodeName) {
147152
}
148153
});
149154

155+
ensureGreen(restoredIndexName);
156+
157+
assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME);
158+
assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME);
159+
150160
final CacheService cacheServiceAfterRestart = internalCluster().getInstance(CacheService.class, dataNode);
151161
final PersistentCache persistentCacheAfterRestart = cacheServiceAfterRestart.getPersistentCache();
152-
ensureGreen(restoredIndexName);
153162

154163
cacheFiles.forEach(cacheFile -> assertTrue(cacheFile + " should have survived node restart", Files.exists(cacheFile)));
155164
assertThat("Cache files should be loaded in cache", persistentCacheAfterRestart.getNumDocs(), equalTo((long) cacheFiles.size()));
156165

157166
assertAcked(client().admin().indices().prepareDelete(restoredIndexName));
158-
159-
assertBusy(() -> {
160-
cacheFiles.forEach(cacheFile -> assertFalse(cacheFile + " should have been cleaned up", Files.exists(cacheFile)));
161-
cacheServiceAfterRestart.synchronizeCache();
162-
assertThat(persistentCacheAfterRestart.getNumDocs(), equalTo(0L));
163-
});
167+
assertBusy(() -> cacheFiles.forEach(cacheFile -> assertFalse(cacheFile + " should have been cleaned up", Files.exists(cacheFile))));
168+
assertEmptyPersistentCacheOnDataNodes();
164169
}
165170

166171
public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
@@ -187,6 +192,7 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
187192
final int numDocs = scaledRandomIntBetween(1_000, 5_000);
188193
try (BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), numDocs)) {
189194
waitForDocs(numDocs, indexer);
195+
indexer.stopAndAwaitStopped();
190196
}
191197
refresh(indexName);
192198

@@ -214,7 +220,6 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
214220
.cluster()
215221
.prepareState()
216222
.clear()
217-
.setRoutingTable(true)
218223
.setMetadata(true)
219224
.setIndices(mountedIndexName)
220225
.get();
@@ -248,6 +253,9 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
248253

249254
ensureGreen(mountedIndexName);
250255

256+
assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME);
257+
assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME);
258+
251259
recoveryResponse = client().admin().indices().prepareRecoveries(mountedIndexName).get();
252260
assertTrue(recoveryResponse.shardRecoveryStates().containsKey(mountedIndexName));
253261
assertTrue(
@@ -271,12 +279,26 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
271279

272280
logger.info("--> deleting mounted index {}", mountedIndex);
273281
assertAcked(client().admin().indices().prepareDelete(mountedIndexName));
282+
assertEmptyPersistentCacheOnDataNodes();
283+
}
274284

275-
assertBusy(() -> {
276-
for (CacheService cacheService : internalCluster().getDataNodeInstances(CacheService.class)) {
277-
cacheService.synchronizeCache();
278-
assertThat(cacheService.getPersistentCache().getNumDocs(), equalTo(0L));
279-
}
280-
});
285+
private void assertEmptyPersistentCacheOnDataNodes() throws Exception {
286+
final Set<DiscoveryNode> dataNodes = new HashSet<>(getDiscoveryNodes().getDataNodes().values());
287+
logger.info("--> verifying persistent caches are empty on nodes... {}", dataNodes);
288+
try {
289+
assertBusy(() -> {
290+
for (DiscoveryNode node : org.elasticsearch.core.List.copyOf(dataNodes)) {
291+
final CacheService cacheService = internalCluster().getInstance(CacheService.class, node.getName());
292+
cacheService.synchronizeCache();
293+
assertThat(cacheService.getPersistentCache().getNumDocs(), equalTo(0L));
294+
logger.info("--> persistent cache is empty on node {}", node);
295+
dataNodes.remove(node);
296+
}
297+
});
298+
logger.info("--> all persistent caches are empty");
299+
} catch (AssertionError ae) {
300+
logger.error("--> persistent caches not empty on nodes: {}", dataNodes);
301+
throw ae;
302+
}
281303
}
282304
}

0 commit comments

Comments
 (0)