diff --git a/buildSrc/src/main/java/org/opensearch/gradle/precommit/ForbiddenApisPrecommitPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/precommit/ForbiddenApisPrecommitPlugin.java index c42b7ea975de5..38f5f5fb6c28f 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/precommit/ForbiddenApisPrecommitPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/precommit/ForbiddenApisPrecommitPlugin.java @@ -136,6 +136,22 @@ public Void call(Object... names) { return null; } }); + // Add a closure to allow projects to optionally call `forbidSleep()` which will add the signatures + // to forbid all usages of `Thread.sleep` + ext.set("forbidSleep", new Closure(t) { + @Override + public Void call(Object... unused) { + final List signatures = new ArrayList<>(); + signatures.addAll(t.getSignatures()); + signatures.add( + "java.lang.Thread#sleep(**) @ Fixed sleeps lead to non-deterministic test failures." + + " Poll for whatever condition you're waiting for." + + " Use helpers like `assertBusy` or the awaitility lib." + ); + t.setSignatures(signatures); + return null; + } + }); // Use of the deprecated security manager APIs are pervasive so set them to warn // globally for all projects. Replacements for (most of) these APIs are available // so usages can move to the non-deprecated variants to avoid the warnings. diff --git a/server/build.gradle b/server/build.gradle index 088165e715d25..b3fe3042b106b 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -169,6 +169,8 @@ tasks.named("testingConventions").configure { } } +tasks.named('forbiddenApisInternalClusterTest').configure { forbidSleep() } + // Set to current version by default def japicmpCompareTarget = System.getProperty("japicmp.compare.version") if (japicmpCompareTarget == null) { /* use latest released version */ diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java index ffba99c175279..e55fde9d9d4f4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java @@ -95,27 +95,13 @@ public void testCatShardsWithTimeoutException() throws IOException, AssertionErr // Dropping master node to delay in cluster state call. internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNodes.get(0))); - CountDownLatch latch = new CountDownLatch(2); - new Thread(() -> { - try { - // Ensures the cancellation timeout expires. - Thread.sleep(2000); - // Starting master node to proceed in cluster state call. - internalCluster().startClusterManagerOnlyNode( - Settings.builder().put("node.name", masterNodes.get(0)).put(clusterManagerDataPathSettings).build() - ); - latch.countDown(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - }).start(); - + CountDownLatch latch = new CountDownLatch(1); final CatShardsRequest shardsRequest = new CatShardsRequest(); TimeValue timeoutInterval = timeValueMillis(1000); shardsRequest.setCancelAfterTimeInterval(timeoutInterval); shardsRequest.clusterManagerNodeTimeout(timeValueMillis(2500)); shardsRequest.setIndices(Strings.EMPTY_ARRAY); - client().execute(CatShardsAction.INSTANCE, shardsRequest, new ActionListener() { + client().execute(CatShardsAction.INSTANCE, shardsRequest, new ActionListener<>() { @Override public void onResponse(CatShardsResponse catShardsResponse) { // onResponse should not be called. @@ -132,7 +118,13 @@ public void onFailure(Exception e) { latch.countDown(); } }); + latch.await(); + + // Restart cluster manager to restore cluster and allow test to cleanup and exit + internalCluster().startClusterManagerOnlyNode( + Settings.builder().put("node.name", masterNodes.get(0)).put(clusterManagerDataPathSettings).build() + ); } public void testListShardsWithHiddenIndex() throws Exception { diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java index b695f03da8a65..bbabca0334697 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java @@ -49,6 +49,7 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.unit.ByteSizeValue; @@ -227,6 +228,7 @@ protected void setLowPriorityUploadRate(String repoName, String value) throws Ex createRepository(repoName, rmd.type(), settings); } + @SuppressForbidden(reason = "Waiting longer than timeout") public void testCreateCloneIndexFailure() throws ExecutionException, InterruptedException { asyncUploadMockFsRepo = false; Version version = VersionUtils.randomIndexCompatibleVersion(random()); diff --git a/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java b/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java index 941f4189f66fa..3b80affe0a086 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java @@ -136,13 +136,6 @@ public Map getProcessors(Processor.Parameters paramet public void execute(IngestDocument ingestDocument, BiConsumer handler) { threadPool.generic().execute(() -> { String id = (String) ingestDocument.getSourceAndMetadata().get("_id"); - if (usually()) { - try { - Thread.sleep(10); - } catch (InterruptedException e) { - // ignore - } - } ingestDocument.setFieldValue("foo", "bar-" + id); handler.accept(ingestDocument, null); }); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java index 64602ee6ea741..6460be54bde8a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java @@ -44,6 +44,7 @@ import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; @@ -149,6 +150,7 @@ private void setClusterInfoTimeout(String timeValue) { ); } + @SuppressForbidden(reason = "Fixed sleeps are prone to flakiness but no documented failures here") public void testClusterInfoServiceCollectsInformation() throws InterruptedException { Settings.Builder settingsBuilder = Settings.builder() .put(ResourceTrackerSettings.GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING.getKey(), TimeValue.timeValueSeconds(1)) @@ -250,6 +252,7 @@ public void testClusterInfoServiceCollectsFileCacheInformation() { } } + @SuppressForbidden(reason = "Fixed sleeps are prone to flakiness but no documented failures here") public void testClusterInfoServiceCollectsNodeResourceStatsInformation() throws InterruptedException { // setting time window as ResourceUsageTracker needs atleast both of these to be ready to start collecting the @@ -279,6 +282,7 @@ public void testClusterInfoServiceCollectsNodeResourceStatsInformation() throws assertEquals(2, nodeResourceUsageStats.size()); } + @SuppressForbidden(reason = "Fixed sleeps are prone to flakiness but no documented failures here") public void testClusterInfoServiceInformationClearOnError() throws InterruptedException { internalCluster().startNodes( 2, diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 562065cc8fdce..df487478a4f7c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -40,6 +40,7 @@ import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.index.MockEngineFactoryPlugin; import org.opensearch.indices.recovery.RecoverySettings; @@ -75,6 +76,7 @@ https://github.com/opensearch-project/OpenSearch/pull/15521 for context */ @ClusterScope(scope = Scope.TEST, numDataNodes = 0) +@SuppressForbidden(reason = "Pending fix: https://github.com/opensearch-project/OpenSearch/issues/18972") public class NodeJoinLeftIT extends OpenSearchIntegTestCase { private TestLogsAppender testLogsAppender; diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java index 2906aa0e086fb..45c4dd14d5ec8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java @@ -53,6 +53,7 @@ import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -219,6 +220,7 @@ public void testDeleteCreateInOneBulk() throws Exception { assertHitCount(client().prepareSearch("test").get(), 0); } + @SuppressForbidden(reason = "Best effort sleep, not critical to test logic") public void testDelayedMappingPropagationOnPrimary() throws Exception { // Here we want to test that things go well if there is a first request // that adds mappings but before mappings are propagated to all nodes @@ -309,6 +311,7 @@ public void testDelayedMappingPropagationOnPrimary() throws Exception { assertEquals(1, docIndexResponse.get(10, TimeUnit.SECONDS).getShardInfo().getTotal()); } + @SuppressForbidden(reason = "Best effort sleep, not critical to test logic") public void testDelayedMappingPropagationOnReplica() throws Exception { // This is essentially the same thing as testDelayedMappingPropagationOnPrimary // but for replicas diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/routing/WeightedRoutingIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/routing/WeightedRoutingIT.java index 98d1d2bb4bb03..bace2a77b3359 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/routing/WeightedRoutingIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/routing/WeightedRoutingIT.java @@ -335,15 +335,14 @@ public void testGetWeightedRouting_ClusterManagerNotDiscovered() throws Exceptio logger.info("--> network disruption is started"); networkDisruption.startDisrupting(); - // wait for leader checker to fail - Thread.sleep(13000); - // get api to fetch local weighted routing for a node in zone a or b - ClusterGetWeightedRoutingResponse weightedRoutingResponse = internalCluster().client( - randomFrom(nodes_in_zone_a.get(0), nodes_in_zone_b.get(0)) - ).admin().cluster().prepareGetWeightedRouting().setAwarenessAttribute("zone").setRequestLocal(true).get(); - assertEquals(weightedRouting, weightedRoutingResponse.weights()); - assertFalse(weightedRoutingResponse.getDiscoveredClusterManager()); + assertBusy(() -> { + ClusterGetWeightedRoutingResponse weightedRoutingResponse = internalCluster().client( + randomFrom(nodes_in_zone_a.get(0), nodes_in_zone_b.get(0)) + ).admin().cluster().prepareGetWeightedRouting().setAwarenessAttribute("zone").setRequestLocal(true).get(); + assertEquals(weightedRouting, weightedRoutingResponse.weights()); + assertFalse(weightedRoutingResponse.getDiscoveredClusterManager()); + }, 13, TimeUnit.SECONDS); logger.info("--> network disruption is stopped"); networkDisruption.stopDisrupting(); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/service/ClusterServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/service/ClusterServiceIT.java index d82d9c0ed05e3..67a4326702730 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/service/ClusterServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/service/ClusterServiceIT.java @@ -38,6 +38,7 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.Nullable; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchIntegTestCase; @@ -55,6 +56,8 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasSize; +import static org.awaitility.Awaitility.await; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class ClusterServiceIT extends OpenSearchIntegTestCase { @@ -350,6 +353,7 @@ public void testPendingUpdateTask() throws Exception { final CountDownLatch invoked1 = new CountDownLatch(1); clusterService.submitStateUpdateTask("1", new ClusterStateUpdateTask() { @Override + @SuppressForbidden(reason = "Sleeping to guarantee a >0 time metric calculation") public ClusterState execute(ClusterState currentState) { try { Thread.sleep(50); @@ -458,10 +462,11 @@ public void onFailure(String source, Exception e) { } }); } - Thread.sleep(100); - pendingClusterTasks = clusterService.getClusterManagerService().pendingTasks(); - assertThat(pendingClusterTasks.size(), greaterThanOrEqualTo(5)); + pendingClusterTasks = await().until( + () -> clusterService.getClusterManagerService().pendingTasks(), + hasSize(greaterThanOrEqualTo(5)) + ); controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5")); for (PendingClusterTask task : pendingClusterTasks) { controlSources.remove(task.getSource().string()); diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoverAfterNodesIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoverAfterNodesIT.java index 44fd0f93cb080..52147af08825e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoverAfterNodesIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoverAfterNodesIT.java @@ -34,6 +34,7 @@ import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchIntegTestCase; @@ -74,6 +75,7 @@ public Client startNode(Settings.Builder settings) { return internalCluster().client(name); } + @SuppressForbidden(reason = "Sleeping longer than timeout") public void testRecoverAfterNodes() throws Exception { internalCluster().setBootstrapClusterManagerNodeIndex(0); logger.info("--> start node (1)"); diff --git a/server/src/internalClusterTest/java/org/opensearch/index/ShardIndexingPressureSettingsIT.java b/server/src/internalClusterTest/java/org/opensearch/index/ShardIndexingPressureSettingsIT.java index 5426f4037294f..a4ac256b67b9d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/ShardIndexingPressureSettingsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/ShardIndexingPressureSettingsIT.java @@ -19,6 +19,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.UUIDs; import org.opensearch.common.action.ActionFuture; import org.opensearch.common.collect.Tuple; @@ -45,6 +46,7 @@ import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 2, numClientNodes = 1) +@SuppressForbidden(reason = "Need to fix: https://github.com/opensearch-project/OpenSearch/issues/14331") public class ShardIndexingPressureSettingsIT extends OpenSearchIntegTestCase { public static final String INDEX_NAME = "test_index"; diff --git a/server/src/internalClusterTest/java/org/opensearch/index/autoforcemerge/AutoForceMergeManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/index/autoforcemerge/AutoForceMergeManagerIT.java index 142e2da95653e..6d03a4f129039 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/autoforcemerge/AutoForceMergeManagerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/autoforcemerge/AutoForceMergeManagerIT.java @@ -11,6 +11,7 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.unit.ByteSizeUnit; @@ -60,6 +61,7 @@ protected Settings nodeSettings(int nodeOrdinal) { .build(); } + @SuppressForbidden(reason = "Sleeping longer than scheduled interval") public void testAutoForceMergeFeatureFlagDisabled() throws InterruptedException, ExecutionException { Settings clusterSettings = Settings.builder() @@ -102,6 +104,7 @@ public void testAutoForceMergeFeatureFlagDisabled() throws InterruptedException, assertAcked(client().admin().indices().prepareDelete(INDEX_NAME_1).get()); } + @SuppressForbidden(reason = "Sleeping longer than scheduled interval") public void testAutoForceMergeTriggeringWithOneShardOfNonWarmCandidate() throws Exception { Settings clusterSettings = Settings.builder() .put(super.nodeSettings(0)) diff --git a/server/src/internalClusterTest/java/org/opensearch/index/mapper/DynamicMappingIT.java b/server/src/internalClusterTest/java/org/opensearch/index/mapper/DynamicMappingIT.java index 2267ef1bb6739..37a5e4c7ba51d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/mapper/DynamicMappingIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/mapper/DynamicMappingIT.java @@ -129,7 +129,6 @@ public void run() { if (error.get() != null) { throw error.get(); } - Thread.sleep(2000); GetMappingsResponse mappings = client().admin().indices().prepareGetMappings("index").get(); for (int i = 0; i < indexThreads.length; ++i) { assertMappingsHaveField(mappings, "index", "field" + i); diff --git a/server/src/internalClusterTest/java/org/opensearch/index/seqno/RetentionLeaseIT.java b/server/src/internalClusterTest/java/org/opensearch/index/seqno/RetentionLeaseIT.java index 51a16acb68089..3012e994f2e52 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/seqno/RetentionLeaseIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/seqno/RetentionLeaseIT.java @@ -37,6 +37,7 @@ import org.opensearch.action.support.replication.ReplicationResponse; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -216,6 +217,7 @@ public void testRetentionLeaseSyncedOnRemove() throws Exception { } } + @SuppressForbidden(reason = "Sleeping longer than lease duration") public void testRetentionLeasesSyncOnExpiration() throws Exception { final int numberOfReplicas = 2 - scaledRandomIntBetween(0, 2); internalCluster().ensureAtLeastNumDataNodes(1 + numberOfReplicas); diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java index 069bdd3fa7a64..a9df5b9da48b6 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java @@ -54,6 +54,7 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateFormatter; import org.opensearch.common.unit.TimeValue; @@ -838,6 +839,7 @@ public void testTimedOutQuery() throws Exception { protected Query doToQuery(QueryShardContext context) { return new TermQuery(new Term("k", "hello")) { @Override + @SuppressForbidden(reason = "Waiting 10x the timeout") public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { // Create the weight before sleeping. Otherwise, TermStates.build() (in the call to super.createWeight()) will // sometimes throw an exception on timeout, rather than timing out gracefully. diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java index e2db9f85131a9..a5956929af01d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java @@ -73,6 +73,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; import org.opensearch.common.SetOnce; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -1268,6 +1269,7 @@ public void testDisconnectsDuringRecovery() throws Exception { redMockTransportService.addSendBehavior(blueMockTransportService, new StubbableTransport.SendRequestBehavior() { private final AtomicInteger count = new AtomicInteger(); + @SuppressForbidden(reason = "Simulating disconnect after sending request with a delay") @Override public void sendRequest( Transport.Connection connection, diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/state/CloseIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/state/CloseIndexIT.java index 8c03dfed169c5..48c20729c1f57 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/state/CloseIndexIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/state/CloseIndexIT.java @@ -346,7 +346,7 @@ public void testCloseWhileDeletingIndices() throws Exception { try { latch.await(); // Add small random delay to reduce exact simultaneous operations - Thread.sleep(randomIntBetween(0, 50)); + Thread.yield(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new AssertionError(e); @@ -365,21 +365,18 @@ public void testCloseWhileDeletingIndices() throws Exception { latch.countDown(); // Wait for all threads with timeout to prevent hanging - boolean allCompleted = true; for (Thread thread : threads) { thread.join(STANDARD_TIMEOUT.millis()); if (thread.isAlive()) { logger.warn("Thread {} did not complete in time, interrupting", thread.getName()); thread.interrupt(); - allCompleted = false; + thread.join(1000L); + if (thread.isAlive()) { + logger.warn("Thread {} still alive even after interrupting", thread.getName()); + } } } - if (!allCompleted) { - // Give interrupted threads a moment to clean up - Thread.sleep(1000); - } - // Wait for cluster state to stabilize after concurrent operations waitForClusterStateConvergence(); } @@ -573,8 +570,6 @@ public void testRecoverExistingReplica() throws Exception { internalCluster().restartNode(dataNodes.get(1), new InternalTestCluster.RestartCallback() { @Override public Settings onNodeStopped(String nodeName) throws Exception { - Thread.sleep(1000); - Client client = client(dataNodes.get(0)); try { assertBusy(() -> { @@ -733,49 +728,38 @@ private static void closeIndices(final CloseIndexRequestBuilder requestBuilder) } static void assertIndexIsClosed(final String... indices) { - for (int retry = 0; retry < 3; retry++) { - try { + try { + assertBusy(() -> { final ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); - boolean allClosed = true; for (String index : indices) { final IndexMetadata indexMetadata = clusterState.metadata().indices().get(index); - if (indexMetadata == null || indexMetadata.getState() != IndexMetadata.State.CLOSE) { - allClosed = false; - break; - } - } - if (!allClosed && retry < 2) { - Thread.sleep(500); - continue; + assertNotNull(indexMetadata); + assertThat(indexMetadata.getState(), equalTo(IndexMetadata.State.CLOSE)); } + }); - for (String index : indices) { - final IndexMetadata indexMetadata = clusterState.metadata().indices().get(index); - assertThat(indexMetadata.getState(), is(IndexMetadata.State.CLOSE)); - final Settings indexSettings = indexMetadata.getSettings(); - assertThat(indexSettings.hasValue(MetadataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING.getKey()), is(true)); - assertThat( - indexSettings.getAsBoolean(MetadataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING.getKey(), false), - is(true) - ); - assertThat(clusterState.routingTable().index(index), notNullValue()); - assertThat(clusterState.blocks().hasIndexBlock(index, MetadataIndexStateService.INDEX_CLOSED_BLOCK), is(true)); - assertThat( - "Index " + index + " must have only 1 block with [id=" + MetadataIndexStateService.INDEX_CLOSED_BLOCK_ID + "]", - clusterState.blocks() - .indices() - .getOrDefault(index, emptySet()) - .stream() - .filter(clusterBlock -> clusterBlock.id() == MetadataIndexStateService.INDEX_CLOSED_BLOCK_ID) - .count(), - equalTo(1L) - ); - } - break; - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); + final ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); + for (String index : indices) { + final IndexMetadata indexMetadata = clusterState.metadata().indices().get(index); + assertThat(indexMetadata.getState(), is(IndexMetadata.State.CLOSE)); + final Settings indexSettings = indexMetadata.getSettings(); + assertThat(indexSettings.hasValue(MetadataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING.getKey()), is(true)); + assertThat(indexSettings.getAsBoolean(MetadataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING.getKey(), false), is(true)); + assertThat(clusterState.routingTable().index(index), notNullValue()); + assertThat(clusterState.blocks().hasIndexBlock(index, MetadataIndexStateService.INDEX_CLOSED_BLOCK), is(true)); + assertThat( + "Index " + index + " must have only 1 block with [id=" + MetadataIndexStateService.INDEX_CLOSED_BLOCK_ID + "]", + clusterState.blocks() + .indices() + .getOrDefault(index, emptySet()) + .stream() + .filter(clusterBlock -> clusterBlock.id() == MetadataIndexStateService.INDEX_CLOSED_BLOCK_ID) + .count(), + equalTo(1L) + ); } + } catch (Exception ex) { + throw new AssertionError(ex); } } @@ -830,7 +814,6 @@ private void waitForClusterStateConvergence() { ); } }, STANDARD_TIMEOUT.getSeconds(), TimeUnit.SECONDS); - Thread.sleep(100); } catch (Exception e) { logger.warn("Failed to wait for cluster state convergence", e); } diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/stats/IndexStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/stats/IndexStatsIT.java index 92c61a84ee871..754aa23274d0b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/stats/IndexStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/stats/IndexStatsIT.java @@ -380,7 +380,6 @@ public void testClearAllCaches() throws Exception { assertThat(indicesStats.getTotal().getQueryCache().getMemorySizeInBytes(), greaterThan(0L)); client().admin().indices().prepareClearCache().execute().actionGet(); - Thread.sleep(100); // Make sure the filter cache entries have been removed... assertBusy(() -> { NodesStatsResponse postClearNodesStats = client().admin() .cluster() diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/store/IndicesStoreIntegrationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/store/IndicesStoreIntegrationIT.java index 0c6631b8d2307..99aeab704e152 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/store/IndicesStoreIntegrationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/store/IndicesStoreIntegrationIT.java @@ -80,7 +80,6 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import static java.lang.Thread.sleep; import static org.opensearch.test.NodeRoles.nonClusterManagerNode; import static org.opensearch.test.NodeRoles.nonDataNode; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -166,7 +165,7 @@ public void testIndexCleanup() throws Exception { if (randomBoolean()) { // sometimes add cluster-state delay to trigger observers in IndicesStore.ShardActiveRequestHandler BlockClusterStateProcessing disruption = relocateAndBlockCompletion(logger, "test", 0, node_1, node_3); // wait a little so that cluster state observer is registered - sleep(50); + Thread.yield(); logger.info("--> stopping disruption"); disruption.stopDisrupting(); } else { diff --git a/server/src/internalClusterTest/java/org/opensearch/recovery/FullRollingRestartIT.java b/server/src/internalClusterTest/java/org/opensearch/recovery/FullRollingRestartIT.java index df26e6a1a8d66..9b35bbb45c1ed 100644 --- a/server/src/internalClusterTest/java/org/opensearch/recovery/FullRollingRestartIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/recovery/FullRollingRestartIT.java @@ -46,6 +46,7 @@ import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.common.Priority; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.collect.MapBuilder; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -538,6 +539,7 @@ public void testDerivedSourceWithMultiFieldsRollingRestart() throws Exception { } } + @SuppressForbidden(reason = "Need to fix: https://github.com/opensearch-project/OpenSearch/issues/20064") public void testDerivedSourceWithConcurrentUpdatesRollingRestart() throws Exception { String mapping = """ { diff --git a/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java index 10f1181b0d128..b6e6d3b0b86c9 100644 --- a/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java @@ -928,7 +928,7 @@ public void testRelocationWithDerivedSourceAndConcurrentIndexing() throws Except long id = totalDocCount.incrementAndGet(); client().prepareIndex("test").setId(String.valueOf(id)).setSource("name", "test" + id, "value", id).get(); initialDocCountLatch.countDown(); - Thread.sleep(10); // Small delay to prevent overwhelming + Thread.yield(); // Small delay to prevent overwhelming } catch (Exception e) { logger.error("Error in background indexing", e); } @@ -1029,7 +1029,7 @@ public void testRelocationWithDerivedSourceWithUpdates() throws Exception { .execute() .actionGet(); updatedDocs.add(docId); - Thread.sleep(10); + Thread.yield(); } catch (Exception e) { logger.warn("Error while updating doc with id = {}", docId, e); } diff --git a/server/src/internalClusterTest/java/org/opensearch/recovery/SimpleRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/recovery/SimpleRecoveryIT.java index 89f3daa99138b..e381ae827d09b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/recovery/SimpleRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/recovery/SimpleRecoveryIT.java @@ -110,7 +110,6 @@ public void testSimpleRecovery() throws Exception { // now start another one so we move some primaries allowNodes("test", 3); - Thread.sleep(200); logger.info("Running Cluster Health"); ensureGreen(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java index 4cf624ec8da3e..46fdf1e8695e1 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java @@ -15,6 +15,7 @@ import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.opensearch.common.Priority; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.QueryBuilders; @@ -138,6 +139,7 @@ public void testRemotePrimaryRelocation() throws Exception { ); } + @SuppressForbidden(reason = "Waiting longer than timeout") public void testMixedModeRelocation_RemoteSeedingFail() throws Exception { String docRepNode = internalCluster().startNode(); ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 9929a614ea3e4..e53045909a9e2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -26,6 +26,7 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.common.Nullable; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.io.PathUtils; import org.opensearch.common.settings.Settings; @@ -99,6 +100,7 @@ @ThreadLeakFilters(filters = CleanerDaemonThreadLeakFilter.class) @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +@SuppressForbidden(reason = "Need to fix: https://github.com/opensearch-project/OpenSearch/issues/14324") public class RemoteRestoreSnapshotIT extends RemoteSnapshotIT { public RemoteRestoreSnapshotIT(Settings nodeSettings) { diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java index 6b6a96dc42a84..2ba7b50786f52 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java @@ -138,7 +138,6 @@ private void indexDocAndRefresh(BytesReference source, int iterations) throws In client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); refresh(INDEX_NAME); } - Thread.sleep(250); client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreMetadataIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreMetadataIT.java index 3ff4f044dfda7..43734d9f134af 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreMetadataIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreMetadataIT.java @@ -199,7 +199,6 @@ public void testMultipleMetadataFilesPerShard() throws Exception { for (int i = 0; i < refreshCount; i++) { indexDocs(); client().admin().indices().prepareRefresh(INDEX_NAME).get(); - Thread.sleep(100); } RemoteStoreMetadataResponse response = client().admin().cluster().prepareRemoteStoreMetadata(INDEX_NAME, null).get(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsGarbageCollectionIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsGarbageCollectionIT.java index f65040927e70c..f872956058f01 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsGarbageCollectionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsGarbageCollectionIT.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.time.Duration; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -35,6 +36,7 @@ import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; +import static org.awaitility.Awaitility.await; @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/16088") @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) @@ -50,13 +52,11 @@ protected Settings nodeSettings(int nodeOrdinal) { .build(); } - private void keepPinnedTimestampSchedulerUpdated() throws InterruptedException { + private static void keepPinnedTimestampSchedulerUpdated() { long currentTime = System.currentTimeMillis(); - int maxRetry = 10; - while (maxRetry > 0 && RemoteStorePinnedTimestampService.getPinnedTimestamps().v1() <= currentTime) { - Thread.sleep(1000); - maxRetry--; - } + await().atMost(Duration.ofSeconds(10)) + .pollDelay(Duration.ofSeconds(1)) + .until(() -> RemoteStorePinnedTimestampService.getPinnedTimestamps().v1() > currentTime); } ActionListener noOpActionListener = new ActionListener<>() { diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java index 4053ce5f6c678..0762b11b4ed06 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java @@ -680,7 +680,7 @@ public void testZeroLagOnCreateIndex() throws InterruptedException { ensureGreen(INDEX_NAME); long currentTimeNs = System.nanoTime(); while (currentTimeNs == System.nanoTime()) { - Thread.sleep(10); + Thread.yield(); } for (int i = 0; i < numOfShards; i++) { diff --git a/server/src/internalClusterTest/java/org/opensearch/search/SearchCancellationIT.java b/server/src/internalClusterTest/java/org/opensearch/search/SearchCancellationIT.java index 5a19e2b841c08..c3f68b17e28e0 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/SearchCancellationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/SearchCancellationIT.java @@ -47,6 +47,7 @@ import org.opensearch.action.search.SearchScrollAction; import org.opensearch.action.search.ShardSearchFailure; import org.opensearch.action.support.WriteRequest; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -606,6 +607,7 @@ public void testMSearchChildReqCancellationWithHybridTimeout() throws Exception * @param milliseconds The minimum time to sleep * @throws InterruptedException if interrupted during sleep */ + @SuppressForbidden(reason = "Waiting longer than timeout") private static void sleepForAtLeast(long milliseconds) throws InterruptedException { Thread.sleep(milliseconds + 100L); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/SearchTimeoutIT.java b/server/src/internalClusterTest/java/org/opensearch/search/SearchTimeoutIT.java index b36ecfab57e58..45cac519f12e9 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/SearchTimeoutIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/SearchTimeoutIT.java @@ -36,6 +36,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.search.SearchResponse; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugins.Plugin; @@ -131,6 +132,7 @@ public static class ScriptedTimeoutPlugin extends MockScriptPlugin { static final String SCRIPT_NAME = "search_timeout"; @Override + @SuppressForbidden(reason = "Simulating a slow task by sleeping") public Map, Object>> pluginScripts() { return Collections.singletonMap(SCRIPT_NAME, params -> { try { diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/TermsDocCountErrorIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/TermsDocCountErrorIT.java index add6b71cb1753..780fc03033255 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/TermsDocCountErrorIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/TermsDocCountErrorIT.java @@ -228,7 +228,7 @@ public void setupSuiteScopeCluster() throws Exception { // correctly calculated for concurrent segment search at the slice level. // See https://github.com/opensearch-project/OpenSearch/issues/11680" forceMerge(1); - Thread.sleep(5000); // Sleep 5s to ensure force merge completes + refresh(); } private void assertDocCountErrorWithinBounds(int size, SearchResponse accurateResponse, SearchResponse testResponse) { diff --git a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java index 558ce82071c21..7a3dd9c4b64e9 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/backpressure/SearchBackpressureIT.java @@ -18,6 +18,7 @@ import org.opensearch.action.search.SearchTask; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.HandledTransportAction; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.inject.Inject; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -438,6 +439,7 @@ protected void doExecute(Task task, TestRequest request, ActionListener request) throws InterruptedException { switch (request.getType()) { case HIGH_CPU: diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/SimpleQueryStringIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/SimpleQueryStringIT.java index c30497828fc82..72a3f4076aed6 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/SimpleQueryStringIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/SimpleQueryStringIT.java @@ -744,7 +744,7 @@ public void testDynamicClauseCountUpdate() throws Exception { client().prepareSearch("testdynamic").setQuery(qb).get(); }); - assert (e.getDetailedMessage().contains("maxClauseCount is set to " + (CLUSTER_MAX_CLAUSE_COUNT - 1))); + assertThat(e.getDetailedMessage(), containsString("maxClauseCount is set to " + (CLUSTER_MAX_CLAUSE_COUNT - 1))); // increase clause count by 2 assertAcked( @@ -754,8 +754,6 @@ public void testDynamicClauseCountUpdate() throws Exception { .setTransientSettings(Settings.builder().put(INDICES_MAX_CLAUSE_COUNT_SETTING.getKey(), CLUSTER_MAX_CLAUSE_COUNT + 2)) ); - Thread.sleep(1); - SearchResponse response = client().prepareSearch("testdynamic").setQuery(qb).get(); assertHitCount(response, 1); assertHits(response.getHits(), "1"); diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java index c1a869a43d8aa..3f64f9d4084fb 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java @@ -47,6 +47,7 @@ import org.opensearch.action.search.ShardSearchFailure; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.Numbers; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.common.xcontent.XContentType; @@ -1249,6 +1250,7 @@ public void testSortMissingNumbersMinMax() throws Exception { assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("3")); } + @SuppressForbidden(reason = "WTF?") public void testSortMissingStrings() throws IOException, InterruptedException { assertAcked( prepareCreate("test").setMapping( diff --git a/server/src/internalClusterTest/java/org/opensearch/search/stats/ConcurrentSearchStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/search/stats/ConcurrentSearchStatsIT.java index eb93abf7c38f2..93e7d1ae75650 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/stats/ConcurrentSearchStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/stats/ConcurrentSearchStatsIT.java @@ -15,6 +15,7 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsRequestBuilder; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; @@ -171,8 +172,7 @@ public void testAvgConcurrencyNodeLevel() throws InterruptedException { ); forceMerge(); - // Sleep to make sure force merge completes - Thread.sleep(1000); + refresh(); // ensure merged segments are searchable client().prepareSearch(INDEX_1).execute().actionGet(); nodesStatsResponse = client().admin().cluster().prepareNodesStats().execute().actionGet(); @@ -212,8 +212,7 @@ public void testAvgConcurrencyNodeLevel() throws InterruptedException { ); forceMerge(); - // Sleep to make sure force merge completes - Thread.sleep(1000); + refresh(); // ensure merged segments are searchable client().prepareSearch(INDEX_2).execute().actionGet(); nodesStatsResponse = client().admin().cluster().prepareNodesStats().execute().actionGet(); @@ -281,8 +280,7 @@ public void testAvgConcurrencyIndexLevel() throws InterruptedException { assertEquals(expectedConcurrency, stats.getTotal().getSearch().getTotal().getConcurrentAvgSliceCount(), 0); forceMerge(); - // Sleep to make sure force merge completes - Thread.sleep(1000); + refresh(); // ensure merged segments are searchable client().prepareSearch(INDEX).execute().actionGet(); indicesStatsResponse = client().admin().indices().prepareStats().execute().actionGet(); @@ -359,6 +357,7 @@ public static class ScriptedDelayedPlugin extends MockScriptPlugin { static final String SCRIPT_NAME = "search_timeout"; @Override + @SuppressForbidden(reason = "Sleeping to simulate slow task") public Map, Object>> pluginScripts() { return Collections.singletonMap(SCRIPT_NAME, params -> { try { diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsIT.java index af17ef596b6a4..5489b3223cbd8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsIT.java @@ -33,6 +33,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.StepListener; +import org.opensearch.action.admin.cluster.node.tasks.list.ListTasksRequest; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsRequest; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus; @@ -58,6 +59,7 @@ import org.opensearch.repositories.RepositoryException; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.snapshots.mockstore.MockRepository; +import org.opensearch.tasks.TaskInfo; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.disruption.NetworkDisruption; @@ -86,6 +88,7 @@ import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -152,7 +155,20 @@ public void testSettingsUpdateFailWhenCreateSnapshotInProgress() throws Exceptio repoName, dataNode ); - Thread.sleep(1000); // Wait for the snapshot to start + // Poll for snapshot to actually start + assertBusy( + () -> assertThat( + client().admin() + .cluster() + .prepareGetSnapshots(repoName) + .addSnapshots("slow-snapshot") + .get() + .getSnapshots() + .getFirst() + .state(), + equalTo(SnapshotState.IN_PROGRESS) + ) + ); assertFalse(createSlowFuture.isDone()); // Ensure the snapshot is still in progress // Attempt to update the repository settings while the snapshot is in progress settings.put("chunk_size", 2000, ByteSizeUnit.BYTES); @@ -163,7 +179,7 @@ public void testSettingsUpdateFailWhenCreateSnapshotInProgress() throws Exceptio assertSuccessful(createSlowFuture); // Ensure the snapshot completes successfully } - public void testSettingsUpdateFailWhenDeleteSnapshotInProgress() throws InterruptedException { + public void testSettingsUpdateFailWhenDeleteSnapshotInProgress() throws Exception { // Start a cluster with a cluster manager node and a data node String clusterManagerName = internalCluster().startClusterManagerOnlyNode(); internalCluster().startDataOnlyNode(); @@ -179,7 +195,13 @@ public void testSettingsUpdateFailWhenDeleteSnapshotInProgress() throws Interrup assertEquals(RestStatus.OK, snapshotInfo.status()); // Ensure the snapshot status is OK // Start deleting the snapshot and block it on the cluster manager node ActionFuture future = deleteSnapshotBlockedOnClusterManager(repoName, snapshotName); - Thread.sleep(1000); // Wait for the delete operation to start + // Wait for the delete operation to start + assertBusy( + () -> assertThat( + client().admin().cluster().listTasks(new ListTasksRequest()).get().getTasks().stream().map(TaskInfo::getAction).toList(), + hasItem(equalTo("cluster:admin/snapshot/delete")) + ) + ); assertFalse(future.isDone()); // Ensure the delete operation is still in progress // Attempt to update the repository settings while the delete operation is in progress Settings.Builder newSettings = randomRepositorySettings(); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index d547c33322a28..02b8903f77cc2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -478,8 +478,6 @@ public void testSnapshotWithStuckNode() throws Exception { .cluster() .prepareDeleteSnapshot("test-repo", "test-snap") .execute(); - // Make sure that abort makes some progress - Thread.sleep(100); unblockNode("test-repo", blockedNode); logger.info("--> stopping node [{}]", blockedNode); stopNode(blockedNode); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java index 50f1e056248a4..ba8baacd67e46 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java @@ -33,6 +33,7 @@ import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; +import java.time.Duration; import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -45,6 +46,7 @@ import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.awaitility.Awaitility.await; @ThreadLeakFilters(filters = CleanerDaemonThreadLeakFilter.class) @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) @@ -85,13 +87,11 @@ public void teardown() { private static final String REMOTE_REPO_NAME = "remote-store-repo-name"; - private void keepPinnedTimestampSchedulerUpdated() throws InterruptedException { + private void keepPinnedTimestampSchedulerUpdated() { long currentTime = System.currentTimeMillis(); - int maxRetry = 10; - while (maxRetry > 0 && RemoteStorePinnedTimestampService.getPinnedTimestamps().v1() <= currentTime) { - Thread.sleep(1000); - maxRetry--; - } + await().atMost(Duration.ofSeconds(10)) + .pollDelay(Duration.ofSeconds(1)) + .until(() -> RemoteStorePinnedTimestampService.getPinnedTimestamps().v1() > currentTime); } public void testDeleteShallowCopyV2() throws Exception { diff --git a/server/src/internalClusterTest/java/org/opensearch/update/UpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/update/UpdateIT.java index 66d3ac2aaea25..623c7d6533d88 100644 --- a/server/src/internalClusterTest/java/org/opensearch/update/UpdateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/update/UpdateIT.java @@ -44,6 +44,7 @@ import org.opensearch.action.update.UpdateRequest; import org.opensearch.action.update.UpdateRequestBuilder; import org.opensearch.action.update.UpdateResponse; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.xcontent.XContentFactory; @@ -739,6 +740,7 @@ public void onFailure(Exception e) { } @Override + @SuppressForbidden(reason = "Sleeping in a loop") public void run() { try { startLatch.await(); diff --git a/server/src/internalClusterTest/java/org/opensearch/versioning/SimpleVersioningIT.java b/server/src/internalClusterTest/java/org/opensearch/versioning/SimpleVersioningIT.java index 8cd7b419f7989..16dd81eee668f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/versioning/SimpleVersioningIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/versioning/SimpleVersioningIT.java @@ -40,6 +40,7 @@ import org.opensearch.action.index.IndexResponse; import org.opensearch.action.search.SearchResponse; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.SuppressForbidden; import org.opensearch.common.lucene.uid.Versions; import org.opensearch.common.settings.Settings; import org.opensearch.core.action.ActionResponse; @@ -172,6 +173,7 @@ public void testExternalGTE() throws Exception { assertThat(deleteResponse.getVersion(), equalTo(18L)); } + @SuppressForbidden(reason = "Fixed sleeps are prone to flakiness but no documented failures here") public void testExternalVersioning() throws Exception { createIndex("test"); ensureGreen(); @@ -801,6 +803,7 @@ public void run() { } } + @SuppressForbidden(reason = "Fixed sleeps are prone to flakiness but no documented failures here") public void testDeleteNotLost() throws Exception { // We require only one shard for this test, so that the 2nd delete provokes pruning the deletes map: