From 2a391f3dfa4fb03fc061829e1ae5c9399fecf9ca Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 12 Feb 2019 07:47:50 -0700 Subject: [PATCH 1/2] Add details about what acquired the shard lock last This adds a `details` parameter to shard locking in `NodeEnvironment`. This is intended to be used for diagnosing issues such as ``` 1> [2019-02-11T14:34:19,262][INFO ][o.e.c.m.MetaDataDeleteIndexService] [node_s0] [.tasks/oSYOG0-9SHOx_pfAoiSExQ] deleting index 1> [2019-02-11T14:34:19,279][WARN ][o.e.i.IndicesService ] [node_s0] [.tasks/oSYOG0-9SHOx_pfAoiSExQ] failed to delete index 1> org.elasticsearch.env.ShardLockObtainFailedException: [.tasks][0]: obtaining shard lock timed out after 0ms 1> at org.elasticsearch.env.NodeEnvironment$InternalShardLock.acquire(NodeEnvironment.java:736) ~[main/:?] 1> at org.elasticsearch.env.NodeEnvironment.shardLock(NodeEnvironment.java:655) ~[main/:?] 1> at org.elasticsearch.env.NodeEnvironment.lockAllForIndex(NodeEnvironment.java:601) ~[main/:?] 1> at org.elasticsearch.env.NodeEnvironment.deleteIndexDirectorySafe(NodeEnvironment.java:554) ~[main/:?] ``` In the hope that we will be able to determine why the shard is still locked. Relates to #30290 as well as some other CI failures --- .../elasticsearch/env/NodeEnvironment.java | 54 +++++++++++-------- .../org/elasticsearch/index/IndexService.java | 2 +- .../org/elasticsearch/index/store/Store.java | 4 +- .../elasticsearch/indices/IndicesService.java | 2 +- .../env/NodeEnvironmentTests.java | 18 +++---- .../index/shard/IndexShardTests.java | 2 +- .../elasticsearch/index/store/StoreTests.java | 6 +-- .../test/InternalTestCluster.java | 2 +- 8 files changed, 49 insertions(+), 41 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java index 397d1ee1763dd..5c023ea8489db 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java +++ b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java @@ -19,12 +19,8 @@ package org.elasticsearch.env; -import java.io.UncheckedIOException; -import java.util.Iterator; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.SegmentInfos; @@ -34,22 +30,22 @@ import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.NativeFSLockFactory; import org.apache.lucene.store.SimpleFSDirectory; -import org.elasticsearch.common.CheckedFunction; -import org.elasticsearch.common.lease.Releasable; -import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.Randomness; import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.io.FileSystemUtils; +import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.gateway.MetaDataStateFormat; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; @@ -63,6 +59,7 @@ import java.io.Closeable; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.file.AtomicMoveNotSupportedException; import java.nio.file.DirectoryStream; import java.nio.file.FileStore; @@ -74,6 +71,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; @@ -84,6 +82,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.Collections.unmodifiableSet; @@ -440,7 +440,7 @@ private static String toString(Collection items) { public void deleteShardDirectorySafe(ShardId shardId, IndexSettings indexSettings) throws IOException, ShardLockObtainFailedException { final Path[] paths = availableShardPaths(shardId); logger.trace("deleting shard {} directory, paths: [{}]", shardId, paths); - try (ShardLock lock = shardLock(shardId)) { + try (ShardLock lock = shardLock(shardId, "shard deletion under lock")) { deleteShardDirectoryUnderLock(lock, indexSettings); } } @@ -532,7 +532,7 @@ private static boolean assertPathsDoNotExist(final Path[] paths) { private boolean isShardLocked(ShardId id) { try { - shardLock(id, 0).close(); + shardLock(id, 0, "checking if shard is locked").close(); return false; } catch (ShardLockObtainFailedException ex) { return true; @@ -551,7 +551,7 @@ private boolean isShardLocked(ShardId id) { */ public void deleteIndexDirectorySafe(Index index, long lockTimeoutMS, IndexSettings indexSettings) throws IOException, ShardLockObtainFailedException { - final List locks = lockAllForIndex(index, indexSettings, lockTimeoutMS); + final List locks = lockAllForIndex(index, indexSettings, lockTimeoutMS, "deleting index directory"); try { deleteIndexDirectoryUnderLock(index, indexSettings); } finally { @@ -586,7 +586,8 @@ public void deleteIndexDirectoryUnderLock(Index index, IndexSettings indexSettin * @param lockTimeoutMS how long to wait for acquiring the indices shard locks * @return the {@link ShardLock} instances for this index. */ - public List lockAllForIndex(Index index, IndexSettings settings, long lockTimeoutMS) throws ShardLockObtainFailedException { + public List lockAllForIndex(Index index, IndexSettings settings, long lockTimeoutMS, + final String lockDetails) throws ShardLockObtainFailedException { final int numShards = settings.getNumberOfShards(); if (numShards <= 0) { throw new IllegalArgumentException("settings must contain a non-null > 0 number of shards"); @@ -598,7 +599,7 @@ public List lockAllForIndex(Index index, IndexSettings settings, long try { for (int i = 0; i < numShards; i++) { long timeoutLeftMS = Math.max(0, lockTimeoutMS - TimeValue.nsecToMSec((System.nanoTime() - startTimeNS))); - allLocks.add(shardLock(new ShardId(index, i), timeoutLeftMS)); + allLocks.add(shardLock(new ShardId(index, i), timeoutLeftMS, lockDetails)); } success = true; } finally { @@ -619,10 +620,11 @@ public List lockAllForIndex(Index index, IndexSettings settings, long * Note: this method will return immediately if the lock can't be acquired. * * @param id the shard ID to lock + * @param details information about why the shard is being locked * @return the shard lock. Call {@link ShardLock#close()} to release the lock */ - public ShardLock shardLock(ShardId id) throws ShardLockObtainFailedException { - return shardLock(id, 0); + public ShardLock shardLock(ShardId id, final String details) throws ShardLockObtainFailedException { + return shardLock(id, 0, details); } /** @@ -631,11 +633,12 @@ public ShardLock shardLock(ShardId id) throws ShardLockObtainFailedException { * or recover from a different shard instance into it. If the shard lock can not be acquired * a {@link ShardLockObtainFailedException} is thrown * @param shardId the shard ID to lock + * @param details information about why the shard is being locked * @param lockTimeoutMS the lock timeout in milliseconds * @return the shard lock. Call {@link ShardLock#close()} to release the lock */ - public ShardLock shardLock(final ShardId shardId, long lockTimeoutMS) throws ShardLockObtainFailedException { - logger.trace("acquiring node shardlock on [{}], timeout [{}]", shardId, lockTimeoutMS); + public ShardLock shardLock(final ShardId shardId, long lockTimeoutMS, final String details) throws ShardLockObtainFailedException { + logger.trace("acquiring node shardlock on [{}], timeout [{}], details [{}]", shardId, lockTimeoutMS, details); final InternalShardLock shardLock; final boolean acquired; synchronized (shardLocks) { @@ -652,7 +655,7 @@ public ShardLock shardLock(final ShardId shardId, long lockTimeoutMS) throws Sha if (acquired == false) { boolean success = false; try { - shardLock.acquire(lockTimeoutMS); + shardLock.acquire(lockTimeoutMS, details); success = true; } finally { if (success == false) { @@ -671,11 +674,11 @@ protected void closeInternal() { } /** - * A functional interface that people can use to reference {@link #shardLock(ShardId, long)} + * A functional interface that people can use to reference {@link #shardLock(ShardId, long, String)} */ @FunctionalInterface public interface ShardLocker { - ShardLock lock(ShardId shardId, long lockTimeoutMS) throws ShardLockObtainFailedException; + ShardLock lock(ShardId shardId, long lockTimeoutMS, String lockDetails) throws ShardLockObtainFailedException; } /** @@ -698,11 +701,13 @@ private final class InternalShardLock { */ private final Semaphore mutex = new Semaphore(1); private int waitCount = 1; // guarded by shardLocks + private String lockDetails; private final ShardId shardId; InternalShardLock(ShardId shardId) { this.shardId = shardId; mutex.acquireUninterruptibly(); + lockDetails = "initial creation lock"; } protected void release() { @@ -730,11 +735,14 @@ private void decWaitCount() { } } - void acquire(long timeoutInMillis) throws ShardLockObtainFailedException { + void acquire(long timeoutInMillis, final String details) throws ShardLockObtainFailedException { try { - if (mutex.tryAcquire(timeoutInMillis, TimeUnit.MILLISECONDS) == false) { + if (mutex.tryAcquire(timeoutInMillis, TimeUnit.MILLISECONDS)) { + lockDetails = details; + } else { throw new ShardLockObtainFailedException(shardId, - "obtaining shard lock timed out after " + timeoutInMillis + "ms"); + "obtaining shard lock timed out after " + timeoutInMillis + "ms, previous lock details: [" + lockDetails + + "] trying to lock for [" + details + "]"); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/server/src/main/java/org/elasticsearch/index/IndexService.java b/server/src/main/java/org/elasticsearch/index/IndexService.java index 1b1784495e685..85822d29d314c 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexService.java +++ b/server/src/main/java/org/elasticsearch/index/IndexService.java @@ -334,7 +334,7 @@ public synchronized IndexShard createShard( IndexShard indexShard = null; ShardLock lock = null; try { - lock = nodeEnv.shardLock(shardId, TimeUnit.SECONDS.toMillis(5)); + lock = nodeEnv.shardLock(shardId, TimeUnit.SECONDS.toMillis(5), "shard creation"); eventListener.beforeIndexShardCreated(shardId, indexSettings); ShardPath path; try { diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index c693d6b9d80fe..fd7dff3f5f840 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -436,7 +436,7 @@ private void closeInternal() { */ public static MetadataSnapshot readMetadataSnapshot(Path indexLocation, ShardId shardId, NodeEnvironment.ShardLocker shardLocker, Logger logger) throws IOException { - try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5)); + try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5), "read metadata snapshot"); Directory dir = new SimpleFSDirectory(indexLocation)) { failIfCorrupted(dir, shardId); return new MetadataSnapshot(null, dir, logger); @@ -457,7 +457,7 @@ public static MetadataSnapshot readMetadataSnapshot(Path indexLocation, ShardId */ public static void tryOpenIndex(Path indexLocation, ShardId shardId, NodeEnvironment.ShardLocker shardLocker, Logger logger) throws IOException, ShardLockObtainFailedException { - try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5)); + try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5), "open index"); Directory dir = new SimpleFSDirectory(indexLocation)) { failIfCorrupted(dir, shardId); SegmentInfos segInfo = Lucene.readSegmentInfos(dir); diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesService.java b/server/src/main/java/org/elasticsearch/indices/IndicesService.java index 32cbc4f70212b..6dd865306ecdf 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesService.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesService.java @@ -1031,7 +1031,7 @@ public void processPendingDeletes(Index index, IndexSettings indexSettings, Time throws IOException, InterruptedException, ShardLockObtainFailedException { logger.debug("{} processing pending deletes", index); final long startTimeNS = System.nanoTime(); - final List shardLocks = nodeEnv.lockAllForIndex(index, indexSettings, timeout.millis()); + final List shardLocks = nodeEnv.lockAllForIndex(index, indexSettings, timeout.millis(), "process pending deletes"); int numRemoved = 0; try { Map locks = new HashMap<>(); diff --git a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java index a667514fa7ef2..b5ef0aa72ae3e 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java +++ b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java @@ -135,11 +135,11 @@ public void testShardLock() throws Exception { final NodeEnvironment env = newNodeEnvironment(); Index index = new Index("foo", "fooUUID"); - ShardLock fooLock = env.shardLock(new ShardId(index, 0)); + ShardLock fooLock = env.shardLock(new ShardId(index, 0), "1"); assertEquals(new ShardId(index, 0), fooLock.getShardId()); try { - env.shardLock(new ShardId(index, 0)); + env.shardLock(new ShardId(index, 0), "2"); fail("shard is locked"); } catch (ShardLockObtainFailedException ex) { // expected @@ -149,7 +149,7 @@ public void testShardLock() throws Exception { Files.createDirectories(path.resolve("1")); } try { - env.lockAllForIndex(index, idxSettings, randomIntBetween(0, 10)); + env.lockAllForIndex(index, idxSettings, randomIntBetween(0, 10), "3"); fail("shard 0 is locked"); } catch (ShardLockObtainFailedException ex) { // expected @@ -157,11 +157,11 @@ public void testShardLock() throws Exception { fooLock.close(); // can lock again? - env.shardLock(new ShardId(index, 0)).close(); + env.shardLock(new ShardId(index, 0), "4").close(); - List locks = env.lockAllForIndex(index, idxSettings, randomIntBetween(0, 10)); + List locks = env.lockAllForIndex(index, idxSettings, randomIntBetween(0, 10), "5"); try { - env.shardLock(new ShardId(index, 0)); + env.shardLock(new ShardId(index, 0), "6"); fail("shard is locked"); } catch (ShardLockObtainFailedException ex) { // expected @@ -239,7 +239,7 @@ public void testResolveIndexFolders() throws Exception { public void testDeleteSafe() throws Exception { final NodeEnvironment env = newNodeEnvironment(); final Index index = new Index("foo", "fooUUID"); - ShardLock fooLock = env.shardLock(new ShardId(index, 0)); + ShardLock fooLock = env.shardLock(new ShardId(index, 0), "1"); assertEquals(new ShardId(index, 0), fooLock.getShardId()); for (Path path : env.indexPaths(index)) { @@ -295,7 +295,7 @@ public void onFailure(Exception e) { @Override protected void doRun() throws Exception { start.await(); - try (ShardLock autoCloses = env.shardLock(new ShardId(index, 0))) { + try (ShardLock autoCloses = env.shardLock(new ShardId(index, 0), "2")) { blockLatch.countDown(); Thread.sleep(randomIntBetween(1, 10)); } @@ -354,7 +354,7 @@ public void run() { int shard = randomIntBetween(0, counts.length - 1); try { try (ShardLock autoCloses = env.shardLock(new ShardId("foo", "fooUUID", shard), - scaledRandomIntBetween(0, 10))) { + scaledRandomIntBetween(0, 10), "1")) { counts[shard].value++; countsAtomic[shard].incrementAndGet(); assertEquals(flipFlop[shard].incrementAndGet(), 1); diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 316ed39574c0c..fadac3b32237c 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -262,7 +262,7 @@ public void testFailShard() throws Exception { assertEquals(shardStateMetaData, getShardStateMetadata(shard)); // but index can't be opened for a failed shard assertThat("store index should be corrupted", StoreUtils.canOpenIndex(logger, shardPath.resolveIndex(), shard.shardId(), - (shardId, lockTimeoutMS) -> new DummyShardLock(shardId)), + (shardId, lockTimeoutMS, details) -> new DummyShardLock(shardId)), equalTo(false)); } diff --git a/server/src/test/java/org/elasticsearch/index/store/StoreTests.java b/server/src/test/java/org/elasticsearch/index/store/StoreTests.java index d6690fd27cc8b..42b9b01df3b14 100644 --- a/server/src/test/java/org/elasticsearch/index/store/StoreTests.java +++ b/server/src/test/java/org/elasticsearch/index/store/StoreTests.java @@ -925,17 +925,17 @@ public void testCanOpenIndex() throws IOException { IndexWriterConfig iwc = newIndexWriterConfig(); Path tempDir = createTempDir(); final BaseDirectoryWrapper dir = newFSDirectory(tempDir); - assertFalse(StoreUtils.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); + assertFalse(StoreUtils.canOpenIndex(logger, tempDir, shardId, (id, l, d) -> new DummyShardLock(id))); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new StringField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); writer.close(); - assertTrue(StoreUtils.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); + assertTrue(StoreUtils.canOpenIndex(logger, tempDir, shardId, (id, l, d) -> new DummyShardLock(id))); Store store = new Store(shardId, INDEX_SETTINGS, dir, new DummyShardLock(shardId)); store.markStoreCorrupted(new CorruptIndexException("foo", "bar")); - assertFalse(StoreUtils.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); + assertFalse(StoreUtils.canOpenIndex(logger, tempDir, shardId, (id, l, d) -> new DummyShardLock(id))); store.close(); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index 0aee6c45a9129..5964cc4a0639b 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -2497,7 +2497,7 @@ public void assertAfterTest() throws IOException { Set shardIds = env.lockedShards(); for (ShardId id : shardIds) { try { - env.shardLock(id, TimeUnit.SECONDS.toMillis(5)).close(); + env.shardLock(id, TimeUnit.SECONDS.toMillis(5), "InternalTestCluster assert after test").close(); } catch (ShardLockObtainFailedException ex) { fail("Shard " + id + " is still locked after 5 sec waiting"); } From 0efd394066402c17cf3b43b866e5aa6452556188 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Tue, 26 Feb 2019 11:39:04 -0700 Subject: [PATCH 2/2] Change argument order, make args final --- .../elasticsearch/env/NodeEnvironment.java | 25 ++++++++++--------- .../org/elasticsearch/index/IndexService.java | 2 +- .../org/elasticsearch/index/store/Store.java | 4 +-- .../elasticsearch/indices/IndicesService.java | 2 +- .../env/NodeEnvironmentTests.java | 8 +++--- .../test/InternalTestCluster.java | 2 +- 6 files changed, 22 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java index 5c023ea8489db..67082242abec2 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java +++ b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java @@ -532,7 +532,7 @@ private static boolean assertPathsDoNotExist(final Path[] paths) { private boolean isShardLocked(ShardId id) { try { - shardLock(id, 0, "checking if shard is locked").close(); + shardLock(id, "checking if shard is locked").close(); return false; } catch (ShardLockObtainFailedException ex) { return true; @@ -551,7 +551,7 @@ private boolean isShardLocked(ShardId id) { */ public void deleteIndexDirectorySafe(Index index, long lockTimeoutMS, IndexSettings indexSettings) throws IOException, ShardLockObtainFailedException { - final List locks = lockAllForIndex(index, indexSettings, lockTimeoutMS, "deleting index directory"); + final List locks = lockAllForIndex(index, indexSettings, "deleting index directory", lockTimeoutMS); try { deleteIndexDirectoryUnderLock(index, indexSettings); } finally { @@ -586,8 +586,8 @@ public void deleteIndexDirectoryUnderLock(Index index, IndexSettings indexSettin * @param lockTimeoutMS how long to wait for acquiring the indices shard locks * @return the {@link ShardLock} instances for this index. */ - public List lockAllForIndex(Index index, IndexSettings settings, long lockTimeoutMS, - final String lockDetails) throws ShardLockObtainFailedException { + public List lockAllForIndex(final Index index, final IndexSettings settings, + final String lockDetails, final long lockTimeoutMS) throws ShardLockObtainFailedException { final int numShards = settings.getNumberOfShards(); if (numShards <= 0) { throw new IllegalArgumentException("settings must contain a non-null > 0 number of shards"); @@ -599,7 +599,7 @@ public List lockAllForIndex(Index index, IndexSettings settings, long try { for (int i = 0; i < numShards; i++) { long timeoutLeftMS = Math.max(0, lockTimeoutMS - TimeValue.nsecToMSec((System.nanoTime() - startTimeNS))); - allLocks.add(shardLock(new ShardId(index, i), timeoutLeftMS, lockDetails)); + allLocks.add(shardLock(new ShardId(index, i), lockDetails, timeoutLeftMS)); } success = true; } finally { @@ -624,7 +624,7 @@ public List lockAllForIndex(Index index, IndexSettings settings, long * @return the shard lock. Call {@link ShardLock#close()} to release the lock */ public ShardLock shardLock(ShardId id, final String details) throws ShardLockObtainFailedException { - return shardLock(id, 0, details); + return shardLock(id, details, 0); } /** @@ -637,7 +637,8 @@ public ShardLock shardLock(ShardId id, final String details) throws ShardLockObt * @param lockTimeoutMS the lock timeout in milliseconds * @return the shard lock. Call {@link ShardLock#close()} to release the lock */ - public ShardLock shardLock(final ShardId shardId, long lockTimeoutMS, final String details) throws ShardLockObtainFailedException { + public ShardLock shardLock(final ShardId shardId, final String details, + final long lockTimeoutMS) throws ShardLockObtainFailedException { logger.trace("acquiring node shardlock on [{}], timeout [{}], details [{}]", shardId, lockTimeoutMS, details); final InternalShardLock shardLock; final boolean acquired; @@ -647,7 +648,7 @@ public ShardLock shardLock(final ShardId shardId, long lockTimeoutMS, final Stri shardLock.incWaitCount(); acquired = false; } else { - shardLock = new InternalShardLock(shardId); + shardLock = new InternalShardLock(shardId, details); shardLocks.put(shardId, shardLock); acquired = true; } @@ -674,11 +675,11 @@ protected void closeInternal() { } /** - * A functional interface that people can use to reference {@link #shardLock(ShardId, long, String)} + * A functional interface that people can use to reference {@link #shardLock(ShardId, String, long)} */ @FunctionalInterface public interface ShardLocker { - ShardLock lock(ShardId shardId, long lockTimeoutMS, String lockDetails) throws ShardLockObtainFailedException; + ShardLock lock(ShardId shardId, String lockDetails, long lockTimeoutMS) throws ShardLockObtainFailedException; } /** @@ -704,10 +705,10 @@ private final class InternalShardLock { private String lockDetails; private final ShardId shardId; - InternalShardLock(ShardId shardId) { + InternalShardLock(final ShardId shardId, final String details) { this.shardId = shardId; mutex.acquireUninterruptibly(); - lockDetails = "initial creation lock"; + lockDetails = details; } protected void release() { diff --git a/server/src/main/java/org/elasticsearch/index/IndexService.java b/server/src/main/java/org/elasticsearch/index/IndexService.java index 6f7e3fc9d6320..2a29f1c63667f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexService.java +++ b/server/src/main/java/org/elasticsearch/index/IndexService.java @@ -334,7 +334,7 @@ public synchronized IndexShard createShard( IndexShard indexShard = null; ShardLock lock = null; try { - lock = nodeEnv.shardLock(shardId, TimeUnit.SECONDS.toMillis(5), "shard creation"); + lock = nodeEnv.shardLock(shardId, "shard creation", TimeUnit.SECONDS.toMillis(5)); eventListener.beforeIndexShardCreated(shardId, indexSettings); ShardPath path; try { diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index 4943143bd4713..3f91693c8ce33 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -436,7 +436,7 @@ private void closeInternal() { */ public static MetadataSnapshot readMetadataSnapshot(Path indexLocation, ShardId shardId, NodeEnvironment.ShardLocker shardLocker, Logger logger) throws IOException { - try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5), "read metadata snapshot"); + try (ShardLock lock = shardLocker.lock(shardId, "read metadata snapshot", TimeUnit.SECONDS.toMillis(5)); Directory dir = new SimpleFSDirectory(indexLocation)) { failIfCorrupted(dir, shardId); return new MetadataSnapshot(null, dir, logger); @@ -457,7 +457,7 @@ public static MetadataSnapshot readMetadataSnapshot(Path indexLocation, ShardId */ public static void tryOpenIndex(Path indexLocation, ShardId shardId, NodeEnvironment.ShardLocker shardLocker, Logger logger) throws IOException, ShardLockObtainFailedException { - try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5), "open index"); + try (ShardLock lock = shardLocker.lock(shardId, "open index", TimeUnit.SECONDS.toMillis(5)); Directory dir = new SimpleFSDirectory(indexLocation)) { failIfCorrupted(dir, shardId); SegmentInfos segInfo = Lucene.readSegmentInfos(dir); diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesService.java b/server/src/main/java/org/elasticsearch/indices/IndicesService.java index 39a5584518d5c..3a0e20c68219b 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesService.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesService.java @@ -1054,7 +1054,7 @@ public void processPendingDeletes(Index index, IndexSettings indexSettings, Time throws IOException, InterruptedException, ShardLockObtainFailedException { logger.debug("{} processing pending deletes", index); final long startTimeNS = System.nanoTime(); - final List shardLocks = nodeEnv.lockAllForIndex(index, indexSettings, timeout.millis(), "process pending deletes"); + final List shardLocks = nodeEnv.lockAllForIndex(index, indexSettings, "process pending deletes", timeout.millis()); int numRemoved = 0; try { Map locks = new HashMap<>(); diff --git a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java index b5ef0aa72ae3e..89a10af1a6fc2 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java +++ b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java @@ -149,7 +149,7 @@ public void testShardLock() throws Exception { Files.createDirectories(path.resolve("1")); } try { - env.lockAllForIndex(index, idxSettings, randomIntBetween(0, 10), "3"); + env.lockAllForIndex(index, idxSettings, "3", randomIntBetween(0, 10)); fail("shard 0 is locked"); } catch (ShardLockObtainFailedException ex) { // expected @@ -159,7 +159,7 @@ public void testShardLock() throws Exception { // can lock again? env.shardLock(new ShardId(index, 0), "4").close(); - List locks = env.lockAllForIndex(index, idxSettings, randomIntBetween(0, 10), "5"); + List locks = env.lockAllForIndex(index, idxSettings, "5", randomIntBetween(0, 10)); try { env.shardLock(new ShardId(index, 0), "6"); fail("shard is locked"); @@ -353,8 +353,8 @@ public void run() { for (int i = 0; i < iters; i++) { int shard = randomIntBetween(0, counts.length - 1); try { - try (ShardLock autoCloses = env.shardLock(new ShardId("foo", "fooUUID", shard), - scaledRandomIntBetween(0, 10), "1")) { + try (ShardLock autoCloses = env.shardLock(new ShardId("foo", "fooUUID", shard), "1", + scaledRandomIntBetween(0, 10))) { counts[shard].value++; countsAtomic[shard].incrementAndGet(); assertEquals(flipFlop[shard].incrementAndGet(), 1); diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index 0cf1d2c347186..21a2a1309f34d 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -2462,7 +2462,7 @@ public synchronized void assertAfterTest() throws IOException { Set shardIds = env.lockedShards(); for (ShardId id : shardIds) { try { - env.shardLock(id, TimeUnit.SECONDS.toMillis(5), "InternalTestCluster assert after test").close(); + env.shardLock(id, "InternalTestCluster assert after test", TimeUnit.SECONDS.toMillis(5)).close(); } catch (ShardLockObtainFailedException ex) { fail("Shard " + id + " is still locked after 5 sec waiting"); }