-
Notifications
You must be signed in to change notification settings - Fork 1.9k
IGNITE-13101 Fixed uncompleted futures leak on node stop in distributed metastorage. #8554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
b1266df
e802099
592c3eb
07ffeeb
8fb0221
d61db3b
fbceec6
5af0ace
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.ConcurrentMap; | ||
| import java.util.concurrent.CopyOnWriteArrayList; | ||
| import java.util.concurrent.locks.ReadWriteLock; | ||
| import java.util.concurrent.locks.ReentrantReadWriteLock; | ||
| import java.util.function.BiConsumer; | ||
| import java.util.function.Predicate; | ||
|
|
@@ -41,6 +42,7 @@ | |
| import org.apache.ignite.internal.GridKernalContext; | ||
| import org.apache.ignite.internal.IgniteInternalFuture; | ||
| import org.apache.ignite.internal.IgniteInterruptedCheckedException; | ||
| import org.apache.ignite.internal.NodeStoppingException; | ||
| import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage; | ||
| import org.apache.ignite.internal.managers.discovery.DiscoveryLocalJoinData; | ||
| import org.apache.ignite.internal.managers.discovery.GridDiscoveryManager; | ||
|
|
@@ -59,12 +61,14 @@ | |
| import org.apache.ignite.internal.processors.subscription.GridInternalSubscriptionProcessor; | ||
| import org.apache.ignite.internal.util.IgniteUtils; | ||
| import org.apache.ignite.internal.util.future.GridFutureAdapter; | ||
| import org.apache.ignite.internal.util.typedef.X; | ||
| import org.apache.ignite.internal.util.typedef.internal.S; | ||
| import org.apache.ignite.internal.util.typedef.internal.U; | ||
| import org.apache.ignite.lang.IgniteBiTuple; | ||
| import org.apache.ignite.lang.IgniteFuture; | ||
| import org.apache.ignite.marshaller.jdk.JdkMarshaller; | ||
| import org.apache.ignite.spi.IgniteNodeValidationResult; | ||
| import org.apache.ignite.spi.IgniteSpiException; | ||
| import org.apache.ignite.spi.discovery.DiscoveryDataBag; | ||
| import org.apache.ignite.spi.discovery.DiscoveryDataBag.GridDiscoveryData; | ||
| import org.apache.ignite.spi.discovery.DiscoveryDataBag.JoiningNodeDiscoveryData; | ||
|
|
@@ -175,6 +179,12 @@ public class DistributedMetaStorageImpl extends GridProcessorAdapter | |
| */ | ||
| private final ConcurrentMap<UUID, GridFutureAdapter<Boolean>> updateFuts = new ConcurrentHashMap<>(); | ||
|
|
||
| /** */ | ||
| private final ReadWriteLock updateFutsStopLock = new ReentrantReadWriteLock(); | ||
|
|
||
| /** */ | ||
| private boolean stopped; | ||
|
|
||
| /** | ||
| * Lock to access/update data and component's state. | ||
| */ | ||
|
|
@@ -287,7 +297,7 @@ public DistributedMetaStorageImpl(GridKernalContext ctx) { | |
| finally { | ||
| lock.writeLock().unlock(); | ||
|
|
||
| cancelUpdateFutures(); | ||
| cancelUpdateFutures(nodeStoppingException(), true); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -914,7 +924,7 @@ private String validatePayload(DistributedMetaStorageJoiningNodeData joiningData | |
|
|
||
| ver = INITIAL_VERSION; | ||
|
|
||
| cancelUpdateFutures(); | ||
| cancelUpdateFutures(new IgniteCheckedException("Client was disconnected during the operation."), false); | ||
| } | ||
| finally { | ||
| lock.writeLock().unlock(); | ||
|
|
@@ -924,13 +934,28 @@ private String validatePayload(DistributedMetaStorageJoiningNodeData joiningData | |
| /** | ||
| * Cancel all waiting futures and clear the map. | ||
| */ | ||
| private void cancelUpdateFutures() { | ||
| for (GridFutureAdapter<Boolean> fut : updateFuts.values()) | ||
| fut.onDone(new IgniteCheckedException("Client was disconnected during the operation.")); | ||
| private void cancelUpdateFutures(Exception e, boolean stop) { | ||
| updateFutsStopLock.writeLock().lock(); | ||
|
|
||
| try { | ||
| stopped = stop; | ||
|
|
||
| for (GridFutureAdapter<Boolean> fut : updateFuts.values()) | ||
| fut.onDone(e); | ||
|
|
||
| updateFuts.clear(); | ||
| updateFuts.clear(); | ||
| } | ||
| finally { | ||
| updateFutsStopLock.writeLock().unlock(); | ||
| } | ||
| } | ||
|
|
||
| /** */ | ||
| private static NodeStoppingException nodeStoppingException() { | ||
| return new NodeStoppingException("Node is stopping."); | ||
| } | ||
|
|
||
|
|
||
| /** {@inheritDoc} */ | ||
| @Override public IgniteInternalFuture<?> onReconnected(boolean clusterRestarted) { | ||
| assert isClient; | ||
|
|
@@ -1033,14 +1058,29 @@ else if (!isClient && ver.id() > 0) { | |
| * @throws IgniteCheckedException If there was an error while sending discovery message. | ||
| */ | ||
| private GridFutureAdapter<?> startWrite(String key, byte[] valBytes) throws IgniteCheckedException { | ||
| if (!isSupported(ctx)) | ||
| throw new IgniteCheckedException(NOT_SUPPORTED_MSG); | ||
| GridFutureAdapter<?> validationRes = validateBeforeWrite(key); | ||
|
|
||
| if (validationRes != null) | ||
| return validationRes; | ||
|
|
||
| UUID reqId = UUID.randomUUID(); | ||
|
|
||
| GridFutureAdapter<Boolean> fut = new GridFutureAdapter<>(); | ||
|
|
||
| updateFuts.put(reqId, fut); | ||
| updateFutsStopLock.readLock().lock(); | ||
|
|
||
| try { | ||
| if (stopped) { | ||
| fut.onDone(nodeStoppingException()); | ||
|
|
||
| return fut; | ||
| } | ||
|
|
||
| updateFuts.put(reqId, fut); | ||
| } | ||
| finally { | ||
| updateFutsStopLock.readLock().unlock(); | ||
| } | ||
|
|
||
| DiscoveryCustomMessage msg = new DistributedMetaStorageUpdateMessage(reqId, key, valBytes); | ||
|
|
||
|
|
@@ -1054,14 +1094,29 @@ private GridFutureAdapter<?> startWrite(String key, byte[] valBytes) throws Igni | |
| */ | ||
| private GridFutureAdapter<Boolean> startCas(String key, byte[] expValBytes, byte[] newValBytes) | ||
| throws IgniteCheckedException { | ||
| if (!isSupported(ctx)) | ||
| throw new IgniteCheckedException(NOT_SUPPORTED_MSG); | ||
| GridFutureAdapter<Boolean> validationRes = validateBeforeWrite(key); | ||
|
|
||
| if (validationRes != null) | ||
| return validationRes; | ||
|
|
||
| UUID reqId = UUID.randomUUID(); | ||
|
|
||
| GridFutureAdapter<Boolean> fut = new GridFutureAdapter<>(); | ||
|
|
||
| updateFuts.put(reqId, fut); | ||
| updateFutsStopLock.readLock().lock(); | ||
|
|
||
| try { | ||
| if (stopped) { | ||
| fut.onDone(nodeStoppingException()); | ||
|
|
||
| return fut; | ||
| } | ||
|
|
||
| updateFuts.put(reqId, fut); | ||
| } | ||
| finally { | ||
| updateFutsStopLock.readLock().unlock(); | ||
| } | ||
|
|
||
| DiscoveryCustomMessage msg = new DistributedMetaStorageCasMessage(reqId, key, expValBytes, newValBytes); | ||
|
|
||
|
|
@@ -1070,6 +1125,31 @@ private GridFutureAdapter<Boolean> startCas(String key, byte[] expValBytes, byte | |
| return fut; | ||
| } | ||
|
|
||
| /** */ | ||
| private GridFutureAdapter<Boolean> validateBeforeWrite(String key) throws IgniteCheckedException { | ||
| boolean supported; | ||
|
|
||
| try { | ||
| supported = isSupported(ctx); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it really necessary to validate all the nodes on every operation?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Every component does this, changing the approach is out of the scope. And yes, older node can enter the cluster sometimes. |
||
| } | ||
| catch (Exception e) { | ||
| if (X.hasCause(e, IgniteSpiException.class) && e.getMessage() != null && e.getMessage().contains("Node stopped.")) { | ||
| GridFutureAdapter<Boolean> fut = new GridFutureAdapter<>(); | ||
|
|
||
| fut.onDone(nodeStoppingException()); | ||
|
|
||
| return fut; | ||
| } | ||
|
|
||
| throw e; | ||
| } | ||
|
|
||
| if (!supported) | ||
| throw new IgniteCheckedException(NOT_SUPPORTED_MSG); | ||
|
|
||
| return null; | ||
| } | ||
|
|
||
| /** | ||
| * Invoked when {@link DistributedMetaStorageUpdateMessage} received. Attempts to store received data (depends on | ||
| * current {@link #bridge} value). Invokes failure handler with critical error if attempt failed for some reason. | ||
|
|
@@ -1119,7 +1199,16 @@ private void onAckMessage( | |
| ClusterNode node, | ||
| DistributedMetaStorageUpdateAckMessage msg | ||
| ) { | ||
| GridFutureAdapter<Boolean> fut = updateFuts.remove(msg.requestId()); | ||
| GridFutureAdapter<Boolean> fut; | ||
|
|
||
| updateFutsStopLock.readLock().lock(); | ||
|
|
||
| try { | ||
| fut = updateFuts.remove(msg.requestId()); | ||
| } | ||
| finally { | ||
| updateFutsStopLock.readLock().unlock(); | ||
|
||
| } | ||
|
|
||
| if (fut != null) { | ||
| String errorMsg = msg.errorMessage(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it make sense to move this into 'validateBeforeWrite' ?
'validateBeforeWrite' can be executed under read-lock as in normal case it always return null.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it does, thank you!