diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java index adebe1177694..43a96cb52464 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java @@ -111,6 +111,31 @@ public void closeContainer(long containerID, DatanodeDetails dn, } } + public void deleteRecoveringContainer(long containerID, DatanodeDetails dn, + ECReplicationConfig repConfig, String encodedToken) throws IOException { + XceiverClientSpi xceiverClient = this.xceiverClientManager + .acquireClient(singleNodePipeline(dn, repConfig)); + try { + // Before deleting the recovering container, just make sure that state is + // Recovering. There will be still race condition, but that will avoid + // most usual case. + ContainerProtos.ReadContainerResponseProto readContainerResponseProto = + ContainerProtocolCalls + .readContainer(xceiverClient, containerID, encodedToken); + if (readContainerResponseProto + .getContainerData() + .getState() == ContainerProtos.ContainerDataProto.State.RECOVERING) { + ContainerProtocolCalls + .deleteContainer(xceiverClient, containerID, true, encodedToken); + } else { + LOG.warn("Container will not be deleted as it is not a recovering" + + " container {}", containerID); + } + } finally { + this.xceiverClientManager.releaseClient(xceiverClient, false); + } + } + public void createRecoveringContainer(long containerID, DatanodeDetails dn, ECReplicationConfig repConfig, String encodedToken, int replicaIndex) throws IOException { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java index 9b00596d58bd..b0f33e56a7f8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java @@ -135,25 +135,52 @@ public void reconstructECContainerGroup(long containerID, // 1. create target recovering containers. String containerToken = encode(tokenHelper.getContainerToken(cid)); - for (Map.Entry indexDnPair : targetNodeMap - .entrySet()) { - DatanodeDetails dn = indexDnPair.getValue(); - Integer index = indexDnPair.getKey(); - containerOperationClient.createRecoveringContainer(containerID, dn, - repConfig, containerToken, index); - } + List recoveringContainersCreatedDNs = new ArrayList<>(); + try { + for (Map.Entry indexDnPair : targetNodeMap + .entrySet()) { + DatanodeDetails dn = indexDnPair.getValue(); + Integer index = indexDnPair.getKey(); + containerOperationClient + .createRecoveringContainer(containerID, dn, repConfig, + containerToken, index); + recoveringContainersCreatedDNs.add(dn); + } - // 2. Reconstruct and transfer to targets - for (BlockLocationInfo blockLocationInfo : blockLocationInfoMap.values()) { - reconstructECBlockGroup(blockLocationInfo, repConfig, targetNodeMap); - } + // 2. Reconstruct and transfer to targets + for (BlockLocationInfo blockLocationInfo : blockLocationInfoMap + .values()) { + reconstructECBlockGroup(blockLocationInfo, repConfig, targetNodeMap); + } - // 3. Close containers - for (Map.Entry indexDnPair : targetNodeMap - .entrySet()) { - DatanodeDetails dn = indexDnPair.getValue(); - containerOperationClient.closeContainer(containerID, dn, repConfig, - containerToken); + // 3. Close containers + for (DatanodeDetails dn: recoveringContainersCreatedDNs) { + containerOperationClient + .closeContainer(containerID, dn, repConfig, containerToken); + } + } catch (Exception e) { + // Any exception let's delete the recovering containers. + LOG.warn( + "Exception while reconstructing the container {}. Cleaning up" + + " all the recovering containers in the reconstruction process.", + containerID, e); + // Delete only the current thread successfully created recovering + // containers. + for (DatanodeDetails dn : recoveringContainersCreatedDNs) { + try { + containerOperationClient + .deleteRecoveringContainer(containerID, dn, repConfig, + containerToken); + if (LOG.isDebugEnabled()) { + LOG.debug("Deleted the container {}, at the target: {}", + containerID, dn); + } + } catch (IOException ioe) { + LOG.error("Exception while deleting the container {} at target: {}", + containerID, dn, ioe); + } + throw e; + } } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 95907116506a..377fc3e83b6b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -84,6 +84,7 @@ import com.google.common.base.Preconditions; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_CHOOSING_POLICY; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_ALREADY_EXISTS; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_NON_EMPTY_CONTAINER; @@ -268,7 +269,12 @@ ContainerCommandResponseProto handleCreateContainer( } // Create Container request should be passed a null container as the // container would be created here. - Preconditions.checkArgument(kvContainer == null); + if (kvContainer != null) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException( + "Container creation failed because " + "key value container" + + " already exists", null, CONTAINER_ALREADY_EXISTS), request); + } long containerID = request.getContainerID();