From 7d3ccfef3e88da73d758688c4c043606a1835557 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 13:44:44 +0300 Subject: [PATCH 01/16] HDDS-3498. Shutdown datanode if address is already in use --- .../common/statemachine/EndpointStateMachine.java | 14 ++++++++------ .../states/endpoint/VersionEndpointTask.java | 3 ++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java index a6c3b11de92..041f24947fe 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java @@ -234,12 +234,14 @@ public void logIfNeeded(Exception ex) { } if (missCounter == 0) { - LOG.warn( - "Unable to communicate to {} server at {} for past {} seconds.", - serverName, - getAddress().getHostString() + ":" + getAddress().getPort(), - TimeUnit.MILLISECONDS.toSeconds(this.getMissedCount() * - getScmHeartbeatInterval(this.conf)), ex); + LOG.error( + "Unable to communicate to {} server at {}:{} for past {} seconds.", + serverName, + address.getAddress(), + address.getPort(), + TimeUnit.MILLISECONDS.toSeconds(this.getMissedCount() * getScmHeartbeatInterval(this.conf)), + ex + ); } if (LOG.isTraceEnabled()) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index e702b1e6e15..040bafaf6ab 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -107,7 +107,8 @@ public EndpointStateMachine.EndPointStates call() throws Exception { } catch (DiskOutOfSpaceException ex) { rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); } catch (IOException ex) { - rpcEndPoint.logIfNeeded(ex); + LOG.error(ex.getCause().getMessage(), ex); + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); } finally { rpcEndPoint.unlock(); } From c9e88a2edcbf931cb9c449966ef40d503563a84a Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 16:48:33 +0300 Subject: [PATCH 02/16] HDDS-3498. Shutdown datanode if address is already in use --- .../common/states/endpoint/VersionEndpointTask.java | 6 +++--- .../common/transport/server/XceiverServerGrpc.java | 13 ++++++++++++- .../hadoop/ozone/container/common/TestEndPoint.java | 4 ++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index 040bafaf6ab..bfa851e9e1e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -27,6 +27,7 @@ import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; +import org.apache.hadoop.ozone.container.ozoneimpl.BindException; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.hadoop.ozone.protocol.VersionResponse; import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; @@ -104,11 +105,10 @@ public EndpointStateMachine.EndPointStates call() throws Exception { LOG.debug("Cannot execute GetVersion task as endpoint state machine " + "is in {} state", rpcEndPoint.getState()); } - } catch (DiskOutOfSpaceException ex) { + } catch (DiskOutOfSpaceException | BindException ex) { rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); } catch (IOException ex) { - LOG.error(ex.getCause().getMessage(), ex); - rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); + rpcEndPoint.logIfNeeded(ex); } finally { rpcEndPoint.unlock(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java index ad9c5c9d9ca..2652a9737de 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java @@ -51,6 +51,7 @@ import io.opentracing.Span; import io.opentracing.util.GlobalTracer; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; +import org.apache.hadoop.ozone.container.ozoneimpl.BindException; import org.apache.ratis.thirdparty.io.grpc.Server; import org.apache.ratis.thirdparty.io.grpc.ServerInterceptors; import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts; @@ -185,7 +186,17 @@ public HddsProtos.ReplicationType getServerType() { @Override public void start() throws IOException { if (!isStarted) { - server.start(); + try { + server.start(); + } catch (IOException e) { + LOG.error("Failed to bind to address", e); + if (e.getMessage().contains("Failed to bind to address")) { + //todo custom exception + throw new BindException(e); + } else { + throw e; + } + } int realPort = server.getPort(); if (port == 0) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java index 2b78b73bcd5..213f9670b1e 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java @@ -336,7 +336,7 @@ public void testGetVersionToInvalidEndpoint() throws Exception { // This version call did NOT work, so endpoint should remain in the same // state. - assertEquals(EndpointStateMachine.EndPointStates.GETVERSION, newState); + assertEquals(EndpointStateMachine.EndPointStates.SHUTDOWN, newState); } } @@ -366,7 +366,7 @@ public void testGetVersionAssertRpcTimeOut() throws Exception { long end = Time.monotonicNow(); scmServerImpl.setRpcResponseDelay(0); assertThat(end - start).isLessThanOrEqualTo(rpcTimeout + tolerance); - assertEquals(EndpointStateMachine.EndPointStates.GETVERSION, newState); + assertEquals(EndpointStateMachine.EndPointStates.SHUTDOWN, newState); } } From 9fd9d9903ce8a04dcd96cf8079230b893c243e33 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 17:10:10 +0300 Subject: [PATCH 03/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/ozoneimpl/BindException.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java new file mode 100644 index 00000000000..85648618f88 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java @@ -0,0 +1,20 @@ +package org.apache.hadoop.ozone.container.ozoneimpl; + +import java.io.IOException; + +public class BindException extends IOException { + public BindException() { + } + + public BindException(String message) { + super(message); + } + + public BindException(String message, Throwable cause) { + super(message, cause); + } + + public BindException(Throwable cause) { + super(cause); + } +} From 20f44700a340112ff9642ec945bc491dfadf3975 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 17:14:11 +0300 Subject: [PATCH 04/16] HDDS-3498. Shutdown datanode if address is already in use From 4d16aa7b31ba5e07cfb05c5991a23396a450371e Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 17:19:44 +0300 Subject: [PATCH 05/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/ozoneimpl/BindException.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java index 85648618f88..2ca46d9f281 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.ozone.container.ozoneimpl; import java.io.IOException; From 71588eb77df23e3be3bd94cd124ddb4634883d37 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 17:23:32 +0300 Subject: [PATCH 06/16] HDDS-3498. Shutdown datanode if address is already in use --- .../hadoop/ozone/container/ozoneimpl/BindException.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java index 2ca46d9f281..573d6f25fd8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java @@ -19,6 +19,10 @@ import java.io.IOException; +/** + * Exception used to indicate a problem with binding a port. + * Typically, the port is in use. + */ public class BindException extends IOException { public BindException() { } From 58741b5dfad9979ce6f009f87e8b8afb273e0bd7 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 17:34:06 +0300 Subject: [PATCH 07/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/ozoneimpl/BindException.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java index 573d6f25fd8..be18f95f7be 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java @@ -24,18 +24,18 @@ * Typically, the port is in use. */ public class BindException extends IOException { - public BindException() { - } + public BindException() { + } - public BindException(String message) { - super(message); - } + public BindException(String message) { + super(message); + } - public BindException(String message, Throwable cause) { - super(message, cause); - } + public BindException(String message, Throwable cause) { + super(message, cause); + } - public BindException(Throwable cause) { - super(cause); - } + public BindException(Throwable cause) { + super(cause); + } } From 6017207776d8468fc4319aaaa18861a69182ea9d Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 17:37:50 +0300 Subject: [PATCH 08/16] HDDS-3498. Shutdown datanode if address is already in use --- .../hadoop/ozone/container/ozoneimpl/BindException.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java index be18f95f7be..5758ae0969e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java @@ -28,14 +28,14 @@ public BindException() { } public BindException(String message) { - super(message); + super(message); } public BindException(String message, Throwable cause) { - super(message, cause); + super(message, cause); } public BindException(Throwable cause) { - super(cause); + super(cause); } } From 29400e577f5b73b39321ab98eb88826f093d24e2 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 19:22:10 +0300 Subject: [PATCH 09/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/common/transport/server/XceiverServerGrpc.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java index 2652a9737de..de188aa0670 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java @@ -191,7 +191,6 @@ public void start() throws IOException { } catch (IOException e) { LOG.error("Failed to bind to address", e); if (e.getMessage().contains("Failed to bind to address")) { - //todo custom exception throw new BindException(e); } else { throw e; From 83fbbbe9516200492e734a9a6a075f842f5bd48b Mon Sep 17 00:00:00 2001 From: daniilchik Date: Wed, 2 Oct 2024 21:28:13 +0300 Subject: [PATCH 10/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/common/statemachine/EndpointStateMachine.java | 5 ++++- .../container/common/transport/server/XceiverServerGrpc.java | 2 +- .../apache/hadoop/ozone/container/common/TestEndPoint.java | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java index 041f24947fe..39980d28708 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java @@ -234,12 +234,15 @@ public void logIfNeeded(Exception ex) { } if (missCounter == 0) { + long missedDurationSeconds = TimeUnit.MILLISECONDS.toSeconds( + this.getMissedCount() * getScmHeartbeatInterval(this.conf) + ); LOG.error( "Unable to communicate to {} server at {}:{} for past {} seconds.", serverName, address.getAddress(), address.getPort(), - TimeUnit.MILLISECONDS.toSeconds(this.getMissedCount() * getScmHeartbeatInterval(this.conf)), + missedDurationSeconds, ex ); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java index de188aa0670..5856df8333f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java @@ -189,7 +189,7 @@ public void start() throws IOException { try { server.start(); } catch (IOException e) { - LOG.error("Failed to bind to address", e); + LOG.error("Error while starting the server", e); if (e.getMessage().contains("Failed to bind to address")) { throw new BindException(e); } else { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java index 213f9670b1e..f81cf8c34fe 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java @@ -326,7 +326,7 @@ public void testGetVersionToInvalidEndpoint() throws Exception { .getReuseableAddress(); try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, nonExistentServerAddress, 1000)) { - rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); DatanodeDetails datanodeDetails = randomDatanodeDetails(); OzoneContainer ozoneContainer = new OzoneContainer(datanodeDetails, conf, ContainerTestUtils.getMockContext(datanodeDetails, ozoneConf)); @@ -353,7 +353,7 @@ public void testGetVersionAssertRpcTimeOut() throws Exception { try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, serverAddress, (int) rpcTimeout)) { - rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); DatanodeDetails datanodeDetails = randomDatanodeDetails(); OzoneContainer ozoneContainer = new OzoneContainer(datanodeDetails, conf, ContainerTestUtils.getMockContext(datanodeDetails, ozoneConf)); From 4a46f4d83f3405a4a8f3c207b562583a27d18290 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Thu, 3 Oct 2024 14:46:33 +0300 Subject: [PATCH 11/16] HDDS-3498. Shutdown datanode if address is already in use --- .../apache/hadoop/ozone/container/common/TestEndPoint.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java index f81cf8c34fe..75f3009bacd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java @@ -353,7 +353,7 @@ public void testGetVersionAssertRpcTimeOut() throws Exception { try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, serverAddress, (int) rpcTimeout)) { - rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); DatanodeDetails datanodeDetails = randomDatanodeDetails(); OzoneContainer ozoneContainer = new OzoneContainer(datanodeDetails, conf, ContainerTestUtils.getMockContext(datanodeDetails, ozoneConf)); @@ -366,7 +366,7 @@ public void testGetVersionAssertRpcTimeOut() throws Exception { long end = Time.monotonicNow(); scmServerImpl.setRpcResponseDelay(0); assertThat(end - start).isLessThanOrEqualTo(rpcTimeout + tolerance); - assertEquals(EndpointStateMachine.EndPointStates.SHUTDOWN, newState); + assertEquals(EndpointStateMachine.EndPointStates.GETVERSION, newState); } } From 4b4a582adb73ce21489662601f88509ef3844392 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Thu, 3 Oct 2024 16:20:13 +0300 Subject: [PATCH 12/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/common/states/endpoint/VersionEndpointTask.java | 2 +- .../container/common/transport/server/XceiverServerGrpc.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index bfa851e9e1e..968c9b9a6e6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.container.common.states.endpoint; import java.io.IOException; +import java.net.BindException; import java.util.concurrent.Callable; import org.apache.hadoop.hdds.conf.ConfigurationSource; @@ -27,7 +28,6 @@ import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; -import org.apache.hadoop.ozone.container.ozoneimpl.BindException; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.hadoop.ozone.protocol.VersionResponse; import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java index 5856df8333f..76ccf2dd332 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.container.common.transport.server; import java.io.IOException; +import java.net.BindException; import java.util.Collections; import java.util.List; import java.util.UUID; @@ -51,7 +52,6 @@ import io.opentracing.Span; import io.opentracing.util.GlobalTracer; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; -import org.apache.hadoop.ozone.container.ozoneimpl.BindException; import org.apache.ratis.thirdparty.io.grpc.Server; import org.apache.ratis.thirdparty.io.grpc.ServerInterceptors; import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts; @@ -191,7 +191,7 @@ public void start() throws IOException { } catch (IOException e) { LOG.error("Error while starting the server", e); if (e.getMessage().contains("Failed to bind to address")) { - throw new BindException(e); + throw new BindException(); } else { throw e; } From 6253cb10aca69e68686f22844f7db89fef37cebc Mon Sep 17 00:00:00 2001 From: daniilchik Date: Thu, 3 Oct 2024 16:26:08 +0300 Subject: [PATCH 13/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/ozoneimpl/BindException.java | 41 ------------------- 1 file changed, 41 deletions(-) delete mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java deleted file mode 100644 index 5758ae0969e..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BindException.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.container.ozoneimpl; - -import java.io.IOException; - -/** - * Exception used to indicate a problem with binding a port. - * Typically, the port is in use. - */ -public class BindException extends IOException { - public BindException() { - } - - public BindException(String message) { - super(message); - } - - public BindException(String message, Throwable cause) { - super(message, cause); - } - - public BindException(Throwable cause) { - super(cause); - } -} From d9536ab2a50f578a7b754686224657b7d566135a Mon Sep 17 00:00:00 2001 From: daniilchik Date: Thu, 3 Oct 2024 19:08:28 +0300 Subject: [PATCH 14/16] HDDS-3498. Shutdown datanode if address is already in use --- .../apache/hadoop/ozone/container/common/TestEndPoint.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java index 75f3009bacd..2b78b73bcd5 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java @@ -326,7 +326,7 @@ public void testGetVersionToInvalidEndpoint() throws Exception { .getReuseableAddress(); try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, nonExistentServerAddress, 1000)) { - rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); DatanodeDetails datanodeDetails = randomDatanodeDetails(); OzoneContainer ozoneContainer = new OzoneContainer(datanodeDetails, conf, ContainerTestUtils.getMockContext(datanodeDetails, ozoneConf)); @@ -336,7 +336,7 @@ public void testGetVersionToInvalidEndpoint() throws Exception { // This version call did NOT work, so endpoint should remain in the same // state. - assertEquals(EndpointStateMachine.EndPointStates.SHUTDOWN, newState); + assertEquals(EndpointStateMachine.EndPointStates.GETVERSION, newState); } } From 3bb033d2de01bc787bf44e096c7598042ea1de7d Mon Sep 17 00:00:00 2001 From: daniilchik Date: Fri, 4 Oct 2024 09:26:02 +0300 Subject: [PATCH 15/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/common/transport/server/XceiverServerGrpc.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java index 76ccf2dd332..42daaa94be3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java @@ -191,7 +191,7 @@ public void start() throws IOException { } catch (IOException e) { LOG.error("Error while starting the server", e); if (e.getMessage().contains("Failed to bind to address")) { - throw new BindException(); + throw new BindException(e.getMessage()); } else { throw e; } From 0ac758e7e2cd6c54a2fdd696f93f4290dc8168c3 Mon Sep 17 00:00:00 2001 From: daniilchik Date: Fri, 4 Oct 2024 12:53:43 +0300 Subject: [PATCH 16/16] HDDS-3498. Shutdown datanode if address is already in use --- .../container/common/statemachine/EndpointStateMachine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java index 39980d28708..b3854e7ecd2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java @@ -237,7 +237,7 @@ public void logIfNeeded(Exception ex) { long missedDurationSeconds = TimeUnit.MILLISECONDS.toSeconds( this.getMissedCount() * getScmHeartbeatInterval(this.conf) ); - LOG.error( + LOG.warn( "Unable to communicate to {} server at {}:{} for past {} seconds.", serverName, address.getAddress(),