From 6227404a8a1e92181ac52d87f4398ce87e1c4c84 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Thu, 15 Aug 2019 09:17:45 -0700 Subject: [PATCH 1/3] [HBASE-22601] Misconfigured addition of peers leads to cluster shutdown. --- .../replication/regionserver/ReplicationSource.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index 8d0e6610c872..dc7eb2366621 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -510,6 +510,15 @@ private void initialize() { } } + if (peerClusterId == null) { + // In some cases, it is possible that peerClusterId is null because it couldn't read + // peer cluster id from zookeeper. One case this might happen is because 2 clusters don't + // have kerberos trust setup. + this.terminate("Peer ClusterId returned is null", null, false); + this.manager.removeSource(this); + return; + } + // In rare case, zookeeper setting may be messed up. That leads to the incorrect // peerClusterId value, which is the same as the source clusterId if (clusterId.equals(peerClusterId) && !replicationEndpoint.canReplicateToSameCluster()) { From 11544c616d691701f289cfb9d11a1fe6ba226e29 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Sat, 17 Aug 2019 15:48:44 -0700 Subject: [PATCH 2/3] [HBASE-22601] Addresing test failures. --- .../regionserver/ReplicationSourceManager.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java index 36cfdd7fb923..17b1f7f71708 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java @@ -382,14 +382,15 @@ public void refreshSources(String peerId) throws IOException { LOG.info("Terminate replication source for " + toRemove.getPeerId()); toRemove.terminate(terminateMessage); } - for (SortedSet walsByGroup : walsById.get(peerId).values()) { - walsByGroup.forEach(wal -> { - Path walPath = new Path(this.logDir, wal); - src.enqueueLog(walPath); - LOG.trace("Enqueued {} to source {} during source creation.", - walPath, src.getQueueId()); - }); - + if (walsById.get(peerId) != null) { + for (SortedSet walsByGroup : walsById.get(peerId).values()) { + walsByGroup.forEach(wal -> { + Path walPath = new Path(this.logDir, wal); + src.enqueueLog(walPath); + LOG.trace("Enqueued {} to source {} during source creation.", + walPath, src.getQueueId()); + }); + } } } LOG.info("Startup replication source for " + src.getPeerId()); From 862e1c6f568b74888b58c7906afd58069d91b537 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Sat, 17 Aug 2019 21:54:33 -0700 Subject: [PATCH 3/3] [HBASE-22601] Addresing review comments. --- .../regionserver/ReplicationSource.java | 7 +------ .../regionserver/ReplicationSourceManager.java | 17 ++++++++--------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index dc7eb2366621..41a4e7b0e174 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -510,12 +510,7 @@ private void initialize() { } } - if (peerClusterId == null) { - // In some cases, it is possible that peerClusterId is null because it couldn't read - // peer cluster id from zookeeper. One case this might happen is because 2 clusters don't - // have kerberos trust setup. - this.terminate("Peer ClusterId returned is null", null, false); - this.manager.removeSource(this); + if (!this.isSourceActive()) { return; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java index 17b1f7f71708..36cfdd7fb923 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java @@ -382,15 +382,14 @@ public void refreshSources(String peerId) throws IOException { LOG.info("Terminate replication source for " + toRemove.getPeerId()); toRemove.terminate(terminateMessage); } - if (walsById.get(peerId) != null) { - for (SortedSet walsByGroup : walsById.get(peerId).values()) { - walsByGroup.forEach(wal -> { - Path walPath = new Path(this.logDir, wal); - src.enqueueLog(walPath); - LOG.trace("Enqueued {} to source {} during source creation.", - walPath, src.getQueueId()); - }); - } + for (SortedSet walsByGroup : walsById.get(peerId).values()) { + walsByGroup.forEach(wal -> { + Path walPath = new Path(this.logDir, wal); + src.enqueueLog(walPath); + LOG.trace("Enqueued {} to source {} during source creation.", + walPath, src.getQueueId()); + }); + } } LOG.info("Startup replication source for " + src.getPeerId());