@@ -97,7 +97,7 @@ public class ClusterTopologyMonitorImpl implements ClusterTopologyMonitor {
9797 protected long highRefreshRateEndTimeNano = 0 ;
9898 protected final Object topologyUpdated = new Object ();
9999 protected final AtomicBoolean requestToUpdateTopology = new AtomicBoolean (false );
100- protected final AtomicLong ignoreNewTopologyRequestsEndTimeNano = new AtomicLong (0 );
100+ protected final AtomicLong ignoreNewTopologyRequestsEndTimeNano = new AtomicLong (- 1 );
101101 protected final ConcurrentHashMap <String /* host */ , Thread > nodeThreads = new ConcurrentHashMap <>();
102102 protected final AtomicBoolean nodeThreadsStop = new AtomicBoolean (false );
103103 protected final AtomicReference <Connection > nodeThreadsWriterConnection = new AtomicReference <>(null );
@@ -188,6 +188,8 @@ public List<HostSpec> forceRefresh(final boolean shouldVerifyWriter, final long
188188
189189 // Previous failover has just completed. We can use results of it without triggering a new topology update.
190190 List <HostSpec > currentHosts = this .topologyMap .get (this .clusterId );
191+ LOGGER .finest (
192+ Utils .logTopology (currentHosts , Messages .get ("ClusterTopologyMonitorImpl.ignoringTopologyRequest" )));
191193 if (currentHosts != null ) {
192194 return currentHosts ;
193195 }
@@ -229,6 +231,7 @@ protected List<HostSpec> waitTillTopologyGetsUpdated(final long timeoutMs) throw
229231 }
230232
231233 if (timeoutMs == 0 ) {
234+ LOGGER .finest (Utils .logTopology (currentHosts , Messages .get ("ClusterTopologyMonitorImpl.timeoutSetToZero" )));
232235 return currentHosts ;
233236 }
234237
@@ -240,6 +243,7 @@ protected List<HostSpec> waitTillTopologyGetsUpdated(final long timeoutMs) throw
240243 this .topologyUpdated .wait (1000 );
241244 }
242245 } catch (InterruptedException ex ) {
246+ LOGGER .fine (Messages .get ("ClusterTopologyMonitorImpl.interrupted" ));
243247 Thread .currentThread ().interrupt ();
244248 return null ;
245249 }
@@ -282,6 +286,7 @@ public void run() {
282286 if (this .isInPanicMode ()) {
283287
284288 if (this .nodeThreads .isEmpty ()) {
289+ LOGGER .finest (Messages .get ("ClusterTopologyMonitorImpl.startingNodeMonitoringThreads" ));
285290
286291 // start node threads
287292 this .nodeThreadsStop .set (false );
@@ -309,19 +314,28 @@ public void run() {
309314 // otherwise let's try it again the next round
310315
311316 } else {
312-
313317 // node threads are running
314318 // check if writer is already detected
315319 final Connection writerConnection = this .nodeThreadsWriterConnection .get ();
316320 final HostSpec writerConnectionHostSpec = this .nodeThreadsWriterHostSpec .get ();
317321 if (writerConnection != null && writerConnectionHostSpec != null ) {
322+ LOGGER .finest (
323+ Messages .get (
324+ "ClusterTopologyMonitorImpl.writerPickedUpFromNodeMonitors" ,
325+ new Object []{writerConnectionHostSpec }));
318326
319327 this .closeConnection (this .monitoringConnection .get ());
320328 this .monitoringConnection .set (writerConnection );
321329 this .writerHostSpec .set (writerConnectionHostSpec );
322330 this .isVerifiedWriterConnection = true ;
323331 this .highRefreshRateEndTimeNano = System .nanoTime () + highRefreshPeriodAfterPanicNano ;
324- this .ignoreNewTopologyRequestsEndTimeNano .set (System .nanoTime () + ignoreTopologyRequestNano );
332+
333+ // We verify the writer on initial connection and on failover, but we only want to ignore new topology
334+ // requests after failover. To accomplish this, the first time we verify the writer we set the ignore end
335+ // time to 0. Any future writer verifications will set it to a positive value.
336+ if (!this .ignoreNewTopologyRequestsEndTimeNano .compareAndSet (-1 , 0 )) {
337+ this .ignoreNewTopologyRequestsEndTimeNano .set (System .nanoTime () + ignoreTopologyRequestNano );
338+ }
325339
326340 this .nodeThreadsStop .set (true );
327341 for (Thread thread : this .nodeThreads .values ()) {
@@ -427,7 +441,7 @@ protected Thread getNodeMonitoringThread(final HostSpec hostSpec, final @Nullabl
427441 }
428442
429443 protected List <HostSpec > openAnyConnectionAndUpdateTopology () {
430-
444+ boolean writerVerifiedByThisThread = false ;
431445 if (this .monitoringConnection .get () == null ) {
432446
433447 Connection conn ;
@@ -448,14 +462,22 @@ protected List<HostSpec> openAnyConnectionAndUpdateTopology() {
448462 try {
449463 if (!StringUtils .isNullOrEmpty (this .getWriterNodeId (this .monitoringConnection .get ()))) {
450464 this .isVerifiedWriterConnection = true ;
465+ writerVerifiedByThisThread = true ;
466+
451467 if (rdsHelper .isRdsInstance (this .initialHostSpec .getHost ())) {
452468 this .writerHostSpec .set (this .initialHostSpec );
453- LOGGER .finest ("writerHostSpec: " + this .writerHostSpec .get ().getHost ());
469+ LOGGER .finest (
470+ Messages .get (
471+ "ClusterTopologyMonitorImpl.writerMonitoringConnection" ,
472+ new Object []{this .writerHostSpec .get ().getHost ()}));
454473 } else {
455474 final String nodeId = this .getNodeId (this .monitoringConnection .get ());
456475 if (!StringUtils .isNullOrEmpty (nodeId )) {
457476 this .writerHostSpec .set (this .createHost (nodeId , true , 0 , null ));
458- LOGGER .finest ("writerHostSpec: " + this .writerHostSpec .get ().getHost ());
477+ LOGGER .finest (
478+ Messages .get (
479+ "ClusterTopologyMonitorImpl.writerMonitoringConnection" ,
480+ new Object []{this .writerHostSpec .get ().getHost ()}));
459481 }
460482 }
461483 }
@@ -471,6 +493,14 @@ protected List<HostSpec> openAnyConnectionAndUpdateTopology() {
471493 }
472494
473495 final List <HostSpec > hosts = this .fetchTopologyAndUpdateCache (this .monitoringConnection .get ());
496+ if (writerVerifiedByThisThread ) {
497+ // We verify the writer on initial connection and on failover, but we only want to ignore new topology
498+ // requests after failover. To accomplish this, the first time we verify the writer we set the ignore end
499+ // time to 0. Any future writer verifications will set it to a positive value.
500+ if (!this .ignoreNewTopologyRequestsEndTimeNano .compareAndSet (-1 , 0 )) {
501+ this .ignoreNewTopologyRequestsEndTimeNano .set (System .nanoTime () + ignoreTopologyRequestNano );
502+ }
503+ }
474504
475505 if (hosts == null ) {
476506 // can't get topology; it might be something's wrong with a connection
@@ -550,7 +580,7 @@ protected void delay(boolean useHighRefreshRate) throws InterruptedException {
550580 return hosts ;
551581 } catch (SQLException ex ) {
552582 // do nothing
553- LOGGER .log ( Level . FINEST , "Error fetching topology: " , ex );
583+ LOGGER .finest ( Messages . get ( "ClusterTopologyMonitorImpl.errorFetchingTopology " , new Object []{ ex }) );
554584 }
555585 return null ;
556586 }
@@ -760,7 +790,7 @@ public void run() {
760790 writerId = this .monitor .getWriterNodeId (connection );
761791
762792 } catch (SQLSyntaxErrorException ex ) {
763- LOGGER .severe (() -> Messages .get ("ClusterTopologyMonitorImpl .invalidWriterQuery" ,
793+ LOGGER .severe (() -> Messages .get ("NodeMonitoringThread .invalidWriterQuery" ,
764794 new Object [] {ex .getMessage ()}));
765795 throw new RuntimeException (ex );
766796
@@ -771,21 +801,21 @@ public void run() {
771801
772802 if (!StringUtils .isNullOrEmpty (writerId )) {
773803 // this prevents closing connection in finally block
774- if (!this .monitor .nodeThreadsWriterConnection
775- .compareAndSet (null , connection )) {
804+ if (!this .monitor .nodeThreadsWriterConnection .compareAndSet (null , connection )) {
776805 // writer connection is already setup
777806 this .monitor .closeConnection (connection );
778807
779808 } else {
780809 // writer connection is successfully set to writerConnection
810+ LOGGER .fine (Messages .get ("NodeMonitoringThread.detectedWriter" , new Object []{writerId }));
811+ // When nodeThreadsWriterConnection and nodeThreadsWriterHostSpec are both set, the topology monitor may
812+ // set ignoreNewTopologyRequestsEndTimeNano, in which case other threads will use the cached topology
813+ // for the ignore duration, so we need to update the topology before setting nodeThreadsWriterHostSpec.
814+ this .monitor .fetchTopologyAndUpdateCache (connection );
781815 this .monitor .nodeThreadsWriterHostSpec .set (hostSpec );
782- LOGGER .fine ("Detected writer: " + writerId );
783816 this .monitor .nodeThreadsStop .set (true );
784-
785- this .monitor .fetchTopologyAndUpdateCache (connection );
786817 LOGGER .fine (Utils .logTopology (
787818 this .monitor .topologyMap .get (this .monitor .clusterId )));
788-
789819 }
790820
791821 // Setting the connection to null here prevents the finally block
@@ -816,7 +846,7 @@ public void run() {
816846 } finally {
817847 this .monitor .closeConnection (connection );
818848 final long end = System .nanoTime ();
819- LOGGER .finest (() -> Messages .get ("ClusterTopologyMonitorImpl.nodeThreadCompleted " ,
849+ LOGGER .finest (() -> Messages .get ("NodeMonitoringThread.threadCompleted " ,
820850 new Object [] {TimeUnit .NANOSECONDS .toMillis (end - start )}));
821851 }
822852 }
@@ -853,7 +883,7 @@ private void readerThreadFetchTopology(final Connection connection, final @Nulla
853883 // writer node has changed
854884 this .writerChanged = true ;
855885
856- LOGGER .fine (() -> Messages .get ("ClusterTopologyMonitorImpl .writerNodeChanged" ,
886+ LOGGER .fine (() -> Messages .get ("NodeMonitoringThread .writerNodeChanged" ,
857887 new Object [] {writerHostSpec .getHost (), latestWriterHostSpec .getHost ()}));
858888
859889 // we can update topology cache and notify all waiting threads
0 commit comments