Skip to content

Commit b680b4a

Browse files
authored
fix: avoid setting ignoreNewTopologyRequestsEndTimeNano on initial connection (#1221)
1 parent 929d356 commit b680b4a

File tree

6 files changed

+165
-66
lines changed

6 files changed

+165
-66
lines changed

wrapper/src/main/java/software/amazon/jdbc/PluginServiceImpl.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,8 @@ public boolean forceRefreshHostList(final boolean shouldVerifyWriter, final long
535535
return true;
536536
}
537537
} catch (TimeoutException ex) {
538-
// do nothing
538+
// do nothing.
539+
LOGGER.finest(Messages.get("PluginServiceImpl.forceRefreshTimeout", new Object[]{timeoutMs}));
539540
}
540541
return false;
541542
}

wrapper/src/main/java/software/amazon/jdbc/hostlistprovider/monitoring/ClusterTopologyMonitorImpl.java

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public class ClusterTopologyMonitorImpl implements ClusterTopologyMonitor {
9797
protected long highRefreshRateEndTimeNano = 0;
9898
protected final Object topologyUpdated = new Object();
9999
protected final AtomicBoolean requestToUpdateTopology = new AtomicBoolean(false);
100-
protected final AtomicLong ignoreNewTopologyRequestsEndTimeNano = new AtomicLong(0);
100+
protected final AtomicLong ignoreNewTopologyRequestsEndTimeNano = new AtomicLong(-1);
101101
protected final ConcurrentHashMap<String /* host */, Thread> nodeThreads = new ConcurrentHashMap<>();
102102
protected final AtomicBoolean nodeThreadsStop = new AtomicBoolean(false);
103103
protected final AtomicReference<Connection> nodeThreadsWriterConnection = new AtomicReference<>(null);
@@ -188,6 +188,8 @@ public List<HostSpec> forceRefresh(final boolean shouldVerifyWriter, final long
188188

189189
// Previous failover has just completed. We can use results of it without triggering a new topology update.
190190
List<HostSpec> currentHosts = this.topologyMap.get(this.clusterId);
191+
LOGGER.finest(
192+
Utils.logTopology(currentHosts, Messages.get("ClusterTopologyMonitorImpl.ignoringTopologyRequest")));
191193
if (currentHosts != null) {
192194
return currentHosts;
193195
}
@@ -229,6 +231,7 @@ protected List<HostSpec> waitTillTopologyGetsUpdated(final long timeoutMs) throw
229231
}
230232

231233
if (timeoutMs == 0) {
234+
LOGGER.finest(Utils.logTopology(currentHosts, Messages.get("ClusterTopologyMonitorImpl.timeoutSetToZero")));
232235
return currentHosts;
233236
}
234237

@@ -240,6 +243,7 @@ protected List<HostSpec> waitTillTopologyGetsUpdated(final long timeoutMs) throw
240243
this.topologyUpdated.wait(1000);
241244
}
242245
} catch (InterruptedException ex) {
246+
LOGGER.fine(Messages.get("ClusterTopologyMonitorImpl.interrupted"));
243247
Thread.currentThread().interrupt();
244248
return null;
245249
}
@@ -282,6 +286,7 @@ public void run() {
282286
if (this.isInPanicMode()) {
283287

284288
if (this.nodeThreads.isEmpty()) {
289+
LOGGER.finest(Messages.get("ClusterTopologyMonitorImpl.startingNodeMonitoringThreads"));
285290

286291
// start node threads
287292
this.nodeThreadsStop.set(false);
@@ -309,19 +314,28 @@ public void run() {
309314
// otherwise let's try it again the next round
310315

311316
} else {
312-
313317
// node threads are running
314318
// check if writer is already detected
315319
final Connection writerConnection = this.nodeThreadsWriterConnection.get();
316320
final HostSpec writerConnectionHostSpec = this.nodeThreadsWriterHostSpec.get();
317321
if (writerConnection != null && writerConnectionHostSpec != null) {
322+
LOGGER.finest(
323+
Messages.get(
324+
"ClusterTopologyMonitorImpl.writerPickedUpFromNodeMonitors",
325+
new Object[]{writerConnectionHostSpec}));
318326

319327
this.closeConnection(this.monitoringConnection.get());
320328
this.monitoringConnection.set(writerConnection);
321329
this.writerHostSpec.set(writerConnectionHostSpec);
322330
this.isVerifiedWriterConnection = true;
323331
this.highRefreshRateEndTimeNano = System.nanoTime() + highRefreshPeriodAfterPanicNano;
324-
this.ignoreNewTopologyRequestsEndTimeNano.set(System.nanoTime() + ignoreTopologyRequestNano);
332+
333+
// We verify the writer on initial connection and on failover, but we only want to ignore new topology
334+
// requests after failover. To accomplish this, the first time we verify the writer we set the ignore end
335+
// time to 0. Any future writer verifications will set it to a positive value.
336+
if (!this.ignoreNewTopologyRequestsEndTimeNano.compareAndSet(-1, 0)) {
337+
this.ignoreNewTopologyRequestsEndTimeNano.set(System.nanoTime() + ignoreTopologyRequestNano);
338+
}
325339

326340
this.nodeThreadsStop.set(true);
327341
for (Thread thread : this.nodeThreads.values()) {
@@ -427,7 +441,7 @@ protected Thread getNodeMonitoringThread(final HostSpec hostSpec, final @Nullabl
427441
}
428442

429443
protected List<HostSpec> openAnyConnectionAndUpdateTopology() {
430-
444+
boolean writerVerifiedByThisThread = false;
431445
if (this.monitoringConnection.get() == null) {
432446

433447
Connection conn;
@@ -448,14 +462,22 @@ protected List<HostSpec> openAnyConnectionAndUpdateTopology() {
448462
try {
449463
if (!StringUtils.isNullOrEmpty(this.getWriterNodeId(this.monitoringConnection.get()))) {
450464
this.isVerifiedWriterConnection = true;
465+
writerVerifiedByThisThread = true;
466+
451467
if (rdsHelper.isRdsInstance(this.initialHostSpec.getHost())) {
452468
this.writerHostSpec.set(this.initialHostSpec);
453-
LOGGER.finest("writerHostSpec: " + this.writerHostSpec.get().getHost());
469+
LOGGER.finest(
470+
Messages.get(
471+
"ClusterTopologyMonitorImpl.writerMonitoringConnection",
472+
new Object[]{this.writerHostSpec.get().getHost()}));
454473
} else {
455474
final String nodeId = this.getNodeId(this.monitoringConnection.get());
456475
if (!StringUtils.isNullOrEmpty(nodeId)) {
457476
this.writerHostSpec.set(this.createHost(nodeId, true, 0, null));
458-
LOGGER.finest("writerHostSpec: " + this.writerHostSpec.get().getHost());
477+
LOGGER.finest(
478+
Messages.get(
479+
"ClusterTopologyMonitorImpl.writerMonitoringConnection",
480+
new Object[]{this.writerHostSpec.get().getHost()}));
459481
}
460482
}
461483
}
@@ -471,6 +493,14 @@ protected List<HostSpec> openAnyConnectionAndUpdateTopology() {
471493
}
472494

473495
final List<HostSpec> hosts = this.fetchTopologyAndUpdateCache(this.monitoringConnection.get());
496+
if (writerVerifiedByThisThread) {
497+
// We verify the writer on initial connection and on failover, but we only want to ignore new topology
498+
// requests after failover. To accomplish this, the first time we verify the writer we set the ignore end
499+
// time to 0. Any future writer verifications will set it to a positive value.
500+
if (!this.ignoreNewTopologyRequestsEndTimeNano.compareAndSet(-1, 0)) {
501+
this.ignoreNewTopologyRequestsEndTimeNano.set(System.nanoTime() + ignoreTopologyRequestNano);
502+
}
503+
}
474504

475505
if (hosts == null) {
476506
// can't get topology; it might be something's wrong with a connection
@@ -550,7 +580,7 @@ protected void delay(boolean useHighRefreshRate) throws InterruptedException {
550580
return hosts;
551581
} catch (SQLException ex) {
552582
// do nothing
553-
LOGGER.log(Level.FINEST, "Error fetching topology:", ex);
583+
LOGGER.finest(Messages.get("ClusterTopologyMonitorImpl.errorFetchingTopology", new Object[]{ex}));
554584
}
555585
return null;
556586
}
@@ -760,7 +790,7 @@ public void run() {
760790
writerId = this.monitor.getWriterNodeId(connection);
761791

762792
} catch (SQLSyntaxErrorException ex) {
763-
LOGGER.severe(() -> Messages.get("ClusterTopologyMonitorImpl.invalidWriterQuery",
793+
LOGGER.severe(() -> Messages.get("NodeMonitoringThread.invalidWriterQuery",
764794
new Object[] {ex.getMessage()}));
765795
throw new RuntimeException(ex);
766796

@@ -771,21 +801,21 @@ public void run() {
771801

772802
if (!StringUtils.isNullOrEmpty(writerId)) {
773803
// this prevents closing connection in finally block
774-
if (!this.monitor.nodeThreadsWriterConnection
775-
.compareAndSet(null, connection)) {
804+
if (!this.monitor.nodeThreadsWriterConnection.compareAndSet(null, connection)) {
776805
// writer connection is already setup
777806
this.monitor.closeConnection(connection);
778807

779808
} else {
780809
// writer connection is successfully set to writerConnection
810+
LOGGER.fine(Messages.get("NodeMonitoringThread.detectedWriter", new Object[]{writerId}));
811+
// When nodeThreadsWriterConnection and nodeThreadsWriterHostSpec are both set, the topology monitor may
812+
// set ignoreNewTopologyRequestsEndTimeNano, in which case other threads will use the cached topology
813+
// for the ignore duration, so we need to update the topology before setting nodeThreadsWriterHostSpec.
814+
this.monitor.fetchTopologyAndUpdateCache(connection);
781815
this.monitor.nodeThreadsWriterHostSpec.set(hostSpec);
782-
LOGGER.fine("Detected writer: " + writerId);
783816
this.monitor.nodeThreadsStop.set(true);
784-
785-
this.monitor.fetchTopologyAndUpdateCache(connection);
786817
LOGGER.fine(Utils.logTopology(
787818
this.monitor.topologyMap.get(this.monitor.clusterId)));
788-
789819
}
790820

791821
// Setting the connection to null here prevents the finally block
@@ -816,7 +846,7 @@ public void run() {
816846
} finally {
817847
this.monitor.closeConnection(connection);
818848
final long end = System.nanoTime();
819-
LOGGER.finest(() -> Messages.get("ClusterTopologyMonitorImpl.nodeThreadCompleted",
849+
LOGGER.finest(() -> Messages.get("NodeMonitoringThread.threadCompleted",
820850
new Object[] {TimeUnit.NANOSECONDS.toMillis(end - start)}));
821851
}
822852
}
@@ -853,7 +883,7 @@ private void readerThreadFetchTopology(final Connection connection, final @Nulla
853883
// writer node has changed
854884
this.writerChanged = true;
855885

856-
LOGGER.fine(() -> Messages.get("ClusterTopologyMonitorImpl.writerNodeChanged",
886+
LOGGER.fine(() -> Messages.get("NodeMonitoringThread.writerNodeChanged",
857887
new Object[] {writerHostSpec.getHost(), latestWriterHostSpec.getHost()}));
858888

859889
// we can update topology cache and notify all waiting threads

wrapper/src/main/java/software/amazon/jdbc/plugin/failover/ClusterAwareWriterFailoverHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ public WriterFailoverResult call() {
302302
private boolean isCurrentHostWriter(final List<HostSpec> latestTopology) {
303303
final HostSpec latestWriter = getWriter(latestTopology);
304304
final Set<String> latestWriterAllAliases = latestWriter.asAliases();
305-
final Set<String> currentAliases = this.originalWriterHost.getAliases();
305+
final Set<String> currentAliases = this.originalWriterHost.asAliases();
306306

307307
return (currentAliases != null)
308308
&& (latestWriterAllAliases.stream().anyMatch(currentAliases::contains));

wrapper/src/main/java/software/amazon/jdbc/plugin/failover2/FailoverConnectionPlugin.java

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818

1919
import java.sql.Connection;
2020
import java.sql.SQLException;
21+
import java.util.ArrayList;
2122
import java.util.Collections;
2223
import java.util.HashSet;
2324
import java.util.List;
2425
import java.util.Properties;
2526
import java.util.Set;
2627
import java.util.concurrent.TimeUnit;
2728
import java.util.logging.Logger;
28-
import java.util.stream.Collectors;
2929
import software.amazon.jdbc.AwsWrapperProperty;
3030
import software.amazon.jdbc.HostListProviderService;
3131
import software.amazon.jdbc.HostRole;
@@ -218,13 +218,11 @@ public void initHostProvider(
218218
final JdbcCallable<Void, SQLException> initHostProviderFunc)
219219
throws SQLException {
220220
initHostProvider(
221-
initialUrl,
222221
hostListProviderService,
223222
initHostProviderFunc);
224223
}
225224

226225
void initHostProvider(
227-
final String initialUrl,
228226
final HostListProviderService hostListProviderService,
229227
final JdbcCallable<Void, SQLException> initHostProviderFunc)
230228
throws SQLException {
@@ -305,10 +303,9 @@ protected <E extends Exception> void dealWithIllegalStateException(
305303
* Initiates the failover procedure. This process tries to establish a new connection to an
306304
* instance in the topology.
307305
*
308-
* @param failedHost The host with network errors.
309306
* @throws SQLException if an error occurs
310307
*/
311-
protected void failover(final HostSpec failedHost) throws SQLException {
308+
protected void failover() throws SQLException {
312309

313310
if (this.failoverMode == FailoverMode.STRICT_WRITER) {
314311
failoverWriter();
@@ -369,7 +366,7 @@ protected void failoverReader() throws SQLException {
369366
try {
370367
readerCandidate =
371368
this.pluginService.getHostSpecByStrategy(
372-
remainingHosts.stream().collect(Collectors.toList()),
369+
new ArrayList<>(remainingHosts),
373370
HostRole.READER,
374371
this.failoverReaderHostSelectorStrategySetting);
375372
} catch (UnsupportedOperationException | SQLException ex) {
@@ -469,54 +466,57 @@ protected void failoverWriter() throws SQLException {
469466
if (!this.pluginService.forceRefreshHostList(true, this.failoverTimeoutMsSetting)) {
470467
// "Unable to establish SQL connection to writer node"
471468
this.failoverWriterFailedCounter.inc();
472-
LOGGER.severe(Messages.get("Failover.unableToConnectToWriter"));
473-
throw new FailoverFailedSQLException(Messages.get("Failover.unableToConnectToWriter"));
469+
LOGGER.severe(Messages.get("Failover.unableToRefreshHostList"));
470+
throw new FailoverFailedSQLException(Messages.get("Failover.unableToRefreshHostList"));
474471
}
475472

476473
final List<HostSpec> updatedHosts = this.pluginService.getAllHosts();
477474
final Properties copyProp = PropertyUtils.copyProperties(this.properties);
478475
copyProp.setProperty(INTERNAL_CONNECT_PROPERTY_NAME, "true");
479476

480-
Connection writerCandidateConn = null;
477+
Connection writerCandidateConn;
481478
final HostSpec writerCandidate = updatedHosts.stream()
482479
.filter(x -> x.getRole() == HostRole.WRITER)
483480
.findFirst()
484481
.orElse(null);
485482

483+
if (writerCandidate == null) {
484+
this.failoverWriterFailedCounter.inc();
485+
String message = Utils.logTopology(updatedHosts, Messages.get("Failover.noWriterHost"));
486+
LOGGER.severe(message);
487+
throw new FailoverFailedSQLException(message);
488+
}
489+
486490
List<HostSpec> allowedHosts = this.pluginService.getHosts();
487-
if (writerCandidate != null && !allowedHosts.contains(writerCandidate)) {
491+
if (!allowedHosts.contains(writerCandidate)) {
488492
this.failoverWriterFailedCounter.inc();
493+
String topologyString = Utils.logTopology(allowedHosts, "");
489494
LOGGER.severe(Messages.get("Failover.newWriterNotAllowed",
490-
new Object[] {writerCandidate.getHost(), Utils.logTopology(allowedHosts, "")}));
495+
new Object[] {writerCandidate.getHost(), topologyString}));
491496
throw new FailoverFailedSQLException(
492497
Messages.get("Failover.newWriterNotAllowed",
493-
new Object[] {writerCandidate.getHost(), Utils.logTopology(allowedHosts, "")}));
494-
}
495-
496-
if (writerCandidate != null) {
497-
try {
498-
writerCandidateConn = this.pluginService.connect(writerCandidate, copyProp);
499-
} catch (SQLException ex) {
500-
// do nothing
501-
}
498+
new Object[] {writerCandidate.getHost(), topologyString}));
502499
}
503500

504-
if (writerCandidateConn == null) {
505-
// "Unable to establish SQL connection to writer node"
501+
try {
502+
writerCandidateConn = this.pluginService.connect(writerCandidate, copyProp);
503+
} catch (SQLException ex) {
506504
this.failoverWriterFailedCounter.inc();
507-
LOGGER.severe(Messages.get("Failover.unableToConnectToWriter"));
508-
throw new FailoverFailedSQLException(Messages.get("Failover.unableToConnectToWriter"));
505+
LOGGER.severe(
506+
Messages.get("Failover.exceptionConnectingToWriter", new Object[]{writerCandidate.getHost(), ex}));
507+
throw new FailoverFailedSQLException(Messages.get("Failover.exceptionConnectingToWriter"));
509508
}
510509

511-
if (this.pluginService.getHostRole(writerCandidateConn) != HostRole.WRITER) {
510+
HostRole role = this.pluginService.getHostRole(writerCandidateConn);
511+
if (role != HostRole.WRITER) {
512512
try {
513513
writerCandidateConn.close();
514514
} catch (SQLException ex) {
515515
// do nothing
516516
}
517517
this.failoverWriterFailedCounter.inc();
518-
LOGGER.severe(Messages.get("Failover.unableToConnectToWriter"));
519-
throw new FailoverFailedSQLException(Messages.get("Failover.unableToConnectToWriter"));
518+
LOGGER.severe(Messages.get("Failover.unexpectedReaderRole", new Object[]{writerCandidate.getHost(), role}));
519+
throw new FailoverFailedSQLException(Messages.get("Failover.unexpectedReaderRole"));
520520
}
521521

522522
this.pluginService.setCurrentConnection(writerCandidateConn, writerCandidate);
@@ -579,7 +579,7 @@ protected void pickNewConnection() throws SQLException {
579579
return;
580580
}
581581

582-
this.failover(this.pluginService.getCurrentHostSpec());
582+
this.failover();
583583
}
584584

585585
protected boolean shouldExceptionTriggerConnectionSwitch(final Throwable t) {
@@ -674,15 +674,15 @@ public Connection connect(
674674
this.pluginService.setAvailability(hostSpec.asAliases(), HostAvailability.NOT_AVAILABLE);
675675

676676
try {
677-
this.failover(hostSpec);
677+
this.failover();
678678
} catch (FailoverSuccessSQLException failoverSuccessException) {
679679
conn = this.pluginService.getCurrentConnection();
680680
}
681681
}
682682
} else {
683683
try {
684684
this.pluginService.refreshHostList();
685-
this.failover(hostSpec);
685+
this.failover();
686686
} catch (FailoverSuccessSQLException failoverSuccessException) {
687687
conn = this.pluginService.getCurrentConnection();
688688
}

0 commit comments

Comments
 (0)