2222import com .carrotsearch .hppc .cursors .ObjectCursor ;
2323import org .apache .logging .log4j .message .ParameterizedMessage ;
2424import org .apache .logging .log4j .util .Supplier ;
25+ import org .apache .lucene .util .IOUtils ;
2526import org .elasticsearch .ExceptionsHelper ;
2627import org .elasticsearch .Version ;
2728import org .elasticsearch .action .ActionListener ;
3839import org .elasticsearch .common .settings .Settings ;
3940import org .elasticsearch .common .transport .TransportAddress ;
4041import org .elasticsearch .common .unit .TimeValue ;
42+ import org .elasticsearch .common .util .concurrent .AbstractRunnable ;
4143import org .elasticsearch .common .util .concurrent .ConcurrentCollections ;
4244import org .elasticsearch .common .util .concurrent .FutureUtils ;
4345import org .elasticsearch .threadpool .ThreadPool ;
4648import org .elasticsearch .transport .FutureTransportResponseHandler ;
4749import org .elasticsearch .transport .NodeDisconnectedException ;
4850import org .elasticsearch .transport .NodeNotConnectedException ;
51+ import org .elasticsearch .transport .PlainTransportFuture ;
52+ import org .elasticsearch .transport .Transport ;
4953import org .elasticsearch .transport .TransportException ;
5054import org .elasticsearch .transport .TransportRequestOptions ;
5155import org .elasticsearch .transport .TransportResponseHandler ;
@@ -401,51 +405,37 @@ protected void doSample() {
401405 HashSet <DiscoveryNode > newNodes = new HashSet <>();
402406 HashSet <DiscoveryNode > newFilteredNodes = new HashSet <>();
403407 for (DiscoveryNode listedNode : listedNodes ) {
404- if (!transportService .nodeConnected (listedNode )) {
405- try {
406- // its a listed node, light connect to it...
407- logger .trace ("connecting to listed node [{}]" , listedNode );
408- transportService .connectToNode (listedNode , LISTED_NODES_PROFILE );
409- } catch (Exception e ) {
410- logger .info (
411- (Supplier <?>)
412- () -> new ParameterizedMessage ("failed to connect to node [{}], removed from nodes list" , listedNode ), e );
413- hostFailureListener .onNodeDisconnected (listedNode , e );
414- newFilteredNodes .add (listedNode );
415- continue ;
416- }
417- }
418- try {
419- LivenessResponse livenessResponse = transportService .submitRequest (listedNode , TransportLivenessAction .NAME ,
420- new LivenessRequest (),
421- TransportRequestOptions .builder ().withType (TransportRequestOptions .Type .STATE ).withTimeout (pingTimeout ).build (),
422- new FutureTransportResponseHandler <LivenessResponse >() {
423- @ Override
424- public LivenessResponse newInstance () {
425- return new LivenessResponse ();
426- }
427- }).txGet ();
408+ try (Transport .Connection connection = transportService .openConnection (listedNode , LISTED_NODES_PROFILE )){
409+ final PlainTransportFuture <LivenessResponse > handler = new PlainTransportFuture <>(
410+ new FutureTransportResponseHandler <LivenessResponse >() {
411+ @ Override
412+ public LivenessResponse newInstance () {
413+ return new LivenessResponse ();
414+ }
415+ });
416+ transportService .sendRequest (connection , TransportLivenessAction .NAME , new LivenessRequest (),
417+ TransportRequestOptions .builder ().withType (TransportRequestOptions .Type .STATE ).withTimeout (pingTimeout ).build (),
418+ handler );
419+ final LivenessResponse livenessResponse = handler .txGet ();
428420 if (!ignoreClusterName && !clusterName .equals (livenessResponse .getClusterName ())) {
429421 logger .warn ("node {} not part of the cluster {}, ignoring..." , listedNode , clusterName );
430422 newFilteredNodes .add (listedNode );
431- } else if ( livenessResponse . getDiscoveryNode () != null ) {
423+ } else {
432424 // use discovered information but do keep the original transport address,
433425 // so people can control which address is exactly used.
434426 DiscoveryNode nodeWithInfo = livenessResponse .getDiscoveryNode ();
435427 newNodes .add (new DiscoveryNode (nodeWithInfo .getName (), nodeWithInfo .getId (), nodeWithInfo .getEphemeralId (),
436428 nodeWithInfo .getHostName (), nodeWithInfo .getHostAddress (), listedNode .getAddress (),
437429 nodeWithInfo .getAttributes (), nodeWithInfo .getRoles (), nodeWithInfo .getVersion ()));
438- } else {
439- // although we asked for one node, our target may not have completed
440- // initialization yet and doesn't have cluster nodes
441- logger .debug ("node {} didn't return any discovery info, temporarily using transport discovery node" , listedNode );
442- newNodes .add (listedNode );
443430 }
431+ } catch (ConnectTransportException e ) {
432+ logger .debug (
433+ (Supplier <?>)
434+ () -> new ParameterizedMessage ("failed to connect to node [{}], ignoring..." , listedNode ), e );
435+ hostFailureListener .onNodeDisconnected (listedNode , e );
444436 } catch (Exception e ) {
445437 logger .info (
446438 (Supplier <?>) () -> new ParameterizedMessage ("failed to get node info for {}, disconnecting..." , listedNode ), e );
447- transportService .disconnectFromNode (listedNode );
448- hostFailureListener .onNodeDisconnected (listedNode , e );
449439 }
450440 }
451441
@@ -470,78 +460,91 @@ protected void doSample() {
470460
471461 final CountDownLatch latch = new CountDownLatch (nodesToPing .size ());
472462 final ConcurrentMap <DiscoveryNode , ClusterStateResponse > clusterStateResponses = ConcurrentCollections .newConcurrentMap ();
473- for (final DiscoveryNode listedNode : nodesToPing ) {
474- threadPool .executor (ThreadPool .Names .MANAGEMENT ).execute (new Runnable () {
475- @ Override
476- public void run () {
477- try {
478- if (!transportService .nodeConnected (listedNode )) {
479- try {
463+ try {
464+ for (final DiscoveryNode nodeToPing : nodesToPing ) {
465+ threadPool .executor (ThreadPool .Names .MANAGEMENT ).execute (new AbstractRunnable () {
466+
467+ /**
468+ * we try to reuse existing connections but if needed we will open a temporary connection
469+ * that will be closed at the end of the execution.
470+ */
471+ Transport .Connection connectionToClose = null ;
472+
473+ @ Override
474+ public void onAfter () {
475+ IOUtils .closeWhileHandlingException (connectionToClose );
476+ }
480477
481- // if its one of the actual nodes we will talk to, not to listed nodes, fully connect
482- if (nodes .contains (listedNode )) {
483- logger .trace ("connecting to cluster node [{}]" , listedNode );
484- transportService .connectToNode (listedNode );
485- } else {
486- // its a listed node, light connect to it...
487- logger .trace ("connecting to listed node (light) [{}]" , listedNode );
488- transportService .connectToNode (listedNode , LISTED_NODES_PROFILE );
489- }
490- } catch (Exception e ) {
491- logger .debug (
492- (Supplier <?>)
493- () -> new ParameterizedMessage ("failed to connect to node [{}], ignoring..." , listedNode ), e );
494- latch .countDown ();
495- return ;
478+ @ Override
479+ public void onFailure (Exception e ) {
480+ latch .countDown ();
481+ if (e instanceof ConnectTransportException ) {
482+ logger .debug ((Supplier <?>)
483+ () -> new ParameterizedMessage ("failed to connect to node [{}], ignoring..." , nodeToPing ), e );
484+ hostFailureListener .onNodeDisconnected (nodeToPing , e );
485+ } else {
486+ logger .info (
487+ (Supplier <?>) () -> new ParameterizedMessage (
488+ "failed to get local cluster state info for {}, disconnecting..." , nodeToPing ), e );
489+ }
490+ }
491+
492+ @ Override
493+ protected void doRun () throws Exception {
494+ Transport .Connection pingConnection = null ;
495+ if (nodes .contains (nodeToPing )) {
496+ try {
497+ pingConnection = transportService .getConnection (nodeToPing );
498+ } catch (NodeNotConnectedException e ) {
499+ // will use a temp connection
496500 }
497501 }
498- transportService .sendRequest (listedNode , ClusterStateAction .NAME ,
499- Requests .clusterStateRequest ().clear ().nodes (true ).local (true ),
500- TransportRequestOptions .builder ().withType (TransportRequestOptions .Type .STATE )
501- .withTimeout (pingTimeout ).build (),
502- new TransportResponseHandler <ClusterStateResponse >() {
503-
504- @ Override
505- public ClusterStateResponse newInstance () {
506- return new ClusterStateResponse ();
507- }
502+ if (pingConnection == null ) {
503+ logger .trace ("connecting to cluster node [{}]" , nodeToPing );
504+ connectionToClose = transportService .openConnection (nodeToPing , LISTED_NODES_PROFILE );
505+ pingConnection = connectionToClose ;
506+ }
507+ transportService .sendRequest (pingConnection , ClusterStateAction .NAME ,
508+ Requests .clusterStateRequest ().clear ().nodes (true ).local (true ),
509+ TransportRequestOptions .builder ().withType (TransportRequestOptions .Type .STATE )
510+ .withTimeout (pingTimeout ).build (),
511+ new TransportResponseHandler <ClusterStateResponse >() {
512+
513+ @ Override
514+ public ClusterStateResponse newInstance () {
515+ return new ClusterStateResponse ();
516+ }
508517
509- @ Override
510- public String executor () {
511- return ThreadPool .Names .SAME ;
512- }
518+ @ Override
519+ public String executor () {
520+ return ThreadPool .Names .SAME ;
521+ }
513522
514- @ Override
515- public void handleResponse (ClusterStateResponse response ) {
516- clusterStateResponses .put (listedNode , response );
517- latch .countDown ();
518- }
523+ @ Override
524+ public void handleResponse (ClusterStateResponse response ) {
525+ clusterStateResponses .put (nodeToPing , response );
526+ latch .countDown ();
527+ }
519528
520- @ Override
521- public void handleException (TransportException e ) {
522- logger .info (
523- (Supplier <?>) () -> new ParameterizedMessage (
524- "failed to get local cluster state for {}, disconnecting..." , listedNode ), e );
525- transportService .disconnectFromNode (listedNode );
529+ @ Override
530+ public void handleException (TransportException e ) {
531+ logger .info (
532+ (Supplier <?>) () -> new ParameterizedMessage (
533+ "failed to get local cluster state for {}, disconnecting..." , nodeToPing ), e );
534+ try {
535+ hostFailureListener .onNodeDisconnected (nodeToPing , e );
536+ }
537+ finally {
526538 latch .countDown ();
527- hostFailureListener .onNodeDisconnected (listedNode , e );
528539 }
529- });
530- } catch (Exception e ) {
531- logger .info (
532- (Supplier <?>)() -> new ParameterizedMessage (
533- "failed to get local cluster state info for {}, disconnecting..." , listedNode ), e );
534- transportService .disconnectFromNode (listedNode );
535- latch .countDown ();
536- hostFailureListener .onNodeDisconnected (listedNode , e );
540+ }
541+ });
537542 }
538- }
539- });
540- }
541-
542- try {
543+ });
544+ }
543545 latch .await ();
544546 } catch (InterruptedException e ) {
547+ Thread .currentThread ().interrupt ();
545548 return ;
546549 }
547550
0 commit comments