4141import org .elasticsearch .index .IndexNotFoundException ;
4242import org .elasticsearch .index .engine .RecoveryEngineException ;
4343import org .elasticsearch .index .mapper .MapperException ;
44+ import org .elasticsearch .index .seqno .SequenceNumbers ;
4445import org .elasticsearch .index .seqno .SequenceNumbersService ;
4546import org .elasticsearch .index .shard .IllegalIndexShardStateException ;
4647import org .elasticsearch .index .shard .IndexEventListener ;
@@ -338,16 +339,20 @@ private Optional<StartRecoveryRequest> getStartRecoveryRequest(final RecoveryTar
338339 final long startingSeqNo ;
339340 if (metadataSnapshot .get ().size () > 0 ) {
340341 startingSeqNo = getStartingSeqNo (recoveryTarget );
341- logger .trace (
342- "{} preparing for sequence number-based recovery starting at local checkpoint [{}] from [{}]" ,
343- recoveryTarget .shardId (),
344- startingSeqNo ,
345- recoveryTarget .sourceNode ());
346342 } else {
347- logger .trace ("{} preparing for file-based recovery from [{}]" , recoveryTarget .shardId (), recoveryTarget .sourceNode ());
348343 startingSeqNo = SequenceNumbersService .UNASSIGNED_SEQ_NO ;
349344 }
350345
346+ if (startingSeqNo == SequenceNumbersService .UNASSIGNED_SEQ_NO ) {
347+ logger .trace ("{} preparing for file-based recovery from [{}]" , recoveryTarget .shardId (), recoveryTarget .sourceNode ());
348+ } else {
349+ logger .trace (
350+ "{} preparing for sequence number-based recovery starting at local checkpoint [{}] from [{}]" ,
351+ recoveryTarget .shardId (),
352+ startingSeqNo ,
353+ recoveryTarget .sourceNode ());
354+ }
355+
351356 logger .trace ("{} preparing shard for peer recovery" , recoveryTarget .shardId ());
352357 recoveryTarget .indexShard ().prepareForIndexRecovery ();
353358
@@ -370,9 +375,23 @@ private Optional<StartRecoveryRequest> getStartRecoveryRequest(final RecoveryTar
370375 return Optional .of (request );
371376 }
372377
373- public static long getStartingSeqNo (RecoveryTarget recoveryTarget ) throws IOException {
374- final long globalCheckpoint = Translog .readGlobalCheckpoint (recoveryTarget .indexShard ().shardPath ().resolveTranslog ());
375- return recoveryTarget .store ().loadSeqNoStats (globalCheckpoint ).getLocalCheckpoint () + 1 ;
378+ /**
379+ * Get the starting sequence number for a sequence-number-based request.
380+ *
381+ * @param recoveryTarget the target of the recovery
382+ * @return the starting sequence number or {@link SequenceNumbersService#UNASSIGNED_SEQ_NO} if obtaining the starting sequence number
383+ * failed
384+ */
385+ public static long getStartingSeqNo (final RecoveryTarget recoveryTarget ) {
386+ try {
387+ final long globalCheckpoint = Translog .readGlobalCheckpoint (recoveryTarget .indexShard ().shardPath ().resolveTranslog ());
388+ return recoveryTarget .store ().loadSeqNoStats (globalCheckpoint ).getLocalCheckpoint () + 1 ;
389+ } catch (final IOException e ) {
390+ // this can happen, for example, if a phase one of the recovery completed successfully, a network partition happens before the
391+ // translog on the recovery target is opened, the recovery enters a retry loop seeing now that the index files are on disk to
392+ // proceeds to attempt a sequence-number-based recovery
393+ return SequenceNumbersService .UNASSIGNED_SEQ_NO ;
394+ }
376395 }
377396
378397 public interface RecoveryListener {
0 commit comments