2222import org .apache .lucene .index .CorruptIndexException ;
2323import org .apache .lucene .index .DirectoryReader ;
2424import org .apache .lucene .index .IndexCommit ;
25+ import org .apache .lucene .index .IndexWriter ;
2526import org .apache .lucene .index .Term ;
2627import org .apache .lucene .search .IndexSearcher ;
2728import org .apache .lucene .search .TermQuery ;
110111import org .elasticsearch .snapshots .SnapshotId ;
111112import org .elasticsearch .snapshots .SnapshotInfo ;
112113import org .elasticsearch .snapshots .SnapshotShardFailure ;
114+ import org .elasticsearch .test .CorruptionUtils ;
113115import org .elasticsearch .test .DummyShardLock ;
114116import org .elasticsearch .test .FieldMaskingReader ;
115117import org .elasticsearch .test .VersionUtils ;
118120
119121import java .io .IOException ;
120122import java .nio .charset .Charset ;
123+ import java .nio .file .FileVisitResult ;
124+ import java .nio .file .Files ;
121125import java .nio .file .Path ;
126+ import java .nio .file .SimpleFileVisitor ;
127+ import java .nio .file .attribute .BasicFileAttributes ;
122128import java .util .ArrayList ;
123129import java .util .Arrays ;
124130import java .util .Collections ;
@@ -1229,7 +1235,7 @@ public String[] listAll() throws IOException {
12291235 };
12301236
12311237 try (Store store = createStore (shardId , new IndexSettings (metaData , Settings .EMPTY ), directory )) {
1232- IndexShard shard = newShard (shardRouting , shardPath , metaData , store ,
1238+ IndexShard shard = newShard (shardRouting , shardPath , metaData , i -> store ,
12331239 null , new InternalEngineFactory (), () -> {
12341240 }, EMPTY_EVENT_LISTENER );
12351241 AtomicBoolean failureCallbackTriggered = new AtomicBoolean (false );
@@ -2569,6 +2575,143 @@ public void testReadSnapshotConcurrently() throws IOException, InterruptedExcept
25692575 closeShards (newShard );
25702576 }
25712577
2578+ public void testIndexCheckOnStartup () throws Exception {
2579+ final IndexShard indexShard = newStartedShard (true );
2580+
2581+ final long numDocs = between (10 , 100 );
2582+ for (long i = 0 ; i < numDocs ; i ++) {
2583+ indexDoc (indexShard , "_doc" , Long .toString (i ), "{}" );
2584+ }
2585+ indexShard .flush (new FlushRequest ());
2586+ closeShards (indexShard );
2587+
2588+ final ShardPath shardPath = indexShard .shardPath ();
2589+
2590+ final Path indexPath = corruptIndexFile (shardPath );
2591+
2592+ final AtomicInteger corruptedMarkerCount = new AtomicInteger ();
2593+ final SimpleFileVisitor <Path > corruptedVisitor = new SimpleFileVisitor <Path >() {
2594+ @ Override
2595+ public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
2596+ if (Files .isRegularFile (file ) && file .getFileName ().toString ().startsWith (Store .CORRUPTED )) {
2597+ corruptedMarkerCount .incrementAndGet ();
2598+ }
2599+ return FileVisitResult .CONTINUE ;
2600+ }
2601+ };
2602+ Files .walkFileTree (indexPath , corruptedVisitor );
2603+
2604+ assertThat ("corruption marker should not be there" , corruptedMarkerCount .get (), equalTo (0 ));
2605+
2606+ final ShardRouting shardRouting = ShardRoutingHelper .initWithSameId (indexShard .routingEntry (),
2607+ RecoverySource .StoreRecoverySource .EXISTING_STORE_INSTANCE
2608+ );
2609+ // start shard and perform index check on startup. It enforce shard to fail due to corrupted index files
2610+ final IndexMetaData indexMetaData = IndexMetaData .builder (indexShard .indexSettings ().getIndexMetaData ())
2611+ .settings (Settings .builder ()
2612+ .put (indexShard .indexSettings .getSettings ())
2613+ .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("true" , "checksum" )))
2614+ .build ();
2615+
2616+ IndexShard corruptedShard = newShard (shardRouting , shardPath , indexMetaData ,
2617+ null , null , indexShard .engineFactory ,
2618+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2619+
2620+ final IndexShardRecoveryException indexShardRecoveryException =
2621+ expectThrows (IndexShardRecoveryException .class , () -> newStartedShard (p -> corruptedShard , true ));
2622+ assertThat (indexShardRecoveryException .getMessage (), equalTo ("failed recovery" ));
2623+
2624+ // check that corrupt marker is there
2625+ Files .walkFileTree (indexPath , corruptedVisitor );
2626+ assertThat ("store has to be marked as corrupted" , corruptedMarkerCount .get (), equalTo (1 ));
2627+
2628+ try {
2629+ closeShards (corruptedShard );
2630+ } catch (RuntimeException e ) {
2631+ assertThat (e .getMessage (), equalTo ("CheckIndex failed" ));
2632+ }
2633+ }
2634+
2635+ public void testShardDoesNotStartIfCorruptedMarkerIsPresent () throws Exception {
2636+ final IndexShard indexShard = newStartedShard (true );
2637+
2638+ final long numDocs = between (10 , 100 );
2639+ for (long i = 0 ; i < numDocs ; i ++) {
2640+ indexDoc (indexShard , "_doc" , Long .toString (i ), "{}" );
2641+ }
2642+ indexShard .flush (new FlushRequest ());
2643+ closeShards (indexShard );
2644+
2645+ final ShardPath shardPath = indexShard .shardPath ();
2646+
2647+ final ShardRouting shardRouting = ShardRoutingHelper .initWithSameId (indexShard .routingEntry (),
2648+ RecoverySource .StoreRecoverySource .EXISTING_STORE_INSTANCE
2649+ );
2650+ final IndexMetaData indexMetaData = indexShard .indexSettings ().getIndexMetaData ();
2651+
2652+ final Path indexPath = shardPath .getDataPath ().resolve (ShardPath .INDEX_FOLDER_NAME );
2653+
2654+ // create corrupted marker
2655+ final String corruptionMessage = "fake ioexception" ;
2656+ try (Store store = createStore (indexShard .indexSettings (), shardPath )) {
2657+ store .markStoreCorrupted (new IOException (corruptionMessage ));
2658+ }
2659+
2660+ // try to start shard on corrupted files
2661+ final IndexShard corruptedShard = newShard (shardRouting , shardPath , indexMetaData ,
2662+ null , null , indexShard .engineFactory ,
2663+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2664+
2665+ final IndexShardRecoveryException exception1 = expectThrows (IndexShardRecoveryException .class ,
2666+ () -> newStartedShard (p -> corruptedShard , true ));
2667+ assertThat (exception1 .getCause ().getMessage (), equalTo (corruptionMessage + " (resource=preexisting_corruption)" ));
2668+ closeShards (corruptedShard );
2669+
2670+ final AtomicInteger corruptedMarkerCount = new AtomicInteger ();
2671+ final SimpleFileVisitor <Path > corruptedVisitor = new SimpleFileVisitor <Path >() {
2672+ @ Override
2673+ public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
2674+ if (Files .isRegularFile (file ) && file .getFileName ().toString ().startsWith (Store .CORRUPTED )) {
2675+ corruptedMarkerCount .incrementAndGet ();
2676+ }
2677+ return FileVisitResult .CONTINUE ;
2678+ }
2679+ };
2680+ Files .walkFileTree (indexPath , corruptedVisitor );
2681+ assertThat ("store has to be marked as corrupted" , corruptedMarkerCount .get (), equalTo (1 ));
2682+
2683+ // try to start another time shard on corrupted files
2684+ final IndexShard corruptedShard2 = newShard (shardRouting , shardPath , indexMetaData ,
2685+ null , null , indexShard .engineFactory ,
2686+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2687+
2688+ final IndexShardRecoveryException exception2 = expectThrows (IndexShardRecoveryException .class ,
2689+ () -> newStartedShard (p -> corruptedShard2 , true ));
2690+ assertThat (exception2 .getCause ().getMessage (), equalTo (corruptionMessage + " (resource=preexisting_corruption)" ));
2691+ closeShards (corruptedShard2 );
2692+
2693+ // check that corrupt marker is there
2694+ corruptedMarkerCount .set (0 );
2695+ Files .walkFileTree (indexPath , corruptedVisitor );
2696+ assertThat ("store still has a single corrupt marker" , corruptedMarkerCount .get (), equalTo (1 ));
2697+ }
2698+
2699+ private Path corruptIndexFile (ShardPath shardPath ) throws IOException {
2700+ final Path indexPath = shardPath .getDataPath ().resolve (ShardPath .INDEX_FOLDER_NAME );
2701+ final Path [] filesToCorrupt =
2702+ Files .walk (indexPath )
2703+ .filter (p -> {
2704+ final String name = p .getFileName ().toString ();
2705+ return Files .isRegularFile (p )
2706+ && name .startsWith ("extra" ) == false // Skip files added by Lucene's ExtrasFS
2707+ && IndexWriter .WRITE_LOCK_NAME .equals (name ) == false
2708+ && name .startsWith ("segments_" ) == false && name .endsWith (".si" ) == false ;
2709+ })
2710+ .toArray (Path []::new );
2711+ CorruptionUtils .corruptFile (random (), filesToCorrupt );
2712+ return indexPath ;
2713+ }
2714+
25722715 /**
25732716 * Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
25742717 * and checking index concurrently. This should always be possible without any exception.
@@ -2592,7 +2735,7 @@ public void testReadSnapshotAndCheckIndexConcurrently() throws Exception {
25922735 final IndexMetaData indexMetaData = IndexMetaData .builder (indexShard .indexSettings ().getIndexMetaData ())
25932736 .settings (Settings .builder ()
25942737 .put (indexShard .indexSettings .getSettings ())
2595- .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("false" , "true" , "checksum" , "fix" )))
2738+ .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("false" , "true" , "checksum" )))
25962739 .build ();
25972740 final IndexShard newShard = newShard (shardRouting , indexShard .shardPath (), indexMetaData ,
25982741 null , null , indexShard .engineFactory , indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
@@ -2634,6 +2777,16 @@ public void testReadSnapshotAndCheckIndexConcurrently() throws Exception {
26342777 closeShards (newShard );
26352778 }
26362779
2780+ public void testCheckOnStartupDeprecatedValue () throws Exception {
2781+ final Settings settings = Settings .builder ().put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), "fix" ).build ();
2782+
2783+ final IndexShard newShard = newShard (true , settings );
2784+ closeShards (newShard );
2785+
2786+ assertWarnings ("Setting [index.shard.check_on_startup] is set to deprecated value [fix], "
2787+ + "which has no effect and will not be accepted in future" );
2788+ }
2789+
26372790 class Result {
26382791 private final int localCheckpoint ;
26392792 private final int maxSeqNo ;
0 commit comments