2323import org .apache .lucene .index .DirectoryReader ;
2424import org .apache .lucene .index .IndexCommit ;
2525import org .apache .lucene .index .IndexableField ;
26+ import org .apache .lucene .index .IndexWriter ;
2627import org .apache .lucene .index .Term ;
2728import org .apache .lucene .search .IndexSearcher ;
2829import org .apache .lucene .search .TermQuery ;
118119import org .elasticsearch .snapshots .SnapshotId ;
119120import org .elasticsearch .snapshots .SnapshotInfo ;
120121import org .elasticsearch .snapshots .SnapshotShardFailure ;
122+ import org .elasticsearch .test .CorruptionUtils ;
121123import org .elasticsearch .test .DummyShardLock ;
122124import org .elasticsearch .test .FieldMaskingReader ;
123125import org .elasticsearch .test .VersionUtils ;
126128
127129import java .io .IOException ;
128130import java .nio .charset .Charset ;
131+ import java .nio .file .FileVisitResult ;
132+ import java .nio .file .Files ;
129133import java .nio .file .Path ;
134+ import java .nio .file .SimpleFileVisitor ;
135+ import java .nio .file .attribute .BasicFileAttributes ;
130136import java .util .ArrayList ;
131137import java .util .Arrays ;
132138import java .util .Collections ;
@@ -1239,7 +1245,7 @@ public String[] listAll() throws IOException {
12391245 };
12401246
12411247 try (Store store = createStore (shardId , new IndexSettings (metaData , Settings .EMPTY ), directory )) {
1242- IndexShard shard = newShard (shardRouting , shardPath , metaData , store ,
1248+ IndexShard shard = newShard (shardRouting , shardPath , metaData , i -> store ,
12431249 null , new InternalEngineFactory (), () -> {
12441250 }, EMPTY_EVENT_LISTENER );
12451251 AtomicBoolean failureCallbackTriggered = new AtomicBoolean (false );
@@ -2590,6 +2596,143 @@ public void testReadSnapshotConcurrently() throws IOException, InterruptedExcept
25902596 closeShards (newShard );
25912597 }
25922598
2599+ public void testIndexCheckOnStartup () throws Exception {
2600+ final IndexShard indexShard = newStartedShard (true );
2601+
2602+ final long numDocs = between (10 , 100 );
2603+ for (long i = 0 ; i < numDocs ; i ++) {
2604+ indexDoc (indexShard , "_doc" , Long .toString (i ), "{}" );
2605+ }
2606+ indexShard .flush (new FlushRequest ());
2607+ closeShards (indexShard );
2608+
2609+ final ShardPath shardPath = indexShard .shardPath ();
2610+
2611+ final Path indexPath = corruptIndexFile (shardPath );
2612+
2613+ final AtomicInteger corruptedMarkerCount = new AtomicInteger ();
2614+ final SimpleFileVisitor <Path > corruptedVisitor = new SimpleFileVisitor <Path >() {
2615+ @ Override
2616+ public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
2617+ if (Files .isRegularFile (file ) && file .getFileName ().toString ().startsWith (Store .CORRUPTED )) {
2618+ corruptedMarkerCount .incrementAndGet ();
2619+ }
2620+ return FileVisitResult .CONTINUE ;
2621+ }
2622+ };
2623+ Files .walkFileTree (indexPath , corruptedVisitor );
2624+
2625+ assertThat ("corruption marker should not be there" , corruptedMarkerCount .get (), equalTo (0 ));
2626+
2627+ final ShardRouting shardRouting = ShardRoutingHelper .initWithSameId (indexShard .routingEntry (),
2628+ RecoverySource .StoreRecoverySource .EXISTING_STORE_INSTANCE
2629+ );
2630+ // start shard and perform index check on startup. It enforce shard to fail due to corrupted index files
2631+ final IndexMetaData indexMetaData = IndexMetaData .builder (indexShard .indexSettings ().getIndexMetaData ())
2632+ .settings (Settings .builder ()
2633+ .put (indexShard .indexSettings .getSettings ())
2634+ .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("true" , "checksum" )))
2635+ .build ();
2636+
2637+ IndexShard corruptedShard = newShard (shardRouting , shardPath , indexMetaData ,
2638+ null , null , indexShard .engineFactory ,
2639+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2640+
2641+ final IndexShardRecoveryException indexShardRecoveryException =
2642+ expectThrows (IndexShardRecoveryException .class , () -> newStartedShard (p -> corruptedShard , true ));
2643+ assertThat (indexShardRecoveryException .getMessage (), equalTo ("failed recovery" ));
2644+
2645+ // check that corrupt marker is there
2646+ Files .walkFileTree (indexPath , corruptedVisitor );
2647+ assertThat ("store has to be marked as corrupted" , corruptedMarkerCount .get (), equalTo (1 ));
2648+
2649+ try {
2650+ closeShards (corruptedShard );
2651+ } catch (RuntimeException e ) {
2652+ assertThat (e .getMessage (), equalTo ("CheckIndex failed" ));
2653+ }
2654+ }
2655+
2656+ public void testShardDoesNotStartIfCorruptedMarkerIsPresent () throws Exception {
2657+ final IndexShard indexShard = newStartedShard (true );
2658+
2659+ final long numDocs = between (10 , 100 );
2660+ for (long i = 0 ; i < numDocs ; i ++) {
2661+ indexDoc (indexShard , "_doc" , Long .toString (i ), "{}" );
2662+ }
2663+ indexShard .flush (new FlushRequest ());
2664+ closeShards (indexShard );
2665+
2666+ final ShardPath shardPath = indexShard .shardPath ();
2667+
2668+ final ShardRouting shardRouting = ShardRoutingHelper .initWithSameId (indexShard .routingEntry (),
2669+ RecoverySource .StoreRecoverySource .EXISTING_STORE_INSTANCE
2670+ );
2671+ final IndexMetaData indexMetaData = indexShard .indexSettings ().getIndexMetaData ();
2672+
2673+ final Path indexPath = shardPath .getDataPath ().resolve (ShardPath .INDEX_FOLDER_NAME );
2674+
2675+ // create corrupted marker
2676+ final String corruptionMessage = "fake ioexception" ;
2677+ try (Store store = createStore (indexShard .indexSettings (), shardPath )) {
2678+ store .markStoreCorrupted (new IOException (corruptionMessage ));
2679+ }
2680+
2681+ // try to start shard on corrupted files
2682+ final IndexShard corruptedShard = newShard (shardRouting , shardPath , indexMetaData ,
2683+ null , null , indexShard .engineFactory ,
2684+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2685+
2686+ final IndexShardRecoveryException exception1 = expectThrows (IndexShardRecoveryException .class ,
2687+ () -> newStartedShard (p -> corruptedShard , true ));
2688+ assertThat (exception1 .getCause ().getMessage (), equalTo (corruptionMessage + " (resource=preexisting_corruption)" ));
2689+ closeShards (corruptedShard );
2690+
2691+ final AtomicInteger corruptedMarkerCount = new AtomicInteger ();
2692+ final SimpleFileVisitor <Path > corruptedVisitor = new SimpleFileVisitor <Path >() {
2693+ @ Override
2694+ public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
2695+ if (Files .isRegularFile (file ) && file .getFileName ().toString ().startsWith (Store .CORRUPTED )) {
2696+ corruptedMarkerCount .incrementAndGet ();
2697+ }
2698+ return FileVisitResult .CONTINUE ;
2699+ }
2700+ };
2701+ Files .walkFileTree (indexPath , corruptedVisitor );
2702+ assertThat ("store has to be marked as corrupted" , corruptedMarkerCount .get (), equalTo (1 ));
2703+
2704+ // try to start another time shard on corrupted files
2705+ final IndexShard corruptedShard2 = newShard (shardRouting , shardPath , indexMetaData ,
2706+ null , null , indexShard .engineFactory ,
2707+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2708+
2709+ final IndexShardRecoveryException exception2 = expectThrows (IndexShardRecoveryException .class ,
2710+ () -> newStartedShard (p -> corruptedShard2 , true ));
2711+ assertThat (exception2 .getCause ().getMessage (), equalTo (corruptionMessage + " (resource=preexisting_corruption)" ));
2712+ closeShards (corruptedShard2 );
2713+
2714+ // check that corrupt marker is there
2715+ corruptedMarkerCount .set (0 );
2716+ Files .walkFileTree (indexPath , corruptedVisitor );
2717+ assertThat ("store still has a single corrupt marker" , corruptedMarkerCount .get (), equalTo (1 ));
2718+ }
2719+
2720+ private Path corruptIndexFile (ShardPath shardPath ) throws IOException {
2721+ final Path indexPath = shardPath .getDataPath ().resolve (ShardPath .INDEX_FOLDER_NAME );
2722+ final Path [] filesToCorrupt =
2723+ Files .walk (indexPath )
2724+ .filter (p -> {
2725+ final String name = p .getFileName ().toString ();
2726+ return Files .isRegularFile (p )
2727+ && name .startsWith ("extra" ) == false // Skip files added by Lucene's ExtrasFS
2728+ && IndexWriter .WRITE_LOCK_NAME .equals (name ) == false
2729+ && name .startsWith ("segments_" ) == false && name .endsWith (".si" ) == false ;
2730+ })
2731+ .toArray (Path []::new );
2732+ CorruptionUtils .corruptFile (random (), filesToCorrupt );
2733+ return indexPath ;
2734+ }
2735+
25932736 /**
25942737 * Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
25952738 * and checking index concurrently. This should always be possible without any exception.
@@ -2613,7 +2756,7 @@ public void testReadSnapshotAndCheckIndexConcurrently() throws Exception {
26132756 final IndexMetaData indexMetaData = IndexMetaData .builder (indexShard .indexSettings ().getIndexMetaData ())
26142757 .settings (Settings .builder ()
26152758 .put (indexShard .indexSettings .getSettings ())
2616- .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("false" , "true" , "checksum" , "fix" )))
2759+ .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("false" , "true" , "checksum" )))
26172760 .build ();
26182761 final IndexShard newShard = newShard (shardRouting , indexShard .shardPath (), indexMetaData ,
26192762 null , null , indexShard .engineFactory , indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
@@ -2655,6 +2798,16 @@ public void testReadSnapshotAndCheckIndexConcurrently() throws Exception {
26552798 closeShards (newShard );
26562799 }
26572800
2801+ public void testCheckOnStartupDeprecatedValue () throws Exception {
2802+ final Settings settings = Settings .builder ().put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), "fix" ).build ();
2803+
2804+ final IndexShard newShard = newShard (true , settings );
2805+ closeShards (newShard );
2806+
2807+ assertWarnings ("Setting [index.shard.check_on_startup] is set to deprecated value [fix], "
2808+ + "which has no effect and will not be accepted in future" );
2809+ }
2810+
26582811 class Result {
26592812 private final int localCheckpoint ;
26602813 private final int maxSeqNo ;
0 commit comments