-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Rollback a primary before recovering from translog #27804
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
03a3d58
0aaee95
6aa3200
7c11d37
d94b06e
f6b5c58
db24f52
63c3cd4
fba0784
0a447e9
4c4a1c7
f412572
4892029
40d5d24
cebbe6d
ac01498
1b1b984
9db0d41
b6b5226
3b265e0
d5558d0
2bd3354
b27ac02
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,8 @@ | |
| import org.elasticsearch.index.translog.TranslogDeletionPolicy; | ||
|
|
||
| import java.io.IOException; | ||
| import java.nio.file.Path; | ||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.function.LongSupplier; | ||
|
|
@@ -37,14 +39,16 @@ | |
| * In particular, this policy will delete index commits whose max sequence number is at most | ||
| * the current global checkpoint except the index commit which has the highest max sequence number among those. | ||
| */ | ||
| final class CombinedDeletionPolicy extends IndexDeletionPolicy { | ||
| public final class CombinedDeletionPolicy extends IndexDeletionPolicy { | ||
| private final TranslogDeletionPolicy translogDeletionPolicy; | ||
| private final EngineConfig.OpenMode openMode; | ||
| private final LongSupplier globalCheckpointSupplier; | ||
| private final IndexCommit startingIndexCommit; | ||
|
|
||
| CombinedDeletionPolicy(EngineConfig.OpenMode openMode, TranslogDeletionPolicy translogDeletionPolicy, | ||
| LongSupplier globalCheckpointSupplier) { | ||
| LongSupplier globalCheckpointSupplier, IndexCommit startingIndexCommit) { | ||
| this.openMode = openMode; | ||
| this.startingIndexCommit = startingIndexCommit; | ||
| this.translogDeletionPolicy = translogDeletionPolicy; | ||
| this.globalCheckpointSupplier = globalCheckpointSupplier; | ||
| } | ||
|
|
@@ -62,7 +66,14 @@ public void onInit(List<? extends IndexCommit> commits) throws IOException { | |
| break; | ||
| case OPEN_INDEX_AND_TRANSLOG: | ||
| assert commits.isEmpty() == false : "index is opened, but we have no commits"; | ||
| onCommit(commits); | ||
| if (startingIndexCommit == null) { | ||
| onCommit(commits); | ||
| } else { | ||
| assert commits.contains(startingIndexCommit) : "Existing commits must contain the starting commit; " + | ||
| "startingCommit [" + startingIndexCommit + "], commits [" + commits + "]"; | ||
| commits.stream().filter(commit -> startingIndexCommit.equals(commit) == false).forEach(IndexCommit::delete); | ||
| updateTranslogDeletionPolicy(startingIndexCommit, startingIndexCommit); | ||
| } | ||
| break; | ||
| default: | ||
| throw new IllegalArgumentException("unknown openMode [" + openMode + "]"); | ||
|
|
@@ -71,7 +82,7 @@ public void onInit(List<? extends IndexCommit> commits) throws IOException { | |
|
|
||
| @Override | ||
| public void onCommit(List<? extends IndexCommit> commits) throws IOException { | ||
| final int keptPosition = indexOfKeptCommits(commits); | ||
| final int keptPosition = indexOfKeptCommits(commits, globalCheckpointSupplier.getAsLong()); | ||
| for (int i = 0; i < keptPosition; i++) { | ||
| commits.get(i).delete(); | ||
| } | ||
|
|
@@ -90,12 +101,38 @@ private void updateTranslogDeletionPolicy(final IndexCommit minRequiredCommit, f | |
| translogDeletionPolicy.setMinTranslogGenerationForRecovery(minRequiredGen); | ||
| } | ||
|
|
||
| /** | ||
| * Selects a starting commit point from a list of existing commits based on the persisted global checkpoint from translog | ||
| * and the retained translog generations. All the required translog files of a starting commit point must exist, | ||
| * and its max seqno should be at most the global checkpoint from the translog checkpoint. | ||
| * | ||
| * @param commits a list of existing commit points | ||
| * @param globalCheckpoint the persisted global checkpoint from the translog, see {@link Translog#readGlobalCheckpoint(Path)} | ||
| * @param minRetainedTranslogGen the minimum translog generation is retained, see {@link Translog#readMinReferencedTranslogGen(Path)} | ||
| */ | ||
| public static IndexCommit startingCommitPoint(List<IndexCommit> commits, long globalCheckpoint, long minRetainedTranslogGen) | ||
|
||
| throws IOException { | ||
| if (commits.isEmpty()) { | ||
| throw new IllegalArgumentException("Commit list must not empty"); | ||
| } | ||
| // Snapshotted commits may not have all its required translog. | ||
| final List<IndexCommit> recoverableCommits = new ArrayList<>(); | ||
| for (IndexCommit commit : commits) { | ||
| if (minRetainedTranslogGen <= Long.parseLong(commit.getUserData().get(Translog.TRANSLOG_GENERATION_KEY))) { | ||
|
||
| recoverableCommits.add(commit); | ||
| } | ||
| } | ||
| assert recoverableCommits.isEmpty() == false : "Unable to select a proper starting commit point; " + | ||
| "commits [" + commits + "], minRetainedTranslogGen [" + minRetainedTranslogGen + "]"; | ||
| final int keptPosition = indexOfKeptCommits(recoverableCommits, globalCheckpoint); | ||
| return recoverableCommits.get(keptPosition); | ||
| } | ||
|
|
||
| /** | ||
| * Find the highest index position of a safe index commit whose max sequence number is not greater than the global checkpoint. | ||
| * Index commits with different translog UUID will be filtered out as they don't belong to this engine. | ||
| */ | ||
| private int indexOfKeptCommits(List<? extends IndexCommit> commits) throws IOException { | ||
| final long currentGlobalCheckpoint = globalCheckpointSupplier.getAsLong(); | ||
| private static int indexOfKeptCommits(List<? extends IndexCommit> commits, long globalCheckpoint) throws IOException { | ||
| final String expectedTranslogUUID = commits.get(commits.size() - 1).getUserData().get(Translog.TRANSLOG_UUID_KEY); | ||
|
|
||
| // Commits are sorted by age (the 0th one is the oldest commit). | ||
|
|
@@ -110,7 +147,7 @@ private int indexOfKeptCommits(List<? extends IndexCommit> commits) throws IOExc | |
| return i; | ||
| } | ||
| final long maxSeqNoFromCommit = Long.parseLong(commitUserData.get(SequenceNumbers.MAX_SEQ_NO)); | ||
| if (maxSeqNoFromCommit <= currentGlobalCheckpoint) { | ||
| if (maxSeqNoFromCommit <= globalCheckpoint) { | ||
| return i; | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
| import org.apache.logging.log4j.Logger; | ||
| import org.apache.logging.log4j.message.ParameterizedMessage; | ||
| import org.apache.lucene.index.CheckIndex; | ||
| import org.apache.lucene.index.DirectoryReader; | ||
| import org.apache.lucene.index.IndexCommit; | ||
| import org.apache.lucene.index.IndexOptions; | ||
| import org.apache.lucene.index.LeafReaderContext; | ||
|
|
@@ -76,6 +77,7 @@ | |
| import org.elasticsearch.index.cache.bitset.ShardBitsetFilterCache; | ||
| import org.elasticsearch.index.cache.request.ShardRequestCache; | ||
| import org.elasticsearch.index.codec.CodecService; | ||
| import org.elasticsearch.index.engine.CombinedDeletionPolicy; | ||
| import org.elasticsearch.index.engine.CommitStats; | ||
| import org.elasticsearch.index.engine.Engine; | ||
| import org.elasticsearch.index.engine.EngineConfig; | ||
|
|
@@ -140,6 +142,7 @@ | |
| import java.io.PrintStream; | ||
| import java.nio.channels.ClosedByInterruptException; | ||
| import java.nio.charset.StandardCharsets; | ||
| import java.nio.file.Path; | ||
| import java.util.ArrayList; | ||
| import java.util.Collections; | ||
| import java.util.EnumSet; | ||
|
|
@@ -2166,10 +2169,11 @@ private DocumentMapperForType docMapper(String type) { | |
| return mapperService.documentMapperWithAutoCreate(type); | ||
| } | ||
|
|
||
| private EngineConfig newEngineConfig(EngineConfig.OpenMode openMode) { | ||
| private EngineConfig newEngineConfig(EngineConfig.OpenMode openMode) throws IOException { | ||
| Sort indexSort = indexSortSupplier.get(); | ||
| final boolean forceNewHistoryUUID; | ||
| switch (shardRouting.recoverySource().getType()) { | ||
| final RecoverySource.Type recoveryType = shardRouting.recoverySource().getType(); | ||
| switch (recoveryType) { | ||
| case EXISTING_STORE: | ||
| case PEER: | ||
| forceNewHistoryUUID = false; | ||
|
|
@@ -2180,7 +2184,15 @@ private EngineConfig newEngineConfig(EngineConfig.OpenMode openMode) { | |
| forceNewHistoryUUID = true; | ||
| break; | ||
| default: | ||
| throw new AssertionError("unknown recovery type: [" + shardRouting.recoverySource().getType() + "]"); | ||
| throw new AssertionError("unknown recovery type: [" + recoveryType + "]"); | ||
| } | ||
| final IndexCommit startingCommit; | ||
| if (recoveryType == RecoverySource.Type.EXISTING_STORE) { | ||
|
||
| startingCommit = CombinedDeletionPolicy.startingCommitPoint(DirectoryReader.listCommits(store.directory()), | ||
| Translog.readGlobalCheckpoint(translogConfig.getTranslogPath()), | ||
| Translog.readMinReferencedTranslogGen(translogConfig.getTranslogPath())); | ||
| } else { | ||
| startingCommit = null; | ||
| } | ||
| return new EngineConfig(openMode, shardId, shardRouting.allocationId().getId(), | ||
| threadPool, indexSettings, warmer, store, indexSettings.getMergePolicy(), | ||
|
|
@@ -2189,7 +2201,7 @@ private EngineConfig newEngineConfig(EngineConfig.OpenMode openMode) { | |
| IndexingMemoryController.SHARD_INACTIVE_TIME_SETTING.get(indexSettings.getSettings()), | ||
| Collections.singletonList(refreshListeners), | ||
| Collections.singletonList(new RefreshMetricUpdater(refreshMetric)), | ||
| indexSort, this::runTranslogRecovery, circuitBreakerService); | ||
| indexSort, this::runTranslogRecovery, circuitBreakerService, startingCommit); | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need special handling here and need the start commit point? can you explain?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I calculated the retained translog generations incorrectly; I will revert this change.
There is an issue with the local checkpoint; I will reach out to discuss with you@bleskes.