Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ public SCMHAManagerImpl(final ConfigurationSource conf,
final StorageContainerManager scm) throws IOException {
this.conf = conf;
this.scm = scm;
this.exitManager = new ExitManager();
if (SCMHAUtils.isSCMHAEnabled(conf)) {
this.transactionBuffer = new SCMHADBTransactionBufferImpl(scm);
this.ratisServer = new SCMRatisServerImpl(conf, scm,
Expand Down Expand Up @@ -258,37 +259,49 @@ public TermIndex installCheckpoint(Path checkpointLocation,
throw e;
}

File dbBackup = null;
File dbBackup;
try {
dbBackup = HAUtils
.replaceDBWithCheckpoint(lastAppliedIndex, oldDBLocation,
checkpointLocation, OzoneConsts.SCM_DB_BACKUP_PREFIX);
LOG.info("Replaced DB with checkpoint, term: {}, index: {}",
term, lastAppliedIndex);
} catch (Exception e) {
// If we are not able to install latest checkpoint we should throw
// this exception. In this way reinitialize can throw exception to
// ratis to handle properly.
LOG.error("Failed to install Snapshot as SCM failed to replace"
+ " DB with downloaded checkpoint. Reloading old SCM state.", e);
+ " DB with downloaded checkpoint. Checkpoint transaction {}", e,
checkpointTxnInfo.getTransactionIndex());
throw e;
}

// Reload the DB store with the new checkpoint.
// Restart (unpause) the state machine and update its last applied index
// to the installed checkpoint's snapshot index.
try {
reloadSCMState();
LOG.info("Reloaded SCM state with Term: {} and Index: {}", term,
lastAppliedIndex);
} catch (Exception ex) {
LOG.info("Failed to reload SCM state with Term: {} and Index: {}", term,
lastAppliedIndex);
// revert to the old db, since the new db may be a corrupted one
// so that SCM can restart from the old db.
try {
// revert to the old db, since the new db may be a corrupted one,
// so that SCM can restart from the old db.
if (dbBackup != null) {
dbBackup = HAUtils
.replaceDBWithCheckpoint(lastAppliedIndex, oldDBLocation,
dbBackup =
HAUtils.replaceDBWithCheckpoint(lastAppliedIndex, oldDBLocation,
dbBackup.toPath(), OzoneConsts.SCM_DB_BACKUP_PREFIX);
startServices();
LOG.error("Replacing SCM state with Term : {} and Index:",
termIndex.getTerm(), termIndex.getTerm());
// This is being done to check before stop with old db
// try to reload and then finally terminate and also test has
// assumption for re-verify after corrupt DB loading without
// reloadSCMState call test fails with NPE when finding db location.
reloadSCMState();
}
} finally {
String errorMsg =
"Failed to reload SCM state and instantiate services.";
String errorMsg = "Failed to reload SCM state and instantiate " +
"services.";
exitManager.exitSystem(1, errorMsg, ex, LOG);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ public void pause() {
}

@Override
public void reinitialize() {
public void reinitialize() throws IOException {
Preconditions.checkNotNull(installingDBCheckpoint);
DBCheckpoint checkpoint = installingDBCheckpoint;

Expand All @@ -369,8 +369,8 @@ public void reinitialize() {
termIndex =
scm.getScmHAManager().installCheckpoint(checkpoint);
} catch (Exception e) {
LOG.error("Failed to reinitialize SCMStateMachine.");
return;
LOG.error("Failed to reinitialize SCMStateMachine.", e);
throw new IOException(e);
}

// re-initialize the DBTransactionBuffer and update the lastAppliedIndex.
Expand Down