Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@
*/
package org.apache.hadoop.ozone.upgrade;

import jakarta.annotation.Nullable;
import org.apache.hadoop.ozone.upgrade.InjectedUpgradeFinalizationExecutor.UpgradeTestInjectionPoints;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.ozone.common.StorageInfo;
import org.slf4j.Logger;

import java.io.File;
import java.io.IOException;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;

Expand All @@ -40,6 +42,12 @@ private UpgradeTestUtils() { }
*/
public static File createVersionFile(File parentDir,
HddsProtos.NodeType nodeType, int mlv) throws IOException {
return createVersionFile(parentDir, nodeType, mlv, null);
}

public static File createVersionFile(File parentDir,
HddsProtos.NodeType nodeType, int mlv,
@Nullable Properties properties) throws IOException {

final String versionFileName = "VERSION";

Expand All @@ -49,6 +57,11 @@ public static File createVersionFile(File parentDir,
System.currentTimeMillis(),
mlv);

if (properties != null) {
properties.forEach((key, value) ->
info.setProperty((String) key, (String) value));
}

File versionFile = new File(parentDir, versionFileName);
info.writeTo(versionFile);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
private PipelineManager pipelineManager;
private ContainerManager containerManager;
private BlockManager scmBlockManager;
private final SCMStorageConfig scmStorageConfig;
private SCMStorageConfig scmStorageConfig;
private NodeDecommissionManager scmDecommissionManager;
private WritableContainerFactory writableContainerFactory;
private FinalizationManager finalizationManager;
Expand Down Expand Up @@ -383,6 +383,14 @@ private StorageContainerManager(OzoneConfiguration conf,
"failure.", ResultCodes.SCM_NOT_INITIALIZED);
}

// Initialize Ratis if needed.
// This is for the clusters which got upgraded from older version of Ozone.
// We enable Ratis by default.
if (!scmStorageConfig.isSCMHAEnabled()) {
// Since we have initialized Ratis, we have to reload StorageConfig
scmStorageConfig = initializeRatis(conf);
}

threadNamePrefix = getScmNodeDetails().threadNamePrefix();
primaryScmNodeId = scmStorageConfig.getPrimaryScmNodeId();

Expand Down Expand Up @@ -1253,15 +1261,13 @@ public static boolean scmInit(OzoneConfiguration conf,
StorageState state = scmStorageConfig.getState();
final SCMHANodeDetails haDetails = SCMHANodeDetails.loadSCMHAConfig(conf,
scmStorageConfig);
String primordialSCM = SCMHAUtils.getPrimordialSCM(conf);
final String primordialSCM = SCMHAUtils.getPrimordialSCM(conf);
final String selfNodeId = haDetails.getLocalNodeDetails().getNodeId();
final String selfHostName = haDetails.getLocalNodeDetails().getHostName();
if (primordialSCM != null && SCMHAUtils.isSCMHAEnabled(conf)
&& !SCMHAUtils.isPrimordialSCM(conf, selfNodeId, selfHostName)) {
LOG.info(
"SCM init command can only be executed in Primordial SCM {}, "
+ "self id {} "
+ "Ignoring it.", primordialSCM, selfNodeId);
if (primordialSCM != null &&
!SCMHAUtils.isPrimordialSCM(conf, selfNodeId, selfHostName)) {
LOG.info("SCM init command can only be executed on Primordial SCM. " +
"Primordial SCM ID: {}. Self ID: {}.", primordialSCM, selfNodeId);
return true;
}
if (state != StorageState.INITIALIZED) {
Expand Down Expand Up @@ -1291,16 +1297,7 @@ public static boolean scmInit(OzoneConfiguration conf,

scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId());
scmStorageConfig.initialize();

if (SCMHAUtils.isSCMHAEnabled(conf)) {
SCMRatisServerImpl.initialize(scmStorageConfig.getClusterID(),
scmStorageConfig.getScmId(), haDetails.getLocalNodeDetails(),
conf);
scmStorageConfig = new SCMStorageConfig(conf);
scmStorageConfig.setSCMHAFlag(true);
// Do force initialize to persist SCM_HA flag.
scmStorageConfig.forceInitialize();
}
scmStorageConfig = initializeRatis(conf);

LOG.info("SCM initialization succeeded. Current cluster id for sd={}"
+ "; cid={}; layoutVersion={}; scmId={}",
Expand All @@ -1312,26 +1309,19 @@ public static boolean scmInit(OzoneConfiguration conf,
return false;
}
} else {
clusterId = scmStorageConfig.getClusterID();
final boolean isSCMHAEnabled = scmStorageConfig.isSCMHAEnabled();

// Initialize security if security is enabled later.
initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, true);

if (SCMHAUtils.isSCMHAEnabled(conf) && !isSCMHAEnabled) {
SCMRatisServerImpl.initialize(scmStorageConfig.getClusterID(),
scmStorageConfig.getScmId(), haDetails.getLocalNodeDetails(),
conf);
scmStorageConfig.setSCMHAFlag(true);
scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId());
scmStorageConfig.forceInitialize();
// Enable Ratis if it's not already enabled.
if (!scmStorageConfig.isSCMHAEnabled()) {
scmStorageConfig = initializeRatis(conf);

/*
* Since Ratis is initialized on an existing cluster, we have to
* Since Ratis can be initialized on an existing cluster, we have to
* trigger Ratis snapshot so that this SCM can send the latest scm.db
* to the bootstrapping SCMs later.
*/

try {
SCMHAUtils.setRatisEnabled(true);
StorageContainerManager scm = createSCM(conf);
Expand All @@ -1342,18 +1332,29 @@ public static boolean scmInit(OzoneConfiguration conf,
} catch (AuthenticationException e) {
throw new IOException(e);
}
LOG.info("Enabled SCM HA");
}

LOG.info("SCM already initialized. Reusing existing cluster id for sd={}"
+ ";cid={}; layoutVersion={}; HAEnabled={}",
scmStorageConfig.getStorageDir(), clusterId,
scmStorageConfig.getLayoutVersion(),
scmStorageConfig.isSCMHAEnabled());
scmStorageConfig.getStorageDir(), scmStorageConfig.getClusterID(),
scmStorageConfig.getLayoutVersion(), scmStorageConfig.isSCMHAEnabled());
return true;
}
}

private static SCMStorageConfig initializeRatis(OzoneConfiguration conf)
throws IOException {
final SCMStorageConfig storageConfig = new SCMStorageConfig(conf);
final SCMHANodeDetails haDetails = SCMHANodeDetails.loadSCMHAConfig(conf, storageConfig);
SCMRatisServerImpl.initialize(storageConfig.getClusterID(),
storageConfig.getScmId(), haDetails.getLocalNodeDetails(), conf);
storageConfig.setSCMHAFlag(true);
storageConfig.setPrimaryScmNodeId(storageConfig.getScmId());
storageConfig.forceInitialize();
LOG.info("Enabled Ratis!");
return storageConfig;
}

private static InetSocketAddress getScmAddress(SCMHANodeDetails haDetails,
ConfigurationSource conf) throws IOException {
List<SCMNodeInfo> scmNodeInfoList = SCMNodeInfo.buildNodeInfo(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdds.scm.upgrade;

import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
Expand All @@ -30,7 +31,10 @@
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Properties;

import static org.apache.hadoop.ozone.OzoneConsts.SCM_HA;
import static org.apache.hadoop.ozone.OzoneConsts.SCM_ID;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
Expand All @@ -53,6 +57,8 @@ public void testStartupSlvLessThanMlv(@TempDir Path tempDir)
OzoneConfiguration conf = new OzoneConfiguration();
conf.set(ScmConfigKeys.OZONE_SCM_DB_DIRS,
tempDir.toAbsolutePath().toString());
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS,
tempDir.toAbsolutePath().toString());

// Set metadata layout version larger then software layout version.
int largestSlv = 0;
Expand All @@ -61,9 +67,15 @@ public void testStartupSlvLessThanMlv(@TempDir Path tempDir)
}
int mlv = largestSlv + 1;

Properties properties = new Properties();
properties.setProperty(SCM_ID, "scm");
properties.setProperty(SCM_HA, "true");

// Create version file with MLV > SLV, which should fail the SCM
// construction.
UpgradeTestUtils.createVersionFile(scmSubdir, HddsProtos.NodeType.SCM, mlv);
UpgradeTestUtils.createVersionFile(scmSubdir, HddsProtos.NodeType.SCM, mlv,
properties);


Throwable t = assertThrows(IOException.class,
() -> new StorageContainerManager(conf));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ protected OMHAService createOMService() throws IOException,
}

/**
* Start OM service with multiple OMs.
* Start SCM service with multiple SCMs.
*/
protected SCMHAService createSCMService()
throws IOException, AuthenticationException {
Expand Down Expand Up @@ -616,10 +616,10 @@ protected SCMHAService createSCMService()
* Initialize HA related configurations.
*/
private void initSCMHAConfig() {
// Set configurations required for starting OM HA service, because that
// Set configurations required for starting SCM HA service, because that
// is the serviceID being passed to start Ozone HA cluster.
// Here setting internal service and OZONE_OM_SERVICE_IDS_KEY, in this
// way in OM start it uses internal service id to find it's service id.
// Here setting internal service and OZONE_SCM_SERVICE_IDS_KEY, in this
// way in SCM start it uses internal service id to find it's service id.
conf.set(ScmConfigKeys.OZONE_SCM_SERVICE_IDS_KEY, scmServiceId);
conf.set(ScmConfigKeys.OZONE_SCM_DEFAULT_SERVICE_ID, scmServiceId);
String scmNodesKey = ConfUtils.addKeySuffixes(
Expand All @@ -629,6 +629,10 @@ private void initSCMHAConfig() {

for (int i = 1; i <= numOfSCMs; i++) {
String scmNodeId = SCM_NODE_ID_PREFIX + i;

if (i == 1) {
conf.set(ScmConfigKeys.OZONE_SCM_PRIMORDIAL_NODE_ID_KEY, scmNodeId);
}
scmNodesKeyValue.append(",").append(scmNodeId);

String scmAddrKey = ConfUtils.addKeySuffixes(
Expand Down