Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -96,20 +97,25 @@ private static void secureLogin(Configuration conf)
socAddr.getHostName());
}

private static NameNodeConnector getNameNodeConnector(Configuration conf)
throws IOException, InterruptedException {
public static NameNodeConnector getNameNodeConnector(Configuration conf)
throws InterruptedException {
final Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf);
final Path externalSPSPathId = HdfsServerConstants.MOVER_ID_PATH;
String serverName = ExternalStoragePolicySatisfier.class.getSimpleName();
while (true) {
try {
final List<NameNodeConnector> nncs = NameNodeConnector
.newNameNodeConnectors(namenodes,
ExternalStoragePolicySatisfier.class.getSimpleName(),
serverName,
externalSPSPathId, conf,
NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS);
return nncs.get(0);
} catch (IOException e) {
LOG.warn("Failed to connect with namenode", e);
if (e.getMessage().equals("Another " + serverName + " is running.")) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's quite some time I looked at this code. Thanks a lot for working on these improvements.
How about using ExitUtil class to terminate. That class has static methods to disable exit. That you can use it from tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's quite some time I looked at this code. Thanks a lot for working on these improvements. How about using ExitUtil class to terminate. That class has static methods to disable exit. That you can use it from tests.

Thank you @umamaheswararao very much for your review. And your suggestion makes sense to me.

I updated the code. PTAL. Thanks.

ExitUtil.terminate(-1,
"Exit immediately because another " + serverName + " is running");
}
Thread.sleep(3000); // retry the connection after few secs
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@
import org.apache.hadoop.security.ssl.KeyStoreTestUtil;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.util.ExitUtil;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
Expand Down Expand Up @@ -197,9 +199,24 @@ private void createCluster() throws IOException {
writeContent(FILE);
}

private void createCluster(boolean createMoverPath) throws IOException {
getConf().setLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
setCluster(startCluster(getConf(), allDiskTypes, NUM_OF_DATANODES,
STORAGES_PER_DATANODE, CAPACITY, createMoverPath));
getFS();
writeContent(FILE);
}

private MiniDFSCluster startCluster(final Configuration conf,
StorageType[][] storageTypes, int numberOfDatanodes, int storagesPerDn,
long nodeCapacity) throws IOException {
return startCluster(conf, storageTypes, numberOfDatanodes, storagesPerDn,
nodeCapacity, false);
}

private MiniDFSCluster startCluster(final Configuration conf,
StorageType[][] storageTypes, int numberOfDatanodes, int storagesPerDn,
long nodeCapacity, boolean createMoverPath) throws IOException {
long[][] capacities = new long[numberOfDatanodes][storagesPerDn];
for (int i = 0; i < numberOfDatanodes; i++) {
for (int j = 0; j < storagesPerDn; j++) {
Expand All @@ -212,7 +229,7 @@ private MiniDFSCluster startCluster(final Configuration conf,
cluster.waitActive();

nnc = DFSTestUtil.getNameNodeConnector(getConf(),
HdfsServerConstants.MOVER_ID_PATH, 1, false);
HdfsServerConstants.MOVER_ID_PATH, 1, createMoverPath);

externalSps = new StoragePolicySatisfier(getConf());
externalCtxt = new ExternalSPSContext(externalSps, nnc);
Expand Down Expand Up @@ -428,6 +445,30 @@ public void testWhenStoragePolicySetToCOLD()
}
}

@Test(timeout = 300000)
public void testInfiniteStartWhenAnotherSPSRunning()
throws Exception {

try {
// Create cluster and create mover path when get NameNodeConnector.
createCluster(true);

// Disable system exit for assert.
ExitUtil.disableSystemExit();

// Get NameNodeConnector one more time to simulate starting other sps process.
// It should exit immediately when another sps is running.
LambdaTestUtils.intercept(ExitUtil.ExitException.class,
"Exit immediately because another ExternalStoragePolicySatisfier is running",
() -> ExternalStoragePolicySatisfier.getNameNodeConnector(config));
} finally {
// Reset first exit exception to avoid AssertionError in MiniDFSCluster#shutdown.
// This has no effect on functionality.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about doing this reset in finally?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about doing this reset in finally?

Thanks @umamaheswararao for your comments. I fixed it.

ExitUtil.resetFirstExitException();
shutdownCluster();
}
}

private void doTestWhenStoragePolicySetToCOLD() throws Exception {
// Change policy to COLD
dfs.setStoragePolicy(new Path(FILE), COLD);
Expand Down