Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;

public interface MonkeyConstants {

Expand All @@ -45,6 +46,11 @@ public interface MonkeyConstants {
String UNBALANCE_WAIT_AFTER_BALANCE_MS = "unbalance.action.wait.after.period";
String UNBALANCE_KILL_META_RS = "unbalance.action.kill.meta.rs";
String DECREASE_HFILE_SIZE_SLEEP_TIME = "decrease.hfile.size.sleep.time";
String RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = "restart.random.rs.exception.sleep.time";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you give these configs a unit? It's always seconds? At least use TimeUnit to define the default values so that developer intent is clear.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are in milliseconds. Updated the PR to use TmeUnit.

String RESTART_ACTIVE_NAMENODE_SLEEP_TIME = "restart.active.namenode.sleep.time";
String RESTART_RANDOM_DATANODE_SLEEP_TIME = "restart.random.datanode.sleep.time";
String RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = "restart.random.journalnode.sleep.time";
String RESTART_RANDOM_ZKNODE_SLEEP_TIME = "restart.random.zknode.sleep.time";
String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = "rolling.batch.suspend.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio";
Expand Down Expand Up @@ -92,6 +98,13 @@ public interface MonkeyConstants {
long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000;
boolean DEFAULT_UNBALANCE_KILL_META_RS = true;
long DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME = 30 * 1000;

long DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
long DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
long DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
long DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
long DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);

long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000;
long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000;
float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@
*/
public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory {

private long restartRandomRsExceptMetaSleepTime;
private long restartActiveMasterSleepTime;
private long rollingBatchRestartRSSleepTime;
private long restartActiveNameNodeSleepTime;
private long restartRandomDataNodeSleepTime;
private long restartRandomJournalNodeSleepTime;
private long restartRandomZKNodeSleepTime;
private long gracefulRollingRestartTSSLeepTime;
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;
private long action1Period;

@Override
public ChaosMonkey build() {
Expand All @@ -53,15 +61,15 @@ public ChaosMonkey build() {
// Destructive actions to mess things around. Cannot run batch restart.
// @formatter:off
Action[] actions1 = new Action[] {
new RestartRandomRsExceptMetaAction(60000),
new RestartActiveMasterAction(5000),
new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
// only allow 2 servers to be dead.
new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
new ForceBalancerAction(),
new RestartActiveNameNodeAction(60000),
new RestartRandomDataNodeAction(60000),
new RestartRandomJournalNodeAction(60000),
new RestartRandomZKNodeAction(60000),
new RestartActiveNameNodeAction(restartActiveNameNodeSleepTime),
new RestartRandomDataNodeAction(restartRandomDataNodeSleepTime),
new RestartRandomJournalNodeAction(restartRandomJournalNodeSleepTime),
new RestartRandomZKNodeAction(restartRandomZKNodeSleepTime),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
Expand All @@ -73,12 +81,33 @@ public ChaosMonkey build() {
new Action[] { new DumpClusterStatusAction(), new DumpHdfsClusterStatusAction() };

return new PolicyBasedChaosMonkey(properties, util,
new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
new PeriodicRandomActionPolicy(60 * 1000, actions1)),
new PeriodicRandomActionPolicy(60 * 1000, actions2));
new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action1Period, actions1)),
new PeriodicRandomActionPolicy(action1Period, actions2));
}

private void loadProperties() {
restartRandomRsExceptMetaSleepTime = Long
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
restartActiveMasterSleepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
rollingBatchRestartRSSleepTime = Long
.parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
restartActiveNameNodeSleepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + ""));
restartRandomDataNodeSleepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + ""));
restartRandomJournalNodeSleepTime = Long
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + ""));
restartRandomZKNodeSleepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + ""));
gracefulRollingRestartTSSLeepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
Expand All @@ -88,5 +117,8 @@ private void loadProperties() {
rollingBatchSuspendtRSRatio =
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
action1Period =
Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@
*/
public class ServerKillingMonkeyFactory extends MonkeyFactory {

private long restartRandomRsExceptMetaSleepTime;
private long restartActiveMasterSleepTime;
private long rollingBatchRestartRSSleepTime;
private long gracefulRollingRestartTSSLeepTime;
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;
private long action1Period;

@Override
public ChaosMonkey build() {
Expand All @@ -48,10 +52,10 @@ public ChaosMonkey build() {
// Destructive actions to mess things around. Cannot run batch restart
// @formatter:off
Action[] actions1 = new Action[] {
new RestartRandomRsExceptMetaAction(60000),
new RestartActiveMasterAction(5000),
new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
// only allow 2 servers to be dead
new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
new ForceBalancerAction(),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
Expand All @@ -63,12 +67,21 @@ public ChaosMonkey build() {
Action[] actions2 = new Action[] { new DumpClusterStatusAction() };

return new PolicyBasedChaosMonkey(properties, util,
new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
new PeriodicRandomActionPolicy(60 * 1000, actions1)),
new PeriodicRandomActionPolicy(60 * 1000, actions2));
new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action1Period, actions1)),
new PeriodicRandomActionPolicy(action1Period, actions2));
}

private void loadProperties() {
restartRandomRsExceptMetaSleepTime = Long
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
restartActiveMasterSleepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
rollingBatchRestartRSSleepTime = Long
.parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
gracefulRollingRestartTSSLeepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
Expand All @@ -78,5 +91,8 @@ private void loadProperties() {
rollingBatchSuspendtRSRatio =
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
action1Period =
Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
}
}