Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1966,6 +1966,8 @@ public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans)
}
}
}
LOG.info("Balancer is going into sleep until next period in {}ms", getConfiguration()
.getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
return successRegionPlans;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
*
*/
@InterfaceAudience.Private
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="IS2_INCONSISTENT_SYNC",
justification="Complaint is about isByTable not being synchronized; we don't modify often")
public abstract class BaseLoadBalancer implements LoadBalancer {
protected static final int MIN_SERVER_BALANCE = 2;
private volatile boolean stopped = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {

private List<CandidateGenerator> candidateGenerators;
private List<CostFunction> costFunctions; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC

// To save currently configed sum of multiplier. Defaulted at 1 for cases that carry high cost
private float sumMultiplier = 1.0f;
// to save and report costs to JMX
private double curOverallCost = 0d;
private double[] tempFunctionCosts;
Expand Down Expand Up @@ -197,7 +198,6 @@ public synchronized void setConf(Configuration conf) {
}
regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);

costFunctions = new ArrayList<>();
addCostFunction(new RegionCountSkewCostFunction(conf));
addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
Expand Down Expand Up @@ -311,46 +311,61 @@ protected synchronized boolean areSomeRegionReplicasColocated(Cluster c) {
protected boolean needsBalance(TableName tableName, Cluster cluster) {
ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not running balancer because only " + cs.getNumServers()
+ " active regionserver(s)");
}
LOG.info("Not running balancer because only " + cs.getNumServers() +
" active regionserver(s)");
return false;
}
if (areSomeRegionReplicasColocated(cluster)) {
LOG.info("Running balancer because at least one server hosts replicas of the same region.");
return true;
}

if (idleRegionServerExist(cluster)){
LOG.info("Running balancer because cluster has idle server(s).");
return true;
}

sumMultiplier = 0.0f;
double total = 0.0;
float sumMultiplier = 0.0f;
for (CostFunction c : costFunctions) {
float multiplier = c.getMultiplier();
if (multiplier <= 0) {
LOG.trace("{} not needed because multiplier is <= 0", c.getClass().getSimpleName());
continue;
}
double cost = c.cost();
if (!c.isNeeded()) {
LOG.trace("{} not needed", c.getClass().getSimpleName());
continue;
}
total += cost * multiplier;
sumMultiplier += multiplier;
total += c.cost() * multiplier;
}
if (sumMultiplier <= 0) {
LOG.error("At least one cost function needs a multiplier > 0. For example, set "
+ "hbase.master.balancer.stochastic.regionCountCost to a positive value or default");
return false;
}

boolean balanced = total <= 0 || sumMultiplier <= 0 ||
(sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
if (LOG.isDebugEnabled()) {
LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}",
balanced ? "Skipping load balancing because balanced" : "We need to load balance",
isByTable ? String.format("table (%s)", tableName) : "cluster",
total, sumMultiplier, minCostNeedBalance);
if (LOG.isTraceEnabled()) {
LOG.trace("Balance decision detailed function costs={}", functionCost());
boolean balanced = (total / sumMultiplier < minCostNeedBalance);
if (balanced) {
String reason = "";
if (total <= 0) {
reason = "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total +
" <= 0";
} else if (sumMultiplier <= 0) {
reason = "sumMultiplier = " + sumMultiplier + " <= 0";
} else if ((total / sumMultiplier) < minCostNeedBalance) {
reason =
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " + (
total / sumMultiplier) + " <= minCostNeedBalance(" + minCostNeedBalance + ")";
}

LOG.info("{} - skipping load balancing because weighted average imbalance={} <= " +
"threshold({}). If you want more aggressive balancing, either lower " +
"hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative" +
" multiplier(s) of the specific cost function(s). functionCost={}",
isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", total / sumMultiplier,
minCostNeedBalance, minCostNeedBalance, functionCost());
} else {
LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
isByTable ? "Table specific ("+tableName+")" : "Cluster wide", maxRunningTime);
}
return !balanced;
}
Expand Down Expand Up @@ -426,8 +441,9 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve
maxSteps);
}
}
LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
+ functionCost() + " computedMaxSteps: " + computedMaxSteps);
LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, "
+ "functionCost={} computedMaxSteps={}",
currentCost / sumMultiplier, functionCost(), computedMaxSteps);

// Perform a stochastic walk to see if we can get a good fit.
long step;
Expand Down Expand Up @@ -472,16 +488,16 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
if (initCost > currentCost) {
plans = createRegionPlans(cluster);
LOG.info("Finished computing new load balance plan. Computation took {}" +
" to try {} different iterations. Found a solution that moves " +
"{} regions; Going from a computed cost of {}" +
" to a new cost of {}", java.time.Duration.ofMillis(endTime - startTime),
step, plans.size(), initCost, currentCost);
LOG.info("Finished computing new moving plan. Computation took {} ms" +
" to try {} different iterations. Found a solution that moves " +
"{} regions; Going from a computed imbalance of {}" + " to a new imbalance of {}. ",
endTime - startTime, step, plans.size(), initCost / sumMultiplier,
currentCost / sumMultiplier);
return plans;
}
LOG.info("Could not find a better load balance plan. Tried {} different configurations in " +
"{}, and did not find anything with a computed cost less than {}", step,
java.time.Duration.ofMillis(endTime - startTime), initCost);
LOG.info("Could not find a better moving plan. Tried {} different configurations in " +
"{} ms, and did not find anything with an imbalance score less than {}", step,
endTime - startTime, initCost / sumMultiplier);
return null;
}

Expand Down Expand Up @@ -513,7 +529,8 @@ private void updateStochasticCosts(TableName tableName, double overall, double[]
}

private void addCostFunction(CostFunction costFunction) {
if (costFunction.getMultiplier() > 0) {
float multiplier = costFunction.getMultiplier();
if (multiplier > 0) {
costFunctions.add(costFunction);
}
}
Expand All @@ -524,9 +541,13 @@ private String functionCost() {
builder.append(c.getClass().getSimpleName());
builder.append(" : (");
if (c.isNeeded()) {
builder.append(c.getMultiplier());
builder.append("multiplier=" + c.getMultiplier());
builder.append(", ");
builder.append(c.cost());
double cost = c.cost();
builder.append("imbalance=" + cost);
if (cost < minCostNeedBalance) {
builder.append(", balanced");
}
} else {
builder.append("not needed");
}
Expand All @@ -535,6 +556,28 @@ private String functionCost() {
return builder.toString();
}


private String totalCostsPerFunc() {
StringBuilder builder = new StringBuilder();
for (CostFunction c : costFunctions) {
if (!c.isNeeded()) {
continue;
}
double cost = c.getMultiplier() * c.cost();
if (cost > 0.0) {
builder.append(" ");
builder.append(c.getClass().getSimpleName());
builder.append(" : ");
builder.append(cost);
builder.append(";");
}
}
if (builder.length() > 0) {
builder.deleteCharAt(builder.length() - 1);
}
return builder.toString();
}

/**
* Create all of the RegionPlan's needed to move from the initial cluster state to the desired
* state.
Expand Down Expand Up @@ -601,7 +644,7 @@ void initCosts(Cluster cluster) {
allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
void updateCostsWithAction(Cluster cluster, Action action) {
for (CostFunction c : costFunctions) {
if (c.getMultiplier() > 0 && c.isNeeded()) {
if (c.isNeeded()) {
c.postAction(action);
}
}
Expand Down Expand Up @@ -640,7 +683,7 @@ String[] getCostFunctionNames() {
CostFunction c = costFunctions.get(i);
this.tempFunctionCosts[i] = 0.0;

if (c.getMultiplier() <= 0 || !c.isNeeded()) {
if (!c.isNeeded()) {
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public class TestStochasticLoadBalancerBalanceCluster extends BalancerTestBase {
*/
@Test
public void testBalanceCluster() throws Exception {
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 1000); // 800 sec
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 1000); // 3 min
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L);
loadBalancer.setConf(conf);
Expand Down