26
26
import java .util .concurrent .ExecutorService ;
27
27
import java .util .concurrent .Executors ;
28
28
import java .util .concurrent .Future ;
29
+
29
30
import org .apache .helix .HelixRebalanceException ;
30
31
import org .apache .helix .controller .dataproviders .ResourceControllerDataProvider ;
31
32
import org .apache .helix .controller .rebalancer .util .WagedRebalanceUtil ;
44
45
import org .slf4j .Logger ;
45
46
import org .slf4j .LoggerFactory ;
46
47
47
-
48
48
/**
49
49
* Compute the best possible assignment based on the Baseline and the previous Best Possible assignment.
50
50
* The coordinator compares the previous Best Possible assignment with the current cluster state so as to derive a
@@ -68,10 +68,8 @@ class PartialRebalanceRunner implements AutoCloseable {
68
68
private Future <Boolean > _asyncPartialRebalanceResult ;
69
69
70
70
public PartialRebalanceRunner (AssignmentManager assignmentManager ,
71
- AssignmentMetadataStore assignmentMetadataStore ,
72
- MetricCollector metricCollector ,
73
- CountMetric rebalanceFailureCount ,
74
- boolean isAsyncPartialRebalanceEnabled ) {
71
+ AssignmentMetadataStore assignmentMetadataStore , MetricCollector metricCollector ,
72
+ CountMetric rebalanceFailureCount , boolean isAsyncPartialRebalanceEnabled ) {
75
73
_assignmentManager = assignmentManager ;
76
74
_assignmentMetadataStore = assignmentMetadataStore ;
77
75
_bestPossibleCalculateExecutor = Executors .newSingleThreadExecutor ();
@@ -82,16 +80,16 @@ public PartialRebalanceRunner(AssignmentManager assignmentManager,
82
80
WagedRebalancerMetricCollector .WagedRebalancerMetricNames .PartialRebalanceCounter .name (),
83
81
CountMetric .class );
84
82
_partialRebalanceLatency = metricCollector .getMetric (
85
- WagedRebalancerMetricCollector .WagedRebalancerMetricNames .PartialRebalanceLatencyGauge
86
- .name (),
83
+ WagedRebalancerMetricCollector .WagedRebalancerMetricNames .PartialRebalanceLatencyGauge .name (),
87
84
LatencyMetric .class );
88
85
_baselineDivergenceGauge = metricCollector .getMetric (
89
86
WagedRebalancerMetricCollector .WagedRebalancerMetricNames .BaselineDivergenceGauge .name (),
90
87
BaselineDivergenceGauge .class );
91
88
}
92
89
93
- public void partialRebalance (ResourceControllerDataProvider clusterData , Map <String , Resource > resourceMap ,
94
- Set <String > activeNodes , final CurrentStateOutput currentStateOutput , RebalanceAlgorithm algorithm )
90
+ public void partialRebalance (ResourceControllerDataProvider clusterData ,
91
+ Map <String , Resource > resourceMap , Set <String > activeNodes ,
92
+ final CurrentStateOutput currentStateOutput , RebalanceAlgorithm algorithm )
95
93
throws HelixRebalanceException {
96
94
// If partial rebalance is async and the previous result is not completed yet,
97
95
// do not start another partial rebalance.
@@ -100,19 +98,20 @@ public void partialRebalance(ResourceControllerDataProvider clusterData, Map<Str
100
98
return ;
101
99
}
102
100
103
- _asyncPartialRebalanceResult = _bestPossibleCalculateExecutor .submit (ExecutorTaskUtil .wrap (() -> {
104
- try {
105
- doPartialRebalance (clusterData , resourceMap , activeNodes , algorithm ,
106
- currentStateOutput );
107
- } catch (HelixRebalanceException e ) {
108
- if (_asyncPartialRebalanceEnabled ) {
109
- _rebalanceFailureCount .increment (1L );
110
- }
111
- LOG .error ("Failed to calculate best possible assignment!" , e );
112
- return false ;
113
- }
114
- return true ;
115
- }));
101
+ _asyncPartialRebalanceResult =
102
+ _bestPossibleCalculateExecutor .submit (ExecutorTaskUtil .wrap (() -> {
103
+ try {
104
+ doPartialRebalance (clusterData , resourceMap , activeNodes , algorithm ,
105
+ currentStateOutput );
106
+ } catch (HelixRebalanceException e ) {
107
+ if (_asyncPartialRebalanceEnabled ) {
108
+ _rebalanceFailureCount .increment (1L );
109
+ }
110
+ LOG .error ("Failed to calculate best possible assignment!" , e );
111
+ return false ;
112
+ }
113
+ return true ;
114
+ }));
116
115
if (!_asyncPartialRebalanceEnabled ) {
117
116
try {
118
117
if (!_asyncPartialRebalanceResult .get ()) {
@@ -131,9 +130,9 @@ public void partialRebalance(ResourceControllerDataProvider clusterData, Map<Str
131
130
* If the result differ from the persisted result, persist it to memory (only if the version is not stale);
132
131
* If persisted, trigger the pipeline so that main thread logic can run again.
133
132
*/
134
- private void doPartialRebalance (ResourceControllerDataProvider clusterData , Map < String , Resource > resourceMap ,
135
- Set <String > activeNodes , RebalanceAlgorithm algorithm , CurrentStateOutput currentStateOutput )
136
- throws HelixRebalanceException {
133
+ private void doPartialRebalance (ResourceControllerDataProvider clusterData ,
134
+ Map < String , Resource > resourceMap , Set <String > activeNodes , RebalanceAlgorithm algorithm ,
135
+ CurrentStateOutput currentStateOutput ) throws HelixRebalanceException {
137
136
LOG .info ("Start calculating the new best possible assignment." );
138
137
_partialRebalanceCounter .increment (1L );
139
138
_partialRebalanceLatency .startMeasuringLatency ();
@@ -142,27 +141,30 @@ private void doPartialRebalance(ResourceControllerDataProvider clusterData, Map<
142
141
if (_assignmentMetadataStore != null ) {
143
142
newBestPossibleAssignmentVersion = _assignmentMetadataStore .getBestPossibleVersion () + 1 ;
144
143
} else {
145
- LOG .debug ("Assignment Metadata Store is null. Skip getting best possible assignment version." );
144
+ LOG .debug (
145
+ "Assignment Metadata Store is null. Skip getting best possible assignment version." );
146
146
}
147
147
148
148
// Read the baseline from metadata store
149
149
Map <String , ResourceAssignment > currentBaseline =
150
- _assignmentManager .getBaselineAssignment (_assignmentMetadataStore , currentStateOutput , resourceMap .keySet ());
150
+ _assignmentManager .getBaselineAssignment (_assignmentMetadataStore , currentStateOutput ,
151
+ resourceMap .keySet ());
151
152
152
153
// Read the best possible assignment from metadata store
153
154
Map <String , ResourceAssignment > currentBestPossibleAssignment =
154
155
_assignmentManager .getBestPossibleAssignment (_assignmentMetadataStore , currentStateOutput ,
155
156
resourceMap .keySet ());
156
157
ClusterModel clusterModel ;
157
158
try {
158
- clusterModel = ClusterModelProvider
159
- .generateClusterModelForPartialRebalance (clusterData , resourceMap , activeNodes ,
160
- currentBaseline , currentBestPossibleAssignment );
159
+ clusterModel =
160
+ ClusterModelProvider .generateClusterModelForPartialRebalance (clusterData , resourceMap ,
161
+ activeNodes , currentBaseline , currentBestPossibleAssignment );
161
162
} catch (Exception ex ) {
162
163
throw new HelixRebalanceException ("Failed to generate cluster model for partial rebalance." ,
163
164
HelixRebalanceException .Type .INVALID_CLUSTER_STATUS , ex );
164
165
}
165
- Map <String , ResourceAssignment > newAssignment = WagedRebalanceUtil .calculateAssignment (clusterModel , algorithm );
166
+ Map <String , ResourceAssignment > newAssignment =
167
+ WagedRebalanceUtil .calculateAssignment (clusterModel , algorithm );
166
168
167
169
// Asynchronously report baseline divergence metric before persisting to metadata store,
168
170
// just in case if persisting fails, we still have the metric.
@@ -177,12 +179,14 @@ private void doPartialRebalance(ResourceControllerDataProvider clusterData, Map<
177
179
currentBaseline , newAssignmentCopy );
178
180
179
181
boolean bestPossibleUpdateSuccessful = false ;
180
- if (_assignmentMetadataStore != null && _assignmentMetadataStore .isBestPossibleChanged (newAssignment )) {
182
+ if (_assignmentMetadataStore != null && _assignmentMetadataStore .isBestPossibleChanged (
183
+ newAssignment )) {
181
184
// This will not persist the new Best Possible Assignment into ZK. It will only update the in-memory cache.
182
185
// If this is done successfully, the new Best Possible Assignment will be persisted into ZK the next time that
183
186
// the pipeline is triggered. We schedule the pipeline to run below.
184
- bestPossibleUpdateSuccessful = _assignmentMetadataStore .asyncUpdateBestPossibleAssignmentCache (newAssignment ,
185
- newBestPossibleAssignmentVersion );
187
+ bestPossibleUpdateSuccessful =
188
+ _assignmentMetadataStore .asyncUpdateBestPossibleAssignmentCache (newAssignment ,
189
+ newBestPossibleAssignmentVersion );
186
190
} else {
187
191
LOG .debug ("Assignment Metadata Store is null. Skip persisting the baseline assignment." );
188
192
}
0 commit comments