Skip to content

Commit 23ecc32

Browse files
authored
YARN-11525. [Federation] Router CLI Supports Save the SubClusterPolicyConfiguration Of Queues. (#5816)
1 parent 84dd624 commit 23ecc32

File tree

26 files changed

+1322
-11
lines changed

26 files changed

+1322
-11
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java

+16
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@
5858
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse;
5959
import org.apache.hadoop.yarn.server.api.protocolrecords.DeregisterSubClusterRequest;
6060
import org.apache.hadoop.yarn.server.api.protocolrecords.DeregisterSubClusterResponse;
61+
import org.apache.hadoop.yarn.server.api.protocolrecords.SaveFederationQueuePolicyRequest;
62+
import org.apache.hadoop.yarn.server.api.protocolrecords.SaveFederationQueuePolicyResponse;
63+
6164

6265
@Private
6366
public interface ResourceManagerAdministrationProtocol extends GetUserMappingsProtocol {
@@ -173,4 +176,17 @@ NodesToAttributesMappingResponse mapAttributesToNodes(
173176
@Idempotent
174177
DeregisterSubClusterResponse deregisterSubCluster(DeregisterSubClusterRequest request)
175178
throws YarnException, IOException;
179+
180+
/**
181+
* In YARN-Federation mode, We will be storing the Policy information for Queues.
182+
*
183+
* @param request saveFederationQueuePolicy Request
184+
* @return Response from saveFederationQueuePolicy.
185+
* @throws YarnException exceptions from yarn servers.
186+
* @throws IOException if an IO error occurred.
187+
*/
188+
@Private
189+
@Idempotent
190+
SaveFederationQueuePolicyResponse saveFederationQueuePolicy(
191+
SaveFederationQueuePolicyRequest request) throws YarnException, IOException;
176192
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.yarn.server.api.protocolrecords;
19+
20+
import org.apache.commons.lang3.StringUtils;
21+
import org.apache.commons.lang3.math.NumberUtils;
22+
import org.apache.hadoop.classification.InterfaceAudience.Private;
23+
import org.apache.hadoop.classification.InterfaceAudience.Public;
24+
import org.apache.hadoop.classification.InterfaceStability.Unstable;
25+
import org.apache.hadoop.yarn.exceptions.YarnException;
26+
import org.apache.hadoop.yarn.util.Records;
27+
28+
import java.util.LinkedHashMap;
29+
import java.util.Map;
30+
31+
/**
32+
* Queue weights for representing Federation.
33+
*/
34+
@Private
35+
@Unstable
36+
public abstract class FederationQueueWeight {
37+
38+
/**
39+
* The FederationQueueWeight object consists of three parts:
40+
* routerWeight, amrmWeight, and headRoomAlpha.
41+
*
42+
* @param routerWeight Weight for routing applications to different subclusters.
43+
* We will route the application to different subclusters based on the configured weights.
44+
* Assuming we have two subclusters, SC-1 and SC-2,
45+
* with a weight of 0.7 for SC-1 and 0.3 for SC-2,
46+
* the application will be allocated in such a way
47+
* that 70% of the applications will be assigned to SC-1 and 30% to SC-2.
48+
*
49+
* @param amrmWeight Weight for resource request from ApplicationMaster (AM) to
50+
* different subclusters' Resource Manager (RM).
51+
* Assuming we have two subclusters, SC-1 and SC-2,
52+
* with a weight of 0.6 for SC-1 and 0.4 for SC-2,
53+
* When AM requesting resources,
54+
* 60% of the requests will be made to the Resource Manager (RM) of SC-1
55+
* and 40% to the RM of SC-2.
56+
*
57+
* @param headRoomAlpha
58+
* used by policies that balance weight-based and load-based considerations in their decisions.
59+
* For policies that use this parameter,
60+
* values close to 1 indicate that most of the decision
61+
* should be based on currently observed headroom from various sub-clusters,
62+
* values close to zero, indicate that the decision should be
63+
* mostly based on weights and practically ignore current load.
64+
*
65+
* @return FederationQueueWeight
66+
*/
67+
@Private
68+
@Unstable
69+
public static FederationQueueWeight newInstance(String routerWeight,
70+
String amrmWeight, String headRoomAlpha) {
71+
FederationQueueWeight federationQueueWeight = Records.newRecord(FederationQueueWeight.class);
72+
federationQueueWeight.setRouterWeight(routerWeight);
73+
federationQueueWeight.setAmrmWeight(amrmWeight);
74+
federationQueueWeight.setHeadRoomAlpha(headRoomAlpha);
75+
return federationQueueWeight;
76+
}
77+
78+
@Public
79+
@Unstable
80+
public abstract String getRouterWeight();
81+
82+
@Public
83+
@Unstable
84+
public abstract void setRouterWeight(String routerWeight);
85+
86+
@Public
87+
@Unstable
88+
public abstract String getAmrmWeight();
89+
90+
@Public
91+
@Unstable
92+
public abstract void setAmrmWeight(String amrmWeight);
93+
94+
@Public
95+
@Unstable
96+
public abstract String getHeadRoomAlpha();
97+
98+
@Public
99+
@Unstable
100+
public abstract void setHeadRoomAlpha(String headRoomAlpha);
101+
102+
private static final String COMMA = ",";
103+
private static final String COLON = ":";
104+
105+
/**
106+
* Check if the subCluster Queue Weight Ratio are valid.
107+
*
108+
* This method can be used to validate RouterPolicyWeight and AMRMPolicyWeight.
109+
*
110+
* @param subClusterWeight the weight ratios of subClusters.
111+
* @throws YarnException exceptions from yarn servers.
112+
*/
113+
public static void checkSubClusterQueueWeightRatioValid(String subClusterWeight)
114+
throws YarnException {
115+
// The subClusterWeight cannot be empty.
116+
if (StringUtils.isBlank(subClusterWeight)) {
117+
throw new YarnException("subClusterWeight can't be empty!");
118+
}
119+
120+
// SC-1:0.7,SC-2:0.3 -> [SC-1:0.7,SC-2:0.3]
121+
String[] subClusterWeights = subClusterWeight.split(COMMA);
122+
Map<String, Double> subClusterWeightMap = new LinkedHashMap<>();
123+
for (String subClusterWeightItem : subClusterWeights) {
124+
// SC-1:0.7 -> [SC-1,0.7]
125+
// We require that the parsing result is not empty and must have a length of 2.
126+
String[] subClusterWeightItems = subClusterWeightItem.split(COLON);
127+
if (subClusterWeightItems == null || subClusterWeightItems.length != 2) {
128+
throw new YarnException("The subClusterWeight cannot be empty," +
129+
" and the subClusterWeight size must be 2. (eg.SC-1,0.2)");
130+
}
131+
subClusterWeightMap.put(subClusterWeightItems[0], Double.valueOf(subClusterWeightItems[1]));
132+
}
133+
134+
// The sum of weight ratios for subClusters must be equal to 1.
135+
double sum = subClusterWeightMap.values().stream().mapToDouble(Double::doubleValue).sum();
136+
boolean isValid = Math.abs(sum - 1.0) < 1e-6; // Comparing with a tolerance of 1e-6
137+
138+
if (!isValid) {
139+
throw new YarnException("The sum of ratios for all subClusters must be equal to 1.");
140+
}
141+
}
142+
143+
/**
144+
* Check if HeadRoomAlpha is a number and is between 0 and 1.
145+
*
146+
* @param headRoomAlpha headroomalpha.
147+
* @throws YarnException exceptions from yarn servers.
148+
*/
149+
public static void checkHeadRoomAlphaValid(String headRoomAlpha) throws YarnException {
150+
if (!isNumeric(headRoomAlpha)) {
151+
throw new YarnException("HeadRoomAlpha must be a number.");
152+
}
153+
154+
double dHeadRoomAlpha = Double.parseDouble(headRoomAlpha);
155+
if (!(dHeadRoomAlpha >= 0 && dHeadRoomAlpha <= 1)) {
156+
throw new YarnException("HeadRoomAlpha must be between 0-1.");
157+
}
158+
}
159+
160+
/**
161+
* Determines whether the given value is a number.
162+
*
163+
* @param value given value.
164+
* @return true, is a number, false, not a number.
165+
*/
166+
protected static boolean isNumeric(String value) {
167+
return NumberUtils.isCreatable(value);
168+
}
169+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.yarn.server.api.protocolrecords;
19+
20+
import org.apache.hadoop.classification.InterfaceAudience.Private;
21+
import org.apache.hadoop.classification.InterfaceAudience.Public;
22+
import org.apache.hadoop.classification.InterfaceStability.Unstable;
23+
import org.apache.hadoop.yarn.util.Records;
24+
25+
/**
26+
* In Yarn Federation mode, this class is used to save the queue policy interface.
27+
*
28+
* This class stores the queue, the weight of the queue,
29+
* and the PolicyManagerClassName information of the queue.
30+
*/
31+
@Private
32+
@Unstable
33+
public abstract class SaveFederationQueuePolicyRequest {
34+
35+
@Private
36+
@Unstable
37+
public static SaveFederationQueuePolicyRequest newInstance(
38+
String queue, FederationQueueWeight federationQueueWeight, String policyManagerClassName) {
39+
SaveFederationQueuePolicyRequest request =
40+
Records.newRecord(SaveFederationQueuePolicyRequest.class);
41+
request.setQueue(queue);
42+
request.setFederationQueueWeight(federationQueueWeight);
43+
request.setPolicyManagerClassName(policyManagerClassName);
44+
return request;
45+
}
46+
47+
@Public
48+
@Unstable
49+
public abstract FederationQueueWeight getFederationQueueWeight();
50+
51+
@Private
52+
@Unstable
53+
public abstract void setFederationQueueWeight(FederationQueueWeight federationQueueWeight);
54+
55+
@Public
56+
@Unstable
57+
public abstract String getQueue();
58+
59+
@Public
60+
@Unstable
61+
public abstract void setQueue(String queue);
62+
63+
@Public
64+
@Unstable
65+
public abstract String getPolicyManagerClassName();
66+
67+
@Public
68+
@Unstable
69+
public abstract void setPolicyManagerClassName(String className);
70+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.yarn.server.api.protocolrecords;
19+
20+
import org.apache.hadoop.classification.InterfaceAudience.Public;
21+
import org.apache.hadoop.classification.InterfaceAudience.Private;
22+
import org.apache.hadoop.classification.InterfaceStability.Unstable;
23+
import org.apache.hadoop.yarn.util.Records;
24+
25+
@Private
26+
@Unstable
27+
public abstract class SaveFederationQueuePolicyResponse {
28+
public static SaveFederationQueuePolicyResponse newInstance() {
29+
return Records.newRecord(SaveFederationQueuePolicyResponse.class);
30+
}
31+
32+
public static SaveFederationQueuePolicyResponse newInstance(String msg) {
33+
SaveFederationQueuePolicyResponse response =
34+
Records.newRecord(SaveFederationQueuePolicyResponse.class);
35+
response.setMessage(msg);
36+
return response;
37+
}
38+
39+
@Public
40+
@Unstable
41+
public abstract String getMessage();
42+
43+
@Public
44+
@Unstable
45+
public abstract void setMessage(String msg);
46+
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto

+1
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,5 @@ service ResourceManagerAdministrationProtocolService {
4848
rpc refreshClusterMaxPriority(RefreshClusterMaxPriorityRequestProto) returns (RefreshClusterMaxPriorityResponseProto);
4949
rpc mapAttributesToNodes(NodesToAttributesMappingRequestProto) returns (NodesToAttributesMappingResponseProto);
5050
rpc deregisterSubCluster(DeregisterSubClusterRequestProto) returns (DeregisterSubClusterResponseProto);
51+
rpc saveFederationQueuePolicy(SaveFederationQueuePolicyRequestProto) returns (SaveFederationQueuePolicyResponseProto);
5152
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto

+10
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,16 @@ message DeregisterSubClusterResponseProto {
170170
repeated DeregisterSubClustersProto deregisterSubClusters = 1;
171171
}
172172

173+
message SaveFederationQueuePolicyRequestProto {
174+
required string queue = 1;
175+
required FederationQueueWeightProto federationQueueWeight = 2;
176+
optional string policyManagerClassName = 3;
177+
}
178+
179+
message SaveFederationQueuePolicyResponseProto {
180+
required string message = 1;
181+
}
182+
173183
//////////////////////////////////////////////////////////////////
174184
///////////// RM Failover related records ////////////////////////
175185
//////////////////////////////////////////////////////////////////

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto

+6
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,12 @@ message DeregisterSubClustersProto {
440440
optional string subClusterState = 5;
441441
}
442442

443+
message FederationQueueWeightProto {
444+
optional string routerWeight = 1;
445+
optional string amrmWeight = 2;
446+
optional string headRoomAlpha = 3;
447+
}
448+
443449
////////////////////////////////////////////////////////////////////////
444450
////// From AM_RM_Protocol /////////////////////////////////////////////
445451
////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)