Skip to content

Commit d683b20

Browse files
author
David Roberts
authored
[ML] More accurate job memory overhead (#47516)
When an ML job runs the memory required can be broken down into: 1. Memory required to load the executable code 2. Instrumented model memory 3. Other memory used by the job's main process or ancilliary processes that is not instrumented Previously we added a simple fixed overhead to account for 1 and 3. This was 100MB for anomaly detection jobs (large because of the completely uninstrumented categorization function and normalize process), and 20MB for data frame analytics jobs. However, this was an oversimplification because the executable code only needs to be loaded once per machine. Also the 100MB overhead for anomaly detection jobs was probably too high in most cases because categorization and normalization don't use _that_ much memory. This PR therefore changes the calculation of memory requirements as follows: 1. A per-node overhead of 30MB for _only_ the first job of any type to be run on a given node - this is to account for loading the executable code 2. The established model memory (if applicable) or model memory limit of the job 3. A per-job overhead of 10MB for anomaly detection jobs and 5MB for data frame analytics jobs, to account for the uninstrumented memory usage This change will enable more jobs to be run on the same node. It will be particularly beneficial when there are a large number of small jobs. It will have less of an effect when there are a small number of large jobs.
1 parent e036ac4 commit d683b20

File tree

5 files changed

+97
-15
lines changed

5 files changed

+97
-15
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,11 @@ public class DataFrameAnalyticsConfig implements ToXContentObject, Writeable {
4141

4242
public static final ByteSizeValue DEFAULT_MODEL_MEMORY_LIMIT = new ByteSizeValue(1, ByteSizeUnit.GB);
4343
public static final ByteSizeValue MIN_MODEL_MEMORY_LIMIT = new ByteSizeValue(1, ByteSizeUnit.MB);
44-
public static final ByteSizeValue PROCESS_MEMORY_OVERHEAD = new ByteSizeValue(20, ByteSizeUnit.MB);
44+
/**
45+
* This includes the overhead of thread stacks and data structures that the program might use that
46+
* are not instrumented. But it does NOT include the memory used by loading the executable code.
47+
*/
48+
public static final ByteSizeValue PROCESS_MEMORY_OVERHEAD = new ByteSizeValue(5, ByteSizeUnit.MB);
4549

4650
public static final ParseField ID = new ParseField("id");
4751
public static final ParseField DESCRIPTION = new ParseField("description");

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/Job.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,14 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
8686
public static final ObjectParser<Builder, Void> STRICT_PARSER = createParser(false);
8787

8888
public static final TimeValue MIN_BACKGROUND_PERSIST_INTERVAL = TimeValue.timeValueHours(1);
89-
public static final ByteSizeValue PROCESS_MEMORY_OVERHEAD = new ByteSizeValue(100, ByteSizeUnit.MB);
89+
90+
/**
91+
* This includes the overhead of thread stacks and data structures that the program might use that
92+
* are not instrumented. (For the <code>autodetect</code> process categorization is not instrumented,
93+
* and the <code>normalize</code> process is not instrumented at all.) But this overhead does NOT
94+
* include the memory used by loading the executable code.
95+
*/
96+
public static final ByteSizeValue PROCESS_MEMORY_OVERHEAD = new ByteSizeValue(10, ByteSizeUnit.MB);
9097

9198
public static final long DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS = 1;
9299

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,12 @@ public Set<DiscoveryNodeRole> getRoles() {
334334
public static final String MACHINE_MEMORY_NODE_ATTR = "ml.machine_memory";
335335
public static final Setting<Integer> CONCURRENT_JOB_ALLOCATIONS =
336336
Setting.intSetting("xpack.ml.node_concurrent_job_allocations", 2, 0, Property.Dynamic, Property.NodeScope);
337+
/**
338+
* The amount of memory needed to load the ML native code shared libraries. The assumption is that the first
339+
* ML job to run on a given node will do this, and then subsequent ML jobs on the same node will reuse the
340+
* same already-loaded code.
341+
*/
342+
public static final ByteSizeValue NATIVE_EXECUTABLE_CODE_OVERHEAD = new ByteSizeValue(30, ByteSizeUnit.MB);
337343
// Values higher than 100% are allowed to accommodate use cases where swapping has been determined to be acceptable.
338344
// Anomaly detector jobs only use their full model memory during background persistence, and this is deliberately
339345
// staggered, so with large numbers of jobs few will generally be persisting state at the same time.

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/JobNodeSelector.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public PersistentTasksCustomMetaData.Assignment selectNode(int dynamicMaxOpenJob
110110
continue;
111111
}
112112

113-
// Assuming the node is elligible at all, check loading
113+
// Assuming the node is eligible at all, check loading
114114
CurrentLoad currentLoad = calculateCurrentLoadForNode(node, persistentTasks, allocateByMemory);
115115
allocateByMemory = currentLoad.allocateByMemory;
116116

@@ -170,6 +170,11 @@ public PersistentTasksCustomMetaData.Assignment selectNode(int dynamicMaxOpenJob
170170
long maxMlMemory = machineMemory * maxMachineMemoryPercent / 100;
171171
Long estimatedMemoryFootprint = memoryTracker.getJobMemoryRequirement(taskName, jobId);
172172
if (estimatedMemoryFootprint != null) {
173+
// If this will be the first job assigned to the node then it will need to
174+
// load the native code shared libraries, so add the overhead for this
175+
if (currentLoad.numberOfAssignedJobs == 0) {
176+
estimatedMemoryFootprint += MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes();
177+
}
173178
long availableMemory = maxMlMemory - currentLoad.assignedJobMemory;
174179
if (estimatedMemoryFootprint > availableMemory) {
175180
reason = "Not opening job [" + jobId + "] on node [" + nodeNameAndMlAttributes(node)
@@ -283,6 +288,11 @@ private CurrentLoad calculateCurrentLoadForNode(DiscoveryNode node, PersistentTa
283288
}
284289
}
285290
}
291+
// if any jobs are running then the native code will be loaded, but shared between all jobs,
292+
// so increase the total memory usage of the assigned jobs to account for this
293+
if (result.numberOfAssignedJobs > 0) {
294+
result.assignedJobMemory += MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes();
295+
}
286296
}
287297

288298
return result;

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/JobNodeSelectorTests.java

Lines changed: 67 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,12 @@ public void testSelectLeastLoadedMlNodeForAnomalyDetectorJob_maxCapacityMemoryLi
175175
int currentlyRunningJobsPerNode = randomIntBetween(1, 100);
176176
int maxRunningJobsPerNode = currentlyRunningJobsPerNode + 1;
177177
// Be careful if changing this - in order for the error message to be exactly as expected
178-
// the value here must divide exactly into (JOB_MEMORY_REQUIREMENT.getBytes() * 100)
179-
int maxMachineMemoryPercent = 40;
180-
long machineMemory = currentlyRunningJobsPerNode * JOB_MEMORY_REQUIREMENT.getBytes() * 100 / maxMachineMemoryPercent;
178+
// the value here must divide exactly into both (JOB_MEMORY_REQUIREMENT.getBytes() * 100) and
179+
// MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
180+
int maxMachineMemoryPercent = 20;
181+
long currentlyRunningJobMemory = MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes() +
182+
currentlyRunningJobsPerNode * JOB_MEMORY_REQUIREMENT.getBytes();
183+
long machineMemory = currentlyRunningJobMemory * 100 / maxMachineMemoryPercent;
181184

182185
Map<String, String> nodeAttr = new HashMap<>();
183186
nodeAttr.put(MachineLearning.MAX_OPEN_JOBS_NODE_ATTR, Integer.toString(maxRunningJobsPerNode));
@@ -193,19 +196,46 @@ public void testSelectLeastLoadedMlNodeForAnomalyDetectorJob_maxCapacityMemoryLi
193196
jobNodeSelector.selectNode(maxRunningJobsPerNode, 2, maxMachineMemoryPercent, isMemoryTrackerRecentlyRefreshed);
194197
assertNull(result.getExecutorNode());
195198
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
196-
+ "Available memory for ML [" + (machineMemory * maxMachineMemoryPercent / 100) + "], memory required by existing jobs ["
197-
+ (JOB_MEMORY_REQUIREMENT.getBytes() * currentlyRunningJobsPerNode) + "], estimated memory required for this job ["
198-
+ JOB_MEMORY_REQUIREMENT.getBytes() + "]"));
199+
+ "Available memory for ML [" + currentlyRunningJobMemory + "], memory required by existing jobs ["
200+
+ currentlyRunningJobMemory + "], estimated memory required for this job [" + JOB_MEMORY_REQUIREMENT.getBytes() + "]"));
201+
}
202+
203+
public void testSelectLeastLoadedMlNodeForAnomalyDetectorJob_firstJobTooBigMemoryLimiting() {
204+
int numNodes = randomIntBetween(1, 10);
205+
int maxRunningJobsPerNode = randomIntBetween(1, 100);
206+
int maxMachineMemoryPercent = 20;
207+
long firstJobTotalMemory = MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes() + JOB_MEMORY_REQUIREMENT.getBytes();
208+
long machineMemory = (firstJobTotalMemory - 1) * 100 / maxMachineMemoryPercent;
209+
210+
Map<String, String> nodeAttr = new HashMap<>();
211+
nodeAttr.put(MachineLearning.MAX_OPEN_JOBS_NODE_ATTR, Integer.toString(maxRunningJobsPerNode));
212+
nodeAttr.put(MachineLearning.MACHINE_MEMORY_NODE_ATTR, Long.toString(machineMemory));
213+
214+
ClusterState.Builder cs = fillNodesWithRunningJobs(nodeAttr, numNodes, 0);
215+
216+
Job job = BaseMlIntegTestCase.createFareQuoteJob("job_id1000", JOB_MEMORY_REQUIREMENT).build(new Date());
217+
218+
JobNodeSelector jobNodeSelector = new JobNodeSelector(cs.build(), job.getId(), MlTasks.JOB_TASK_NAME, memoryTracker,
219+
0, node -> TransportOpenJobAction.nodeFilter(node, job));
220+
PersistentTasksCustomMetaData.Assignment result =
221+
jobNodeSelector.selectNode(maxRunningJobsPerNode, 2, maxMachineMemoryPercent, isMemoryTrackerRecentlyRefreshed);
222+
assertNull(result.getExecutorNode());
223+
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
224+
+ "Available memory for ML [" + (firstJobTotalMemory - 1)
225+
+ "], memory required by existing jobs [0], estimated memory required for this job [" + firstJobTotalMemory + "]"));
199226
}
200227

201228
public void testSelectLeastLoadedMlNodeForDataFrameAnalyticsJob_maxCapacityMemoryLimiting() {
202229
int numNodes = randomIntBetween(1, 10);
203230
int currentlyRunningJobsPerNode = randomIntBetween(1, 100);
204231
int maxRunningJobsPerNode = currentlyRunningJobsPerNode + 1;
205232
// Be careful if changing this - in order for the error message to be exactly as expected
206-
// the value here must divide exactly into (JOB_MEMORY_REQUIREMENT.getBytes() * 100)
207-
int maxMachineMemoryPercent = 40;
208-
long machineMemory = currentlyRunningJobsPerNode * JOB_MEMORY_REQUIREMENT.getBytes() * 100 / maxMachineMemoryPercent;
233+
// the value here must divide exactly into both (JOB_MEMORY_REQUIREMENT.getBytes() * 100) and
234+
// MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
235+
int maxMachineMemoryPercent = 20;
236+
long currentlyRunningJobMemory = MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes() +
237+
currentlyRunningJobsPerNode * JOB_MEMORY_REQUIREMENT.getBytes();
238+
long machineMemory = currentlyRunningJobMemory * 100 / maxMachineMemoryPercent;
209239

210240
Map<String, String> nodeAttr = new HashMap<>();
211241
nodeAttr.put(MachineLearning.MAX_OPEN_JOBS_NODE_ATTR, Integer.toString(maxRunningJobsPerNode));
@@ -222,9 +252,34 @@ public void testSelectLeastLoadedMlNodeForDataFrameAnalyticsJob_maxCapacityMemor
222252
jobNodeSelector.selectNode(maxRunningJobsPerNode, 2, maxMachineMemoryPercent, isMemoryTrackerRecentlyRefreshed);
223253
assertNull(result.getExecutorNode());
224254
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
225-
+ "Available memory for ML [" + (machineMemory * maxMachineMemoryPercent / 100) + "], memory required by existing jobs ["
226-
+ (JOB_MEMORY_REQUIREMENT.getBytes() * currentlyRunningJobsPerNode) + "], estimated memory required for this job ["
227-
+ JOB_MEMORY_REQUIREMENT.getBytes() + "]"));
255+
+ "Available memory for ML [" + currentlyRunningJobMemory + "], memory required by existing jobs ["
256+
+ currentlyRunningJobMemory + "], estimated memory required for this job [" + JOB_MEMORY_REQUIREMENT.getBytes() + "]"));
257+
}
258+
259+
public void testSelectLeastLoadedMlNodeForDataFrameAnalyticsJob_firstJobTooBigMemoryLimiting() {
260+
int numNodes = randomIntBetween(1, 10);
261+
int maxRunningJobsPerNode = randomIntBetween(1, 100);
262+
int maxMachineMemoryPercent = 20;
263+
long firstJobTotalMemory = MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes() + JOB_MEMORY_REQUIREMENT.getBytes();
264+
long machineMemory = (firstJobTotalMemory - 1) * 100 / maxMachineMemoryPercent;
265+
266+
Map<String, String> nodeAttr = new HashMap<>();
267+
nodeAttr.put(MachineLearning.MAX_OPEN_JOBS_NODE_ATTR, Integer.toString(maxRunningJobsPerNode));
268+
nodeAttr.put(MachineLearning.MACHINE_MEMORY_NODE_ATTR, Long.toString(machineMemory));
269+
270+
ClusterState.Builder cs = fillNodesWithRunningJobs(nodeAttr, numNodes, 0);
271+
272+
String dataFrameAnalyticsId = "data_frame_analytics_id1000";
273+
274+
JobNodeSelector jobNodeSelector = new JobNodeSelector(cs.build(), dataFrameAnalyticsId,
275+
MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME, memoryTracker, 0,
276+
node -> TransportStartDataFrameAnalyticsAction.TaskExecutor.nodeFilter(node, dataFrameAnalyticsId));
277+
PersistentTasksCustomMetaData.Assignment result =
278+
jobNodeSelector.selectNode(maxRunningJobsPerNode, 2, maxMachineMemoryPercent, isMemoryTrackerRecentlyRefreshed);
279+
assertNull(result.getExecutorNode());
280+
assertThat(result.getExplanation(), containsString("because this node has insufficient available memory. "
281+
+ "Available memory for ML [" + (firstJobTotalMemory - 1)
282+
+ "], memory required by existing jobs [0], estimated memory required for this job [" + firstJobTotalMemory + "]"));
228283
}
229284

230285
public void testSelectLeastLoadedMlNode_noMlNodes() {

0 commit comments

Comments
 (0)