6
6
import * as assert from 'assert' ;
7
7
import { getLogger , Logger } from '../../common/log' ;
8
8
import { randomSelect } from '../../common/utils' ;
9
- import { GPUInfo } from '../common/gpuData' ;
10
- import {
11
- parseGpuIndices , RemoteMachineMeta , RemoteMachineScheduleResult , RemoteMachineTrialJobDetail , ScheduleResultType , ExecutorManager
12
- } from './remoteMachineData' ;
9
+ import { GPUInfo , parseGpuIndices , ScheduleResultType } from '../common/gpuData' ;
10
+ import { ExecutorManager , RemoteMachineMeta , RemoteMachineScheduleResult , RemoteMachineTrialJobDetail } from './remoteMachineData' ;
13
11
14
12
type SCHEDULE_POLICY_NAME = 'random' | 'round-robin' ;
15
13
@@ -39,7 +37,7 @@ export class GPUScheduler {
39
37
* @param requiredGPUNum required GPU number
40
38
*/
41
39
public scheduleMachine ( requiredGPUNum : number | undefined , trialJobDetail : RemoteMachineTrialJobDetail ) : RemoteMachineScheduleResult {
42
- if ( requiredGPUNum === undefined ) {
40
+ if ( requiredGPUNum === undefined ) {
43
41
requiredGPUNum = 0 ;
44
42
}
45
43
assert ( requiredGPUNum >= 0 ) ;
@@ -48,7 +46,7 @@ export class GPUScheduler {
48
46
49
47
// Step 1: Check if required GPU number not exceeds the total GPU number in all machines
50
48
const eligibleRM : RemoteMachineMeta [ ] = allRMs . filter ( ( rmMeta : RemoteMachineMeta ) =>
51
- rmMeta . gpuSummary === undefined || requiredGPUNum === 0 || ( requiredGPUNum !== undefined && rmMeta . gpuSummary . gpuCount >= requiredGPUNum ) ) ;
49
+ rmMeta . gpuSummary === undefined || requiredGPUNum === 0 || ( requiredGPUNum !== undefined && rmMeta . gpuSummary . gpuCount >= requiredGPUNum ) ) ;
52
50
if ( eligibleRM . length === 0 ) {
53
51
// If the required gpu number exceeds the upper limit of all machine's GPU number
54
52
// Return REQUIRE_EXCEED_TOTAL directly
@@ -75,8 +73,8 @@ export class GPUScheduler {
75
73
this . log . warning ( `Scheduler: trialJob id ${ trialJobDetail . id } , no machine can be scheduled, return TMP_NO_AVAILABLE_GPU ` ) ;
76
74
77
75
return {
78
- resultType : ScheduleResultType . TMP_NO_AVAILABLE_GPU ,
79
- scheduleInfo : undefined
76
+ resultType : ScheduleResultType . TMP_NO_AVAILABLE_GPU ,
77
+ scheduleInfo : undefined
80
78
} ;
81
79
}
82
80
@@ -159,7 +157,7 @@ export class GPUScheduler {
159
157
const num : number | undefined = rmMeta . occupiedGpuIndexMap . get ( gpuInfo . index ) ;
160
158
const maxTrialNumPerGpu : number = rmMeta . maxTrialNumPerGpu ? rmMeta . maxTrialNumPerGpu : 1 ;
161
159
if ( ( num === undefined && ( ! rmMeta . useActiveGpu && gpuInfo . activeProcessNum === 0 || rmMeta . useActiveGpu ) ) ||
162
- ( num !== undefined && num < maxTrialNumPerGpu ) ) {
160
+ ( num !== undefined && num < maxTrialNumPerGpu ) ) {
163
161
availableGPUs . push ( gpuInfo ) ;
164
162
}
165
163
} else {
@@ -200,7 +198,7 @@ export class GPUScheduler {
200
198
}
201
199
202
200
private allocateHost ( requiredGPUNum : number , rmMeta : RemoteMachineMeta ,
203
- gpuInfos : GPUInfo [ ] , trialJobDetail : RemoteMachineTrialJobDetail ) : RemoteMachineScheduleResult {
201
+ gpuInfos : GPUInfo [ ] , trialJobDetail : RemoteMachineTrialJobDetail ) : RemoteMachineScheduleResult {
204
202
assert ( gpuInfos . length >= requiredGPUNum ) ;
205
203
const allocatedGPUs : GPUInfo [ ] = this . selectGPUsForTrial ( gpuInfos , requiredGPUNum ) ;
206
204
allocatedGPUs . forEach ( ( gpuInfo : GPUInfo ) => {
@@ -222,10 +220,10 @@ export class GPUScheduler {
222
220
scheduleInfo : {
223
221
rmMeta : rmMeta ,
224
222
cudaVisibleDevice : allocatedGPUs
225
- . map ( ( gpuInfo : GPUInfo ) => {
226
- return gpuInfo . index ;
227
- } )
228
- . join ( ',' )
223
+ . map ( ( gpuInfo : GPUInfo ) => {
224
+ return gpuInfo . index ;
225
+ } )
226
+ . join ( ',' )
229
227
}
230
228
} ;
231
229
}
0 commit comments