@@ -24,6 +24,7 @@ import (
24
24
"github.com/ovh/cds/sdk"
25
25
"github.com/ovh/cds/sdk/cdsclient"
26
26
"github.com/ovh/cds/sdk/hatchery"
27
+ "github.com/ovh/cds/sdk/slug"
27
28
)
28
29
29
30
// New instanciates a new hatchery local
@@ -99,8 +100,12 @@ func (h *HatcheryKubernetes) ApplyConfiguration(cfg interface{}) error {
99
100
// Status returns sdk.MonitoringStatus, implements interface service.Service
100
101
func (h * HatcheryKubernetes ) Status (ctx context.Context ) * sdk.MonitoringStatus {
101
102
m := h .NewMonitoringStatus ()
102
- m .AddLine (sdk.MonitoringStatusLine {Component : "Workers" , Value : fmt .Sprintf ("%d/%d" , len (h .WorkersStarted (ctx )), h .Config .Provision .MaxWorker ), Status : sdk .MonitoringStatusOK })
103
-
103
+ ws , err := h .WorkersStarted (ctx )
104
+ if err != nil {
105
+ ctx = log .ContextWithStackTrace (ctx , err )
106
+ log .Warn (ctx , err .Error ())
107
+ }
108
+ m .AddLine (sdk.MonitoringStatusLine {Component : "Workers" , Value : fmt .Sprintf ("%d/%d" , len (ws ), h .Config .Provision .MaxWorker ), Status : sdk .MonitoringStatusOK })
104
109
return m
105
110
}
106
111
@@ -171,11 +176,6 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
171
176
return sdk .WithStack (fmt .Errorf ("no job ID and no register" ))
172
177
}
173
178
174
- label := "execution"
175
- if spawnArgs .RegisterOnly {
176
- label = "register"
177
- }
178
-
179
179
var logJob string
180
180
if spawnArgs .JobID > 0 {
181
181
logJob = fmt .Sprintf ("for workflow job %d," , spawnArgs .JobID )
@@ -221,7 +221,6 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
221
221
envsWm := workerConfig .InjectEnvVars
222
222
envsWm ["CDS_MODEL_MEMORY" ] = fmt .Sprintf ("%d" , memory )
223
223
envsWm ["CDS_FROM_WORKER_IMAGE" ] = "true"
224
- envsWm ["CDS_CONFIG" ] = workerConfig .EncodeBase64 ()
225
224
226
225
for envName , envValue := range spawnArgs .Model .ModelDocker .Envs {
227
226
envsWm [envName ] = envValue
@@ -239,16 +238,33 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
239
238
pullPolicy = "Always"
240
239
}
241
240
241
+ // Create secret for worker config
242
+ configSecretName , err := h .createConfigSecret (ctx , workerConfig )
243
+ if err != nil {
244
+ return sdk .WrapError (err , "cannot create secret for config %s" , workerConfig .Name )
245
+ }
246
+ envs = append (envs , apiv1.EnvVar {
247
+ Name : "CDS_CONFIG" ,
248
+ ValueFrom : & apiv1.EnvVarSource {
249
+ SecretKeyRef : & apiv1.SecretKeySelector {
250
+ LocalObjectReference : apiv1.LocalObjectReference {
251
+ Name : configSecretName ,
252
+ },
253
+ Key : "CDS_CONFIG" ,
254
+ },
255
+ },
256
+ })
257
+
242
258
var gracePeriodSecs int64
243
259
podSchema := apiv1.Pod {
244
260
ObjectMeta : metav1.ObjectMeta {
245
261
Name : spawnArgs .WorkerName ,
246
262
Namespace : h .Config .Namespace ,
247
263
DeletionGracePeriodSeconds : & gracePeriodSecs ,
248
264
Labels : map [string ]string {
249
- LABEL_WORKER : label ,
250
- LABEL_WORKER_MODEL : strings . ToLower ( spawnArgs . Model . Name ) ,
251
- LABEL_HATCHERY_NAME : h . Configuration (). Name ,
265
+ LABEL_HATCHERY_NAME : h . Configuration (). Name ,
266
+ LABEL_WORKER_NAME : workerConfig . Name ,
267
+ LABEL_WORKER_MODEL_PATH : slug . Convert ( spawnArgs . Model . Path ()) ,
252
268
},
253
269
Annotations : map [string ]string {},
254
270
},
@@ -273,6 +289,15 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
273
289
},
274
290
}
275
291
292
+ // Check here to add secret if needed
293
+ if spawnArgs .Model .ModelDocker .Private {
294
+ secretRegistryName , err := h .createRegistrySecret (ctx , * spawnArgs .Model )
295
+ if err != nil {
296
+ return sdk .WrapError (err , "cannot create secret for model %s" , spawnArgs .Model .Path ())
297
+ }
298
+ podSchema .Spec .ImagePullSecrets = []apiv1.LocalObjectReference {{Name : secretRegistryName }}
299
+ }
300
+
276
301
var services []sdk.Requirement
277
302
for _ , req := range spawnArgs .Requirements {
278
303
if req .Type == sdk .ServiceRequirement {
@@ -286,16 +311,6 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
286
311
podSchema .Spec .HostAliases [0 ].Hostnames [0 ] = "worker"
287
312
}
288
313
289
- // Check here to add secret if needed
290
- secretName := "cds-credreg-" + spawnArgs .Model .Name
291
- if spawnArgs .Model .ModelDocker .Private {
292
- if err := h .createSecret (ctx , secretName , * spawnArgs .Model ); err != nil {
293
- return sdk .WrapError (err , "cannot create secret for model %s" , spawnArgs .Model .Path ())
294
- }
295
- podSchema .Spec .ImagePullSecrets = []apiv1.LocalObjectReference {{Name : secretName }}
296
- podSchema .ObjectMeta .Labels [LABEL_SECRET ] = secretName
297
- }
298
-
299
314
for i , serv := range services {
300
315
//name= <alias> => the name of the host put in /etc/hosts of the worker
301
316
//value= "postgres:latest env_1=blabla env_2=blabla"" => we can add env variables in requirement name
@@ -345,7 +360,7 @@ func (h *HatcheryKubernetes) SpawnWorker(ctx context.Context, spawnArgs hatchery
345
360
podSchema .Spec .HostAliases [0 ].Hostnames [i + 1 ] = strings .ToLower (serv .Name )
346
361
}
347
362
348
- _ , err : = h .kubeClient .PodCreate (ctx , h .Config .Namespace , & podSchema , metav1.CreateOptions {})
363
+ _ , err = h .kubeClient .PodCreate (ctx , h .Config .Namespace , & podSchema , metav1.CreateOptions {})
349
364
log .Debug (ctx , "hatchery> kubernetes> SpawnWorker> %s > Pod created" , spawnArgs .WorkerName )
350
365
return sdk .WithStack (err )
351
366
}
@@ -356,20 +371,18 @@ func (h *HatcheryKubernetes) GetLogger() *logrus.Logger {
356
371
357
372
// WorkersStarted returns the number of instances started but
358
373
// not necessarily register on CDS yet
359
- func (h * HatcheryKubernetes ) WorkersStarted (ctx context.Context ) []string {
360
- list , err := h .kubeClient .PodList (ctx , h .Config .Namespace , metav1.ListOptions {LabelSelector : LABEL_HATCHERY_NAME })
374
+ func (h * HatcheryKubernetes ) WorkersStarted (ctx context.Context ) ([]string , error ) {
375
+ list , err := h .kubeClient .PodList (ctx , h .Config .Namespace , metav1.ListOptions {
376
+ LabelSelector : fmt .Sprintf ("%s=%s,%s" , LABEL_HATCHERY_NAME , h .Config .Name , LABEL_WORKER_NAME ),
377
+ })
361
378
if err != nil {
362
- log .Warn (ctx , "WorkersStarted> unable to list pods on namespace %s" , h .Config .Namespace )
363
- return nil
379
+ return nil , sdk .WrapError (err , "unable to list pods on namespace %s" , h .Config .Namespace )
364
380
}
365
381
workerNames := make ([]string , 0 , list .Size ())
366
382
for _ , pod := range list .Items {
367
- labels := pod .GetLabels ()
368
- if labels [LABEL_HATCHERY_NAME ] == h .Configuration ().Name {
369
- workerNames = append (workerNames , pod .GetName ())
370
- }
383
+ workerNames = append (workerNames , pod .GetName ())
371
384
}
372
- return workerNames
385
+ return workerNames , nil
373
386
}
374
387
375
388
// NeedRegistration return true if worker model need regsitration
@@ -381,31 +394,33 @@ func (h *HatcheryKubernetes) NeedRegistration(_ context.Context, m *sdk.Model) b
381
394
}
382
395
383
396
func (h * HatcheryKubernetes ) routines (ctx context.Context ) {
384
- ticker := time .NewTicker (10 * time .Minute )
397
+ ticker := time .NewTicker (10 * time .Second )
385
398
defer ticker .Stop ()
386
399
387
400
for {
388
401
select {
389
402
case <- ticker .C :
390
403
h .GoRoutines .Exec (ctx , "getCDNConfiguration" , func (ctx context.Context ) {
391
404
if err := h .Common .RefreshServiceLogger (ctx ); err != nil {
392
- log .Error (ctx , "hatchery> kubernetes> cannot get cdn configuration : %v" , err )
405
+ log .ErrorWithStackTrace (ctx , sdk . WrapError ( err , " cannot get cdn configuration" ) )
393
406
}
394
407
})
395
408
396
409
h .GoRoutines .Exec (ctx , "getServicesLogs" , func (ctx context.Context ) {
397
410
if err := h .getServicesLogs (ctx ); err != nil {
398
- log .Error (ctx , "Hatchery> Kubernetes> Cannot get service logs : %v" , err )
411
+ log .ErrorWithStackTrace (ctx , sdk . WrapError ( err , "cannot get service logs" ) )
399
412
}
400
413
})
401
414
402
415
h .GoRoutines .Exec (ctx , "killAwolWorker" , func (ctx context.Context ) {
403
- _ = h .killAwolWorkers (ctx )
416
+ if err := h .killAwolWorkers (ctx ); err != nil {
417
+ log .ErrorWithStackTrace (ctx , sdk .WrapError (err , "cannot delete awol worker" ))
418
+ }
404
419
})
405
420
406
421
h .GoRoutines .Exec (ctx , "deleteSecrets" , func (ctx context.Context ) {
407
422
if err := h .deleteSecrets (ctx ); err != nil {
408
- log .Error (ctx , "hatchery> kubernetes> cannot handle secrets : %v" , err )
423
+ log .ErrorWithStackTrace (ctx , sdk . WrapError ( err , " cannot delete secrets" ) )
409
424
}
410
425
})
411
426
case <- ctx .Done ():
0 commit comments