Skip to content

Commit 58036a9

Browse files
committed
Supporting startupProbe in operator
1 parent eb50a22 commit 58036a9

File tree

9 files changed

+371
-32
lines changed

9 files changed

+371
-32
lines changed

api/datadoghq/v2alpha1/datadogagent_types.go

+4
Original file line numberDiff line numberDiff line change
@@ -1600,6 +1600,10 @@ type DatadogAgentGenericContainer struct {
16001600
// +optional
16011601
LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`
16021602

1603+
// Configure the Startup Probe of the container
1604+
// +optional
1605+
StartupProbe *corev1.Probe `json:"startupProbe,omitempty"`
1606+
16031607
// Container-level SecurityContext.
16041608
// +optional
16051609
SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"`

api/datadoghq/v2alpha1/zz_generated.deepcopy.go

+5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/datadoghq/v2alpha1/zz_generated.openapi.go

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/v1/datadoghq.com_datadogagents.yaml

+177-32
Large diffs are not rendered by default.

docs/configuration.v2alpha1.md

+16
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,22 @@ In the table, `spec.override.nodeAgent.image.name` and `spec.override.nodeAgent.
341341
| [key].containers.[key].securityContext.windowsOptions.gmsaCredentialSpecName | GMSACredentialSpecName is the name of the GMSA credential spec to use. |
342342
| [key].containers.[key].securityContext.windowsOptions.hostProcess | HostProcess determines if a container should be run as a 'Host Process' container. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. |
343343
| [key].containers.[key].securityContext.windowsOptions.runAsUserName | The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. |
344+
| [key].containers.[key].startupProbe.exec.command | Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. |
345+
| [key].containers.[key].startupProbe.failureThreshold | Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 6. Minimum value is 1. |
346+
| [key].containers.[key].startupProbe.grpc.port | Port number of the gRPC service. Number must be in the range 1 to 65535. |
347+
| [key].containers.[key].startupProbe.grpc.service | Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). If this is not specified, the default behavior is defined by gRPC. |
348+
| [key].containers.[key].startupProbe.httpGet.host | Host name to connect to, defaults to the pod IP. You probably want to set "Host" in httpHeaders instead. |
349+
| [key].containers.[key].startupProbe.httpGet.httpHeaders | Custom headers to set in the request. HTTP allows repeated headers. |
350+
| [key].containers.[key].startupProbe.httpGet.path | Path to access on the HTTP server. |
351+
| [key].containers.[key].startupProbe.httpGet.port | Name or number of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. |
352+
| [key].containers.[key].startupProbe.httpGet.scheme | Scheme to use for connecting to the host. Defaults to HTTP. |
353+
| [key].containers.[key].startupProbe.initialDelaySeconds | Number of seconds after the container has started before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes |
354+
| [key].containers.[key].startupProbe.periodSeconds | How often (in seconds) to perform the probe. Default to 15 seconds. Minimum value is 1. |
355+
| [key].containers.[key].startupProbe.successThreshold | Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. |
356+
| [key].containers.[key].startupProbe.tcpSocket.host | Optional: Host name to connect to, defaults to the pod IP. |
357+
| [key].containers.[key].startupProbe.tcpSocket.port | Number or name of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. |
358+
| [key].containers.[key].startupProbe.terminationGracePeriodSeconds | Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. |
359+
| [key].containers.[key].startupProbe.timeoutSeconds | Number of seconds after which the probe times out. Defaults to 5 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes |
344360
| [key].containers.[key].volumeMounts `[]object` | Specify additional volume mounts in the container. |
345361
| [key].createRbac | Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component |
346362
| [key].customConfigurations `map[string]object` | CustomConfiguration allows to specify custom configuration files for `datadog.yaml`, `datadog-cluster.yaml`, `security-agent.yaml`, and `system-probe.yaml`. The content is merged with configuration generated by the Datadog Operator, with priority given to custom configuration. WARNING: It is possible to override values set in the `DatadogAgent`. |

internal/controller/datadogagent/component/agent/default.go

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ func agentSingleContainer(dda metav1.Object) []corev1.Container {
126126
VolumeMounts: volumeMountsForCoreAgent(),
127127
LivenessProbe: v2alpha1.GetDefaultLivenessProbe(),
128128
ReadinessProbe: v2alpha1.GetDefaultReadinessProbe(),
129+
StartupProbe: v2alpha1.GetDefaultStartupProbe(), // do we want this here?
129130
}
130131

131132
containers := []corev1.Container{

internal/controller/datadogagent/override/container.go

+14
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ func overrideContainer(container *corev1.Container, override *v2alpha1.DatadogAg
140140
container.LivenessProbe = overrideLivenessProbe(override.LivenessProbe)
141141
}
142142

143+
if override.StartupProbe != nil {
144+
container.StartupProbe = overrideStartupProbe(override.StartupProbe)
145+
}
146+
143147
if override.SecurityContext != nil {
144148
container.SecurityContext = override.SecurityContext
145149
}
@@ -238,3 +242,13 @@ func overrideLivenessProbe(livenessProbeOverride *corev1.Probe) *corev1.Probe {
238242
}
239243
return livenessProbeOverride
240244
}
245+
246+
func overrideStartupProbe(startupProbeOverride *corev1.Probe) *corev1.Probe {
247+
// Add default httpGet probeHandler if probeHandler is not configured in livenessProbe override
248+
if !hasProbeHandler(startupProbeOverride) {
249+
startupProbeOverride.HTTPGet = &corev1.HTTPGetAction{
250+
Path: v2alpha1.DefaultLivenessProbeHTTPPath,
251+
Port: intstr.IntOrString{IntVal: v2alpha1.DefaultAgentHealthPort}}
252+
}
253+
return startupProbeOverride
254+
}

internal/controller/datadogagent/override/container_test.go

+133
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,139 @@ func TestContainer(t *testing.T) {
638638
})
639639
},
640640
},
641+
{
642+
name: "override liveness probe with default HTTPGet",
643+
containerName: common.CoreAgentContainerName,
644+
existingManager: func() *fake.PodTemplateManagers {
645+
return fake.NewPodTemplateManagers(t, corev1.PodTemplateSpec{
646+
Spec: corev1.PodSpec{
647+
Containers: []corev1.Container{*agentContainer},
648+
},
649+
})
650+
},
651+
override: v2alpha1.DatadogAgentGenericContainer{
652+
StartupProbe: &corev1.Probe{
653+
InitialDelaySeconds: 10,
654+
TimeoutSeconds: 5,
655+
PeriodSeconds: 30,
656+
SuccessThreshold: 1,
657+
FailureThreshold: 5,
658+
},
659+
},
660+
validateManager: func(t *testing.T, manager *fake.PodTemplateManagers, containerName string) {
661+
assertContainerMatch(t, manager.PodTemplateSpec().Spec.Containers, containerName, func(container corev1.Container) bool {
662+
return reflect.DeepEqual(
663+
&corev1.Probe{
664+
InitialDelaySeconds: 10,
665+
TimeoutSeconds: 5,
666+
PeriodSeconds: 30,
667+
SuccessThreshold: 1,
668+
FailureThreshold: 5,
669+
ProbeHandler: corev1.ProbeHandler{
670+
HTTPGet: &corev1.HTTPGetAction{
671+
Path: "/live",
672+
Port: intstr.IntOrString{
673+
IntVal: 5555,
674+
},
675+
},
676+
},
677+
},
678+
container.StartupProbe)
679+
})
680+
},
681+
},
682+
{
683+
name: "override liveness probe with non-HTTPGet handler",
684+
containerName: common.CoreAgentContainerName,
685+
existingManager: func() *fake.PodTemplateManagers {
686+
return fake.NewPodTemplateManagers(t, corev1.PodTemplateSpec{
687+
Spec: corev1.PodSpec{
688+
Containers: []corev1.Container{*agentContainer},
689+
},
690+
})
691+
},
692+
override: v2alpha1.DatadogAgentGenericContainer{
693+
StartupProbe: &corev1.Probe{
694+
InitialDelaySeconds: 10,
695+
TimeoutSeconds: 5,
696+
PeriodSeconds: 30,
697+
SuccessThreshold: 1,
698+
FailureThreshold: 5,
699+
ProbeHandler: corev1.ProbeHandler{
700+
Exec: &corev1.ExecAction{
701+
Command: []string{"echo", "foo", "bar"},
702+
},
703+
},
704+
},
705+
},
706+
validateManager: func(t *testing.T, manager *fake.PodTemplateManagers, containerName string) {
707+
assertContainerMatch(t, manager.PodTemplateSpec().Spec.Containers, containerName, func(container corev1.Container) bool {
708+
return reflect.DeepEqual(
709+
&corev1.Probe{
710+
InitialDelaySeconds: 10,
711+
TimeoutSeconds: 5,
712+
PeriodSeconds: 30,
713+
SuccessThreshold: 1,
714+
FailureThreshold: 5,
715+
ProbeHandler: corev1.ProbeHandler{
716+
Exec: &corev1.ExecAction{
717+
Command: []string{"echo", "foo", "bar"},
718+
},
719+
},
720+
},
721+
container.StartupProbe)
722+
})
723+
},
724+
},
725+
{
726+
name: "override liveness probe",
727+
containerName: common.CoreAgentContainerName,
728+
existingManager: func() *fake.PodTemplateManagers {
729+
return fake.NewPodTemplateManagers(t, corev1.PodTemplateSpec{
730+
Spec: corev1.PodSpec{
731+
Containers: []corev1.Container{*agentContainer},
732+
},
733+
})
734+
},
735+
override: v2alpha1.DatadogAgentGenericContainer{
736+
StartupProbe: &corev1.Probe{
737+
InitialDelaySeconds: 10,
738+
TimeoutSeconds: 5,
739+
PeriodSeconds: 30,
740+
SuccessThreshold: 1,
741+
FailureThreshold: 5,
742+
ProbeHandler: corev1.ProbeHandler{
743+
HTTPGet: &corev1.HTTPGetAction{
744+
Path: "/some/path",
745+
Port: intstr.IntOrString{
746+
IntVal: 1234,
747+
},
748+
},
749+
},
750+
},
751+
},
752+
validateManager: func(t *testing.T, manager *fake.PodTemplateManagers, containerName string) {
753+
assertContainerMatch(t, manager.PodTemplateSpec().Spec.Containers, containerName, func(container corev1.Container) bool {
754+
return reflect.DeepEqual(
755+
&corev1.Probe{
756+
InitialDelaySeconds: 10,
757+
TimeoutSeconds: 5,
758+
PeriodSeconds: 30,
759+
SuccessThreshold: 1,
760+
FailureThreshold: 5,
761+
ProbeHandler: corev1.ProbeHandler{
762+
HTTPGet: &corev1.HTTPGetAction{
763+
Path: "/some/path",
764+
Port: intstr.IntOrString{
765+
IntVal: 1234,
766+
},
767+
},
768+
},
769+
},
770+
container.StartupProbe)
771+
})
772+
},
773+
},
641774
{
642775
name: "override security context",
643776
containerName: common.CoreAgentContainerName,

internal/controller/testutils/agent.go

+15
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,21 @@ func NewDatadogAgentWithOverrides(namespace string, name string) v2alpha1.Datado
488488
SuccessThreshold: 1,
489489
FailureThreshold: 5,
490490
},
491+
StartupProbe: &v1.Probe{
492+
ProbeHandler: v1.ProbeHandler{
493+
HTTPGet: &v1.HTTPGetAction{
494+
Path: v2alpha1.DefaultLivenessProbeHTTPPath,
495+
Port: intstr.IntOrString{
496+
IntVal: v2alpha1.DefaultAgentHealthPort,
497+
},
498+
},
499+
},
500+
InitialDelaySeconds: 15,
501+
TimeoutSeconds: 5,
502+
PeriodSeconds: 15,
503+
SuccessThreshold: 1,
504+
FailureThreshold: 6,
505+
},
491506
SecurityContext: &v1.SecurityContext{
492507
RunAsUser: apiutils.NewInt64Pointer(12345),
493508
},

0 commit comments

Comments
 (0)