diff --git a/api/config/crd/bases/odigos.io_collectorsgroups.yaml b/api/config/crd/bases/odigos.io_collectorsgroups.yaml index db2de096c..b8dca11fc 100644 --- a/api/config/crd/bases/odigos.io_collectorsgroups.yaml +++ b/api/config/crd/bases/odigos.io_collectorsgroups.yaml @@ -62,6 +62,14 @@ spec: this is when go runtime will start garbage collection. it is recommended to be set to 80% of the hard limit of the memory limiter. type: integer + memoryLimitMiB: + description: |- + This option sets the limit on the memory usage of the collector. + since the memory limiter mechanism is heuristic, and operates on fixed intervals, + while it cannot fully prevent OOMs, it can help in reducing the chances of OOMs in edge cases. + the settings should prevent the collector from exceeding the memory request, + so one can set this to the same value as the memory request or higher to allow for some buffer for bursts. + type: integer memoryLimiterLimitMiB: description: |- this parameter sets the "limit_mib" parameter in the memory limiter configuration for the collector. @@ -86,6 +94,7 @@ spec: type: integer required: - gomemlimitMiB + - memoryLimitMiB - memoryLimiterLimitMiB - memoryLimiterSpikeLimitMiB - memoryRequestMiB diff --git a/api/generated/odigos/applyconfiguration/odigos/v1alpha1/collectorsgroupmemorysettings.go b/api/generated/odigos/applyconfiguration/odigos/v1alpha1/collectorsgroupmemorysettings.go index e0b50a76e..4bd5e45d6 100644 --- a/api/generated/odigos/applyconfiguration/odigos/v1alpha1/collectorsgroupmemorysettings.go +++ b/api/generated/odigos/applyconfiguration/odigos/v1alpha1/collectorsgroupmemorysettings.go @@ -21,6 +21,7 @@ package v1alpha1 // with apply. type CollectorsGroupMemorySettingsApplyConfiguration struct { MemoryRequestMiB *int `json:"memoryRequestMiB,omitempty"` + MemoryLimitMiB *int `json:"memoryLimitMiB,omitempty"` MemoryLimiterLimitMiB *int `json:"memoryLimiterLimitMiB,omitempty"` MemoryLimiterSpikeLimitMiB *int `json:"memoryLimiterSpikeLimitMiB,omitempty"` GomemlimitMiB *int `json:"gomemlimitMiB,omitempty"` @@ -40,6 +41,14 @@ func (b *CollectorsGroupMemorySettingsApplyConfiguration) WithMemoryRequestMiB(v return b } +// WithMemoryLimitMiB sets the MemoryLimitMiB field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the MemoryLimitMiB field is set to the value of the last call. +func (b *CollectorsGroupMemorySettingsApplyConfiguration) WithMemoryLimitMiB(value int) *CollectorsGroupMemorySettingsApplyConfiguration { + b.MemoryLimitMiB = &value + return b +} + // WithMemoryLimiterLimitMiB sets the MemoryLimiterLimitMiB field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the MemoryLimiterLimitMiB field is set to the value of the last call. diff --git a/api/odigos/v1alpha1/collectorsgroup_types.go b/api/odigos/v1alpha1/collectorsgroup_types.go index 26a688df3..a8187f53b 100644 --- a/api/odigos/v1alpha1/collectorsgroup_types.go +++ b/api/odigos/v1alpha1/collectorsgroup_types.go @@ -40,6 +40,13 @@ type CollectorsGroupMemorySettings struct { // it will be embedded in the as a resource request of the form "memory: Mi" MemoryRequestMiB int `json:"memoryRequestMiB"` + // This option sets the limit on the memory usage of the collector. + // since the memory limiter mechanism is heuristic, and operates on fixed intervals, + // while it cannot fully prevent OOMs, it can help in reducing the chances of OOMs in edge cases. + // the settings should prevent the collector from exceeding the memory request, + // so one can set this to the same value as the memory request or higher to allow for some buffer for bursts. + MemoryLimitMiB int `json:"memoryLimitMiB"` + // this parameter sets the "limit_mib" parameter in the memory limiter configuration for the collector. // it is the hard limit after which a force garbage collection will be performed. // this value will end up comparing against the go runtime reported heap Alloc value. diff --git a/autoscaler/controllers/gateway/deployment.go b/autoscaler/controllers/gateway/deployment.go index 8e71814da..e15f98a45 100644 --- a/autoscaler/controllers/gateway/deployment.go +++ b/autoscaler/controllers/gateway/deployment.go @@ -91,6 +91,7 @@ func getDesiredDeployment(dests *odigosv1.DestinationList, configDataHash string gateway *odigosv1.CollectorsGroup, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string) (*appsv1.Deployment, error) { requestMemoryQuantity := resource.MustParse(fmt.Sprintf("%dMi", gateway.Spec.MemorySettings.MemoryRequestMiB)) + limitMemoryQuantity := resource.MustParse(fmt.Sprintf("%dMi", gateway.Spec.MemorySettings.MemoryLimitMiB)) desiredDeployment := &appsv1.Deployment{ ObjectMeta: v1.ObjectMeta{ @@ -190,6 +191,9 @@ func getDesiredDeployment(dests *odigosv1.DestinationList, configDataHash string Requests: corev1.ResourceList{ corev1.ResourceMemory: requestMemoryQuantity, }, + Limits: corev1.ResourceList{ + corev1.ResourceMemory: limitMemoryQuantity, + }, }, }, }, diff --git a/scheduler/controllers/clustercollectorsgroup/memory.go b/scheduler/controllers/clustercollectorsgroup/memory.go index 8f0a0dda6..7a8244a1a 100644 --- a/scheduler/controllers/clustercollectorsgroup/memory.go +++ b/scheduler/controllers/clustercollectorsgroup/memory.go @@ -19,6 +19,12 @@ const ( // the percentage out of the memory limiter hard limit, at which go runtime will start garbage collection. // it is used to calculate the GOMEMLIMIT environment variable value. defaultGoMemLimitPercentage = 80.0 + + // the memory settings should prevent the collector from exceeding the memory request. + // however, the mechanism is heuristic and does not guarantee to prevent OOMs. + // allowing the memory limit to be slightly above the memory request can help in reducing the chances of OOMs in edge cases. + // instead of having the process killed, it can use extra memory available on the node without allocating it preemptively. + memoryLimitAboveRequestFactor = 1.25 ) // process the memory settings from odigos config and return the memory settings for the collectors group. @@ -29,6 +35,8 @@ func getMemorySettings(odigosConfig *common.OdigosConfiguration) *odigosv1.Colle memoryRequestMiB = odigosConfig.CollectorGateway.RequestMemoryMiB } + memoryLimitMiB := int(float64(memoryRequestMiB) * memoryLimitAboveRequestFactor) + // the memory limiter hard limit is set as 50 MiB less than the memory request memoryLimiterLimitMiB := memoryRequestMiB - defaultMemoryLimiterLimitDiffMib if odigosConfig.CollectorGateway != nil && odigosConfig.CollectorGateway.MemoryLimiterLimitMiB > 0 { @@ -47,6 +55,7 @@ func getMemorySettings(odigosConfig *common.OdigosConfiguration) *odigosv1.Colle return &odigosv1.CollectorsGroupMemorySettings{ MemoryRequestMiB: memoryRequestMiB, + MemoryLimitMiB: memoryLimitMiB, MemoryLimiterLimitMiB: memoryLimiterLimitMiB, MemoryLimiterSpikeLimitMiB: memoryLimiterSpikeLimitMiB, GomemlimitMiB: gomemlimitMiB,