Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable gateway hpa based on memory #1689

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion autoscaler/controllers/collectorsgroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"

odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1"
controllerconfig "github.com/odigos-io/odigos/autoscaler/controllers/controller_config"
"github.com/odigos-io/odigos/autoscaler/controllers/datacollection"
"github.com/odigos-io/odigos/autoscaler/controllers/gateway"
"sigs.k8s.io/controller-runtime/pkg/predicate"
Expand All @@ -38,6 +39,7 @@ type CollectorsGroupReconciler struct {
ImagePullSecrets []string
OdigosVersion string
DisableNameProcessor bool
Config *controllerconfig.ControllerConfig
}

//+kubebuilder:rbac:groups=odigos.io,namespace=odigos-system,resources=collectorsgroups,verbs=get;list;watch;create;update;patch;delete
Expand All @@ -62,7 +64,7 @@ func (r *CollectorsGroupReconciler) Reconcile(ctx context.Context, req ctrl.Requ
logger := log.FromContext(ctx)
logger.V(0).Info("Reconciling CollectorsGroup")

err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion)
err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion, r.Config.MetricsServerEnabled)
if err != nil {
return ctrl.Result{}, err
}
Expand Down
5 changes: 5 additions & 0 deletions autoscaler/controllers/controller_config/controller_config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package controllerconfig

type ControllerConfig struct {
MetricsServerEnabled bool
}
4 changes: 3 additions & 1 deletion autoscaler/controllers/destination_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controllers
import (
"context"

controllerconfig "github.com/odigos-io/odigos/autoscaler/controllers/controller_config"
"github.com/odigos-io/odigos/autoscaler/controllers/gateway"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -34,6 +35,7 @@ type DestinationReconciler struct {
Scheme *runtime.Scheme
ImagePullSecrets []string
OdigosVersion string
Config *controllerconfig.ControllerConfig
}

//+kubebuilder:rbac:groups=odigos.io,namespace=odigos-system,resources=destinations,verbs=get;list;watch;create;update;patch;delete
Expand All @@ -51,7 +53,7 @@ type DestinationReconciler struct {
func (r *DestinationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx)
logger.V(0).Info("Reconciling Destination")
err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion)
err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion, r.Config.MetricsServerEnabled)
if err != nil {
return ctrl.Result{}, err
}
Expand Down
15 changes: 11 additions & 4 deletions autoscaler/controllers/gateway/hpa.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"

odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1"
autoscaling "k8s.io/api/autoscaling/v2beta2"
autoscaling "k8s.io/api/autoscaling/v2"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -24,8 +24,9 @@ const (
)

var (
minReplicas = intPtr(1)
maxReplicas = int32(10)
minReplicas = intPtr(1)
maxReplicas = int32(10)
stabilizationWindowSeconds = intPtr(300) // cooldown period for scaling down
)

func syncHPA(gateway *odigosv1.CollectorsGroup, ctx context.Context, c client.Client, scheme *runtime.Scheme, memConfig *memoryConfigurations) error {
Expand All @@ -34,7 +35,7 @@ func syncHPA(gateway *odigosv1.CollectorsGroup, ctx context.Context, c client.Cl
metricQuantity := resource.MustParse(fmt.Sprintf("%dMi", memLimit))
hpa := &autoscaling.HorizontalPodAutoscaler{
TypeMeta: metav1.TypeMeta{
APIVersion: "autoscaling/v2beta2",
APIVersion: "autoscaling/v2",
Kind: "HorizontalPodAutoscaler",
},
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -61,8 +62,14 @@ func syncHPA(gateway *odigosv1.CollectorsGroup, ctx context.Context, c client.Cl
},
},
},
Behavior: &autoscaling.HorizontalPodAutoscalerBehavior{
ScaleDown: &autoscaling.HPAScalingRules{
StabilizationWindowSeconds: stabilizationWindowSeconds,
},
},
},
}

if err := controllerutil.SetControllerReference(gateway, hpa, scheme); err != nil {
logger.Error(err, "Failed to set controller reference")
return err
Expand Down
32 changes: 7 additions & 25 deletions autoscaler/controllers/gateway/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ package gateway
import (
"context"

appsv1 "k8s.io/api/apps/v1"

odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1"
commonconf "github.com/odigos-io/odigos/autoscaler/controllers/common"
odigoscommon "github.com/odigos-io/odigos/common"
Expand All @@ -23,7 +21,8 @@ var (
}
)

func Sync(ctx context.Context, k8sClient client.Client, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string) error {
func Sync(ctx context.Context, k8sClient client.Client, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string,
metricsServerExists bool) error {
logger := log.FromContext(ctx)

odigosNs := env.GetCurrentNamespace()
Expand Down Expand Up @@ -55,7 +54,8 @@ func Sync(ctx context.Context, k8sClient client.Client, scheme *runtime.Scheme,
return err
}

err = syncGateway(&dests, &processors, &gatewayCollectorGroup, ctx, k8sClient, scheme, imagePullSecrets, odigosVersion, &odigosConfig)
err = syncGateway(&dests, &processors, &gatewayCollectorGroup, ctx, k8sClient, scheme, imagePullSecrets, odigosVersion, &odigosConfig,
metricsServerExists)
statusPatchString := commonconf.GetCollectorsGroupDeployedConditionsPatch(err)
statusErr := k8sClient.Status().Patch(ctx, &gatewayCollectorGroup, client.RawPatch(types.MergePatchType, []byte(statusPatchString)))
if statusErr != nil {
Expand All @@ -67,7 +67,8 @@ func Sync(ctx context.Context, k8sClient client.Client, scheme *runtime.Scheme,

func syncGateway(dests *odigosv1.DestinationList, processors *odigosv1.ProcessorList,
gateway *odigosv1.CollectorsGroup, ctx context.Context,
c client.Client, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string, odigosConfig *odigoscommon.OdigosConfiguration) error {
c client.Client, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string, odigosConfig *odigoscommon.OdigosConfiguration,
metricsServerExists bool) error {
logger := log.FromContext(ctx)
logger.V(0).Info("Syncing gateway")

Expand Down Expand Up @@ -103,7 +104,7 @@ func syncGateway(dests *odigosv1.DestinationList, processors *odigosv1.Processor
return err
}

if isMetricsServerInstalled(ctx, c) {
if metricsServerExists {
err = syncHPA(gateway, ctx, c, scheme, memConfig)
if err != nil {
logger.Error(err, "Failed to sync HPA")
Expand All @@ -112,22 +113,3 @@ func syncGateway(dests *odigosv1.DestinationList, processors *odigosv1.Processor

return nil
}

func isMetricsServerInstalled(ctx context.Context, c client.Client) bool {
// Check if Kubernetes metrics server is installed by checking if the metrics-server deployment exists
logger := log.FromContext(ctx)
var metricsServerDeployment appsv1.Deployment
err := c.Get(ctx, types.NamespacedName{Name: "metrics-server", Namespace: "kube-system"}, &metricsServerDeployment)
if err != nil {
if client.IgnoreNotFound(err) != nil {
logger.Error(err, "Failed to get metrics-server deployment")
return false
}

logger.V(0).Info("Metrics server not found, skipping HPA creation")
return false
}

logger.V(0).Info("Metrics server found, creating HPA for Gateway")
return true
}
6 changes: 4 additions & 2 deletions autoscaler/controllers/odigosconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package controllers
import (
"context"

controllerconfig "github.com/odigos-io/odigos/autoscaler/controllers/controller_config"
"github.com/odigos-io/odigos/autoscaler/controllers/gateway"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -16,13 +17,14 @@ type OdigosConfigReconciler struct {
Scheme *runtime.Scheme
ImagePullSecrets []string
OdigosVersion string
Config *controllerconfig.ControllerConfig
}

func (r *OdigosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx)
logger.V(0).Info("Reconciling Odigos Configuration")

err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion)
err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion, r.Config.MetricsServerEnabled)
if err != nil {
return ctrl.Result{}, err
}
Expand Down
4 changes: 3 additions & 1 deletion autoscaler/controllers/processor_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"

v1 "github.com/odigos-io/odigos/api/odigos/v1alpha1"
controllerconfig "github.com/odigos-io/odigos/autoscaler/controllers/controller_config"
"github.com/odigos-io/odigos/autoscaler/controllers/datacollection"
"github.com/odigos-io/odigos/autoscaler/controllers/gateway"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -18,14 +19,15 @@ type ProcessorReconciler struct {
ImagePullSecrets []string
OdigosVersion string
DisableNameProcessor bool
Config *controllerconfig.ControllerConfig
}

func (r *ProcessorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {

logger := log.FromContext(ctx)
logger.V(0).Info("Reconciling Processor")

err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion)
err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion, r.Config.MetricsServerEnabled)
if err != nil {
return ctrl.Result{}, err
}
Expand Down
4 changes: 3 additions & 1 deletion autoscaler/controllers/secret_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controllers
import (
"context"

controllerconfig "github.com/odigos-io/odigos/autoscaler/controllers/controller_config"
"github.com/odigos-io/odigos/autoscaler/controllers/gateway"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -19,6 +20,7 @@ type SecretReconciler struct {
Scheme *runtime.Scheme
ImagePullSecrets []string
OdigosVersion string
Config *controllerconfig.ControllerConfig
}

type secretPredicate struct {
Expand Down Expand Up @@ -52,7 +54,7 @@ func (r *SecretReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
logger := log.FromContext(ctx)
logger.V(0).Info("Reconciling Secret")

err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion)
err := gateway.Sync(ctx, r.Client, r.Scheme, r.ImagePullSecrets, r.OdigosVersion, r.Config.MetricsServerEnabled)
if err != nil {
return ctrl.Result{}, err
}
Expand Down
32 changes: 32 additions & 0 deletions autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"os"
"strings"

apierrors "k8s.io/apimachinery/pkg/api/errors"

"github.com/odigos-io/odigos/k8sutils/pkg/env"

corev1 "k8s.io/api/core/v1"
Expand All @@ -31,6 +33,7 @@ import (

"sigs.k8s.io/controller-runtime/pkg/cache"

"github.com/go-logr/logr"
"github.com/go-logr/zapr"
bridge "github.com/odigos-io/opentelemetry-zap-bridge"

Expand All @@ -40,6 +43,7 @@ import (

"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -53,6 +57,7 @@ import (

"github.com/odigos-io/odigos/autoscaler/controllers"
"github.com/odigos-io/odigos/autoscaler/controllers/actions"
controllerconfig "github.com/odigos-io/odigos/autoscaler/controllers/controller_config"
"github.com/odigos-io/odigos/autoscaler/controllers/gateway"
nameutils "github.com/odigos-io/odigos/autoscaler/utils"

Expand Down Expand Up @@ -169,11 +174,16 @@ func main() {
// at the time of writing (2024-10-22) only dotnet and java native agent are using the name processor.
_, disableNameProcessor := os.LookupEnv("DISABLE_NAME_PROCESSOR")

config := &controllerconfig.ControllerConfig{
MetricsServerEnabled: isMetricsServerInstalled(mgr, setupLog),
}

if err = (&controllers.DestinationReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
ImagePullSecrets: imagePullSecrets,
OdigosVersion: odigosVersion,
Config: config,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Destination")
os.Exit(1)
Expand All @@ -185,6 +195,7 @@ func main() {
ImagePullSecrets: imagePullSecrets,
OdigosVersion: odigosVersion,
DisableNameProcessor: disableNameProcessor,
Config: config,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Processor")
os.Exit(1)
Expand All @@ -195,6 +206,7 @@ func main() {
ImagePullSecrets: imagePullSecrets,
OdigosVersion: odigosVersion,
DisableNameProcessor: disableNameProcessor,
Config: config,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CollectorsGroup")
os.Exit(1)
Expand All @@ -214,6 +226,7 @@ func main() {
Scheme: mgr.GetScheme(),
ImagePullSecrets: imagePullSecrets,
OdigosVersion: odigosVersion,
Config: config,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "OdigosConfig")
os.Exit(1)
Expand All @@ -223,6 +236,7 @@ func main() {
Scheme: mgr.GetScheme(),
ImagePullSecrets: imagePullSecrets,
OdigosVersion: odigosVersion,
Config: config,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Secret")
os.Exit(1)
Expand Down Expand Up @@ -262,3 +276,21 @@ func main() {
os.Exit(1)
}
}

func isMetricsServerInstalled(mgr ctrl.Manager, logger logr.Logger) bool {
var metricsServerDeployment appsv1.Deployment
// Use APIReader (uncached client) for direct access to the API server
// uses because mgr not cache the metrics-server deployment
err := mgr.GetAPIReader().Get(context.TODO(), types.NamespacedName{Name: "metrics-server", Namespace: "kube-system"}, &metricsServerDeployment)
if err != nil {
if apierrors.IsNotFound(err) {
logger.Info("Metrics-server deployment not found")
} else {
logger.Error(err, "Failed to get metrics-server deployment")
}
return false
}

logger.V(0).Info("Metrics server found")
return true
}
14 changes: 13 additions & 1 deletion tests/e2e/cli-upgrade/assert-instrumented-and-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,12 @@ status:
(value != null): true # This will hold the version before the upgrade
- key: process.pid
(value != null): true
- key: k8s.namespace.name
(value != null): true
- key: k8s.container.name
(value != null): true
- key: k8s.pod.name
(value != null): true
---
apiVersion: odigos.io/v1alpha1
kind: InstrumentationInstance
Expand All @@ -324,7 +330,13 @@ status:
- key: process.pid
(value != null): true
- key: telemetry.sdk.language
value: python
value: python
- key: k8s.namespace.name
(value != null): true
- key: k8s.container.name
(value != null): true
- key: k8s.pod.name
(value != null): true
---
apiVersion: odigos.io/v1alpha1
kind: InstrumentationInstance
Expand Down
Loading