Skip to content

Commit

Permalink
feat: Add custom metric to count reconcile errors in a detailed manner
Browse files Browse the repository at this point in the history
This is including resource name and namespace in labels where the default provided isn't
  • Loading branch information
oxyno-zeta committed Mar 3, 2024
1 parent d85d78b commit 3e9048b
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 13 deletions.
22 changes: 20 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,30 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics"

postgresqlv1alpha1 "github.com/easymile/postgresql-operator/api/postgresql/v1alpha1"
postgresqlcontrollers "github.com/easymile/postgresql-operator/internal/controller/postgresql"
"github.com/prometheus/client_golang/prometheus"
//+kubebuilder:scaffold:imports
)

var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
controllerRuntimeDetailedErrorTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "controller_runtime_reconcile_detailed_errors_total",
Help: "Total number of reconciliation errors per controller detailed with resource namespace and name.",
},
[]string{"controller", "namespace", "name"},
)
)

func init() {
// Register custom metrics with the global prometheus registry
metrics.Registry.MustRegister(controllerRuntimeDetailedErrorTotal)

utilruntime.Must(clientgoscheme.AddToScheme(scheme))

utilruntime.Must(postgresqlv1alpha1.AddToScheme(scheme))
Expand Down Expand Up @@ -117,6 +129,8 @@ func main() {
"controllerGroup",
"postgresql.easymile.com",
),
ControllerRuntimeDetailedErrorTotal: controllerRuntimeDetailedErrorTotal,
ControllerName: "postgresqlengineconfiguration",
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PostgresqlEngineConfiguration")
os.Exit(1)
Expand All @@ -134,6 +148,8 @@ func main() {
"controllerGroup",
"postgresql.easymile.com",
),
ControllerRuntimeDetailedErrorTotal: controllerRuntimeDetailedErrorTotal,
ControllerName: "postgresqldatabase",
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PostgresqlDatabase")
os.Exit(1)
Expand All @@ -151,6 +167,8 @@ func main() {
"controllerGroup",
"postgresql.easymile.com",
),
ControllerRuntimeDetailedErrorTotal: controllerRuntimeDetailedErrorTotal,
ControllerName: "postgresqluserrole",
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PostgresqlUserRole")
os.Exit(1)
Expand Down
17 changes: 13 additions & 4 deletions internal/controller/postgresql/postgresqldatabase_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/easymile/postgresql-operator/internal/controller/postgresql/postgres"
"github.com/easymile/postgresql-operator/internal/controller/utils"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"github.com/thoas/go-funk"
)

Expand All @@ -42,11 +43,13 @@ const (
)

// PostgresqlDatabaseReconciler reconciles a PostgresqlDatabase object.
type PostgresqlDatabaseReconciler struct { //nolint: golint // generated
client.Client
Scheme *runtime.Scheme
type PostgresqlDatabaseReconciler struct {
Recorder record.EventRecorder
Log logr.Logger
client.Client
Scheme *runtime.Scheme
ControllerRuntimeDetailedErrorTotal *prometheus.CounterVec
Log logr.Logger
ControllerName string
}

//+kubebuilder:rbac:groups=postgresql.easymile.com,resources=postgresqldatabases,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -671,6 +674,9 @@ func (r *PostgresqlDatabaseReconciler) manageError(
instance.Status.Ready = false
instance.Status.Phase = postgresqlv1alpha1.DatabaseFailedPhase

// Increase fail counter
r.ControllerRuntimeDetailedErrorTotal.WithLabelValues(r.ControllerName, instance.Namespace, instance.Name).Inc()

// Patch status
err := r.Status().Patch(ctx, instance, originalPatch)
if err != nil {
Expand All @@ -695,6 +701,9 @@ func (r *PostgresqlDatabaseReconciler) manageSuccess(
// Patch status
err := r.Status().Patch(ctx, instance, originalPatch)
if err != nil {
// Increase fail counter
r.ControllerRuntimeDetailedErrorTotal.WithLabelValues(r.ControllerName, instance.Namespace, instance.Name).Inc()

logger.Error(err, "unable to update status")

// Return error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/easymile/postgresql-operator/internal/controller/postgresql/postgres"
"github.com/easymile/postgresql-operator/internal/controller/utils"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

Expand All @@ -42,11 +43,13 @@ const (
)

// PostgresqlEngineConfigurationReconciler reconciles a PostgresqlEngineConfiguration object.
type PostgresqlEngineConfigurationReconciler struct { //nolint: golint // generated
client.Client
Scheme *runtime.Scheme
type PostgresqlEngineConfigurationReconciler struct {
Recorder record.EventRecorder
Log logr.Logger
client.Client
Scheme *runtime.Scheme
ControllerRuntimeDetailedErrorTotal *prometheus.CounterVec
Log logr.Logger
ControllerName string
}

//+kubebuilder:rbac:groups=postgresql.easymile.com,resources=postgresqlengineconfigurations,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -332,6 +335,9 @@ func (r *PostgresqlEngineConfigurationReconciler) manageError(
instance.Status.Ready = false
instance.Status.Phase = postgresqlv1alpha1.EngineFailedPhase

// Increase fail counter
r.ControllerRuntimeDetailedErrorTotal.WithLabelValues(r.ControllerName, instance.Namespace, instance.Name).Inc()

// Patch status
err := r.Status().Patch(ctx, instance, originalPatch)
if err != nil {
Expand Down Expand Up @@ -363,6 +369,9 @@ func (r *PostgresqlEngineConfigurationReconciler) manageSuccess(
// Patch status
err = r.Status().Patch(ctx, instance, originalPatch)
if err != nil {
// Increase fail counter
r.ControllerRuntimeDetailedErrorTotal.WithLabelValues(r.ControllerName, instance.Namespace, instance.Name).Inc()

logger.Error(err, "unable to update status")

// Return error
Expand Down
15 changes: 12 additions & 3 deletions internal/controller/postgresql/postgresqluserrole_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"github.com/easymile/postgresql-operator/internal/controller/postgresql/postgres"
"github.com/easymile/postgresql-operator/internal/controller/utils"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"github.com/thoas/go-funk"
)

Expand All @@ -56,10 +57,12 @@ const (

// PostgresqlUserRoleReconciler reconciles a PostgresqlUserRole object.
type PostgresqlUserRoleReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
Log logr.Logger
client.Client
Scheme *runtime.Scheme
ControllerRuntimeDetailedErrorTotal *prometheus.CounterVec
Log logr.Logger
ControllerName string
}

type dbPrivilegeCache struct {
Expand Down Expand Up @@ -1364,6 +1367,9 @@ func (r *PostgresqlUserRoleReconciler) manageError(
instance.Status.Ready = false
instance.Status.Phase = v1alpha1.UserRoleFailedPhase

// Increase fail counter
r.ControllerRuntimeDetailedErrorTotal.WithLabelValues(r.ControllerName, instance.Namespace, instance.Name).Inc()

// Patch status
err := r.Status().Patch(ctx, instance, originalPatch)
if err != nil {
Expand All @@ -1388,6 +1394,9 @@ func (r *PostgresqlUserRoleReconciler) manageSuccess(
// Patch status
err := r.Status().Patch(ctx, instance, originalPatch)
if err != nil {
// Increase fail counter
r.ControllerRuntimeDetailedErrorTotal.WithLabelValues(r.ControllerName, instance.Namespace, instance.Name).Inc()

logger.Error(err, "unable to update status")

// Return error
Expand Down

0 comments on commit 3e9048b

Please sign in to comment.