Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions api/v4/searchheadcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,21 @@ type SearchHeadClusterStatus struct {

// Auxillary message describing CR status
Message string `json:"message"`

UpgradePhase UpgradePhase `json:"upgradePhase"`

UpgradeStartTimestamp int64 `json:"upgradeStartTimestamp"`

UpgradeEndTimestamp int64 `json:"upgradeEndTimestamp"`
}

type UpgradePhase string

const (
UpgradePhaseUpgrading UpgradePhase = "Upgrading"
UpgradePhaseUpgraded UpgradePhase = "Upgraded"
)

// SearchHeadCluster is the Schema for a Splunk Enterprise search head cluster
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +k8s:openapi-gen=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9470,6 +9470,14 @@ spec:
telAppInstalled:
description: Telemetry App installation flag
type: boolean
upgradeEndTimestamp:
format: int64
type: integer
upgradePhase:
type: string
upgradeStartTimestamp:
format: int64
type: integer
type: object
type: object
served: true
Expand Down
11 changes: 6 additions & 5 deletions internal/controller/clustermanager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/splunk/splunk-operator/internal/controller/common"

"github.com/pkg/errors"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -71,7 +72,7 @@ type ClusterManagerReconciler struct {
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *ClusterManagerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
// your logic here
reconcileCounters.With(getPrometheusLabels(req, "ClusterManager")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "ClusterManager")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "ClusterManager")

reqLogger := log.FromContext(ctx)
Expand Down Expand Up @@ -160,9 +161,9 @@ func (r *ClusterManagerReconciler) SetupWithManager(mgr ctrl.Manager) error {

// recordInstrumentionData Record api profiling information to prometheus
func recordInstrumentionData(start time.Time, req ctrl.Request, module string, name string) {
metricLabels := getPrometheusLabels(req, name)
metricLabels[labelModuleName] = module
metricLabels[labelMethodName] = name
metricLabels := metrics.GetPrometheusLabels(req, name)
metricLabels[metrics.LabelModuleName] = module
metricLabels[metrics.LabelMethodName] = name
value := float64(time.Since(start) / time.Millisecond)
apiTotalTimeMetricEvents.With(metricLabels).Set(value)
metrics.ApiTotalTimeMetricEvents.With(metricLabels).Set(value)
}
3 changes: 2 additions & 1 deletion internal/controller/clustermaster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/pkg/errors"
enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -72,7 +73,7 @@ type ClusterMasterReconciler struct {
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *ClusterMasterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
// your logic here
reconcileCounters.With(getPrometheusLabels(req, "ClusterMaster")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "ClusterMaster")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "ClusterMaster")

reqLogger := log.FromContext(ctx)
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/indexercluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/pkg/errors"
enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -71,7 +72,7 @@ type IndexerClusterReconciler struct {
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *IndexerClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reconcileCounters.With(getPrometheusLabels(req, "IndexerCluster")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "IndexerCluster")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "IndexerCluster")

reqLogger := log.FromContext(ctx)
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/licensemanager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/splunk/splunk-operator/internal/controller/common"

"github.com/pkg/errors"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -70,7 +71,7 @@ type LicenseManagerReconciler struct {
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *LicenseManagerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reconcileCounters.With(getPrometheusLabels(req, "LicenseManager")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "LicenseManager")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "LicenseManager")

reqLogger := log.FromContext(ctx)
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/licensemaster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/pkg/errors"
enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3"
enterpriseApi "github.com/splunk/splunk-operator/api/v4"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -71,7 +72,7 @@ type LicenseMasterReconciler struct {
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *LicenseMasterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reconcileCounters.With(getPrometheusLabels(req, "LicenseMaster")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "LicenseMaster")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "LicenseMaster")

reqLogger := log.FromContext(ctx)
Expand Down
54 changes: 0 additions & 54 deletions internal/controller/metric.go

This file was deleted.

3 changes: 2 additions & 1 deletion internal/controller/monitoringconsole_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/pkg/errors"
enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -71,7 +72,7 @@ type MonitoringConsoleReconciler struct {
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *MonitoringConsoleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reconcileCounters.With(getPrometheusLabels(req, "MonitoringConsole")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "MonitoringConsole")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "MonitoringConsole")
reqLogger := log.FromContext(ctx)
reqLogger = reqLogger.WithValues("monitoringconsole", req.NamespacedName)
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/searchheadcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/splunk/splunk-operator/internal/controller/common"

"github.com/pkg/errors"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -70,7 +71,7 @@ type SearchHeadClusterReconciler struct {
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *SearchHeadClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reconcileCounters.With(getPrometheusLabels(req, "SearchHeadCluster")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "SearchHeadCluster")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "SearchHeadCluster")

reqLogger := log.FromContext(ctx)
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/standalone_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/pkg/errors"
metrics "github.com/splunk/splunk-operator/pkg/splunk/client/metrics"
enterprise "github.com/splunk/splunk-operator/pkg/splunk/enterprise"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -75,7 +76,7 @@ type StandaloneReconciler struct {
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *StandaloneReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
reconcileCounters.With(getPrometheusLabels(req, "Standalone")).Inc()
metrics.ReconcileCounters.With(metrics.GetPrometheusLabels(req, "Standalone")).Inc()
defer recordInstrumentionData(time.Now(), req, "controller", "Standalone")

reqLogger := log.FromContext(ctx)
Expand Down
24 changes: 24 additions & 0 deletions pkg/splunk/client/enterprise.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,30 @@ func (c *SplunkClient) SetSearchHeadDetention(detain bool) error {
return c.Do(request, expectedStatus, nil)
}

// InitiateUpgrade initializes rolling upgrade process for a search head cluster
// This endpoint proxies request to the cluster captain
func (c *SplunkClient) InitiateUpgrade() error {
endpoint := fmt.Sprintf("%s/services/shcluster/captain/control/control/upgrade-init", c.ManagementURI)
request, err := http.NewRequest("POST", endpoint, nil)
if err != nil {
return err
}
expectedStatus := []int{200}
return c.Do(request, expectedStatus, nil)
}

// FinalizeUpgrade finalizes rolling upgrade process for a search head cluster
// This endpoint proxies request to the cluster captain
func (c *SplunkClient) FinalizeUpgrade() error {
endpoint := fmt.Sprintf("%s/services/shcluster/captain/control/control/upgrade-finalize", c.ManagementURI)
request, err := http.NewRequest("POST", endpoint, nil)
if err != nil {
return err
}
expectedStatus := []int{200}
return c.Do(request, expectedStatus, nil)
}

// RemoveSearchHeadClusterMember removes a search head cluster member.
// You can use this on any member of a search head cluster.
// See https://docs.splunk.com/Documentation/Splunk/latest/DistSearch/Removeaclustermember
Expand Down
97 changes: 97 additions & 0 deletions pkg/splunk/client/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package metrics

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

const (
LabelNamespace = "namespace"
LabelName = "name"
LabelKind = "kind"
LabelErrorType = "error_type"
LabelMethodName = "api"
LabelModuleName = "module"
LabelResourceVersion = "resource_version"
)

var (
upgradeStartTimestamp int64
upgradeEndTimestamp int64
)

var ReconcileCounters = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "splunk_operator_reconcile_total",
Help: "The number of times reconciled by this controller",
}, []string{LabelNamespace, LabelName, LabelKind})

var ReconcileErrorCounter = prometheus.NewCounter(prometheus.CounterOpts{
Name: "splunk_operator_reconcile_error_total",
Help: "The number of times the operator has failed to reconcile",
})

var ActionFailureCounters = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "splunk_operator_error_total",
Help: "The number of times operator has entered an error state",
}, []string{LabelErrorType})

var ApiTotalTimeMetricEvents = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "splunk_operator_module_duration_in_milliseconds",
Help: "The time it takes to complete each call in standalone (in milliseconds)",
}, []string{LabelNamespace, LabelName, LabelKind, LabelModuleName, LabelMethodName})

var UpgradeStartTime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "splunk_upgrade_start_time",
Help: "Unix timestamp when the SHC upgrade started",
})

var UpgradeEndTime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "splunk_upgrade_end_time",
Help: "Unix timestamp when the SHC upgrade ended",
})

var ActiveHistoricalSearchCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "splunk_active_historical_search_count",
Help: "Total number of active historical search count",
}, []string{"sh_name"})

var ActiveRealtimeSearchCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "splunk_active_realtime_search_count",
Help: "Total number of active realtime search count",
}, []string{"sh_name"})

func GetPrometheusLabels(request reconcile.Request, kind string) prometheus.Labels {
return prometheus.Labels{
LabelNamespace: request.Namespace,
LabelName: request.Name,
LabelKind: kind,
}
}

func RecordUpgradeStartTime() {
upgradeStartTimestamp = time.Now().Unix()
UpgradeStartTime.Set(float64(upgradeStartTimestamp))
}

func RecordUpgradeEndTime() {
upgradeEndTimestamp = time.Now().Unix()
UpgradeEndTime.Set(float64(upgradeEndTimestamp))
}

func init() {
metrics.Registry.MustRegister(
ReconcileCounters,
ReconcileErrorCounter,
ActionFailureCounters,
ApiTotalTimeMetricEvents,
UpgradeStartTime,
UpgradeEndTime,
ActiveHistoricalSearchCount,
ActiveRealtimeSearchCount,
)
}
3 changes: 3 additions & 0 deletions pkg/splunk/common/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,7 @@ type StatefulSetPodManager interface {

// FinishRecycle completes recycle event for pod and returns true, or returns false if nothing to do
FinishRecycle(context.Context, int32) (bool, error)

// FinishUpgrade finishes rolling upgrade process; it returns an error if upgrade process can't be finished
FinishUpgrade(context.Context, int32) error
}
Loading
Loading