Skip to content

Commit bdc9931

Browse files
committed
MON-4115: expose label metrics for jobs and cronjobs
Adds `AdditionalLabelsAllowList` to KSM config. Signed-off-by: Pranshu Srivastava <[email protected]>
1 parent e7abc6a commit bdc9931

File tree

5 files changed

+103
-0
lines changed

5 files changed

+103
-0
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.
22

3+
## 4.21
4+
5+
- [#2553](https://github.com/openshift/cluster-monitoring-operator/pull/2553) Expose an `additionalLabelsAllowList` field in CMO's KubeStateMetrics configuration to allow users to specify which additional label metrics from Kubernetes objects should be exposed by KSM.
6+
37
## 4.20
48

59
- [#2595](https://github.com/openshift/cluster-monitoring-operator/pull/2595) Multi-tenant support for KSM's CRS feature-set downstream.

Documentation/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ The `KubeStateMetricsConfig` resource defines settings for the `kube-state-metri
174174
| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.33/#resourcerequirements-v1-core) | Defines resource requests and limits for the KubeStateMetrics container. |
175175
| tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.33/#toleration-v1-core) | Defines tolerations for the pods. |
176176
| topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. |
177+
| additionalLabelsAllowList | *string | Defines label-metrics' allow list for resources in addition to the default one. Currently, this is only supported for `jobs` and `cronjobs`, due to cardinality concerns. This follows the format: resource1=[label1,label2,labelN...],...,resourceN=[...], which is then validated and appended to the default labels' allow list. |
177178

178179
[Back to TOC](#table-of-contents)
179180

Documentation/openshiftdocs/modules/kubestatemetricsconfig.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat
2626

2727
|topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints.
2828

29+
|additionalLabelsAllowList|*string|Defines label-metrics' allow list for resources in addition to the default one. Currently, this is only supported for `jobs` and `cronjobs`, due to cardinality concerns. This follows the format: resource1=[label1,label2,labelN...],...,resourceN=[...], which is then validated and appended to the default labels' allow list.
30+
2931
|===
3032

3133
link:../index.adoc[Back to TOC]

pkg/manifests/manifests.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4949
"k8s.io/apimachinery/pkg/util/sets"
5050
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
51+
"k8s.io/klog/v2"
5152
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
5253
"k8s.io/utils/ptr"
5354
k8syaml "sigs.k8s.io/yaml"
@@ -760,6 +761,34 @@ func (f *Factory) KubeStateMetricsDeployment() (*appsv1.Deployment, error) {
760761
if f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources != nil {
761762
d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources
762763
}
764+
additionalAllowList := f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.AdditionalLabelsAllowList
765+
if additionalAllowList != nil && *additionalAllowList != "" {
766+
err = validateLabelsAllowListFormat(*additionalAllowList)
767+
if err != nil {
768+
return nil, fmt.Errorf("error parsing allowlist: %w", err)
769+
}
770+
for i = range container.Args {
771+
if strings.HasPrefix(container.Args[i], "--metric-labels-allowlist=") {
772+
allowedResources := sets.New[string]("jobs", "cronjobs")
773+
gotResourcesKeyValues := strings.Split(*additionalAllowList, ",")
774+
acceptedResources := make([]string, 0, len(gotResourcesKeyValues))
775+
for _, keyValue := range gotResourcesKeyValues {
776+
parts := strings.SplitN(keyValue, "=", 2)
777+
if len(parts) != 2 {
778+
return nil, fmt.Errorf("error parsing allowlist: %w", ErrConfigValidation)
779+
}
780+
if allowedResources.Has(parts[0]) {
781+
acceptedResources = append(acceptedResources, keyValue)
782+
} else {
783+
klog.V(4).Infof("ignoring unsupported resource %q in additional labels allowlist", parts[0])
784+
}
785+
}
786+
if len(acceptedResources) > 0 {
787+
container.Args[i] += "," + strings.Join(acceptedResources, ",")
788+
}
789+
}
790+
}
791+
}
763792
}
764793
}
765794

@@ -3614,3 +3643,65 @@ func hashStringMap(m map[string]string) string {
36143643
}
36153644
return hashByteMap(byteMap)
36163645
}
3646+
3647+
func validateLabelsAllowListFormat(value string) error {
3648+
var errLabelsAllowListFormat = errors.New("invalid format, should be: resource1=[label1,label2,labelN...],...,resourceN=[...]")
3649+
3650+
// Taken from text/scanner EOF constant.
3651+
const EOF = -1
3652+
var (
3653+
m = map[string][]string{}
3654+
previous rune
3655+
next rune
3656+
firstWordPos int
3657+
name string
3658+
)
3659+
firstWordPos = 0
3660+
3661+
for i, v := range value {
3662+
if i+1 == len(value) {
3663+
next = EOF
3664+
} else {
3665+
next = []rune(value)[i+1]
3666+
}
3667+
if i-1 >= 0 {
3668+
previous = []rune(value)[i-1]
3669+
} else {
3670+
previous = v
3671+
}
3672+
3673+
switch v {
3674+
case '=':
3675+
if previous == ',' || next != '[' {
3676+
return errLabelsAllowListFormat
3677+
}
3678+
name = strings.TrimSpace(string([]rune(value)[firstWordPos:i]))
3679+
m[name] = []string{}
3680+
firstWordPos = i + 1
3681+
case '[':
3682+
if previous != '=' {
3683+
return errLabelsAllowListFormat
3684+
}
3685+
firstWordPos = i + 1
3686+
case ']':
3687+
// if after metric group, has char not comma or end.
3688+
if next != EOF && next != ',' {
3689+
return errLabelsAllowListFormat
3690+
}
3691+
if previous != '[' {
3692+
m[name] = append(m[name], strings.TrimSpace(string(([]rune(value)[firstWordPos:i]))))
3693+
}
3694+
firstWordPos = i + 1
3695+
case ',':
3696+
// if starts or ends with comma
3697+
if previous == v || next == EOF || next == ']' {
3698+
return errLabelsAllowListFormat
3699+
}
3700+
if previous != ']' {
3701+
m[name] = append(m[name], strings.TrimSpace(string(([]rune(value)[firstWordPos:i]))))
3702+
}
3703+
firstWordPos = i + 1
3704+
}
3705+
}
3706+
return nil
3707+
}

pkg/manifests/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,11 @@ type KubeStateMetricsConfig struct {
175175
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
176176
// Defines a pod's topology spread constraints.
177177
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
178+
// Defines label-metrics' allow list for resources in addition to the default one.
179+
// Currently, this is only supported for `jobs` and `cronjobs`, due to cardinality concerns.
180+
// This follows the format: resource1=[label1,label2,labelN...],...,resourceN=[...],
181+
// which is then validated and appended to the default labels' allow list.
182+
AdditionalLabelsAllowList *string `json:"additionalLabelsAllowList,omitempty"`
178183
}
179184

180185
// The `PrometheusK8sConfig` resource defines settings for the Prometheus

0 commit comments

Comments
 (0)