Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.

## 4.21

- [#2553](https://github.com/openshift/cluster-monitoring-operator/pull/2553) Expose an `additionalLabelsAllowList` field in CMO's KubeStateMetrics configuration to allow users to specify which additional label metrics from Kubernetes objects should be exposed by KSM.

## 4.20

- [#2595](https://github.com/openshift/cluster-monitoring-operator/pull/2595) Multi-tenant support for KSM's CRS feature-set downstream.
Expand Down
1 change: 1 addition & 0 deletions Documentation/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ The `KubeStateMetricsConfig` resource defines settings for the `kube-state-metri
| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.33/#resourcerequirements-v1-core) | Defines resource requests and limits for the KubeStateMetrics container. |
| tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.33/#toleration-v1-core) | Defines tolerations for the pods. |
| topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. |
| additionalLabelsAllowList | *string | Defines label-metrics' allow list for resources in addition to the default one. Currently, this is only supported for `jobs` and `cronjobs`, due to cardinality concerns. This follows the format: resource1=[label1,label2,labelN...],...,resourceN=[...], which is then validated and appended to the default labels' allow list. |

[Back to TOC](#table-of-contents)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat

|topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints.

|additionalLabelsAllowList|*string|Defines label-metrics' allow list for resources in addition to the default one. Currently, this is only supported for `jobs` and `cronjobs`, due to cardinality concerns. This follows the format: resource1=[label1,label2,labelN...],...,resourceN=[...], which is then validated and appended to the default labels' allow list.

|===

link:../index.adoc[Back to TOC]
91 changes: 91 additions & 0 deletions pkg/manifests/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
"k8s.io/klog/v2"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
"k8s.io/utils/ptr"
k8syaml "sigs.k8s.io/yaml"
Expand Down Expand Up @@ -760,6 +761,34 @@ func (f *Factory) KubeStateMetricsDeployment() (*appsv1.Deployment, error) {
if f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources != nil {
d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources
}
additionalAllowList := f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.AdditionalLabelsAllowList
if additionalAllowList != nil && *additionalAllowList != "" {
err = validateLabelsAllowListFormat(*additionalAllowList)
if err != nil {
return nil, fmt.Errorf("error parsing allowlist: %w", err)
}
for i = range container.Args {
if strings.HasPrefix(container.Args[i], "--metric-labels-allowlist=") {
allowedResources := sets.New[string]("jobs", "cronjobs")
gotResourcesKeyValues := strings.Split(*additionalAllowList, ",")
acceptedResources := make([]string, 0, len(gotResourcesKeyValues))
for _, keyValue := range gotResourcesKeyValues {
parts := strings.SplitN(keyValue, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("error parsing allowlist: %w", ErrConfigValidation)
}
if allowedResources.Has(parts[0]) {
acceptedResources = append(acceptedResources, keyValue)
} else {
klog.V(4).Infof("ignoring unsupported resource %q in additional labels allowlist", parts[0])
}
}
if len(acceptedResources) > 0 {
container.Args[i] += "," + strings.Join(acceptedResources, ",")
}
}
}
}
}
}

Expand Down Expand Up @@ -3614,3 +3643,65 @@ func hashStringMap(m map[string]string) string {
}
return hashByteMap(byteMap)
}

func validateLabelsAllowListFormat(value string) error {
Copy link
Member Author

@rexagod rexagod Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

var errLabelsAllowListFormat = errors.New("invalid format, should be: resource1=[label1,label2,labelN...],...,resourceN=[...]")

// Taken from text/scanner EOF constant.
const EOF = -1
var (
m = map[string][]string{}
previous rune
next rune
firstWordPos int
name string
)
firstWordPos = 0

for i, v := range value {
if i+1 == len(value) {
next = EOF
} else {
next = []rune(value)[i+1]
}
if i-1 >= 0 {
previous = []rune(value)[i-1]
} else {
previous = v
}

switch v {
case '=':
if previous == ',' || next != '[' {
return errLabelsAllowListFormat
}
name = strings.TrimSpace(string([]rune(value)[firstWordPos:i]))
m[name] = []string{}
firstWordPos = i + 1
case '[':
if previous != '=' {
return errLabelsAllowListFormat
}
firstWordPos = i + 1
case ']':
// if after metric group, has char not comma or end.
if next != EOF && next != ',' {
return errLabelsAllowListFormat
}
if previous != '[' {
m[name] = append(m[name], strings.TrimSpace(string(([]rune(value)[firstWordPos:i]))))
}
firstWordPos = i + 1
case ',':
// if starts or ends with comma
if previous == v || next == EOF || next == ']' {
return errLabelsAllowListFormat
}
if previous != ']' {
m[name] = append(m[name], strings.TrimSpace(string(([]rune(value)[firstWordPos:i]))))
}
firstWordPos = i + 1
}
}
return nil
}
5 changes: 5 additions & 0 deletions pkg/manifests/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ type KubeStateMetricsConfig struct {
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
// Defines a pod's topology spread constraints.
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
// Defines label-metrics' allow list for resources in addition to the default one.
// Currently, this is only supported for `jobs` and `cronjobs`, due to cardinality concerns.
// This follows the format: resource1=[label1,label2,labelN...],...,resourceN=[...],
// which is then validated and appended to the default labels' allow list.
AdditionalLabelsAllowList *string `json:"additionalLabelsAllowList,omitempty"`
Comment on lines +178 to +182
Copy link
Member Author

@rexagod rexagod Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marioferh Following on @machine424's comment, could you affirm if this would be okay to port to the CRD as is, or if you believe there are any changes to be made here? Thanks!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's still a WIP with the api folks, so far I'd prefer to wait until we get an ack from them in Prometheus and PrometheusK8s resources before adding any additional fields. While it seems pretty much straightforward I'd hold this for now.

}

// The `PrometheusK8sConfig` resource defines settings for the Prometheus
Expand Down