diff --git a/.chloggen/kangyi_k8scluster-nil-pointer.yaml b/.chloggen/kangyi_k8scluster-nil-pointer.yaml new file mode 100644 index 0000000000000..496c183fe5a81 --- /dev/null +++ b/.chloggen/kangyi_k8scluster-nil-pointer.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: receiver/k8s_cluster + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: fix nil dereference in k8s_cluster receiver + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [47538] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/k8sclusterreceiver/internal/container/containers.go b/receiver/k8sclusterreceiver/internal/container/containers.go index 3c44ee9ffee8f..a22aea3ec7caa 100644 --- a/receiver/k8sclusterreceiver/internal/container/containers.go +++ b/receiver/k8sclusterreceiver/internal/container/containers.go @@ -64,12 +64,14 @@ func RecordSpecMetrics(logger *zap.Logger, mb *metadata.MetricsBuilder, c corev1 if cs != nil && cs.LastTerminationState.Terminated != nil { e.SetK8sContainerStatusLastTerminatedReason(cs.LastTerminationState.Terminated.Reason) } - image, err := docker.ParseImageName(cs.Image) - if err != nil { - docker.LogParseError(err, cs.Image, logger) - } else { - e.SetContainerImageName(image.Repository) - e.SetContainerImageTag(image.Tag) + if cs != nil { + image, err := docker.ParseImageName(cs.Image) + if err != nil { + docker.LogParseError(err, cs.Image, logger) + } else { + e.SetContainerImageName(image.Repository) + e.SetContainerImageTag(image.Tag) + } } e.SetK8sPodName(pod.Name) diff --git a/receiver/k8sclusterreceiver/internal/container/containers_test.go b/receiver/k8sclusterreceiver/internal/container/containers_test.go index 2fb1ba11396a2..264bb4f7c4272 100644 --- a/receiver/k8sclusterreceiver/internal/container/containers_test.go +++ b/receiver/k8sclusterreceiver/internal/container/containers_test.go @@ -9,12 +9,273 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/receiver/receivertest" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata" ) +// metricValue finds a single data point value in m by metric name and data point attributes. +// Returns (value, found). Only handles Int and Double data points. +func metricValue(t *testing.T, m pmetric.Metrics, metricName string, dpAttrs map[string]string) (float64, bool) { + t.Helper() + for i := 0; i < m.ResourceMetrics().Len(); i++ { + for j := 0; j < m.ResourceMetrics().At(i).ScopeMetrics().Len(); j++ { + metrics := m.ResourceMetrics().At(i).ScopeMetrics().At(j).Metrics() + for k := 0; k < metrics.Len(); k++ { + metric := metrics.At(k) + if metric.Name() != metricName { + continue + } + var dps pmetric.NumberDataPointSlice + switch metric.Type() { + case pmetric.MetricTypeGauge: + dps = metric.Gauge().DataPoints() + case pmetric.MetricTypeSum: + dps = metric.Sum().DataPoints() + default: + continue + } + for l := 0; l < dps.Len(); l++ { + dp := dps.At(l) + match := true + for attrKey, attrVal := range dpAttrs { + v, ok := dp.Attributes().Get(attrKey) + if !ok || v.Str() != attrVal { + match = false + break + } + } + if match { + switch dp.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + return float64(dp.IntValue()), true + case pmetric.NumberDataPointValueTypeDouble: + return dp.DoubleValue(), true + } + } + } + } + } + } + return 0, false +} + +func TestRecordSpecMetrics(t *testing.T) { + pod := &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + UID: types.UID("test-pod-uid"), + }, + Spec: corev1.PodSpec{ + NodeName: "test-node", + Containers: []corev1.Container{ + { + Name: "test-container", + Image: "docker/test-image:v1.0", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("512Mi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }, + }, + }, + } + + ts := pcommon.Timestamp(time.Now().UnixNano()) + + t.Run("running container", func(t *testing.T) { + pod := pod.DeepCopy() + pod.Status.ContainerStatuses = []corev1.ContainerStatus{ + { + Name: "test-container", + Image: "docker/test-image:v1.0", + ContainerID: "docker://abc123", + Ready: true, + RestartCount: 2, + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + } + + mb := metadata.NewMetricsBuilder(metadata.DefaultMetricsBuilderConfig(), receivertest.NewNopSettings(metadata.Type)) + RecordSpecMetrics(zap.NewNop(), mb, pod.Spec.Containers[0], pod, ts) + m := mb.Emit() + + v, ok := metricValue(t, m, "k8s.container.cpu_request", nil) + require.True(t, ok) + assert.InDelta(t, 0.5, v, 0.001) + + v, ok = metricValue(t, m, "k8s.container.cpu_limit", nil) + require.True(t, ok) + assert.InDelta(t, 2.0, v, 0.001) + + v, ok = metricValue(t, m, "k8s.container.memory_request", nil) + require.True(t, ok) + assert.Equal(t, float64(256*1024*1024), v) + + v, ok = metricValue(t, m, "k8s.container.memory_limit", nil) + require.True(t, ok) + assert.Equal(t, float64(512*1024*1024), v) + + v, ok = metricValue(t, m, "k8s.container.restarts", nil) + require.True(t, ok) + assert.Equal(t, float64(2), v) + + v, ok = metricValue(t, m, "k8s.container.ready", nil) + require.True(t, ok) + assert.Equal(t, float64(1), v) + }) + + t.Run("terminated container records restarts and ready=0", func(t *testing.T) { + pod := pod.DeepCopy() + pod.Status.ContainerStatuses = []corev1.ContainerStatus{ + { + Name: "test-container", + Image: "docker/test-image:v1.0", + ContainerID: "docker://def456", + Ready: false, + RestartCount: 5, + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Reason: "OOMKilled", + ExitCode: 137, + }, + }, + LastTerminationState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Reason: "OOMKilled", + }, + }, + }, + } + + mbc := metadata.DefaultMetricsBuilderConfig() + mbc.Metrics.K8sContainerStatusState.Enabled = true + mbc.Metrics.K8sContainerStatusReason.Enabled = true + mb := metadata.NewMetricsBuilder(mbc, receivertest.NewNopSettings(metadata.Type)) + RecordSpecMetrics(zap.NewNop(), mb, pod.Spec.Containers[0], pod, ts) + m := mb.Emit() + + v, ok := metricValue(t, m, "k8s.container.restarts", nil) + require.True(t, ok) + assert.Equal(t, float64(5), v) + + v, ok = metricValue(t, m, "k8s.container.ready", nil) + require.True(t, ok) + assert.Equal(t, float64(0), v) + + // Terminated state: terminated=1, others=0. + v, ok = metricValue(t, m, "k8s.container.status.state", map[string]string{"k8s.container.status.state": "terminated"}) + require.True(t, ok) + assert.Equal(t, float64(1), v) + + v, ok = metricValue(t, m, "k8s.container.status.state", map[string]string{"k8s.container.status.state": "running"}) + require.True(t, ok) + assert.Equal(t, float64(0), v) + + v, ok = metricValue(t, m, "k8s.container.status.state", map[string]string{"k8s.container.status.state": "waiting"}) + require.True(t, ok) + assert.Equal(t, float64(0), v) + + // OOMKilled reason=1, others=0. + v, ok = metricValue(t, m, "k8s.container.status.reason", map[string]string{"k8s.container.status.reason": "OOMKilled"}) + require.True(t, ok) + assert.Equal(t, float64(1), v) + + v, ok = metricValue(t, m, "k8s.container.status.reason", map[string]string{"k8s.container.status.reason": "CrashLoopBackOff"}) + require.True(t, ok) + assert.Equal(t, float64(0), v) + }) + + t.Run("waiting container", func(t *testing.T) { + pod := pod.DeepCopy() + pod.Status.ContainerStatuses = []corev1.ContainerStatus{ + { + Name: "test-container", + Image: "docker/test-image:v1.0", + ContainerID: "", + Ready: false, + RestartCount: 3, + State: corev1.ContainerState{ + Waiting: &corev1.ContainerStateWaiting{ + Reason: "CrashLoopBackOff", + }, + }, + }, + } + + mbc := metadata.DefaultMetricsBuilderConfig() + mbc.Metrics.K8sContainerStatusState.Enabled = true + mbc.Metrics.K8sContainerStatusReason.Enabled = true + mb := metadata.NewMetricsBuilder(mbc, receivertest.NewNopSettings(metadata.Type)) + RecordSpecMetrics(zap.NewNop(), mb, pod.Spec.Containers[0], pod, ts) + m := mb.Emit() + + // Waiting state: waiting=1, others=0. + v, ok := metricValue(t, m, "k8s.container.status.state", map[string]string{"k8s.container.status.state": "waiting"}) + require.True(t, ok) + assert.Equal(t, float64(1), v) + + v, ok = metricValue(t, m, "k8s.container.status.state", map[string]string{"k8s.container.status.state": "running"}) + require.True(t, ok) + assert.Equal(t, float64(0), v) + + v, ok = metricValue(t, m, "k8s.container.status.state", map[string]string{"k8s.container.status.state": "terminated"}) + require.True(t, ok) + assert.Equal(t, float64(0), v) + + // CrashLoopBackOff reason=1, others=0. + v, ok = metricValue(t, m, "k8s.container.status.reason", map[string]string{"k8s.container.status.reason": "CrashLoopBackOff"}) + require.True(t, ok) + assert.Equal(t, float64(1), v) + + v, ok = metricValue(t, m, "k8s.container.status.reason", map[string]string{"k8s.container.status.reason": "OOMKilled"}) + require.True(t, ok) + assert.Equal(t, float64(0), v) + }) + + t.Run("no matching container status", func(t *testing.T) { + // Pod has no ContainerStatuses, so cs is nil inside RecordSpecMetrics. + // This covers the nil guard added around docker.ParseImageName. + pod := pod.DeepCopy() + + mb := metadata.NewMetricsBuilder(metadata.DefaultMetricsBuilderConfig(), receivertest.NewNopSettings(metadata.Type)) + RecordSpecMetrics(zap.NewNop(), mb, pod.Spec.Containers[0], pod, ts) + m := mb.Emit() + + // Resource-level metrics from the spec are still recorded. + v, ok := metricValue(t, m, "k8s.container.cpu_request", nil) + require.True(t, ok) + assert.InDelta(t, 0.5, v, 0.001) + + v, ok = metricValue(t, m, "k8s.container.cpu_limit", nil) + require.True(t, ok) + assert.InDelta(t, 2.0, v, 0.001) + + // Per-status metrics (restarts, ready) are not emitted when cs is nil. + _, ok = metricValue(t, m, "k8s.container.restarts", nil) + assert.False(t, ok) + + _, ok = metricValue(t, m, "k8s.container.ready", nil) + assert.False(t, ok) + }) +} + func TestGetMetadata(t *testing.T) { refTime := v1.Now() pod := &corev1.Pod{