diff --git a/pkg/collector/container/container.go b/pkg/collector/container/container.go index 0a0ace077..aaa3361d4 100644 --- a/pkg/collector/container/container.go +++ b/pkg/collector/container/container.go @@ -3,6 +3,8 @@ package container import ( + "errors" + "fmt" "strings" "github.com/Microsoft/hcsshim" @@ -192,6 +194,7 @@ func (c *Collector) Build() error { []string{"container_id"}, nil, ) + return nil } @@ -200,17 +203,11 @@ func (c *Collector) Build() error { func (c *Collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) error { if err := c.collect(ch); err != nil { _ = level.Error(c.logger).Log("msg", "failed collecting collector metrics", "err", err) + return err } - return nil -} -// containerClose closes the container resource. -func (c *Collector) containerClose(container hcsshim.Container) { - err := container.Close() - if err != nil { - _ = level.Error(c.logger).Log("err", err) - } + return nil } func (c *Collector) collect(ch chan<- prometheus.Metric) error { @@ -218,6 +215,7 @@ func (c *Collector) collect(ch chan<- prometheus.Metric) error { containers, err := hcsshim.GetContainers(hcsshim.ComputeSystemQuery{Types: []string{"Container"}}) if err != nil { _ = level.Error(c.logger).Log("msg", "Err in Getting containers", "err", err) + return err } @@ -228,102 +226,138 @@ func (c *Collector) collect(ch chan<- prometheus.Metric) error { prometheus.GaugeValue, float64(count), ) + if count == 0 { return nil } containerPrefixes := make(map[string]string) + hasErrors := false for _, containerDetails := range containers { - // https://stackoverflow.com/questions/45617758/proper-way-to-release-resources-with-defer-in-a-loop - func() { - container, err := hcsshim.OpenContainer(containerDetails.ID) - if container != nil { - defer c.containerClose(container) - } - if err != nil { - _ = level.Error(c.logger).Log("msg", "err in opening container", "containerId", containerDetails.ID, "err", err) - return + containerIdWithPrefix := getContainerIdWithPrefix(containerDetails) + + if err = c.collectContainer(ch, containerDetails, containerIdWithPrefix); err != nil { + if hcsshim.IsNotExist(err) { + _ = level.Debug(c.logger).Log("msg", "err in fetching container statistics", "containerId", containerDetails.ID, "err", err) + } else { + _ = level.Error(c.logger).Log("msg", "err in fetching container statistics", "containerId", containerDetails.ID, "err", err) + hasErrors = true } - cstats, err := container.Statistics() - if err != nil { - _ = level.Error(c.logger).Log("msg", "err in fetching container Statistics", "containerId", containerDetails.ID, "err", err) - return - } + continue + } - containerIdWithPrefix := getContainerIdWithPrefix(containerDetails) - containerPrefixes[containerDetails.ID] = containerIdWithPrefix + containerPrefixes[containerDetails.ID] = containerIdWithPrefix + } - ch <- prometheus.MustNewConstMetric( - c.containerAvailable, - prometheus.CounterValue, - 1, - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.usageCommitBytes, - prometheus.GaugeValue, - float64(cstats.Memory.UsageCommitBytes), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.usageCommitPeakBytes, - prometheus.GaugeValue, - float64(cstats.Memory.UsageCommitPeakBytes), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.usagePrivateWorkingSetBytes, - prometheus.GaugeValue, - float64(cstats.Memory.UsagePrivateWorkingSetBytes), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.runtimeTotal, - prometheus.CounterValue, - float64(cstats.Processor.TotalRuntime100ns)*perflib.TicksToSecondScaleFactor, - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.runtimeUser, - prometheus.CounterValue, - float64(cstats.Processor.RuntimeUser100ns)*perflib.TicksToSecondScaleFactor, - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.runtimeKernel, - prometheus.CounterValue, - float64(cstats.Processor.RuntimeKernel100ns)*perflib.TicksToSecondScaleFactor, - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.readCountNormalized, - prometheus.CounterValue, - float64(cstats.Storage.ReadCountNormalized), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.readSizeBytes, - prometheus.CounterValue, - float64(cstats.Storage.ReadSizeBytes), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.writeCountNormalized, - prometheus.CounterValue, - float64(cstats.Storage.WriteCountNormalized), - containerIdWithPrefix, - ) - ch <- prometheus.MustNewConstMetric( - c.writeSizeBytes, - prometheus.CounterValue, - float64(cstats.Storage.WriteSizeBytes), - containerIdWithPrefix, - ) - }() + if err = c.collectNetworkMetrics(ch, containerPrefixes); err != nil { + return fmt.Errorf("error in fetching container network statistics: %w", err) + } + + if hasErrors { + return errors.New("errors while fetching container statistics") } + return nil +} + +func (c *Collector) collectContainer(ch chan<- prometheus.Metric, containerDetails hcsshim.ContainerProperties, containerIdWithPrefix string) error { + container, err := hcsshim.OpenContainer(containerDetails.ID) + if err != nil { + return fmt.Errorf("error in opening container: %w", err) + } + + defer func() { + if container == nil { + return + } + + if err := container.Close(); err != nil { + _ = level.Error(c.logger).Log("err", fmt.Errorf("error in closing container: %w", err)) + } + }() + + containerStats, err := container.Statistics() + if err != nil { + return fmt.Errorf("error in fetching container statistics: %w", err) + } + + ch <- prometheus.MustNewConstMetric( + c.containerAvailable, + prometheus.CounterValue, + 1, + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.usageCommitBytes, + prometheus.GaugeValue, + float64(containerStats.Memory.UsageCommitBytes), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.usageCommitPeakBytes, + prometheus.GaugeValue, + float64(containerStats.Memory.UsageCommitPeakBytes), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.usagePrivateWorkingSetBytes, + prometheus.GaugeValue, + float64(containerStats.Memory.UsagePrivateWorkingSetBytes), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.runtimeTotal, + prometheus.CounterValue, + float64(containerStats.Processor.TotalRuntime100ns)*perflib.TicksToSecondScaleFactor, + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.runtimeUser, + prometheus.CounterValue, + float64(containerStats.Processor.RuntimeUser100ns)*perflib.TicksToSecondScaleFactor, + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.runtimeKernel, + prometheus.CounterValue, + float64(containerStats.Processor.RuntimeKernel100ns)*perflib.TicksToSecondScaleFactor, + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.readCountNormalized, + prometheus.CounterValue, + float64(containerStats.Storage.ReadCountNormalized), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.readSizeBytes, + prometheus.CounterValue, + float64(containerStats.Storage.ReadSizeBytes), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.writeCountNormalized, + prometheus.CounterValue, + float64(containerStats.Storage.WriteCountNormalized), + containerIdWithPrefix, + ) + ch <- prometheus.MustNewConstMetric( + c.writeSizeBytes, + prometheus.CounterValue, + float64(containerStats.Storage.WriteSizeBytes), + containerIdWithPrefix, + ) + + return nil +} + +// collectNetworkMetrics collects network metrics for containers. +// With HNSv2, the network stats must be collected from hcsshim.HNSListEndpointRequest. +// Network statistics from the container.Statistics() are providing data only, if HNSv1 is used. +// Ref: https://github.com/prometheus-community/windows_exporter/pull/1218 +func (c *Collector) collectNetworkMetrics(ch chan<- prometheus.Metric, containerPrefixes map[string]string) error { hnsEndpoints, err := hcsshim.HNSListEndpointRequest() if err != nil { _ = level.Warn(c.logger).Log("msg", "Failed to collect network stats for containers") @@ -344,13 +378,14 @@ func (c *Collector) collect(ch chan<- prometheus.Metric) error { for _, containerId := range endpoint.SharedContainers { containerIdWithPrefix, ok := containerPrefixes[containerId] - endpointId := strings.ToUpper(endpoint.Id) if !ok { - _ = level.Warn(c.logger).Log("msg", "Failed to collect network stats for container "+containerId) + _ = level.Debug(c.logger).Log("msg", "Failed to collect network stats for container "+containerId) continue } + endpointId := strings.ToUpper(endpoint.Id) + ch <- prometheus.MustNewConstMetric( c.bytesReceived, prometheus.CounterValue,