diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs index 0df5425aae0..b61085a4ce2 100644 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs @@ -14,6 +14,11 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider { private const double One = 1.0; private const long Hundred = 100L; + private const double CpuLimitThreshold110Percent = 1.1; + + // Meters to track CPU utilization threshold exceedances + private readonly Counter? _cpuUtilizationLimit100PercentExceededCounter; + private readonly Counter? _cpuUtilizationLimit110PercentExceededCounter; private readonly object _cpuLocker = new(); private readonly object _memoryLocker = new(); @@ -38,6 +43,8 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider private double _memoryPercentage; private long _previousCgroupCpuTime; private long _previousHostCpuTime; + private long _cpuUtilizationLimit100PercentExceeded; + private long _cpuUtilizationLimit110PercentExceeded; public SystemResources Resources { get; } public LinuxUtilizationProvider(IOptions options, ILinuxUtilizationParser parser, @@ -77,17 +84,21 @@ public LinuxUtilizationProvider(IOptions options, ILi // Try to get the CPU request from cgroup cpuRequest = _parser.GetCgroupRequestCpuV2(); - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilizationWithoutHostDelta() / cpuLimit, unit: "1"); + + // Initialize the counters + _cpuUtilizationLimit100PercentExceededCounter = meter.CreateCounter("cpu_utilization_limit_100_percent_exceeded"); + _cpuUtilizationLimit110PercentExceededCounter = meter.CreateCounter("cpu_utilization_limit_110_percent_exceeded"); + _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilizationLimit(cpuLimit), unit: "1"); _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilizationWithoutHostDelta() / cpuRequest, unit: "1"); } else { _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1"); _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1"); + _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessCpuUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1"); } _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1"); - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessCpuUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1"); _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessMemoryUtilization, observeValue: MemoryUtilization, unit: "1"); // cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core @@ -138,6 +149,34 @@ public double CpuUtilizationWithoutHostDelta() return _lastCpuCoresUsed; } + /// + /// Calculates CPU utilization relative to the CPU limit. + /// + /// The CPU limit to use for the calculation. + /// CPU usage as a ratio of the limit. + public double CpuUtilizationLimit(float cpuLimit) + { + double utilization = CpuUtilizationWithoutHostDelta() / cpuLimit; + + // Increment counter if utilization exceeds 1 (100%) + if (utilization > 1.0) + { + _cpuUtilizationLimit100PercentExceededCounter?.Add(1); + _cpuUtilizationLimit100PercentExceeded++; + Log.CounterMessage100(_logger, _cpuUtilizationLimit100PercentExceeded); + } + + // Increment counter if utilization exceeds 110% + if (utilization > CpuLimitThreshold110Percent) + { + _cpuUtilizationLimit110PercentExceededCounter?.Add(1); + _cpuUtilizationLimit110PercentExceeded++; + Log.CounterMessage110(_logger, _cpuUtilizationLimit110PercentExceeded); + } + + return utilization; + } + public double CpuUtilization() { DateTimeOffset now = _timeProvider.GetUtcNow(); diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/Log.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/Log.cs index 1b859d6d493..918087b1b78 100644 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/Log.cs +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/Log.cs @@ -44,4 +44,16 @@ public static partial void CpuUsageDataV2( long previousCgroupCpuTime, double actualElapsedNanoseconds, double cpuCores); + + [LoggerMessage(5, LogLevel.Debug, + "CPU utilization exceeded 100%: Counter = {counterValue}")] + public static partial void CounterMessage100( + ILogger logger, + long counterValue); + + [LoggerMessage(6, LogLevel.Debug, + "CPU utilization exceeded 110%: Counter = {counterValue}")] + public static partial void CounterMessage110( + ILogger logger, + long counterValue); } diff --git a/src/Shared/Instruments/ResourceUtilizationInstruments.cs b/src/Shared/Instruments/ResourceUtilizationInstruments.cs index a5a95a254c5..73b33b7b75b 100644 --- a/src/Shared/Instruments/ResourceUtilizationInstruments.cs +++ b/src/Shared/Instruments/ResourceUtilizationInstruments.cs @@ -65,6 +65,22 @@ internal static class ResourceUtilizationInstruments /// The type of an instrument is . /// public const string SystemNetworkConnections = "system.network.connections"; + + /// + /// The name of an instrument to count occurrences when CPU utilization exceeds 100% of the limit. + /// + /// + /// The type of an instrument is . + /// + public const string CpuUtilizationLimit100PercentExceeded = "cpu.utilization.limit.100percent.exceeded"; + + /// + /// The name of an instrument to count occurrences when CPU utilization exceeds 110% of the limit. + /// + /// + /// The type of an instrument is . + /// + public const string CpuUtilizationLimit110PercentExceeded = "cpu.utilization.limit.110percent.exceeded"; } #pragma warning disable CS1574 diff --git a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs index d5b536d8386..97dd5345717 100644 --- a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs +++ b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs @@ -258,7 +258,7 @@ public void Provider_Registers_Instruments_CgroupV2_WithoutHostCpu() listener.Start(); listener.RecordObservableInstruments(); - Assert.Equal(5, samples.Count); + Assert.Equal(4, samples.Count); Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization); Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value)); @@ -269,9 +269,6 @@ public void Provider_Registers_Instruments_CgroupV2_WithoutHostCpu() Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerMemoryLimitUtilization); Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerMemoryLimitUtilization).value); - Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization); - Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessCpuUtilization).value)); - Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization); Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value); }