Skip to content

Commit

Permalink
Merged PR 41659: Add metrics with correct names for Resource Monitori…
Browse files Browse the repository at this point in the history
…ng (#5341)

Add metrics with correct names for Resource Monitoring (#5341)

Fixes #5113
Previous art: #5309

Add new metrics with correct names. Old metrics will continue to be enabled by default.

### Existing metric setup

**Windows Snapshot provider class**
    `process.cpu.utilization`
    `dotnet.process.memory.virtual.utilization`

**Windows Container Snapshot provider class**
    `process.cpu.utilization`
    `dotnet.process.memory.virtual.utilization`

**Linix Utilization Provider class**
    `process.cpu.utilization`
    `dotnet.process.memory.virtual.utilization`

### New metric setup

**Windows Snapshot provider class**
    `process.cpu.utilization` - no changes
    `dotnet.process.memory.virtual.utilization` - no changes

**Windows Container Snapshot provider class**
    `process.cpu.utilization` - no changes
    `dotnet.process.memory.virtual.utilization` - calculates memory for the dotnet process only (instead of all processes)
    `container.cpu.limit.utilization` - new metric, same value as `process.cpu.utilization`
    `container.memory.limit.utilization` - new metric, calculates memory for all processes in the container

**Linux Utilization Provider class**
    `process.cpu.utilization` - fixed incorrect scale calculation, instead of `host CPUs / CPU limit / CPU request`, it is now `host CPUs / CPU request`
    `dotnet.process.memory.virtual.utilization` - no changes
    `container.cpu.limit.utilization` - new metric, value is relative to CPU resource limit (aka maximum CPU units)
    `container.memory.limit.utilization` - new metric, calculates memory for all processes in the container
    `container.cpu.request.utilization` - new metric, same value as `process.cpu.utilization`

----
#### AI description  (iteration 1)
#### PR Classification
New feature: Added metrics with correct names for resource monitoring.

#### PR Summary
This pull request introduces new metrics for resource monitoring with correct naming conventions and updates the related tests and implementation.
- `LinuxUtilizationProvider.cs`: Added new metrics for container CPU and memory utilization, and updated existing metrics.
- `AcceptanceTest.cs`: Added new tests for verifying the new metrics and updated existing tests for better coverage.
- `ResourceUtilizationInstruments.cs`: Defined new constants for the new metrics.
- Removed `WindowsCounters.cs` as it is no longer needed.
  • Loading branch information
joperezr committed Aug 13, 2024
1 parent 62abfe3 commit fa86c9a
Show file tree
Hide file tree
Showing 16 changed files with 363 additions and 162 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static ResourceUtilization CalculateUtilization(in Snapshot first, in Sna
long runtimeTickDelta = second.TotalTimeSinceStart.Ticks - first.TotalTimeSinceStart.Ticks;

// Compute the total number of ticks available on the machine during that interval
double totalSystemTicks = runtimeTickDelta * systemResources.GuaranteedCpuUnits;
double totalSystemTicks = runtimeTickDelta;

// fudge to avoid divide by zero
if (totalSystemTicks <= 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ private static bool TryGetCpuUnitsFromCgroups(IFileSystem fileSystem, out float
/// <summary>
/// In cgroup v1 the CPU shares is used to determine the CPU allocation.
/// in cgroup v2 the CPU weight is used to determine the CPU allocation.
/// To calculete CPU request in cgroup v2 we need to read the CPU weight and convert it to CPU shares.
/// To calculate CPU request in cgroup v2 we need to read the CPU weight and convert it to CPU shares.
/// But for cgroup v1 we can read the CPU shares directly from the file.
/// 1024 equals 1 CPU core.
/// In cgroup v1 on some systems the location of the CPU shares file is different.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
private readonly object _cpuLocker = new();
private readonly object _memoryLocker = new();
private readonly ILinuxUtilizationParser _parser;
private readonly ulong _totalMemoryInBytes;
private readonly ulong _memoryLimit;
private readonly TimeSpan _cpuRefreshInterval;
private readonly TimeSpan _memoryRefreshInterval;
private readonly TimeProvider _timeProvider;
private readonly double _scale;
private readonly double _scaleForTrackerApi;
private readonly double _scaleRelativeToCpuLimit;
private readonly double _scaleRelativeToCpuRequest;
private readonly double _scaleRelativeToCpuRequestForTrackerApi;

private DateTimeOffset _refreshAfterCpu;
private DateTimeOffset _refreshAfterMemory;
Expand All @@ -37,73 +38,73 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
{
_parser = parser;
_timeProvider = timeProvider ?? TimeProvider.System;
var now = _timeProvider.GetUtcNow();
DateTimeOffset now = _timeProvider.GetUtcNow();
_cpuRefreshInterval = options.Value.CpuConsumptionRefreshInterval;
_memoryRefreshInterval = options.Value.MemoryConsumptionRefreshInterval;
_refreshAfterCpu = now;
_refreshAfterMemory = now;
_totalMemoryInBytes = _parser.GetAvailableMemoryInBytes();
_memoryLimit = _parser.GetAvailableMemoryInBytes();
_previousHostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
_previousCgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();

var hostMemory = _parser.GetHostAvailableMemory();
var hostCpus = _parser.GetHostCpuCount();
var availableCpus = _parser.GetCgroupLimitedCpus();
var cpuGuaranteedRequest = _parser.GetCgroupRequestCpu();
_scale = hostCpus / availableCpus;
_scaleForTrackerApi = hostCpus / availableCpus;
float hostCpus = _parser.GetHostCpuCount();
float cpuLimit = _parser.GetCgroupLimitedCpus();
float cpuRequest = _parser.GetCgroupRequestCpu();
_scaleRelativeToCpuLimit = hostCpus / cpuLimit;
_scaleRelativeToCpuRequest = hostCpus / cpuRequest;
_scaleRelativeToCpuRequestForTrackerApi = hostCpus; // the division by cpuRequest is performed later on in the ResourceUtilization class

#pragma warning disable CA2000 // Dispose objects before losing scope
// We don't dispose the meter because IMeterFactory handles that
// An issue on analyzer side: https://github.com/dotnet/roslyn-analyzers/issues/6912
// Related documentation: https://github.com/dotnet/docs/pull/37170
var meter = meterFactory.Create("Microsoft.Extensions.Diagnostics.ResourceMonitoring");
var meter = meterFactory.Create(nameof(Microsoft.Extensions.Diagnostics.ResourceMonitoring));
#pragma warning restore CA2000 // Dispose objects before losing scope

_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.CpuUtilization, observeValue: CpuUtilization, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.MemoryUtilization, observeValue: MemoryUtilization, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");

// cpuGuaranteedRequest is a CPU request for pod, for host its 1 core
// available CPUs is a CPU limit for a pod or for a host.
// _totalMemoryInBytes - Resource Memory Limit (in k8s terms)
// _totalMemoryInBytes - To keep the contract, this parameter will get the Host available memory
Resources = new SystemResources(cpuGuaranteedRequest, availableCpus, _totalMemoryInBytes, _totalMemoryInBytes);
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessCpuUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1");
_ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessMemoryUtilization, observeValue: MemoryUtilization, unit: "1");

// cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core
// cpuLimit is a CPU limit (aka max CPU units available) for a pod or for a host.
// _memoryLimit - Resource Memory Limit (in k8s terms)
// _memoryLimit - To keep the contract, this parameter will get the Host available memory
Resources = new SystemResources(cpuRequest, cpuLimit, _memoryLimit, _memoryLimit);
}

public double CpuUtilization()
{
var now = _timeProvider.GetUtcNow();
bool needUpdate = false;
DateTimeOffset now = _timeProvider.GetUtcNow();

lock (_cpuLocker)
{
if (now >= _refreshAfterCpu)
if (now < _refreshAfterCpu)
{
needUpdate = true;
return _cpuPercentage;
}
}

if (needUpdate)
{
var hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
var cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
long cgroupCpuTime = _parser.GetCgroupCpuUsageInNanoseconds();

lock (_cpuLocker)
lock (_cpuLocker)
{
if (now >= _refreshAfterCpu)
{
if (now >= _refreshAfterCpu)
double deltaHost = hostCpuTime - _previousHostCpuTime;
double deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;

if (deltaHost > 0 && deltaCgroup > 0)
{
var deltaHost = hostCpuTime - _previousHostCpuTime;
var deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;

if (deltaHost > 0 && deltaCgroup > 0)
{
var percentage = Math.Min(One, deltaCgroup / deltaHost * _scale);

_cpuPercentage = percentage;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cgroupCpuTime;
_previousHostCpuTime = hostCpuTime;
}
double percentage = Math.Min(One, deltaCgroup / deltaHost);

_cpuPercentage = percentage;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cgroupCpuTime;
_previousHostCpuTime = hostCpuTime;
}
}
}
Expand All @@ -113,30 +114,26 @@ public double CpuUtilization()

public double MemoryUtilization()
{
var now = _timeProvider.GetUtcNow();
bool needUpdate = false;
DateTimeOffset now = _timeProvider.GetUtcNow();

lock (_memoryLocker)
{
if (now >= _refreshAfterMemory)
if (now < _refreshAfterMemory)
{
needUpdate = true;
return _memoryPercentage;
}
}

if (needUpdate)
{
var memoryUsed = _parser.GetMemoryUsageInBytes();
ulong memoryUsed = _parser.GetMemoryUsageInBytes();

lock (_memoryLocker)
lock (_memoryLocker)
{
if (now >= _refreshAfterMemory)
{
if (now >= _refreshAfterMemory)
{
var memoryPercentage = Math.Min(One, (double)memoryUsed / _totalMemoryInBytes);
double memoryPercentage = Math.Min(One, (double)memoryUsed / _memoryLimit);

_memoryPercentage = memoryPercentage;
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
}
_memoryPercentage = memoryPercentage;
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
}
}

Expand All @@ -150,14 +147,14 @@ public double MemoryUtilization()
/// </remarks>
public Snapshot GetSnapshot()
{
var hostTime = _parser.GetHostCpuUsageInNanoseconds();
var cgroupTime = _parser.GetCgroupCpuUsageInNanoseconds();
var memoryUsed = _parser.GetMemoryUsageInBytes();
long hostTime = _parser.GetHostCpuUsageInNanoseconds();
long cgroupTime = _parser.GetCgroupCpuUsageInNanoseconds();
ulong memoryUsed = _parser.GetMemoryUsageInBytes();

return new Snapshot(
totalTimeSinceStart: TimeSpan.FromTicks(hostTime / Hundred),
kernelTimeSinceStart: TimeSpan.Zero,
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleForTrackerApi)),
userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleRelativeToCpuRequestForTrackerApi)),
memoryUsageInBytes: memoryUsed);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ private static ResourceMonitorBuilder AddWindowsProvider(this ResourceMonitorBui
builder.PickWindowsSnapshotProvider();

_ = builder.Services
.AddActivatedSingleton<WindowsCounters>();
.AddActivatedSingleton<WindowsNetworkMetrics>();

_ = builder.Services
.AddActivatedSingleton<TcpTableInfo>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,16 @@ public readonly struct ResourceUtilization
/// <param name="systemResources">CPU and memory limits.</param>
public ResourceUtilization(double cpuUsedPercentage, ulong memoryUsedInBytes, SystemResources systemResources)
{
CpuUsedPercentage = Throw.IfLessThan(cpuUsedPercentage, 0.0);
double guaranteedCpuUnits = systemResources.GuaranteedCpuUnits;
if (guaranteedCpuUnits <= 0)
{
guaranteedCpuUnits = 1;
}

CpuUsedPercentage = Throw.IfLessThan(cpuUsedPercentage / guaranteedCpuUnits, 0.0);
MemoryUsedInBytes = Throw.IfLessThan(memoryUsedInBytes, 0);
SystemResources = systemResources;
MemoryUsedPercentage = Math.Min(Hundred, (double)MemoryUsedInBytes / SystemResources.GuaranteedMemoryInBytes * Hundred);
MemoryUsedPercentage = Math.Min(Hundred, (double)MemoryUsedInBytes / systemResources.GuaranteedMemoryInBytes * Hundred);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;

namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring;

/// <summary>
Expand All @@ -13,18 +15,42 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring;
internal static class ResourceUtilizationInstruments
{
/// <summary>
/// Gets the CPU consumption of the running application in range <c>[0, 1]</c>.
/// The name of an instrument to retrieve CPU limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string ContainerCpuLimitUtilization = "container.cpu.limit.utilization";

/// <summary>
/// The name of an instrument to retrieve CPU request consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string ContainerCpuRequestUtilization = "container.cpu.request.utilization";

/// <summary>
/// The name of an instrument to retrieve memory limit consumption of all processes running inside a container or control group in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string ContainerMemoryLimitUtilization = "container.memory.limit.utilization";

/// <summary>
/// The name of an instrument to retrieve CPU consumption share of the running process in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string CpuUtilization = "process.cpu.utilization";
public const string ProcessCpuUtilization = "process.cpu.utilization";

/// <summary>
/// Gets the memory consumption of the running application in range <c>[0, 1]</c>.
/// The name of an instrument to retrieve memory consumption share of the running process in range <c>[0, 1]</c>.
/// </summary>
/// <remarks>
/// The type of an instrument is <see cref="System.Diagnostics.Metrics.ObservableGauge{T}"/>.
/// </remarks>
public const string MemoryUtilization = "dotnet.process.memory.virtual.utilization";
public const string ProcessMemoryUtilization = "dotnet.process.memory.virtual.utilization";
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Windows.Interop;
internal interface IProcessInfo
{
/// <summary>
/// Retrieve the memory usage of a system.
/// Retrieves the amount of memory, in bytes, used by the current process.
/// </summary>
/// <returns>Memory usage amount in bytes.</returns>
/// <returns>The number of bytes allocated by the current process.</returns>
ulong GetCurrentProcessMemoryUsage();

/// <summary>
/// Retrieves the amount of memory, in bytes, used by the system.
/// </summary>
/// <returns>The number of bytes allocated by the system.</returns>
ulong GetMemoryUsage();
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,10 @@ public ulong GetMemoryUsage()

return memoryUsage;
}

public ulong GetCurrentProcessMemoryUsage()
{
using Process process = Process.GetCurrentProcess();
return (ulong)process.WorkingSet64;
}
}
Loading

0 comments on commit fa86c9a

Please sign in to comment.