diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md index d589309891ac..56200abdfc9d 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md @@ -6,6 +6,8 @@ - Add live metrics collection of requests/dependencies/exceptions ([#34673](https://github.com/Azure/azure-sdk-for-python/pull/34673)) +- Add live metrics collection of cpu time/process memory + ([#34735](https://github.com/Azure/azure-sdk-for-python/pull/34735)) ### Breaking Changes diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_constants.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_constants.py index 94155b73f32c..b34228ce923d 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_constants.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_constants.py @@ -49,4 +49,4 @@ class _DocumentIngressDocumentType(Enum): Event = "Event" Trace = "Trace" -# cSpell:disable +# cSpell:enable diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_live_metrics.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_live_metrics.py index c84ae7b2a906..feb6922f4428 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_live_metrics.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_quickpulse/_live_metrics.py @@ -1,8 +1,13 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +# cSpell:disable + +from typing import Any, Iterable, Optional + import platform -from typing import Any, Optional +import psutil +from opentelemetry.metrics import CallbackOptions, Observation from opentelemetry.sdk._logs import LogData from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.resources import Resource @@ -13,10 +18,12 @@ from azure.monitor.opentelemetry.exporter._generated.models import ContextTagKeys from azure.monitor.opentelemetry.exporter._quickpulse._constants import ( + _COMMITTED_BYTES_NAME, _DEPENDENCY_DURATION_NAME, _DEPENDENCY_FAILURE_RATE_NAME, _DEPENDENCY_RATE_NAME, _EXCEPTION_RATE_NAME, + _PROCESSOR_TIME_NAME, _REQUEST_DURATION_NAME, _REQUEST_FAILURE_RATE_NAME, _REQUEST_RATE_NAME, @@ -43,6 +50,8 @@ ) +PROCESS = psutil.Process() + def enable_live_metrics(**kwargs: Any) -> None: """Live metrics entry point. @@ -113,6 +122,14 @@ def __init__(self, connection_string: Optional[str], resource: Optional[Resource "exc/sec", "live metrics exception rate per second" ) + self._process_memory_gauge = self._meter.create_observable_gauge( + _COMMITTED_BYTES_NAME[0], + [_get_process_memory], + ) + self._processor_time_gauge = self._meter.create_observable_gauge( + _PROCESSOR_TIME_NAME[0], + [_get_processor_time], + ) def _record_span(self, span: ReadableSpan) -> None: # Only record if in post state @@ -150,3 +167,23 @@ def _record_log_record(self, log_data: LogData) -> None: exc_message = log_record.attributes.get(SpanAttributes.EXCEPTION_MESSAGE) if exc_type is not None or exc_message is not None: self._exception_rate_counter.add(1) + + +# pylint: disable=unused-argument +def _get_process_memory(options: CallbackOptions) -> Iterable[Observation]: + # rss is non-swapped physical memory a process has used + yield Observation( + PROCESS.memory_info().rss, + {}, + ) + + +# pylint: disable=unused-argument +def _get_processor_time(options: CallbackOptions) -> Iterable[Observation]: + # Processor time does not include idle time + yield Observation( + 100 - psutil.cpu_times_percent().idle, + {}, + ) + +# cSpell:enable diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py index 56f8392cef1c..7668667ac21c 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/setup.py @@ -6,6 +6,7 @@ # license information. # -------------------------------------------------------------------------- +# cSpell:disable import os import re @@ -86,6 +87,7 @@ "msrest>=0.6.10", "opentelemetry-api~=1.21", "opentelemetry-sdk~=1.21", + "psutil>=5.9.8", ], entry_points={ "opentelemetry_traces_exporter": [ @@ -102,3 +104,5 @@ ] } ) + +# cSpell:enable diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/quickpulse/test_live_metrics.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/quickpulse/test_live_metrics.py index f1699bd6beb7..464d7162ee07 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/quickpulse/test_live_metrics.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/quickpulse/test_live_metrics.py @@ -1,22 +1,44 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +# cSpell:disable + +import collections import platform import unittest from unittest import mock -from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics import ( + Counter, + Histogram, + Meter, + MeterProvider, + ObservableGauge, +) from opentelemetry.sdk.resources import Resource, ResourceAttributes from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace import SpanKind from azure.monitor.opentelemetry.exporter._generated.models import ContextTagKeys +from azure.monitor.opentelemetry.exporter._quickpulse._constants import ( + _COMMITTED_BYTES_NAME, + _DEPENDENCY_DURATION_NAME, + _DEPENDENCY_FAILURE_RATE_NAME, + _DEPENDENCY_RATE_NAME, + _EXCEPTION_RATE_NAME, + _PROCESSOR_TIME_NAME, + _REQUEST_DURATION_NAME, + _REQUEST_FAILURE_RATE_NAME, + _REQUEST_RATE_NAME, +) from azure.monitor.opentelemetry.exporter._quickpulse._exporter import ( _QuickpulseExporter, _QuickpulseMetricReader, ) from azure.monitor.opentelemetry.exporter._quickpulse._live_metrics import ( enable_live_metrics, + _get_process_memory, + _get_processor_time, _QuickpulseManager, ) from azure.monitor.opentelemetry.exporter._quickpulse._state import ( @@ -92,6 +114,28 @@ def test_init(self, generator_mock): self.assertEqual(qpm._reader._base_monitoring_data_point, qpm._base_monitoring_data_point) self.assertTrue(isinstance(qpm._meter_provider, MeterProvider)) self.assertEqual(qpm._meter_provider._sdk_config.metric_readers, [qpm._reader]) + self.assertTrue(isinstance(qpm._meter, Meter)) + self.assertEqual(qpm._meter.name, "azure_monitor_live_metrics") + self.assertTrue(isinstance(qpm._request_duration, Histogram)) + self.assertEqual(qpm._request_duration.name, _REQUEST_DURATION_NAME[0]) + self.assertTrue(isinstance(qpm._dependency_duration, Histogram)) + self.assertEqual(qpm._dependency_duration.name, _DEPENDENCY_DURATION_NAME[0]) + self.assertTrue(isinstance(qpm._request_rate_counter, Counter)) + self.assertEqual(qpm._request_rate_counter.name, _REQUEST_RATE_NAME[0]) + self.assertTrue(isinstance(qpm._request_failed_rate_counter, Counter)) + self.assertEqual(qpm._request_failed_rate_counter.name, _REQUEST_FAILURE_RATE_NAME[0]) + self.assertTrue(isinstance(qpm._dependency_rate_counter, Counter)) + self.assertEqual(qpm._dependency_rate_counter.name, _DEPENDENCY_RATE_NAME[0]) + self.assertTrue(isinstance(qpm._dependency_failure_rate_counter, Counter)) + self.assertEqual(qpm._dependency_failure_rate_counter.name, _DEPENDENCY_FAILURE_RATE_NAME[0]) + self.assertTrue(isinstance(qpm._exception_rate_counter, Counter)) + self.assertEqual(qpm._exception_rate_counter.name, _EXCEPTION_RATE_NAME[0]) + self.assertTrue(isinstance(qpm._process_memory_gauge, ObservableGauge)) + self.assertEqual(qpm._process_memory_gauge.name, _COMMITTED_BYTES_NAME[0]) + self.assertEqual(qpm._process_memory_gauge._callbacks, [_get_process_memory]) + self.assertTrue(isinstance(qpm._processor_time_gauge, ObservableGauge)) + self.assertEqual(qpm._processor_time_gauge.name, _PROCESSOR_TIME_NAME[0]) + self.assertEqual(qpm._processor_time_gauge._callbacks, [_get_processor_time]) def test_singleton(self): @@ -247,3 +291,23 @@ def test_record_log_exception(self, post_state_mock, log_doc_mock, append_doc_mo qpm._record_log_record(log_data_mock) append_doc_mock.assert_called_once_with(log_record_doc) qpm._exception_rate_counter.add.assert_called_once_with(1) + + def test_process_memory(self): + with mock.patch("azure.monitor.opentelemetry.exporter._quickpulse._live_metrics.PROCESS") as process_mock: + memory = collections.namedtuple('memory', 'rss') + pmem = memory(rss=40) + process_mock.memory_info.return_value = pmem + mem = _get_process_memory(None) + obs = next(mem) + self.assertEqual(obs.value, 40) + + @mock.patch("psutil.cpu_times_percent") + def test_processor_time(self, processor_mock): + cpu = collections.namedtuple('cpu', 'idle') + cpu_times = cpu(idle=94.5) + processor_mock.return_value = cpu_times + time = _get_processor_time(None) + obs = next(time) + self.assertEqual(obs.value, 5.5) + +# cSpell:enable diff --git a/shared_requirements.txt b/shared_requirements.txt index 564822a591a2..0cfd9cefeea1 100644 --- a/shared_requirements.txt +++ b/shared_requirements.txt @@ -17,6 +17,7 @@ azureml-telemetry cryptography msrestazure requests +psutil opencensus opencensus-ext-azure opencensus-ext-threading