From 676a2cc774ef422eeef73dcddbf6059e7f3c64d3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 23 Jun 2025 15:09:47 -0500 Subject: [PATCH 01/16] Refactor block metrics to use HS specific registry Fix `HomeserverMetricsManager` lints Fix `metrics_manager` missing in `Measure` `__slots__` --- synapse/metrics/homeserver_metrics_manager.py | 87 +++++++++++++++++++ synapse/server.py | 3 + synapse/util/metrics.py | 61 ++++++------- 3 files changed, 116 insertions(+), 35 deletions(-) create mode 100644 synapse/metrics/homeserver_metrics_manager.py diff --git a/synapse/metrics/homeserver_metrics_manager.py b/synapse/metrics/homeserver_metrics_manager.py new file mode 100644 index 00000000000..95a8104c62e --- /dev/null +++ b/synapse/metrics/homeserver_metrics_manager.py @@ -0,0 +1,87 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2025 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# + +from prometheus_client import CollectorRegistry, Counter + + +class BlockMetrics: + def __init__( + self, + metrics_collector_registry: CollectorRegistry, + ) -> None: + self.block_counter = Counter( + "synapse_util_metrics_block_count", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_timer = Counter( + "synapse_util_metrics_block_time_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_ru_utime = Counter( + "synapse_util_metrics_block_ru_utime_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_ru_stime = Counter( + "synapse_util_metrics_block_ru_stime_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_db_txn_count = Counter( + "synapse_util_metrics_block_db_txn_count", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + + self.block_db_txn_duration = Counter( + "synapse_util_metrics_block_db_txn_duration_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + """seconds spent waiting for db txns, excluding scheduling time, in this block""" + + self.block_db_sched_duration = Counter( + "synapse_util_metrics_block_db_sched_duration_seconds", + "", + ["block_name"], + registry=metrics_collector_registry, + ) + """seconds spent waiting for a db connection, in this block""" + + +class HomeserverMetricsManager: + """ + Homeserver-scoped metrics manager. + + This class serves as a container for the homeserver's global metrics objects. + """ + + def __init__(self) -> None: + self.metrics_collector_registry = CollectorRegistry(auto_describe=True) + + self.block_metrics = BlockMetrics( + metrics_collector_registry=self.metrics_collector_registry, + ) diff --git a/synapse/server.py b/synapse/server.py index fd16abb9ead..6b5302fa029 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -129,6 +129,7 @@ from synapse.media.media_repository import MediaRepository from synapse.metrics import register_threadpool from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.module_api import ModuleApi from synapse.module_api.callbacks import ModuleApiCallbacks from synapse.notifier import Notifier, ReplicationNotifier @@ -310,6 +311,8 @@ def __init__( # This attribute is set by the free function `refresh_certificate`. self.tls_server_context_factory: Optional[IOpenSSLContextFactory] = None + self.metrics_manager = HomeserverMetricsManager() + def register_module_web_resource(self, path: str, resource: Resource) -> None: """Allows a module to register a web resource to be served at the given path. diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 6a389f7a7e8..98900df2ab7 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -33,7 +33,7 @@ TypeVar, ) -from prometheus_client import CollectorRegistry, Counter, Metric +from prometheus_client import CollectorRegistry, Metric from typing_extensions import Concatenate, ParamSpec from synapse.logging.context import ( @@ -42,36 +42,11 @@ current_context, ) from synapse.metrics import InFlightGauge +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.util import Clock logger = logging.getLogger(__name__) -block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"]) - -block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"]) - -block_ru_utime = Counter( - "synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"] -) - -block_ru_stime = Counter( - "synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"] -) - -block_db_txn_count = Counter( - "synapse_util_metrics_block_db_txn_count", "", ["block_name"] -) - -# seconds spent waiting for db txns, excluding scheduling time, in this block -block_db_txn_duration = Counter( - "synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"] -) - -# seconds spent waiting for a db connection, in this block -block_db_sched_duration = Counter( - "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"] -) - # This is dynamically created in InFlightGauge.__init__. class _InFlightMetric(Protocol): @@ -141,12 +116,15 @@ async def measured_func(self: HasClock, *args: P.args, **kwargs: P.kwargs) -> R: class Measure: __slots__ = [ "clock", + "metrics_manager", "name", "_logging_context", "start", ] - def __init__(self, clock: Clock, name: str) -> None: + def __init__( + self, clock: Clock, name: str, metrics_manager: HomeserverMetricsManager + ) -> None: """ Args: clock: An object with a "time()" method, which returns the current @@ -154,6 +132,7 @@ def __init__(self, clock: Clock, name: str) -> None: name: The name of the metric to report. """ self.clock = clock + self.metrics_manager = metrics_manager self.name = name curr_context = current_context() if not curr_context: @@ -198,13 +177,25 @@ def __exit__( self._logging_context.__exit__(exc_type, exc_val, exc_tb) try: - block_counter.labels(self.name).inc() - block_timer.labels(self.name).inc(duration) - block_ru_utime.labels(self.name).inc(usage.ru_utime) - block_ru_stime.labels(self.name).inc(usage.ru_stime) - block_db_txn_count.labels(self.name).inc(usage.db_txn_count) - block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec) - block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec) + self.metrics_manager.block_metrics.block_counter.labels(self.name).inc() + self.metrics_manager.block_metrics.block_timer.labels(self.name).inc( + duration + ) + self.metrics_manager.block_metrics.block_ru_utime.labels(self.name).inc( + usage.ru_utime + ) + self.metrics_manager.block_metrics.block_ru_stime.labels(self.name).inc( + usage.ru_stime + ) + self.metrics_manager.block_metrics.block_db_txn_count.labels(self.name).inc( + usage.db_txn_count + ) + self.metrics_manager.block_metrics.block_db_txn_duration.labels( + self.name + ).inc(usage.db_txn_duration_sec) + self.metrics_manager.block_metrics.block_db_sched_duration.labels( + self.name + ).inc(usage.db_sched_duration_sec) except ValueError: logger.warning("Failed to save metrics! Usage: %s", usage) From 420c83dbf95e49545000d436bb075028e660eb05 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 23 Jun 2025 15:11:26 -0500 Subject: [PATCH 02/16] Use global `REGISTRY` for now --- synapse/metrics/homeserver_metrics_manager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/metrics/homeserver_metrics_manager.py b/synapse/metrics/homeserver_metrics_manager.py index 95a8104c62e..615a7d62653 100644 --- a/synapse/metrics/homeserver_metrics_manager.py +++ b/synapse/metrics/homeserver_metrics_manager.py @@ -12,7 +12,7 @@ # . # -from prometheus_client import CollectorRegistry, Counter +from prometheus_client import REGISTRY, CollectorRegistry, Counter class BlockMetrics: @@ -80,7 +80,9 @@ class HomeserverMetricsManager: """ def __init__(self) -> None: - self.metrics_collector_registry = CollectorRegistry(auto_describe=True) + # TODO: use `self.metrics_collector_registry = CollectorRegistry(auto_describe=True)` + # once we refactor our metrics endpoints to use the specified registry. + self.metrics_collector_registry = REGISTRY self.block_metrics = BlockMetrics( metrics_collector_registry=self.metrics_collector_registry, From 612cbd943934abce2f6e391766cc8b892706a958 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 23 Jun 2025 15:29:53 -0500 Subject: [PATCH 03/16] Refactor `InFlight` block metrics to be HS specific --- synapse/metrics/homeserver_metrics_manager.py | 25 +++++++++++++++ synapse/util/metrics.py | 31 +++++++------------ 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/synapse/metrics/homeserver_metrics_manager.py b/synapse/metrics/homeserver_metrics_manager.py index 615a7d62653..e67f13e05ac 100644 --- a/synapse/metrics/homeserver_metrics_manager.py +++ b/synapse/metrics/homeserver_metrics_manager.py @@ -12,8 +12,24 @@ # . # +from typing import Protocol + from prometheus_client import REGISTRY, CollectorRegistry, Counter +from synapse.metrics import InFlightGauge + + +# This is dynamically created in InFlightGauge.__init__. +class BlockInFlightMetric(Protocol): + """ + Sub-metrics used for the `InFlightGauge` for blocks. + """ + + real_time_max: float + """The longest observed duration of any single execution of this block, in seconds.""" + real_time_sum: float + """The cumulative time spent executing this block across all calls, in seconds.""" + class BlockMetrics: def __init__( @@ -71,6 +87,15 @@ def __init__( ) """seconds spent waiting for a db connection, in this block""" + self.in_flight: InFlightGauge[BlockInFlightMetric] = InFlightGauge( + "synapse_util_metrics_block_in_flight", + "", + labels=["block_name"], + # Matches the fields in the `BlockInFlightMetric` + sub_metrics=["real_time_max", "real_time_sum"], + ) + """Tracks the number of blocks currently active""" + class HomeserverMetricsManager: """ diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 98900df2ab7..a29afeedc31 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -41,28 +41,15 @@ LoggingContext, current_context, ) -from synapse.metrics import InFlightGauge -from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager +from synapse.metrics.homeserver_metrics_manager import ( + BlockInFlightMetric, + HomeserverMetricsManager, +) from synapse.util import Clock logger = logging.getLogger(__name__) -# This is dynamically created in InFlightGauge.__init__. -class _InFlightMetric(Protocol): - real_time_max: float - real_time_sum: float - - -# Tracks the number of blocks currently active -in_flight: InFlightGauge[_InFlightMetric] = InFlightGauge( - "synapse_util_metrics_block_in_flight", - "", - labels=["block_name"], - sub_metrics=["real_time_max", "real_time_sum"], -) - - P = ParamSpec("P") R = TypeVar("R") @@ -153,7 +140,9 @@ def __enter__(self) -> "Measure": self.start = self.clock.time() self._logging_context.__enter__() - in_flight.register((self.name,), self._update_in_flight) + self.metrics_manager.block_metrics.in_flight.register( + (self.name,), self._update_in_flight + ) logger.debug("Entering block %s", self.name) @@ -173,7 +162,9 @@ def __exit__( duration = self.clock.time() - self.start usage = self.get_resource_usage() - in_flight.unregister((self.name,), self._update_in_flight) + self.metrics_manager.block_metrics.in_flight.unregister( + (self.name,), self._update_in_flight + ) self._logging_context.__exit__(exc_type, exc_val, exc_tb) try: @@ -206,7 +197,7 @@ def get_resource_usage(self) -> ContextResourceUsage: """ return self._logging_context.get_resource_usage() - def _update_in_flight(self, metrics: _InFlightMetric) -> None: + def _update_in_flight(self, metrics: BlockInFlightMetric) -> None: """Gets called when processing in flight metrics""" assert self.start is not None duration = self.clock.time() - self.start From c07e1f502c6446259f41b7026f3defdcbf9f15e6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 23 Jun 2025 15:39:43 -0500 Subject: [PATCH 04/16] Add docstrings --- synapse/metrics/homeserver_metrics_manager.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/homeserver_metrics_manager.py b/synapse/metrics/homeserver_metrics_manager.py index e67f13e05ac..9e4cdf8034f 100644 --- a/synapse/metrics/homeserver_metrics_manager.py +++ b/synapse/metrics/homeserver_metrics_manager.py @@ -32,6 +32,10 @@ class BlockInFlightMetric(Protocol): class BlockMetrics: + """ + Metrics to see the number of and how much time is spend in various blocks of code. + """ + def __init__( self, metrics_collector_registry: CollectorRegistry, @@ -49,6 +53,7 @@ def __init__( ["block_name"], registry=metrics_collector_registry, ) + """The cumulative time spent executing this block across all calls, in seconds.""" self.block_ru_utime = Counter( "synapse_util_metrics_block_ru_utime_seconds", @@ -56,6 +61,7 @@ def __init__( ["block_name"], registry=metrics_collector_registry, ) + """Resource usage: user CPU time in seconds used in this block""" self.block_ru_stime = Counter( "synapse_util_metrics_block_ru_stime_seconds", @@ -63,6 +69,7 @@ def __init__( ["block_name"], registry=metrics_collector_registry, ) + """Resource usage: system CPU time in seconds used in this block""" self.block_db_txn_count = Counter( "synapse_util_metrics_block_db_txn_count", @@ -70,6 +77,7 @@ def __init__( ["block_name"], registry=metrics_collector_registry, ) + """Number of database transactions completed in this block""" self.block_db_txn_duration = Counter( "synapse_util_metrics_block_db_txn_duration_seconds", @@ -77,7 +85,7 @@ def __init__( ["block_name"], registry=metrics_collector_registry, ) - """seconds spent waiting for db txns, excluding scheduling time, in this block""" + """Seconds spent waiting for database txns, excluding scheduling time, in this block""" self.block_db_sched_duration = Counter( "synapse_util_metrics_block_db_sched_duration_seconds", @@ -85,7 +93,7 @@ def __init__( ["block_name"], registry=metrics_collector_registry, ) - """seconds spent waiting for a db connection, in this block""" + """Seconds spent waiting for a db connection, in this block""" self.in_flight: InFlightGauge[BlockInFlightMetric] = InFlightGauge( "synapse_util_metrics_block_in_flight", @@ -94,7 +102,7 @@ def __init__( # Matches the fields in the `BlockInFlightMetric` sub_metrics=["real_time_max", "real_time_sum"], ) - """Tracks the number of blocks currently active""" + """Tracks the number of blocks currently running and their real time usage.""" class HomeserverMetricsManager: From 019f0b7b4f6e549cea0f9a32a904c93176112328 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 13:44:27 -0500 Subject: [PATCH 05/16] Refactor bulk `Measure` usage Not finished: - `synapse/http/federation/well_known_resolver.py` - `measure_func` --- synapse/federation/send_queue.py | 3 +- synapse/federation/sender/__init__.py | 5 +- synapse/handlers/appservice.py | 7 +- synapse/handlers/delayed_events.py | 3 +- synapse/handlers/presence.py | 8 +- synapse/handlers/sync.py | 9 ++- synapse/handlers/typing.py | 5 +- synapse/handlers/user_directory.py | 3 +- .../http/federation/well_known_resolver.py | 2 +- synapse/http/matrixfederationclient.py | 5 +- .../callbacks/media_repository_callbacks.py | 13 ++- .../callbacks/ratelimit_callbacks.py | 7 +- .../callbacks/spamchecker_callbacks.py | 79 ++++++++++++++++--- synapse/replication/http/federation.py | 3 +- synapse/replication/http/send_event.py | 3 +- synapse/replication/http/send_events.py | 3 +- synapse/replication/tcp/client.py | 5 +- synapse/replication/tcp/resource.py | 5 +- synapse/state/__init__.py | 7 +- synapse/storage/controllers/persist_events.py | 23 +++++- synapse/storage/controllers/state.py | 3 +- .../storage/databases/main/events_worker.py | 3 +- synapse/storage/databases/main/roommember.py | 5 +- synapse/util/metrics.py | 9 ++- 24 files changed, 168 insertions(+), 50 deletions(-) diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index b5c9fcff7cf..7a7882d7a6c 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -73,6 +73,7 @@ class FederationRemoteSendQueue(AbstractFederationSender): def __init__(self, hs: "HomeServer"): self.server_name = hs.hostname self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.is_mine_server_name = hs.is_mine_server_name @@ -156,7 +157,7 @@ def _clear_queue(self) -> None: def _clear_queue_before_pos(self, position_to_delete: int) -> None: """Clear all the queues from before a given position""" - with Measure(self.clock, "send_queue._clear"): + with Measure(self.clock, self.metrics_manager, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_destinations.keys() i = self.presence_destinations.bisect_left(position_to_delete) diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 2eef7b707d7..4eeab926c90 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -378,6 +378,7 @@ def __init__(self, hs: "HomeServer"): self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.is_mine_id = hs.is_mine_id self.is_mine_server_name = hs.is_mine_server_name @@ -657,7 +658,9 @@ async def handle_room_events(events: List[EventBase]) -> None: logger.debug( "Handling %i events in room %s", len(events), events[0].room_id ) - with Measure(self.clock, "handle_room_events"): + with Measure( + self.clock, self.metrics_manager, "handle_room_events" + ): for event in events: await handle_event(event) diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index f3bbdb5a05b..483f824556b 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -79,6 +79,7 @@ def __init__(self, hs: "HomeServer"): self.scheduler = hs.get_application_service_scheduler() self.started_scheduler = False self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.notify_appservices = hs.config.worker.should_notify_appservices self.event_sources = hs.get_event_sources() self._msc2409_to_device_messages_enabled = ( @@ -120,7 +121,7 @@ def notify_interested_services(self, max_token: RoomStreamToken) -> None: @wrap_as_background_process("notify_interested_services") async def _notify_interested_services(self, max_token: RoomStreamToken) -> None: - with Measure(self.clock, "notify_interested_services"): + with Measure(self.clock, self.metrics_manager, "notify_interested_services"): self.is_processing = True try: upper_bound = -1 @@ -329,7 +330,9 @@ async def _notify_interested_services_ephemeral( users: Collection[Union[str, UserID]], ) -> None: logger.debug("Checking interested services for %s", stream_key) - with Measure(self.clock, "notify_interested_services_ephemeral"): + with Measure( + self.clock, self.metrics_manager, "notify_interested_services_ephemeral" + ): for service in services: if stream_key == StreamKeyType.TYPING: # Note that we don't persist the token (via set_appservice_stream_type_pos) diff --git a/synapse/handlers/delayed_events.py b/synapse/handlers/delayed_events.py index 80cb1cec9b0..049dab82c03 100644 --- a/synapse/handlers/delayed_events.py +++ b/synapse/handlers/delayed_events.py @@ -58,6 +58,7 @@ def __init__(self, hs: "HomeServer"): self._storage_controllers = hs.get_storage_controllers() self._config = hs.config self._clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self._event_creation_handler = hs.get_event_creation_handler() self._room_member_handler = hs.get_room_member_handler() @@ -159,7 +160,7 @@ async def _unsafe_process_new_event(self) -> None: # Loop round handling deltas until we're up to date while True: - with Measure(self._clock, "delayed_events_delta"): + with Measure(self._clock, self.metrics_manager, "delayed_events_delta"): room_max_stream_ordering = self._store.get_room_max_stream_ordering() if self._event_pos == room_max_stream_ordering: return diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 390cafa8f63..a2d74b99213 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -749,6 +749,7 @@ def __init__(self, hs: "HomeServer"): super().__init__(hs) self.wheel_timer: WheelTimer[str] = WheelTimer() self.notifier = hs.get_notifier() + self.metrics_manager = hs.metrics_manager federation_registry = hs.get_federation_registry() @@ -941,7 +942,7 @@ async def _update_states( now = self.clock.time_msec() - with Measure(self.clock, "presence_update_states"): + with Measure(self.clock, self.metrics_manager, "presence_update_states"): # NOTE: We purposefully don't await between now and when we've # calculated what we want to do with the new states, to avoid races. @@ -1497,7 +1498,7 @@ async def _process_presence() -> None: async def _unsafe_process(self) -> None: # Loop round handling deltas until we're up to date while True: - with Measure(self.clock, "presence_delta"): + with Measure(self.clock, self.metrics_manager, "presence_delta"): room_max_stream_ordering = self.store.get_room_max_stream_ordering() if self._event_pos == room_max_stream_ordering: return @@ -1762,6 +1763,7 @@ def __init__(self, hs: "HomeServer"): self.get_presence_handler = hs.get_presence_handler self.get_presence_router = hs.get_presence_router self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.store = hs.get_datastores().main async def get_new_events( @@ -1792,7 +1794,7 @@ async def get_new_events( user_id = user.to_string() stream_change_cache = self.store.presence_stream_cache - with Measure(self.clock, "presence.get_new_events"): + with Measure(self.clock, self.metrics_manager, "presence.get_new_events"): if from_key is not None: from_key = int(from_key) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index a400e63fd56..4f05d7ea7d7 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -337,6 +337,7 @@ def __init__(self, hs: "HomeServer"): self._push_rules_handler = hs.get_push_rules_handler() self.event_sources = hs.get_event_sources() self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.state = hs.get_state_handler() self.auth_blocking = hs.get_auth_blocking() self._storage_controllers = hs.get_storage_controllers() @@ -710,7 +711,7 @@ async def ephemeral_by_room( sync_config = sync_result_builder.sync_config - with Measure(self.clock, "ephemeral_by_room"): + with Measure(self.clock, self.metrics_manager, "ephemeral_by_room"): typing_key = since_token.typing_key if since_token else 0 room_ids = sync_result_builder.joined_room_ids @@ -783,7 +784,7 @@ async def _load_filtered_recents( and current token to send down to clients. newly_joined_room """ - with Measure(self.clock, "load_filtered_recents"): + with Measure(self.clock, self.metrics_manager, "load_filtered_recents"): timeline_limit = sync_config.filter_collection.timeline_limit() block_all_timeline = ( sync_config.filter_collection.blocks_all_room_timeline() @@ -1174,7 +1175,7 @@ async def compute_state_delta( # updates even if they occurred logically before the previous event. # TODO(mjark) Check for new redactions in the state events. - with Measure(self.clock, "compute_state_delta"): + with Measure(self.clock, self.metrics_manager, "compute_state_delta"): # The memberships needed for events in the timeline. # Only calculated when `lazy_load_members` is on. members_to_fetch: Optional[Set[str]] = None @@ -1791,7 +1792,7 @@ async def unread_notifs_for_room_id( # the DB. return RoomNotifCounts.empty() - with Measure(self.clock, "unread_notifs_for_room_id"): + with Measure(self.clock, self.metrics_manager, "unread_notifs_for_room_id"): return await self.store.get_unread_event_push_actions_by_room_for_user( room_id, sync_config.user.to_string(), diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 8d693fee304..89373f2bc1a 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -505,6 +505,7 @@ class TypingNotificationEventSource(EventSource[int, JsonMapping]): def __init__(self, hs: "HomeServer"): self._main_store = hs.get_datastores().main self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager # We can't call get_typing_handler here because there's a cycle: # # Typing -> Notifier -> TypingNotificationEventSource -> Typing @@ -535,7 +536,7 @@ async def get_new_events_as( appservice may be interested in. * The latest known room serial. """ - with Measure(self.clock, "typing.get_new_events_as"): + with Measure(self.clock, self.metrics_manager, "typing.get_new_events_as"): handler = self.get_typing_handler() events = [] @@ -571,7 +572,7 @@ async def get_new_events( Find typing notifications for given rooms (> `from_token` and <= `to_token`) """ - with Measure(self.clock, "typing.get_new_events"): + with Measure(self.clock, self.metrics_manager, "typing.get_new_events"): from_key = int(from_key) handler = self.get_typing_handler() diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 1f692c79a02..b509934486e 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -104,6 +104,7 @@ def __init__(self, hs: "HomeServer"): self._storage_controllers = hs.get_storage_controllers() self.server_name = hs.hostname self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.worker.should_update_user_directory @@ -237,7 +238,7 @@ async def _unsafe_process(self) -> None: # Loop round handling deltas until we're up to date while True: - with Measure(self.clock, "user_dir_delta"): + with Measure(self.clock, self.metrics_manager, "user_dir_delta"): room_max_stream_ordering = self.store.get_room_max_stream_ordering() if self.pos == room_max_stream_ordering: return diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index 9a6bac7281a..e8761f0bb51 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -134,7 +134,7 @@ async def get_well_known(self, server_name: bytes) -> WellKnownLookupResult: # TODO: should we linearise so that we don't end up doing two .well-known # requests for the same server in parallel? try: - with Measure(self._clock, "get_well_known"): + with Measure(self._clock, self.metrics_manager, "get_well_known"): result: Optional[bytes] cache_period: float diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 97a863a1189..e5899977906 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -451,6 +451,7 @@ def __init__( ) self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self._store = hs.get_datastores().main self.version_string_bytes = hs.version_string.encode("ascii") self.default_timeout_seconds = hs.config.federation.client_timeout_ms / 1000 @@ -697,7 +698,9 @@ async def _send_request( outgoing_requests_counter.labels(request.method).inc() try: - with Measure(self.clock, "outbound_request"): + with Measure( + self.clock, self.metrics_manager, "outbound_request" + ): # we don't want all the fancy cookie and redirect handling # that treq.request gives: just use the raw Agent. diff --git a/synapse/module_api/callbacks/media_repository_callbacks.py b/synapse/module_api/callbacks/media_repository_callbacks.py index 6fa80a8eab1..0ac7d06c108 100644 --- a/synapse/module_api/callbacks/media_repository_callbacks.py +++ b/synapse/module_api/callbacks/media_repository_callbacks.py @@ -32,6 +32,7 @@ class MediaRepositoryModuleApiCallbacks: def __init__(self, hs: "HomeServer") -> None: self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self._get_media_config_for_user_callbacks: List[ GET_MEDIA_CONFIG_FOR_USER_CALLBACK ] = [] @@ -57,7 +58,11 @@ def register_callbacks( async def get_media_config_for_user(self, user_id: str) -> Optional[JsonDict]: for callback in self._get_media_config_for_user_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res: Optional[JsonDict] = await delay_cancellation(callback(user_id)) if res: return res @@ -68,7 +73,11 @@ async def is_user_allowed_to_upload_media_of_size( self, user_id: str, size: int ) -> bool: for callback in self._is_user_allowed_to_upload_media_of_size_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res: bool = await delay_cancellation(callback(user_id, size)) if not res: return res diff --git a/synapse/module_api/callbacks/ratelimit_callbacks.py b/synapse/module_api/callbacks/ratelimit_callbacks.py index 64f9cc81e8a..93259c214f5 100644 --- a/synapse/module_api/callbacks/ratelimit_callbacks.py +++ b/synapse/module_api/callbacks/ratelimit_callbacks.py @@ -44,6 +44,7 @@ class RatelimitOverride: class RatelimitModuleApiCallbacks: def __init__(self, hs: "HomeServer") -> None: self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self._get_ratelimit_override_for_user_callbacks: List[ GET_RATELIMIT_OVERRIDE_FOR_USER_CALLBACK ] = [] @@ -64,7 +65,11 @@ async def get_ratelimit_override_for_user( self, user_id: str, limiter_name: str ) -> Optional[RatelimitOverride]: for callback in self._get_ratelimit_override_for_user_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res: Optional[RatelimitOverride] = await delay_cancellation( callback(user_id, limiter_name) ) diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py index bea5c5badf0..10e617e567b 100644 --- a/synapse/module_api/callbacks/spamchecker_callbacks.py +++ b/synapse/module_api/callbacks/spamchecker_callbacks.py @@ -340,6 +340,7 @@ class SpamCheckerModuleApiCallbacks: def __init__(self, hs: "synapse.server.HomeServer") -> None: self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] self._should_drop_federated_event_callbacks: List[ @@ -464,7 +465,11 @@ async def check_event_for_spam( generally discouraged as it doesn't support internationalization. """ for callback in self._check_event_for_spam_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation(callback(event)) if res is False or res == self.NOT_SPAM: # This spam-checker accepts the event. @@ -517,7 +522,11 @@ async def should_drop_federated_event( True if the event should be silently dropped """ for callback in self._should_drop_federated_event_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res: Union[bool, str] = await delay_cancellation(callback(event)) if res: return res @@ -539,7 +548,11 @@ async def user_may_join_room( NOT_SPAM if the operation is permitted, [Codes, Dict] otherwise. """ for callback in self._user_may_join_room_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation(callback(user_id, room_id, is_invited)) # Normalize return values to `Codes` or `"NOT_SPAM"`. if res is True or res is self.NOT_SPAM: @@ -578,7 +591,11 @@ async def user_may_invite( NOT_SPAM if the operation is permitted, Codes otherwise. """ for callback in self._user_may_invite_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation( callback(inviter_userid, invitee_userid, room_id) ) @@ -623,7 +640,11 @@ async def user_may_send_3pid_invite( NOT_SPAM if the operation is permitted, Codes otherwise. """ for callback in self._user_may_send_3pid_invite_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation( callback(inviter_userid, medium, address, room_id) ) @@ -659,7 +680,11 @@ async def user_may_create_room( room_config: The room creation configuration which is the body of the /createRoom request """ for callback in self._user_may_create_room_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): checker_args = inspect.signature(callback) # Also ensure backwards compatibility with spam checker callbacks # that don't expect the room_config argument. @@ -723,7 +748,11 @@ async def user_may_send_state_event( content: The content of the state event """ for callback in self._user_may_send_state_event_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): # We make a copy of the content to ensure that the spam checker cannot modify it. res = await delay_cancellation( callback(user_id, room_id, event_type, state_key, deepcopy(content)) @@ -751,7 +780,11 @@ async def user_may_create_room_alias( """ for callback in self._user_may_create_room_alias_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation(callback(userid, room_alias)) if res is True or res is self.NOT_SPAM: continue @@ -784,7 +817,11 @@ async def user_may_publish_room( room_id: The ID of the room that would be published """ for callback in self._user_may_publish_room_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation(callback(userid, room_id)) if res is True or res is self.NOT_SPAM: continue @@ -826,7 +863,11 @@ async def check_username_for_spam( True if the user is spammy. """ for callback in self._check_username_for_spam_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): checker_args = inspect.signature(callback) # Make a copy of the user profile object to ensure the spam checker cannot # modify it. @@ -875,7 +916,11 @@ async def check_registration_for_spam( """ for callback in self._check_registration_for_spam_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): behaviour = await delay_cancellation( callback(email_threepid, username, request_info, auth_provider_id) ) @@ -917,7 +962,11 @@ async def check_media_file_for_spam( """ for callback in self._check_media_file_for_spam_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation(callback(file_wrapper, file_info)) # Normalize return values to `Codes` or `"NOT_SPAM"`. if res is False or res is self.NOT_SPAM: @@ -964,7 +1013,11 @@ async def check_login_for_spam( """ for callback in self._check_login_for_spam_callbacks: - with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + with Measure( + self.clock, + self.metrics_manager, + f"{callback.__module__}.{callback.__qualname__}", + ): res = await delay_cancellation( callback( user_id, diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 940f418396c..a0bf8d181a0 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -79,6 +79,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.federation_event_handler = hs.get_federation_event_handler() @staticmethod @@ -122,7 +123,7 @@ async def _serialize_payload( # type: ignore[override] async def _handle_request( # type: ignore[override] self, request: Request, content: JsonDict ) -> Tuple[int, JsonDict]: - with Measure(self.clock, "repl_fed_send_events_parse"): + with Measure(self.clock, self.metrics_manager, "repl_fed_send_events_parse"): room_id = content["room_id"] backfilled = content["backfilled"] diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 01952a8d592..f0ab5d2456b 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -80,6 +80,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager @staticmethod async def _serialize_payload( # type: ignore[override] @@ -121,7 +122,7 @@ async def _serialize_payload( # type: ignore[override] async def _handle_request( # type: ignore[override] self, request: Request, content: JsonDict, event_id: str ) -> Tuple[int, JsonDict]: - with Measure(self.clock, "repl_send_event_parse"): + with Measure(self.clock, self.metrics_manager, "repl_send_event_parse"): event_dict = content["event"] room_ver = KNOWN_ROOM_VERSIONS[content["room_version"]] internal_metadata = content["internal_metadata"] diff --git a/synapse/replication/http/send_events.py b/synapse/replication/http/send_events.py index d965ce54924..d38cec9de3d 100644 --- a/synapse/replication/http/send_events.py +++ b/synapse/replication/http/send_events.py @@ -81,6 +81,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager @staticmethod async def _serialize_payload( # type: ignore[override] @@ -122,7 +123,7 @@ async def _serialize_payload( # type: ignore[override] async def _handle_request( # type: ignore[override] self, request: Request, payload: JsonDict ) -> Tuple[int, JsonDict]: - with Measure(self.clock, "repl_send_events_parse"): + with Measure(self.clock, self.metrics_manager, "repl_send_events_parse"): events_and_context = [] events = payload["events"] rooms = set() diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 0bd5478cd35..64ebde0b246 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -79,6 +79,7 @@ def __init__(self, hs: "HomeServer"): self.notifier = hs.get_notifier() self._reactor = hs.get_reactor() self._clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self._streams = hs.get_replication_streams() self._instance_name = hs.get_instance_name() self._typing_handler = hs.get_typing_handler() @@ -342,7 +343,9 @@ async def wait_for_stream_position( waiting_list.add((position, deferred)) # We measure here to get in flight counts and average waiting time. - with Measure(self._clock, "repl.wait_for_stream_position"): + with Measure( + self._clock, self.metrics_manager, "repl.wait_for_stream_position" + ): logger.info( "Waiting for repl stream %r to reach %s (%s); currently at: %s", stream_name, diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index d647a2b3326..485cc267a46 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -80,6 +80,7 @@ class ReplicationStreamer: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.notifier = hs.get_notifier() self._instance_name = hs.get_instance_name() @@ -155,7 +156,9 @@ async def _run_notifier_loop(self) -> None: while self.pending_updates: self.pending_updates = False - with Measure(self.clock, "repl.stream.get_updates"): + with Measure( + self.clock, self.metrics_manager, "repl.stream.get_updates" + ): all_streams = self.streams if self._replication_torture_level is not None: diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 1c3e5d00a92..3b24c25b810 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -632,6 +632,7 @@ class StateResolutionHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.resolve_linearizer = Linearizer(name="state_resolve_lock") @@ -747,7 +748,7 @@ async def resolve_state_groups( # which will be used as a cache key for future resolutions, but # not get persisted. - with Measure(self.clock, "state.create_group_ids"): + with Measure(self.clock, self.metrics_manager, "state.create_group_ids"): cache = _make_state_cache_entry(new_state, state_groups_ids) self._state_cache[group_names] = cache @@ -785,7 +786,9 @@ async def resolve_events_with_store( a map from (type, state_key) to event_id. """ try: - with Measure(self.clock, "state._resolve_events") as m: + with Measure( + self.clock, self.metrics_manager, "state._resolve_events" + ) as m: room_version_obj = KNOWN_ROOM_VERSIONS[room_version] if room_version_obj.state_res == StateResolutionVersions.V1: return await v1.resolve_events_with_store( diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index f5131fe2915..ed311747dba 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -338,6 +338,7 @@ def __init__( self.persist_events_store = stores.persist_events self._clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self._instance_name = hs.get_instance_name() self.is_mine_id = hs.is_mine_id self._event_persist_queue = _EventPeristenceQueue( @@ -616,7 +617,9 @@ async def _persist_event_batch( state_delta_for_room = None if not backfilled: - with Measure(self._clock, "_calculate_state_and_extrem"): + with Measure( + self._clock, self.metrics_manager, "_calculate_state_and_extrem" + ): # Work out the new "current state" for the room. # We do this by working out what the new extremities are and then # calculating the state from that. @@ -627,7 +630,11 @@ async def _persist_event_batch( room_id, chunk ) - with Measure(self._clock, "calculate_chain_cover_index_for_events"): + with Measure( + self._clock, + self.metrics_manager, + "calculate_chain_cover_index_for_events", + ): # We now calculate chain ID/sequence numbers for any state events we're # persisting. We ignore out of band memberships as we're not in the room # and won't have their auth chain (we'll fix it up later if we join the @@ -719,7 +726,11 @@ async def _calculate_new_forward_extremities_and_state_delta( break logger.debug("Calculating state delta for room %s", room_id) - with Measure(self._clock, "persist_events.get_new_state_after_events"): + with Measure( + self._clock, + self.metrics_manager, + "persist_events.get_new_state_after_events", + ): res = await self._get_new_state_after_events( room_id, ev_ctx_rm, @@ -746,7 +757,11 @@ async def _calculate_new_forward_extremities_and_state_delta( # removed keys entirely. delta = DeltaState([], delta_ids) elif current_state is not None: - with Measure(self._clock, "persist_events.calculate_state_delta"): + with Measure( + self._clock, + self.metrics_manager, + "persist_events.calculate_state_delta", + ): delta = await self._calculate_state_delta(room_id, current_state) if delta: diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index f28f5d7e039..35831bc12a9 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -70,6 +70,7 @@ class StateStorageController: def __init__(self, hs: "HomeServer", stores: "Databases"): self._is_mine_id = hs.is_mine_id self._clock = hs.get_clock() + self.metrics_manager = hs.metrics_manager self.stores = stores self._partial_state_events_tracker = PartialStateEventsTracker(stores.main) self._partial_state_room_tracker = PartialCurrentStateTracker(stores.main) @@ -812,7 +813,7 @@ async def get_joined_hosts( state_group = object() assert state_group is not None - with Measure(self._clock, "get_joined_hosts"): + with Measure(self._clock, self.metrics_manager, "get_joined_hosts"): return await self._get_joined_hosts( room_id, state_group, state_entry=state_entry ) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 3db4460f571..5411142d1db 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -226,6 +226,7 @@ def __init__( hs: "HomeServer", ): super().__init__(database, db_conn, hs) + self.metrics_manager = hs.metrics_manager self._stream_id_gen: MultiWriterIdGenerator self._backfill_id_gen: MultiWriterIdGenerator @@ -1233,7 +1234,7 @@ def _fetch_event_list( to event row. Note that it may well contain additional events that were not part of this request. """ - with Measure(self._clock, "_fetch_event_list"): + with Measure(self._clock, self.metrics_manager, "_fetch_event_list"): try: events_to_fetch = { event_id for events, _ in event_list for event_id in events diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 7ca73abb835..fc33aef831b 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -100,6 +100,7 @@ def __init__( hs: "HomeServer", ): super().__init__(database, db_conn, hs) + self.metrics_manager = hs.metrics_manager self._server_notices_mxid = hs.config.servernotices.server_notices_mxid @@ -983,7 +984,9 @@ async def get_joined_user_ids_from_state( `_get_user_ids_from_membership_event_ids` for any uncached events. """ - with Measure(self._clock, "get_joined_user_ids_from_state"): + with Measure( + self._clock, self.metrics_manager, "get_joined_user_ids_from_state" + ): users_in_room = set() member_event_ids = [ e_id for key, e_id in state.items() if key[0] == EventTypes.Member diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index a29afeedc31..70d342f0f76 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -64,7 +64,7 @@ def measure_func( """Decorate an async method with a `Measure` context manager. The Measure is created using `self.clock`; it should only be used to decorate - methods in classes defining an instance-level `clock` attribute. + methods in classes defining an instance-level `clock` and `metrics_manager` attribute. Usage: @@ -87,7 +87,7 @@ def wrapper( @wraps(func) async def measured_func(self: HasClock, *args: P.args, **kwargs: P.kwargs) -> R: - with Measure(self.clock, block_name): + with Measure(self.clock, self.metrics_manager, block_name): r = await func(self, *args, **kwargs) return r @@ -110,7 +110,10 @@ class Measure: ] def __init__( - self, clock: Clock, name: str, metrics_manager: HomeserverMetricsManager + self, + clock: Clock, + metrics_manager: HomeserverMetricsManager, + name: str, ) -> None: """ Args: From c5296f5b6a9f9a4f938ad29356722fb8e2b01121 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 13:55:15 -0500 Subject: [PATCH 06/16] Refactor `Measure` in `synapse/http/federation/well_known_resolver.py` --- .../federation/matrix_federation_agent.py | 3 +++ .../http/federation/well_known_resolver.py | 3 +++ synapse/http/matrixfederationclient.py | 2 ++ tests/handlers/test_typing.py | 20 +++++++++++-------- .../test_matrix_federation_agent.py | 7 +++++++ .../test_federation_sender_shard.py | 2 ++ 6 files changed, 29 insertions(+), 8 deletions(-) diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index a7742fcea85..09962563ffa 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -48,6 +48,7 @@ from synapse.http.federation.well_known_resolver import WellKnownResolver from synapse.http.proxyagent import ProxyAgent from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.types import ISynapseReactor from synapse.util import Clock @@ -93,6 +94,7 @@ class MatrixFederationAgent: def __init__( self, reactor: ISynapseReactor, + metrics_manager: HomeserverMetricsManager, tls_client_options_factory: Optional[FederationPolicyForHTTPS], user_agent: bytes, ip_allowlist: Optional[IPSet], @@ -128,6 +130,7 @@ def __init__( if _well_known_resolver is None: _well_known_resolver = WellKnownResolver( reactor, + metrics_manager, agent=BlocklistingAgentWrapper( ProxyAgent( reactor, diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index e8761f0bb51..ecc66933ab5 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -35,6 +35,7 @@ from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size from synapse.logging.context import make_deferred_yieldable +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.util import Clock, json_decoder from synapse.util.caches.ttlcache import TTLCache from synapse.util.metrics import Measure @@ -92,6 +93,7 @@ class WellKnownResolver: def __init__( self, reactor: IReactorTime, + metrics_manager: HomeserverMetricsManager, agent: IAgent, user_agent: bytes, well_known_cache: Optional[TTLCache[bytes, Optional[bytes]]] = None, @@ -99,6 +101,7 @@ def __init__( ): self._reactor = reactor self._clock = Clock(reactor) + self.metrics_manager = metrics_manager if well_known_cache is None: well_known_cache = _well_known_cache diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index e5899977906..c5d1d3d3c80 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -406,6 +406,7 @@ def __init__( self.server_name = hs.hostname self.reactor = hs.get_reactor() + self.metrics_manager = hs.metrics_manager user_agent = hs.version_string if hs.config.server.user_agent_suffix: @@ -418,6 +419,7 @@ def __init__( # Talk to federation directly federation_agent: IAgent = MatrixFederationAgent( self.reactor, + self.metrics_manager, tls_client_options_factory, user_agent.encode("ascii"), hs.config.server.federation_ip_range_allowlist, diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 9d8960315fe..0bc2c4933bb 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -84,14 +84,6 @@ def make_homeserver( # we mock out the federation client too self.mock_federation_client = AsyncMock(spec=["put_json"]) - self.mock_federation_client.put_json.return_value = (200, "OK") - self.mock_federation_client.agent = MatrixFederationAgent( - reactor, - tls_client_options_factory=None, - user_agent=b"SynapseInTrialTest/0.0.0", - ip_allowlist=None, - ip_blocklist=IPSet(), - ) # the tests assume that we are starting at unix time 1000 reactor.pump((1000,)) @@ -104,6 +96,18 @@ def make_homeserver( replication_streams={}, ) + # Finish off mocking the federation client + self.mock_federation_client.put_json.return_value = (200, "OK") + self.mock_federation_client.agent = MatrixFederationAgent( + reactor=reactor, + # After we get access to the `hs` homeserver instance, we can replace the federation agent + metrics_manager=hs.metrics_manager, + tls_client_options_factory=None, + user_agent=b"SynapseInTrialTest/0.0.0", + ip_allowlist=None, + ip_blocklist=IPSet(), + ) + return hs def create_resource_dict(self) -> Dict[str, Resource]: diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 6b25e53c28e..519ef603ac2 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -59,6 +59,7 @@ LoggingContextOrSentinel, current_context, ) +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.types import ISynapseReactor from synapse.util.caches.ttlcache import TTLCache @@ -82,6 +83,8 @@ def setUp(self) -> None: self._config = config = HomeServerConfig() config.parse_config_dict(config_dict, "", "") + self.metrics_manager = HomeserverMetricsManager() + self.tls_factory = FederationPolicyForHTTPS(config) self.well_known_cache: TTLCache[bytes, Optional[bytes]] = TTLCache( @@ -92,6 +95,7 @@ def setUp(self) -> None: ) self.well_known_resolver = WellKnownResolver( self.reactor, + self.metrics_manager, Agent(self.reactor, contextFactory=self.tls_factory), b"test-agent", well_known_cache=self.well_known_cache, @@ -270,6 +274,7 @@ def _make_agent(self) -> MatrixFederationAgent: """ return MatrixFederationAgent( reactor=cast(ISynapseReactor, self.reactor), + metrics_manager=self.metrics_manager, tls_client_options_factory=self.tls_factory, user_agent=b"test-agent", # Note that this is unused since _well_known_resolver is provided. ip_allowlist=IPSet(), @@ -1012,6 +1017,7 @@ def test_get_well_known_unsigned_cert(self) -> None: tls_factory = FederationPolicyForHTTPS(config) agent = MatrixFederationAgent( reactor=self.reactor, + metrics_manager=self.metrics_manager, tls_client_options_factory=tls_factory, user_agent=b"test-agent", # This is unused since _well_known_resolver is passed below. ip_allowlist=IPSet(), @@ -1019,6 +1025,7 @@ def test_get_well_known_unsigned_cert(self) -> None: _srv_resolver=self.mock_resolver, _well_known_resolver=WellKnownResolver( cast(ISynapseReactor, self.reactor), + self.metrics_manager, Agent(self.reactor, contextFactory=tls_factory), b"test-agent", well_known_cache=self.well_known_cache, diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py index 58a7a9dc721..9acc772ab92 100644 --- a/tests/replication/test_federation_sender_shard.py +++ b/tests/replication/test_federation_sender_shard.py @@ -36,6 +36,7 @@ from synapse.events import EventBase, make_event_from_dict from synapse.handlers.typing import TypingWriterHandler from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.rest.admin import register_servlets_for_client_rest_resource from synapse.rest.client import login, room from synapse.server import HomeServer @@ -69,6 +70,7 @@ def setUp(self) -> None: reactor, _ = get_clock() self.matrix_federation_agent = MatrixFederationAgent( reactor, + metrics_manager=HomeserverMetricsManager(), tls_client_options_factory=None, user_agent=b"SynapseInTrialTest/0.0.0", ip_allowlist=None, From 41dfb20253dc5e4a3013e812a67a2dfa32f73dba Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 13:58:32 -0500 Subject: [PATCH 07/16] Update `measure_func` to support `HasClockAndMetricsManager` --- synapse/util/metrics.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 70d342f0f76..898a3f3dd8b 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -54,8 +54,11 @@ R = TypeVar("R") -class HasClock(Protocol): +class HasClockAndMetricsManager(Protocol): + # Used to measure functions clock: Clock + # Used to namespace the metrics to the given homeserver + metrics_manager: HomeserverMetricsManager def measure_func( @@ -81,12 +84,14 @@ async def foo(...): """ def wrapper( - func: Callable[Concatenate[HasClock, P], Awaitable[R]], + func: Callable[Concatenate[HasClockAndMetricsManager, P], Awaitable[R]], ) -> Callable[P, Awaitable[R]]: block_name = func.__name__ if name is None else name @wraps(func) - async def measured_func(self: HasClock, *args: P.args, **kwargs: P.kwargs) -> R: + async def measured_func( + self: HasClockAndMetricsManager, *args: P.args, **kwargs: P.kwargs + ) -> R: with Measure(self.clock, self.metrics_manager, block_name): r = await func(self, *args, **kwargs) return r From f46904272fbfc9fd044f5fdb8e0ec445c79ba836 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 14:02:32 -0500 Subject: [PATCH 08/16] Make sure `@measure_func` usages have the necessary properties --- synapse/federation/sender/transaction_manager.py | 3 +++ synapse/handlers/device.py | 11 ++++++++++- synapse/handlers/message.py | 5 ++++- synapse/push/bulk_push_rule_evaluator.py | 5 ++++- synapse/state/__init__.py | 7 +++++-- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index d8a3eaa525a..ca82929c182 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -60,6 +60,9 @@ class TransactionManager: def __init__(self, hs: "synapse.server.HomeServer"): self._server_name = hs.hostname self.clock = hs.get_clock() # nb must be called this for @measure_func + self.metrics_manager = ( + hs.metrics_manager + ) # nb must be called this for @measure_func self._store = hs.get_datastores().main self._transaction_actions = TransactionActions(self._store) self._transport_layer = hs.get_federation_transport_client() diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 8f9bf92fda3..97e80d90816 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -526,6 +526,11 @@ class DeviceHandler(DeviceWorkerHandler): def __init__(self, hs: "HomeServer"): super().__init__(hs) + self.clock = hs.get_clock() # nb must be called this for @measure_func + self.metrics_manager = ( + hs.metrics_manager + ) # nb must be called this for @measure_func + self.federation_sender = hs.get_federation_sender() self._account_data_handler = hs.get_account_data_handler() self._storage_controllers = hs.get_storage_controllers() @@ -1214,10 +1219,14 @@ class DeviceListUpdater(DeviceListWorkerUpdater): def __init__(self, hs: "HomeServer", device_handler: DeviceHandler): self.store = hs.get_datastores().main self.federation = hs.get_federation_client() - self.clock = hs.get_clock() self.device_handler = device_handler self._notifier = hs.get_notifier() + self.clock = hs.get_clock() # nb must be called this for @measure_func + self.metrics_manager = ( + hs.metrics_manager + ) # nb must be called this for @measure_func + self._remote_edu_linearizer = Linearizer(name="remote_device_list") self._resync_linearizer = Linearizer(name="remote_device_resync") diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5d6ee6996f7..f65e359ce32 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -481,7 +481,10 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.state = hs.get_state_handler() - self.clock = hs.get_clock() + self.clock = hs.get_clock() # nb must be called this for @measure_func + self.metrics_manager = ( + hs.metrics_manager + ) # nb must be called this for @measure_func self.validator = EventValidator() self.profile_handler = hs.get_profile_handler() self.event_builder_factory = hs.get_event_builder_factory() diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1f4f5b90c3a..8e324a8ae3d 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -129,7 +129,10 @@ class BulkPushRuleEvaluator: def __init__(self, hs: "HomeServer"): self.hs = hs self.store = hs.get_datastores().main - self.clock = hs.get_clock() + self.clock = hs.get_clock() # nb must be called this for @measure_func + self.metrics_manager = ( + hs.metrics_manager + ) # nb must be called this for @measure_func self._event_auth_handler = hs.get_event_auth_handler() self.should_calculate_push_rules = self.hs.config.push.enable_push diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 3b24c25b810..a160634ae1c 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -189,10 +189,13 @@ class StateHandler: """ def __init__(self, hs: "HomeServer"): - self.clock = hs.get_clock() + self.hs = hs self.store = hs.get_datastores().main self._state_storage_controller = hs.get_storage_controllers().state - self.hs = hs + self.clock = hs.get_clock() # nb must be called this for @measure_func + self.metrics_manager = ( + hs.metrics_manager + ) # nb must be called this for @measure_func self._state_resolution_handler = hs.get_state_resolution_handler() self._storage_controllers = hs.get_storage_controllers() self._events_shard_config = hs.config.worker.events_shard_config From 6c39fc8a9a90155c3c55c0b082f534b33ba386c4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 16:31:50 -0500 Subject: [PATCH 09/16] Fix mypy complaining about unknown types by changing property order around Fix mypy complaints ``` synapse/handlers/delayed_events.py:266: error: Cannot determine type of "validator" [has-type] synapse/handlers/delayed_events.py:267: error: Cannot determine type of "event_builder_factory" [has-type] ``` --- synapse/handlers/message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f65e359ce32..5dfda6a0c38 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -481,13 +481,13 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.state = hs.get_state_handler() + self.validator = EventValidator() + self.event_builder_factory = hs.get_event_builder_factory() self.clock = hs.get_clock() # nb must be called this for @measure_func self.metrics_manager = ( hs.metrics_manager ) # nb must be called this for @measure_func - self.validator = EventValidator() self.profile_handler = hs.get_profile_handler() - self.event_builder_factory = hs.get_event_builder_factory() self.server_name = hs.hostname self.notifier = hs.get_notifier() self.config = hs.config From e46690c4180e40d5fa5522bc7679ef4f79301196 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 16:33:53 -0500 Subject: [PATCH 10/16] Add changelog --- changelog.d/18591.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/18591.misc diff --git a/changelog.d/18591.misc b/changelog.d/18591.misc new file mode 100644 index 00000000000..c8893c2806b --- /dev/null +++ b/changelog.d/18591.misc @@ -0,0 +1 @@ +Refactor `Measure` block metrics to be homeserver-scoped. From 3277afd520aad448b0ec787d6b01aede6452e4b2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 16:42:33 -0500 Subject: [PATCH 11/16] Refactor from `hs.metrics_manager` -> `hs.get_metrics_manager()` This matches the pattern of other homeserver attributes like `hs.get_clock()` --- synapse/federation/send_queue.py | 2 +- synapse/federation/sender/__init__.py | 2 +- synapse/federation/sender/transaction_manager.py | 2 +- synapse/handlers/appservice.py | 2 +- synapse/handlers/delayed_events.py | 2 +- synapse/handlers/device.py | 4 ++-- synapse/handlers/message.py | 2 +- synapse/handlers/presence.py | 4 ++-- synapse/handlers/sync.py | 2 +- synapse/handlers/typing.py | 2 +- synapse/handlers/user_directory.py | 2 +- synapse/http/matrixfederationclient.py | 4 ++-- synapse/module_api/callbacks/media_repository_callbacks.py | 2 +- synapse/module_api/callbacks/ratelimit_callbacks.py | 2 +- synapse/module_api/callbacks/spamchecker_callbacks.py | 2 +- synapse/push/bulk_push_rule_evaluator.py | 2 +- synapse/replication/http/federation.py | 2 +- synapse/replication/http/send_event.py | 2 +- synapse/replication/http/send_events.py | 2 +- synapse/replication/tcp/client.py | 2 +- synapse/replication/tcp/resource.py | 2 +- synapse/server.py | 6 ++++-- synapse/state/__init__.py | 4 ++-- synapse/storage/controllers/persist_events.py | 2 +- synapse/storage/controllers/state.py | 2 +- synapse/storage/databases/main/events_worker.py | 2 +- synapse/storage/databases/main/roommember.py | 2 +- tests/handlers/test_typing.py | 2 +- 28 files changed, 35 insertions(+), 33 deletions(-) diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 7a7882d7a6c..3393a06b45b 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -73,7 +73,7 @@ class FederationRemoteSendQueue(AbstractFederationSender): def __init__(self, hs: "HomeServer"): self.server_name = hs.hostname self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.is_mine_server_name = hs.is_mine_server_name diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 4eeab926c90..156938b61b9 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -378,7 +378,7 @@ def __init__(self, hs: "HomeServer"): self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.is_mine_id = hs.is_mine_id self.is_mine_server_name = hs.is_mine_server_name diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index ca82929c182..92d975c471a 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -61,7 +61,7 @@ def __init__(self, hs: "synapse.server.HomeServer"): self._server_name = hs.hostname self.clock = hs.get_clock() # nb must be called this for @measure_func self.metrics_manager = ( - hs.metrics_manager + hs.get_metrics_manager() ) # nb must be called this for @measure_func self._store = hs.get_datastores().main self._transaction_actions = TransactionActions(self._store) diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 483f824556b..9ead045c6c5 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -79,7 +79,7 @@ def __init__(self, hs: "HomeServer"): self.scheduler = hs.get_application_service_scheduler() self.started_scheduler = False self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.notify_appservices = hs.config.worker.should_notify_appservices self.event_sources = hs.get_event_sources() self._msc2409_to_device_messages_enabled = ( diff --git a/synapse/handlers/delayed_events.py b/synapse/handlers/delayed_events.py index 049dab82c03..e3d3bf75578 100644 --- a/synapse/handlers/delayed_events.py +++ b/synapse/handlers/delayed_events.py @@ -58,7 +58,7 @@ def __init__(self, hs: "HomeServer"): self._storage_controllers = hs.get_storage_controllers() self._config = hs.config self._clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._event_creation_handler = hs.get_event_creation_handler() self._room_member_handler = hs.get_room_member_handler() diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 97e80d90816..4c88c75f817 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -528,7 +528,7 @@ def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() # nb must be called this for @measure_func self.metrics_manager = ( - hs.metrics_manager + hs.get_metrics_manager() ) # nb must be called this for @measure_func self.federation_sender = hs.get_federation_sender() @@ -1224,7 +1224,7 @@ def __init__(self, hs: "HomeServer", device_handler: DeviceHandler): self.clock = hs.get_clock() # nb must be called this for @measure_func self.metrics_manager = ( - hs.metrics_manager + hs.get_metrics_manager() ) # nb must be called this for @measure_func self._remote_edu_linearizer = Linearizer(name="remote_device_list") diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5dfda6a0c38..f8512e9d111 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -485,7 +485,7 @@ def __init__(self, hs: "HomeServer"): self.event_builder_factory = hs.get_event_builder_factory() self.clock = hs.get_clock() # nb must be called this for @measure_func self.metrics_manager = ( - hs.metrics_manager + hs.get_metrics_manager() ) # nb must be called this for @measure_func self.profile_handler = hs.get_profile_handler() self.server_name = hs.hostname diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index a2d74b99213..4c8a8a61ddb 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -749,7 +749,7 @@ def __init__(self, hs: "HomeServer"): super().__init__(hs) self.wheel_timer: WheelTimer[str] = WheelTimer() self.notifier = hs.get_notifier() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() federation_registry = hs.get_federation_registry() @@ -1763,7 +1763,7 @@ def __init__(self, hs: "HomeServer"): self.get_presence_handler = hs.get_presence_handler self.get_presence_router = hs.get_presence_router self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.store = hs.get_datastores().main async def get_new_events( diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4f05d7ea7d7..9bbca5537e3 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -337,7 +337,7 @@ def __init__(self, hs: "HomeServer"): self._push_rules_handler = hs.get_push_rules_handler() self.event_sources = hs.get_event_sources() self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.state = hs.get_state_handler() self.auth_blocking = hs.get_auth_blocking() self._storage_controllers = hs.get_storage_controllers() diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 89373f2bc1a..36f1a3a8e7e 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -505,7 +505,7 @@ class TypingNotificationEventSource(EventSource[int, JsonMapping]): def __init__(self, hs: "HomeServer"): self._main_store = hs.get_datastores().main self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() # We can't call get_typing_handler here because there's a cycle: # # Typing -> Notifier -> TypingNotificationEventSource -> Typing diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index b509934486e..78bbe6de56c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -104,7 +104,7 @@ def __init__(self, hs: "HomeServer"): self._storage_controllers = hs.get_storage_controllers() self.server_name = hs.hostname self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.worker.should_update_user_directory diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c5d1d3d3c80..345bec6184e 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -406,7 +406,7 @@ def __init__( self.server_name = hs.hostname self.reactor = hs.get_reactor() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() user_agent = hs.version_string if hs.config.server.user_agent_suffix: @@ -453,7 +453,7 @@ def __init__( ) self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._store = hs.get_datastores().main self.version_string_bytes = hs.version_string.encode("ascii") self.default_timeout_seconds = hs.config.federation.client_timeout_ms / 1000 diff --git a/synapse/module_api/callbacks/media_repository_callbacks.py b/synapse/module_api/callbacks/media_repository_callbacks.py index 0ac7d06c108..abcb28159fc 100644 --- a/synapse/module_api/callbacks/media_repository_callbacks.py +++ b/synapse/module_api/callbacks/media_repository_callbacks.py @@ -32,7 +32,7 @@ class MediaRepositoryModuleApiCallbacks: def __init__(self, hs: "HomeServer") -> None: self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._get_media_config_for_user_callbacks: List[ GET_MEDIA_CONFIG_FOR_USER_CALLBACK ] = [] diff --git a/synapse/module_api/callbacks/ratelimit_callbacks.py b/synapse/module_api/callbacks/ratelimit_callbacks.py index 93259c214f5..84c2ad93995 100644 --- a/synapse/module_api/callbacks/ratelimit_callbacks.py +++ b/synapse/module_api/callbacks/ratelimit_callbacks.py @@ -44,7 +44,7 @@ class RatelimitOverride: class RatelimitModuleApiCallbacks: def __init__(self, hs: "HomeServer") -> None: self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._get_ratelimit_override_for_user_callbacks: List[ GET_RATELIMIT_OVERRIDE_FOR_USER_CALLBACK ] = [] diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py index 10e617e567b..bf33faf9160 100644 --- a/synapse/module_api/callbacks/spamchecker_callbacks.py +++ b/synapse/module_api/callbacks/spamchecker_callbacks.py @@ -340,7 +340,7 @@ class SpamCheckerModuleApiCallbacks: def __init__(self, hs: "synapse.server.HomeServer") -> None: self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] self._should_drop_federated_event_callbacks: List[ diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 8e324a8ae3d..68e2bf20a9d 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -131,7 +131,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.clock = hs.get_clock() # nb must be called this for @measure_func self.metrics_manager = ( - hs.metrics_manager + hs.get_metrics_manager() ) # nb must be called this for @measure_func self._event_auth_handler = hs.get_event_auth_handler() self.should_calculate_push_rules = self.hs.config.push.enable_push diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index a0bf8d181a0..6433ae68ed3 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -79,7 +79,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.federation_event_handler = hs.get_federation_event_handler() @staticmethod diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index f0ab5d2456b..0b14a3fd207 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -80,7 +80,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() @staticmethod async def _serialize_payload( # type: ignore[override] diff --git a/synapse/replication/http/send_events.py b/synapse/replication/http/send_events.py index d38cec9de3d..c574d8f4e48 100644 --- a/synapse/replication/http/send_events.py +++ b/synapse/replication/http/send_events.py @@ -81,7 +81,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() @staticmethod async def _serialize_payload( # type: ignore[override] diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 64ebde0b246..2a4c502b9e5 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -79,7 +79,7 @@ def __init__(self, hs: "HomeServer"): self.notifier = hs.get_notifier() self._reactor = hs.get_reactor() self._clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._streams = hs.get_replication_streams() self._instance_name = hs.get_instance_name() self._typing_handler = hs.get_typing_handler() diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 485cc267a46..c21b9a66213 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -80,7 +80,7 @@ class ReplicationStreamer: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.notifier = hs.get_notifier() self._instance_name = hs.get_instance_name() diff --git a/synapse/server.py b/synapse/server.py index 6b5302fa029..466e07491fe 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -311,8 +311,6 @@ def __init__( # This attribute is set by the free function `refresh_certificate`. self.tls_server_context_factory: Optional[IOpenSSLContextFactory] = None - self.metrics_manager = HomeserverMetricsManager() - def register_module_web_resource(self, path: str, resource: Resource) -> None: """Allows a module to register a web resource to be served at the given path. @@ -417,6 +415,10 @@ def is_mine_server_name(self, server_name: str) -> bool: def get_clock(self) -> Clock: return Clock(self._reactor) + @cache_in_self + def get_metrics_manager(self) -> HomeserverMetricsManager: + return HomeserverMetricsManager() + def get_datastores(self) -> Databases: if not self.datastores: raise Exception("HomeServer.setup must be called before getting datastores") diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index a160634ae1c..0e3c23d7611 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -194,7 +194,7 @@ def __init__(self, hs: "HomeServer"): self._state_storage_controller = hs.get_storage_controllers().state self.clock = hs.get_clock() # nb must be called this for @measure_func self.metrics_manager = ( - hs.metrics_manager + hs.get_metrics_manager() ) # nb must be called this for @measure_func self._state_resolution_handler = hs.get_state_resolution_handler() self._storage_controllers = hs.get_storage_controllers() @@ -635,7 +635,7 @@ class StateResolutionHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.resolve_linearizer = Linearizer(name="state_resolve_lock") diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index ed311747dba..fb7f085221c 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -338,7 +338,7 @@ def __init__( self.persist_events_store = stores.persist_events self._clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._instance_name = hs.get_instance_name() self.is_mine_id = hs.is_mine_id self._event_persist_queue = _EventPeristenceQueue( diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 35831bc12a9..40e4663e8e9 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -70,7 +70,7 @@ class StateStorageController: def __init__(self, hs: "HomeServer", stores: "Databases"): self._is_mine_id = hs.is_mine_id self._clock = hs.get_clock() - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self.stores = stores self._partial_state_events_tracker = PartialStateEventsTracker(stores.main) self._partial_state_room_tracker = PartialCurrentStateTracker(stores.main) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 5411142d1db..33a16b1697f 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -226,7 +226,7 @@ def __init__( hs: "HomeServer", ): super().__init__(database, db_conn, hs) - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._stream_id_gen: MultiWriterIdGenerator self._backfill_id_gen: MultiWriterIdGenerator diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index fc33aef831b..a6a685a8041 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -100,7 +100,7 @@ def __init__( hs: "HomeServer", ): super().__init__(database, db_conn, hs) - self.metrics_manager = hs.metrics_manager + self.metrics_manager = hs.get_metrics_manager() self._server_notices_mxid = hs.config.servernotices.server_notices_mxid diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 0bc2c4933bb..7db9d9c1f9c 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -101,7 +101,7 @@ def make_homeserver( self.mock_federation_client.agent = MatrixFederationAgent( reactor=reactor, # After we get access to the `hs` homeserver instance, we can replace the federation agent - metrics_manager=hs.metrics_manager, + metrics_manager=hs.get_metrics_manager(), tls_client_options_factory=None, user_agent=b"SynapseInTrialTest/0.0.0", ip_allowlist=None, From ee51a45c45a41533eae2becf35b1c331894e1953 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 16:50:02 -0500 Subject: [PATCH 12/16] Update doc comment further --- synapse/util/metrics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 898a3f3dd8b..381aec5c1d4 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -66,8 +66,9 @@ def measure_func( ) -> Callable[[Callable[P, Awaitable[R]]], Callable[P, Awaitable[R]]]: """Decorate an async method with a `Measure` context manager. - The Measure is created using `self.clock`; it should only be used to decorate - methods in classes defining an instance-level `clock` and `metrics_manager` attribute. + The Measure is created using `self.clock` and `self.metrics_manager`; it should only + be used to decorate methods in classes defining an instance-level `clock` and + `metrics_manager` attribute. Usage: From 554673dd30217038813cf4e6e1431f9129525712 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 18:24:52 -0500 Subject: [PATCH 13/16] Use homeserver specific registry and combine in the metrics endpoint output --- synapse/app/_base.py | 29 ++++++++++++--- synapse/app/generic_worker.py | 7 ++-- synapse/app/homeserver.py | 7 ++-- synapse/metrics/__init__.py | 36 ++++++++++++------- synapse/metrics/_twisted_exposition.py | 22 +++++++++--- synapse/metrics/homeserver_metrics_manager.py | 6 ++-- 6 files changed, 79 insertions(+), 28 deletions(-) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 49ab5d680d4..14cbd38ee80 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -76,6 +76,7 @@ from synapse.logging.opentracing import init_tracer from synapse.metrics import install_gc_manager, register_threadpool from synapse.metrics.background_process_metrics import wrap_as_background_process +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.metrics.jemalloc import setup_jemalloc_stats from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers from synapse.module_api.callbacks.third_party_event_rules_callbacks import ( @@ -283,18 +284,38 @@ async def wrapper() -> None: reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper())) -def listen_metrics(bind_addresses: StrCollection, port: int) -> None: +def listen_metrics( + bind_addresses: StrCollection, port: int, metrics_manager: HomeserverMetricsManager +) -> None: """ Start Prometheus metrics server. """ - from prometheus_client import start_http_server as start_http_server_prometheus + from prometheus_client import ( + REGISTRY, + CollectorRegistry, + start_http_server as start_http_server_prometheus, + ) - from synapse.metrics import RegistryProxy + from synapse.metrics import CombinedRegistryProxy + + combined_registry_proxy = CombinedRegistryProxy( + [ + # TODO: Remove `REGISTRY` once all metrics have been migrated to the + # homeserver specific metrics collector registry, see + # https://github.com/element-hq/synapse/issues/18592 + REGISTRY, + metrics_manager.metrics_collector_registry, + ] + ) + # Cheeky cast but matches the signature of a `CollectorRegistry` instance enough + # for it to be usable in the contexts in which we use it. + # TODO Do something nicer about this. + registry = cast(CollectorRegistry, combined_registry_proxy) for host in bind_addresses: logger.info("Starting metrics listener on %s:%d", host, port) _set_prometheus_client_use_created_metrics(False) - start_http_server_prometheus(port, addr=host, registry=RegistryProxy) + start_http_server_prometheus(port, addr=host, registry=registry) def _set_prometheus_client_use_created_metrics(new_value: bool) -> None: diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 75c65ccc0d7..22df3667708 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -49,7 +49,7 @@ from synapse.federation.transport.server import TransportLayerServer from synapse.http.server import JsonResource, OptionsResource from synapse.logging.context import LoggingContext -from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy +from synapse.metrics import METRICS_PREFIX, MetricsResource from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource from synapse.rest import ClientRestResource, admin from synapse.rest.health import HealthResource @@ -186,7 +186,9 @@ def _listen_http(self, listener_config: ListenerConfig) -> None: for res in listener_config.http_options.resources: for name in res.names: if name == "metrics": - resources[METRICS_PREFIX] = MetricsResource(RegistryProxy) + resources[METRICS_PREFIX] = MetricsResource( + metrics_manager=self.get_metrics_manager() + ) elif name == "client": resource: Resource = ClientRestResource(self) @@ -294,6 +296,7 @@ def start_listening(self) -> None: _base.listen_metrics( listener.bind_addresses, listener.port, + self.get_metrics_manager(), ) else: raise ConfigError( diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e027b5eaea2..26f4ad8eb5b 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -60,7 +60,7 @@ StaticResource, ) from synapse.logging.context import LoggingContext -from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy +from synapse.metrics import METRICS_PREFIX, MetricsResource from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource from synapse.rest import ClientRestResource, admin from synapse.rest.health import HealthResource @@ -252,7 +252,9 @@ def _configure_named_resource( resources[SERVER_KEY_PREFIX] = KeyResource(self) if name == "metrics" and self.config.metrics.enable_metrics: - metrics_resource: Resource = MetricsResource(RegistryProxy) + metrics_resource: Resource = MetricsResource( + metrics_manager=self.get_metrics_manager() + ) if compress: metrics_resource = gz_wrap(metrics_resource) resources[METRICS_PREFIX] = metrics_resource @@ -296,6 +298,7 @@ def start_listening(self) -> None: _base.listen_metrics( listener.bind_addresses, listener.port, + self.get_metrics_manager(), ) else: raise ConfigError( diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 86ac2c23959..634582749be 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -30,6 +30,7 @@ Dict, Generic, Iterable, + List, Mapping, Optional, Set, @@ -37,7 +38,6 @@ Type, TypeVar, Union, - cast, ) import attr @@ -67,19 +67,31 @@ HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") -class _RegistryProxy: - @staticmethod - def collect() -> Iterable[Metric]: - for metric in REGISTRY.collect(): - if not metric.name.startswith("__"): - yield metric +class CombinedRegistryProxy: + """ + Wrapper around the global Prometheus metric registry so we can also include our + homeserver-specific metrics. + + Usage: + ```python + combined_registry_proxy = CombinedRegistryProxy([ + homeserver_metrics_collector_registry, + prometheus_client.REGISTRY + ]) + registry = cast(CollectorRegistry, combined_registry_proxy) + ``` + """ + def __init__( + self, + registry_list: List[CollectorRegistry], + ): + self.registry_list = registry_list -# A little bit nasty, but collect() above is static so a Protocol doesn't work. -# _RegistryProxy matches the signature of a CollectorRegistry instance enough -# for it to be usable in the contexts in which we use it. -# TODO Do something nicer about this. -RegistryProxy = cast(CollectorRegistry, _RegistryProxy) + def collect(self) -> Iterable[Metric]: + for registry in self.registry_list: + for metric in registry.collect(): + yield metric @attr.s(slots=True, hash=True, auto_attribs=True) diff --git a/synapse/metrics/_twisted_exposition.py b/synapse/metrics/_twisted_exposition.py index 9652ca83fb1..251b5e50316 100644 --- a/synapse/metrics/_twisted_exposition.py +++ b/synapse/metrics/_twisted_exposition.py @@ -20,11 +20,16 @@ # # -from prometheus_client import REGISTRY, CollectorRegistry, generate_latest +from typing import TYPE_CHECKING + +from prometheus_client import REGISTRY, generate_latest from twisted.web.resource import Resource from twisted.web.server import Request +if TYPE_CHECKING: + from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager + CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" @@ -35,11 +40,20 @@ class MetricsResource(Resource): isLeaf = True - def __init__(self, registry: CollectorRegistry = REGISTRY): - self.registry = registry + def __init__(self, metrics_manager: "HomeserverMetricsManager"): + self.metrics_manager = metrics_manager def render_GET(self, request: Request) -> bytes: request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) - response = generate_latest(self.registry) + # While we're in the middle of the refactor of metrics in Synapse, we need to + # merge the metrics from the global registry and the homeserver specific metrics + # collector registry. + # + # TODO: Remove `generate_latest(REGISTRY)` once all homeserver metrics have been + # migrated to the homeserver specific metrics collector registry, see + # https://github.com/element-hq/synapse/issues/18592 + response = generate_latest(REGISTRY) + generate_latest( + self.metrics_manager.metrics_collector_registry + ) request.setHeader(b"Content-Length", str(len(response))) return response diff --git a/synapse/metrics/homeserver_metrics_manager.py b/synapse/metrics/homeserver_metrics_manager.py index 9e4cdf8034f..e943d7e1f3f 100644 --- a/synapse/metrics/homeserver_metrics_manager.py +++ b/synapse/metrics/homeserver_metrics_manager.py @@ -14,7 +14,7 @@ from typing import Protocol -from prometheus_client import REGISTRY, CollectorRegistry, Counter +from prometheus_client import CollectorRegistry, Counter from synapse.metrics import InFlightGauge @@ -113,9 +113,7 @@ class HomeserverMetricsManager: """ def __init__(self) -> None: - # TODO: use `self.metrics_collector_registry = CollectorRegistry(auto_describe=True)` - # once we refactor our metrics endpoints to use the specified registry. - self.metrics_collector_registry = REGISTRY + self.metrics_collector_registry = CollectorRegistry(auto_describe=True) self.block_metrics = BlockMetrics( metrics_collector_registry=self.metrics_collector_registry, From f6f079be4151d24363665bc8020efb5ca44a9fa8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 18:36:16 -0500 Subject: [PATCH 14/16] Better docstring --- synapse/metrics/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 634582749be..cebc7136999 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -69,8 +69,8 @@ class CombinedRegistryProxy: """ - Wrapper around the global Prometheus metric registry so we can also include our - homeserver-specific metrics. + Wrapper that combines multiple Prometheus `CollectorRegistry` instances, presenting + them as a single unified registry when collecting metrics. Usage: ```python @@ -78,6 +78,9 @@ class CombinedRegistryProxy: homeserver_metrics_collector_registry, prometheus_client.REGISTRY ]) + # Cheeky cast but matches the signature of a `CollectorRegistry` instance enough + # for it to be usable in the contexts in which we use it. + # TODO Do something nicer about this. registry = cast(CollectorRegistry, combined_registry_proxy) ``` """ From 71f415dd49a0069d261d42a708c764ea87b31b81 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 18:51:10 -0500 Subject: [PATCH 15/16] Fix `tests.test_state` missing `get_metrics_manager()` on homeserver mock --- tests/test_state.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_state.py b/tests/test_state.py index adb72b07304..c3780510df3 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -239,6 +239,7 @@ def setUp(self) -> None: "get_auth", "get_state_handler", "get_clock", + "get_metrics_manager", "get_state_resolution_handler", "get_account_validity_handler", "get_macaroon_generator", From 1a5e9f69f3b422ae6f2b20a5691b8b13e0b8c75a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 24 Jun 2025 19:06:22 -0500 Subject: [PATCH 16/16] Fill in `get_metrics_manager()` on port db `MockHomeserver` --- synapse/_scripts/synapse_port_db.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 573c70696e2..b94f1732c7e 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -58,6 +58,7 @@ make_deferred_yieldable, run_in_background, ) +from synapse.metrics.homeserver_metrics_manager import HomeserverMetricsManager from synapse.notifier import ReplicationNotifier from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn from synapse.storage.databases.main import FilteringWorkerStore @@ -308,6 +309,7 @@ def set_room_is_public(self, room_id: str, is_public: bool) -> NoReturn: class MockHomeserver: def __init__(self, config: HomeServerConfig): self.clock = Clock(reactor) + self.metrics_manager = HomeserverMetricsManager() self.config = config self.hostname = config.server.server_name self.version_string = SYNAPSE_VERSION @@ -315,6 +317,9 @@ def __init__(self, config: HomeServerConfig): def get_clock(self) -> Clock: return self.clock + def get_metrics_manager(self) -> HomeserverMetricsManager: + return self.metrics_manager + def get_reactor(self) -> ISynapseReactor: return reactor