Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
02a7668
Add `instance` label to `Measure`
MadLittleMods Jun 26, 2025
65035b6
Refactor `@measure_func` decorator to include server name
MadLittleMods Jun 26, 2025
d05b6ca
Bulk refactor `Measure(...)` to add `server_name`
MadLittleMods Jun 26, 2025
6731c4b
Refactor `Measure` in `WellKnownResolver`
MadLittleMods Jun 26, 2025
c7d15db
Bulk refactor `@measure_func` decorator usage
MadLittleMods Jun 26, 2025
c232ec7
Fix mypy complaining about unknown types by changing property order a…
MadLittleMods Jun 26, 2025
521c68c
Add changelog
MadLittleMods Jun 26, 2025
652c34b
Better docstrings for `_InFlightMetric` -> `_BlockInFlightMetric`
MadLittleMods Jun 26, 2025
5ad555c
Add docstrings for block metrics
MadLittleMods Jun 26, 2025
06f9af1
Add introduction comment
MadLittleMods Jun 26, 2025
e0f8992
Fix failing to save metrics because of incorrect label names
MadLittleMods Jun 26, 2025
1cb0f77
Add metric docstring to metric description/documentation
MadLittleMods Jul 3, 2025
85293a0
`instance` -> `server_name` label
MadLittleMods Jul 3, 2025
9ef4607
Merge branch 'develop' into madlittlemods/per-hs-metrics-measure3
MadLittleMods Jul 3, 2025
c4abc83
Merge branch 'develop' into madlittlemods/per-hs-metrics-measure3
MadLittleMods Jul 4, 2025
820a8ef
Merge branch 'develop' into madlittlemods/per-hs-metrics-measure3
MadLittleMods Jul 9, 2025
76fbcb7
Merge branch 'develop' into madlittlemods/per-hs-metrics-measure3
MadLittleMods Jul 15, 2025
db172b6
Merge branch 'develop' into madlittlemods/per-hs-metrics-measure3
MadLittleMods Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18601.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor `Measure` block metrics to be homeserver-scoped.
4 changes: 3 additions & 1 deletion synapse/federation/send_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ def _clear_queue(self) -> None:

def _clear_queue_before_pos(self, position_to_delete: int) -> None:
"""Clear all the queues from before a given position"""
with Measure(self.clock, "send_queue._clear"):
with Measure(
self.clock, name="send_queue._clear", server_name=self.server_name
):
# Delete things out of presence maps
keys = self.presence_destinations.keys()
i = self.presence_destinations.bisect_left(position_to_delete)
Expand Down
6 changes: 5 additions & 1 deletion synapse/federation/sender/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,11 @@ async def handle_room_events(events: List[EventBase]) -> None:
logger.debug(
"Handling %i events in room %s", len(events), events[0].room_id
)
with Measure(self.clock, "handle_room_events"):
with Measure(
self.clock,
name="handle_room_events",
server_name=self.server_name,
):
for event in events:
await handle_event(event)

Expand Down
4 changes: 2 additions & 2 deletions synapse/federation/sender/transaction_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class TransactionManager:
"""

def __init__(self, hs: "synapse.server.HomeServer"):
self._server_name = hs.hostname
self.server_name = hs.hostname # nb must be called this for @measure_func
self.clock = hs.get_clock() # nb must be called this for @measure_func
self._store = hs.get_datastores().main
self._transaction_actions = TransactionActions(self._store)
Expand Down Expand Up @@ -116,7 +116,7 @@ async def send_new_transaction(
transaction = Transaction(
origin_server_ts=int(self.clock.time_msec()),
transaction_id=txn_id,
origin=self._server_name,
origin=self.server_name,
destination=destination,
pdus=[p.get_pdu_json() for p in pdus],
edus=[edu.get_dict() for edu in edus],
Expand Down
11 changes: 9 additions & 2 deletions synapse/handlers/appservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@

class ApplicationServicesHandler:
def __init__(self, hs: "HomeServer"):
self.server_name = hs.hostname
self.store = hs.get_datastores().main
self.is_mine_id = hs.is_mine_id
self.appservice_api = hs.get_application_service_api()
Expand Down Expand Up @@ -120,7 +121,9 @@ def notify_interested_services(self, max_token: RoomStreamToken) -> None:

@wrap_as_background_process("notify_interested_services")
async def _notify_interested_services(self, max_token: RoomStreamToken) -> None:
with Measure(self.clock, "notify_interested_services"):
with Measure(
self.clock, name="notify_interested_services", server_name=self.server_name
):
self.is_processing = True
try:
upper_bound = -1
Expand Down Expand Up @@ -329,7 +332,11 @@ async def _notify_interested_services_ephemeral(
users: Collection[Union[str, UserID]],
) -> None:
logger.debug("Checking interested services for %s", stream_key)
with Measure(self.clock, "notify_interested_services_ephemeral"):
with Measure(
self.clock,
name="notify_interested_services_ephemeral",
server_name=self.server_name,
):
for service in services:
if stream_key == StreamKeyType.TYPING:
# Note that we don't persist the token (via set_appservice_stream_type_pos)
Expand Down
5 changes: 4 additions & 1 deletion synapse/handlers/delayed_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@

class DelayedEventsHandler:
def __init__(self, hs: "HomeServer"):
self.server_name = hs.hostname
self._store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers()
self._config = hs.config
Expand Down Expand Up @@ -159,7 +160,9 @@ async def _unsafe_process_new_event(self) -> None:

# Loop round handling deltas until we're up to date
while True:
with Measure(self._clock, "delayed_events_delta"):
with Measure(
self._clock, name="delayed_events_delta", server_name=self.server_name
):
room_max_stream_ordering = self._store.get_room_max_stream_ordering()
if self._event_pos == room_max_stream_ordering:
return
Expand Down
5 changes: 4 additions & 1 deletion synapse/handlers/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,8 @@ class DeviceHandler(DeviceWorkerHandler):
def __init__(self, hs: "HomeServer"):
super().__init__(hs)

self.server_name = hs.hostname # nb must be called this for @measure_func
self.clock = hs.get_clock() # nb must be called this for @measure_func
self.federation_sender = hs.get_federation_sender()
self._account_data_handler = hs.get_account_data_handler()
self._storage_controllers = hs.get_storage_controllers()
Expand Down Expand Up @@ -1214,7 +1216,8 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
def __init__(self, hs: "HomeServer", device_handler: DeviceHandler):
self.store = hs.get_datastores().main
self.federation = hs.get_federation_client()
self.clock = hs.get_clock()
self.server_name = hs.hostname # nb must be called this for @measure_func
self.clock = hs.get_clock() # nb must be called this for @measure_func
self.device_handler = device_handler
self._notifier = hs.get_notifier()

Expand Down
8 changes: 4 additions & 4 deletions synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,16 +476,16 @@ async def _expire_event(self, event_id: str) -> None:
class EventCreationHandler:
def __init__(self, hs: "HomeServer"):
self.hs = hs
self.validator = EventValidator()
self.event_builder_factory = hs.get_event_builder_factory()
self.server_name = hs.hostname # nb must be called this for @measure_func
self.clock = hs.get_clock() # nb must be called this for @measure_func
self.auth_blocking = hs.get_auth_blocking()
self._event_auth_handler = hs.get_event_auth_handler()
self.store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers()
self.state = hs.get_state_handler()
self.clock = hs.get_clock()
self.validator = EventValidator()
self.profile_handler = hs.get_profile_handler()
self.event_builder_factory = hs.get_event_builder_factory()
self.server_name = hs.hostname
self.notifier = hs.get_notifier()
self.config = hs.config
self.require_membership_for_aliases = (
Expand Down
14 changes: 11 additions & 3 deletions synapse/handlers/presence.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,7 @@ async def bump_presence_active_time(
class PresenceHandler(BasePresenceHandler):
def __init__(self, hs: "HomeServer"):
super().__init__(hs)
self.server_name = hs.hostname
self.wheel_timer: WheelTimer[str] = WheelTimer()
self.notifier = hs.get_notifier()

Expand Down Expand Up @@ -941,7 +942,9 @@ async def _update_states(

now = self.clock.time_msec()

with Measure(self.clock, "presence_update_states"):
with Measure(
self.clock, name="presence_update_states", server_name=self.server_name
):
# NOTE: We purposefully don't await between now and when we've
# calculated what we want to do with the new states, to avoid races.

Expand Down Expand Up @@ -1497,7 +1500,9 @@ async def _process_presence() -> None:
async def _unsafe_process(self) -> None:
# Loop round handling deltas until we're up to date
while True:
with Measure(self.clock, "presence_delta"):
with Measure(
self.clock, name="presence_delta", server_name=self.server_name
):
room_max_stream_ordering = self.store.get_room_max_stream_ordering()
if self._event_pos == room_max_stream_ordering:
return
Expand Down Expand Up @@ -1759,6 +1764,7 @@ def __init__(self, hs: "HomeServer"):
# Same with get_presence_router:
#
# AuthHandler -> Notifier -> PresenceEventSource -> ModuleApi -> AuthHandler
self.server_name = hs.hostname
self.get_presence_handler = hs.get_presence_handler
self.get_presence_router = hs.get_presence_router
self.clock = hs.get_clock()
Expand Down Expand Up @@ -1792,7 +1798,9 @@ async def get_new_events(
user_id = user.to_string()
stream_change_cache = self.store.presence_stream_cache

with Measure(self.clock, "presence.get_new_events"):
with Measure(
self.clock, name="presence.get_new_events", server_name=self.server_name
):
if from_key is not None:
from_key = int(from_key)

Expand Down
17 changes: 13 additions & 4 deletions synapse/handlers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ class E2eeSyncResult:

class SyncHandler:
def __init__(self, hs: "HomeServer"):
self.server_name = hs.hostname
self.hs_config = hs.config
self.store = hs.get_datastores().main
self.notifier = hs.get_notifier()
Expand Down Expand Up @@ -710,7 +711,9 @@ async def ephemeral_by_room(

sync_config = sync_result_builder.sync_config

with Measure(self.clock, "ephemeral_by_room"):
with Measure(
self.clock, name="ephemeral_by_room", server_name=self.server_name
):
typing_key = since_token.typing_key if since_token else 0

room_ids = sync_result_builder.joined_room_ids
Expand Down Expand Up @@ -783,7 +786,9 @@ async def _load_filtered_recents(
and current token to send down to clients.
newly_joined_room
"""
with Measure(self.clock, "load_filtered_recents"):
with Measure(
self.clock, name="load_filtered_recents", server_name=self.server_name
):
timeline_limit = sync_config.filter_collection.timeline_limit()
block_all_timeline = (
sync_config.filter_collection.blocks_all_room_timeline()
Expand Down Expand Up @@ -1174,7 +1179,9 @@ async def compute_state_delta(
# updates even if they occurred logically before the previous event.
# TODO(mjark) Check for new redactions in the state events.

with Measure(self.clock, "compute_state_delta"):
with Measure(
self.clock, name="compute_state_delta", server_name=self.server_name
):
# The memberships needed for events in the timeline.
# Only calculated when `lazy_load_members` is on.
members_to_fetch: Optional[Set[str]] = None
Expand Down Expand Up @@ -1791,7 +1798,9 @@ async def unread_notifs_for_room_id(
# the DB.
return RoomNotifCounts.empty()

with Measure(self.clock, "unread_notifs_for_room_id"):
with Measure(
self.clock, name="unread_notifs_for_room_id", server_name=self.server_name
):
return await self.store.get_unread_event_push_actions_by_room_for_user(
room_id,
sync_config.user.to_string(),
Expand Down
9 changes: 7 additions & 2 deletions synapse/handlers/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ def process_replication_rows(

class TypingNotificationEventSource(EventSource[int, JsonMapping]):
def __init__(self, hs: "HomeServer"):
self.server_name = hs.hostname
self._main_store = hs.get_datastores().main
self.clock = hs.get_clock()
# We can't call get_typing_handler here because there's a cycle:
Expand Down Expand Up @@ -535,7 +536,9 @@ async def get_new_events_as(
appservice may be interested in.
* The latest known room serial.
"""
with Measure(self.clock, "typing.get_new_events_as"):
with Measure(
self.clock, name="typing.get_new_events_as", server_name=self.server_name
):
handler = self.get_typing_handler()

events = []
Expand Down Expand Up @@ -571,7 +574,9 @@ async def get_new_events(
Find typing notifications for given rooms (> `from_token` and <= `to_token`)
"""

with Measure(self.clock, "typing.get_new_events"):
with Measure(
self.clock, name="typing.get_new_events", server_name=self.server_name
):
from_key = int(from_key)
handler = self.get_typing_handler()

Expand Down
4 changes: 3 additions & 1 deletion synapse/handlers/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,9 @@ async def _unsafe_process(self) -> None:

# Loop round handling deltas until we're up to date
while True:
with Measure(self.clock, "user_dir_delta"):
with Measure(
self.clock, name="user_dir_delta", server_name=self.server_name
):
room_max_stream_ordering = self.store.get_room_max_stream_ordering()
if self.pos == room_max_stream_ordering:
return
Expand Down
7 changes: 7 additions & 0 deletions synapse/http/federation/matrix_federation_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class MatrixFederationAgent:

def __init__(
self,
server_name: str,
reactor: ISynapseReactor,
tls_client_options_factory: Optional[FederationPolicyForHTTPS],
user_agent: bytes,
Expand All @@ -100,6 +101,11 @@ def __init__(
_srv_resolver: Optional[SrvResolver] = None,
_well_known_resolver: Optional[WellKnownResolver] = None,
):
"""
Args:
server_name: Our homeserver name (used to label metrics) (`hs.hostname`).
"""

# proxy_reactor is not blocklisting reactor
proxy_reactor = reactor

Expand Down Expand Up @@ -127,6 +133,7 @@ def __init__(

if _well_known_resolver is None:
_well_known_resolver = WellKnownResolver(
server_name,
reactor,
agent=BlocklistingAgentWrapper(
ProxyAgent(
Expand Down
15 changes: 14 additions & 1 deletion synapse/http/federation/well_known_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,19 @@ class WellKnownResolver:

def __init__(
self,
server_name: str,
reactor: IReactorTime,
agent: IAgent,
user_agent: bytes,
well_known_cache: Optional[TTLCache[bytes, Optional[bytes]]] = None,
had_well_known_cache: Optional[TTLCache[bytes, bool]] = None,
):
"""
Args:
server_name: Our homeserver name (used to label metrics) (`hs.hostname`).
"""

self.server_name = server_name
self._reactor = reactor
self._clock = Clock(reactor)

Expand Down Expand Up @@ -134,7 +141,13 @@ async def get_well_known(self, server_name: bytes) -> WellKnownLookupResult:
# TODO: should we linearise so that we don't end up doing two .well-known
# requests for the same server in parallel?
try:
with Measure(self._clock, "get_well_known"):
with Measure(
self._clock,
name="get_well_known",
# This should be our homeserver where the the code is running (used to
# label metrics)
server_name=self.server_name,
):
result: Optional[bytes]
cache_period: float

Expand Down
7 changes: 6 additions & 1 deletion synapse/http/matrixfederationclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ def __init__(
if hs.get_instance_name() in outbound_federation_restricted_to:
# Talk to federation directly
federation_agent: IAgent = MatrixFederationAgent(
self.server_name,
self.reactor,
tls_client_options_factory,
user_agent.encode("ascii"),
Expand Down Expand Up @@ -697,7 +698,11 @@ async def _send_request(
outgoing_requests_counter.labels(request.method).inc()

try:
with Measure(self.clock, "outbound_request"):
with Measure(
self.clock,
name="outbound_request",
server_name=self.server_name,
):
# we don't want all the fancy cookie and redirect handling
# that treq.request gives: just use the raw Agent.

Expand Down
Loading
Loading