Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
53f9c4c
Add in base linting for metrics
MadLittleMods Jul 23, 2025
8d20965
Fill in `synapse/api/auth/msc3861_delegated.py`
MadLittleMods Jul 23, 2025
ee223c6
Fill in `synapse/federation/federation_server.py`
MadLittleMods Jul 23, 2025
b699e8b
Fill in `synapse/handlers/federation_event.py`
MadLittleMods Jul 23, 2025
04b95ac
Fill in `synapse/handlers/federation.py`
MadLittleMods Jul 23, 2025
68fa7ec
Fill in `synapse/handlers/sliding_sync/__init__.py`
MadLittleMods Jul 23, 2025
1f4cc8f
Fill in `synapse/http/request_metrics.py
MadLittleMods Jul 23, 2025
fa739e2
Fill in `synapse/metrics/__init__.py`
MadLittleMods Jul 23, 2025
31bcbbd
Support `labelnames` argument being a Tuple expression
MadLittleMods Jul 23, 2025
8c5beee
Fill in `synapse/metrics/_gc.py`
MadLittleMods Jul 23, 2025
cba3c65
Fill in `synapse/metrics/_reactor_metrics.py`
MadLittleMods Jul 23, 2025
30f5008
Fill in `synapse/replication/tcp/external_cache.py`
MadLittleMods Jul 23, 2025
4d4ada2
Fill in `synapse/rest/client/room.py`
MadLittleMods Jul 23, 2025
8fc2944
Fill in `synapse/state/__init__.py`
MadLittleMods Jul 23, 2025
a1fb7d4
Fill in `synapse/storage/database.py`
MadLittleMods Jul 23, 2025
36155d5
Fill in `synapse/storage/controllers/persist_events.py`
MadLittleMods Jul 23, 2025
99b862e
Fill in `synapse/util/ratelimitutils.py`
MadLittleMods Jul 23, 2025
adf1028
Make `self.server_name` available for `synapse/storage/database.py`
MadLittleMods Jul 23, 2025
4b0a4bb
Add changelog
MadLittleMods Jul 23, 2025
4d63c55
Merge branch 'develop' into madlittlemods/18592-refactor-histogram
MadLittleMods Jul 24, 2025
4c84ffa
Remove debug log
MadLittleMods Jul 24, 2025
12a9a8c
Merge branch 'develop' into madlittlemods/18592-refactor-histogram
MadLittleMods Jul 25, 2025
bc8c770
Fix leftover missing `self.server_name` now that #18656 is merged
MadLittleMods Jul 25, 2025
3e3e11f
Fix `make_fake_db_pool`
MadLittleMods Jul 23, 2025
52b99ef
Merge branch 'develop' into madlittlemods/18592-refactor-histogram
MadLittleMods Jul 29, 2025
841ad19
Merge branch 'develop' into madlittlemods/18592-refactor-histogram
MadLittleMods Jul 29, 2025
f751976
Merge branch 'develop' into madlittlemods/18592-refactor-histogram
MadLittleMods Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18724.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor `Histogram` metrics to be homeserver-scoped.
1 change: 1 addition & 0 deletions scripts-dev/mypy_synapse_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def get_function_signature_hook(
) -> Optional[Callable[[FunctionSigContext], FunctionLike]]:
if fullname in (
"prometheus_client.metrics.Counter",
"prometheus_client.metrics.Histogram",
"prometheus_client.metrics.Gauge",
# TODO: Add other prometheus_client metrics that need checking as we
# refactor, see https://github.com/element-hq/synapse/issues/18592
Expand Down
17 changes: 14 additions & 3 deletions synapse/_scripts/review_recent_signups.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,21 @@

from synapse.config._base import (
Config,
ConfigError,
RootConfig,
find_config_files,
read_config_files,
)
from synapse.config.database import DatabaseConfig
from synapse.config.server import ServerConfig
from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn
from synapse.storage.engines import create_engine


class ReviewConfig(RootConfig):
"A config class that just pulls out the database config"
"A config class that just pulls out the server and database config"

config_classes = [DatabaseConfig]
config_classes = [ServerConfig, DatabaseConfig]


@attr.s(auto_attribs=True)
Expand Down Expand Up @@ -148,6 +150,10 @@ def main() -> None:
config_dict = read_config_files(config_files)
config.parse_config_dict(config_dict, "", "")

server_name = config.server.server_name
if not isinstance(server_name, str):
raise ConfigError("Must be a string", ("server_name",))

since_ms = time.time() * 1000 - Config.parse_duration(config_args.since)
exclude_users_with_email = config_args.exclude_emails
exclude_users_with_appservice = config_args.exclude_app_service
Expand All @@ -159,7 +165,12 @@ def main() -> None:

engine = create_engine(database_config.config)

with make_conn(database_config, engine, "review_recent_signups") as db_conn:
with make_conn(
db_config=database_config,
engine=engine,
default_txn_name="review_recent_signups",
server_name=server_name,
) as db_conn:
# This generates a type of Cursor, not LoggingTransaction.
user_infos = get_recent_users(
db_conn.cursor(),
Expand Down
10 changes: 8 additions & 2 deletions synapse/_scripts/synapse_port_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,8 +672,14 @@ def build_db_store(
engine = create_engine(db_config.config)

hs = MockHomeserver(self.hs_config)

with make_conn(db_config, engine, "portdb") as db_conn:
server_name = hs.hostname

with make_conn(
db_config=db_config,
engine=engine,
default_txn_name="portdb",
server_name=server_name,
) as db_conn:
engine.check_database(
db_conn, allow_outdated_version=allow_outdated_version
)
Expand Down
15 changes: 11 additions & 4 deletions synapse/api/auth/msc3861_delegated.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
inject_request_headers,
start_active_span,
)
from synapse.metrics import SERVER_NAME_LABEL
from synapse.synapse_rust.http_client import HttpClient
from synapse.types import Requester, UserID, create_requester
from synapse.util import json_decoder
Expand All @@ -62,7 +63,7 @@
introspection_response_timer = Histogram(
"synapse_api_auth_delegated_introspection_response",
"Time taken to get a response for an introspection request",
["code"],
labelnames=["code", SERVER_NAME_LABEL],
)


Expand Down Expand Up @@ -341,17 +342,23 @@ async def _introspect_token(
)
except HttpResponseException as e:
end_time = self._clock.time()
introspection_response_timer.labels(e.code).observe(end_time - start_time)
introspection_response_timer.labels(
code=e.code, **{SERVER_NAME_LABEL: self.server_name}
).observe(end_time - start_time)
raise
except Exception:
end_time = self._clock.time()
introspection_response_timer.labels("ERR").observe(end_time - start_time)
introspection_response_timer.labels(
code="ERR", **{SERVER_NAME_LABEL: self.server_name}
).observe(end_time - start_time)
raise

logger.debug("Fetched token from MAS")

end_time = self._clock.time()
introspection_response_timer.labels(200).observe(end_time - start_time)
introspection_response_timer.labels(
code=200, **{SERVER_NAME_LABEL: self.server_name}
).observe(end_time - start_time)

resp = json_decoder.decode(resp_body.decode("utf-8"))

Expand Down
7 changes: 4 additions & 3 deletions synapse/federation/federation_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
pdu_process_time = Histogram(
"synapse_federation_server_pdu_process_time",
"Time taken to process an event",
labelnames=[SERVER_NAME_LABEL],
)

last_pdu_ts_metric = Gauge(
Expand Down Expand Up @@ -1324,9 +1325,9 @@ async def _process_incoming_pdus_in_room_inner(
origin, event.event_id
)
if received_ts is not None:
pdu_process_time.observe(
(self._clock.time_msec() - received_ts) / 1000
)
pdu_process_time.labels(
**{SERVER_NAME_LABEL: self.server_name}
).observe((self._clock.time_msec() - received_ts) / 1000)

next = await self._get_next_nonspam_staged_event_for_room(
room_id, room_version
Expand Down
3 changes: 2 additions & 1 deletion synapse/federation/sender/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,8 @@ async def handle_event(event: EventBase) -> None:
ts = event_to_received_ts[event.event_id]
assert ts is not None
synapse.metrics.event_processing_lag_by_event.labels(
"federation_sender"
name="federation_sender",
**{SERVER_NAME_LABEL: self.server_name},
).observe((now - ts) / 1000)

async def handle_room_events(events: List[EventBase]) -> None:
Expand Down
3 changes: 2 additions & 1 deletion synapse/handlers/appservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ async def start_scheduler() -> None:
assert ts is not None

synapse.metrics.event_processing_lag_by_event.labels(
"appservice_sender"
name="appservice_sender",
**{SERVER_NAME_LABEL: self.server_name},
).observe((now - ts) / 1000)

async def handle_room_events(events: Iterable[EventBase]) -> None:
Expand Down
9 changes: 5 additions & 4 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
from synapse.http.servlet import assert_params_in_dict
from synapse.logging.context import nested_logging_context
from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
from synapse.metrics import SERVER_NAME_LABEL
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.module_api import NOT_SPAM
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
Expand All @@ -90,7 +91,7 @@
backfill_processing_before_timer = Histogram(
"synapse_federation_backfill_processing_before_time_seconds",
"sec",
[],
labelnames=[SERVER_NAME_LABEL],
buckets=(
0.1,
0.5,
Expand Down Expand Up @@ -533,9 +534,9 @@ async def try_backfill(domains: StrCollection) -> bool:
# backfill points regardless of `current_depth`.
if processing_start_time is not None:
processing_end_time = self.clock.time_msec()
backfill_processing_before_timer.observe(
(processing_end_time - processing_start_time) / 1000
)
backfill_processing_before_timer.labels(
**{SERVER_NAME_LABEL: self.server_name}
).observe((processing_end_time - processing_start_time) / 1000)

success = await try_backfill(likely_domains)
if success:
Expand Down
6 changes: 4 additions & 2 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
backfill_processing_after_timer = Histogram(
"synapse_federation_backfill_processing_after_time_seconds",
"sec",
[],
labelnames=[SERVER_NAME_LABEL],
buckets=(
0.1,
0.25,
Expand Down Expand Up @@ -692,7 +692,9 @@ async def backfill(
if not events:
return

with backfill_processing_after_timer.time():
with backfill_processing_after_timer.labels(
**{SERVER_NAME_LABEL: self.server_name}
).time():
# if there are any events in the wrong room, the remote server is buggy and
# should not be trusted.
for ev in events:
Expand Down
10 changes: 6 additions & 4 deletions synapse/handlers/sliding_sync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
tag_args,
trace,
)
from synapse.metrics import SERVER_NAME_LABEL
from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
from synapse.storage.databases.main.state_deltas import StateDelta
from synapse.storage.databases.main.stream import PaginateFunction
Expand Down Expand Up @@ -79,7 +80,7 @@
sync_processing_time = Histogram(
"synapse_sliding_sync_processing_time",
"Time taken to generate a sliding sync response, ignoring wait times.",
["initial"],
labelnames=["initial", SERVER_NAME_LABEL],
)

# Limit the number of state_keys we should remember sending down the connection for each
Expand All @@ -94,6 +95,7 @@

class SlidingSyncHandler:
def __init__(self, hs: "HomeServer"):
self.server_name = hs.hostname
self.clock = hs.get_clock()
self.store = hs.get_datastores().main
self.storage_controllers = hs.get_storage_controllers()
Expand Down Expand Up @@ -368,9 +370,9 @@ async def handle_room(room_id: str) -> None:
set_tag(SynapseTags.FUNC_ARG_PREFIX + "sync_config.user", user_id)

end_time_s = self.clock.time()
sync_processing_time.labels(from_token is not None).observe(
end_time_s - start_time_s
)
sync_processing_time.labels(
initial=from_token is not None, **{SERVER_NAME_LABEL: self.server_name}
).observe(end_time_s - start_time_s)

return sliding_sync_result

Expand Down
8 changes: 5 additions & 3 deletions synapse/http/request_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,11 @@ def stop(self, time_sec: float, response_code: int, sent_bytes: int) -> None:

response_count.labels(**response_base_labels).inc()

response_timer.labels(code=response_code_str, **response_base_labels).observe(
time_sec - self.start_ts
)
response_timer.labels(
code=response_code_str,
**response_base_labels,
**{SERVER_NAME_LABEL: self.our_server_name},
).observe(time_sec - self.start_ts)

resource_usage = context.get_resource_usage()

Expand Down
4 changes: 2 additions & 2 deletions synapse/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def collect(self) -> Iterable[Metric]:
event_processing_lag_by_event = Histogram(
"synapse_event_processing_lag_by_event",
"Time between an event being persisted and it being queued up to be sent to the relevant remote servers",
["name"],
labelnames=["name", SERVER_NAME_LABEL],
)

# Build info of the running server.
Expand All @@ -607,7 +607,7 @@ def collect(self) -> Iterable[Metric]:
" there is a request with try count of 4, then there would have been one"
" each for 1, 2 and 3",
buckets=(1, 2, 3, 4, 5, 10),
labelnames=("type", "reason"),
labelnames=("type", "reason", SERVER_NAME_LABEL),
)

threadpool_total_threads = Gauge(
Expand Down
4 changes: 2 additions & 2 deletions synapse/metrics/_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@
# Python GC metrics
#

# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
# These are process-level metrics, so they do not have the `SERVER_NAME_LABEL`.
gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"]) # type: ignore[missing-server-name-label]
gc_time = Histogram(
gc_time = Histogram( # type: ignore[missing-server-name-label]
"python_gc_time",
"Time taken to GC (sec)",
["gen"],
Expand Down
3 changes: 2 additions & 1 deletion synapse/metrics/_reactor_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class PollReactor: # type: ignore[no-redef]
# Twisted reactor metrics
#

tick_time = Histogram(
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
tick_time = Histogram( # type: ignore[missing-server-name-label]
"python_twisted_reactor_tick_time",
"Tick time of the Twisted reactor (sec)",
buckets=[0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2, 5],
Expand Down
10 changes: 7 additions & 3 deletions synapse/replication/tcp/external_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
response_timer = Histogram(
"synapse_external_cache_response_time_seconds",
"Time taken to get a response from Redis for a cache get/set request",
labelnames=["method"],
labelnames=["method", SERVER_NAME_LABEL],
buckets=(
0.001,
0.002,
Expand Down Expand Up @@ -110,7 +110,9 @@ async def set(self, cache_name: str, key: str, value: Any, expiry_ms: int) -> No
"ExternalCache.set",
tags={opentracing.SynapseTags.CACHE_NAME: cache_name},
):
with response_timer.labels("set").time():
with response_timer.labels(
method="set", **{SERVER_NAME_LABEL: self.server_name}
).time():
return await make_deferred_yieldable(
self._redis_connection.set(
self._get_redis_key(cache_name, key),
Expand All @@ -129,7 +131,9 @@ async def get(self, cache_name: str, key: str) -> Optional[Any]:
"ExternalCache.get",
tags={opentracing.SynapseTags.CACHE_NAME: cache_name},
):
with response_timer.labels("get").time():
with response_timer.labels(
method="get", **{SERVER_NAME_LABEL: self.server_name}
).time():
result = await make_deferred_yieldable(
self._redis_connection.get(self._get_redis_key(cache_name, key))
)
Expand Down
Loading
Loading