From a4465a1e9e1ddba500b56cac08e628dba8364afe Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 25 Sep 2020 14:00:40 +0100 Subject: [PATCH 1/5] Add metrics to track success/otherwise of replication requests --- synapse/replication/http/_base.py | 38 +++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index b448da671038..6b1dfa70297d 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -20,18 +20,28 @@ from inspect import signature from typing import Dict, List, Tuple -from synapse.api.errors import ( - CodeMessageException, - HttpResponseException, - RequestSendFailed, - SynapseError, -) +from prometheus_client import Counter, Gauge + +from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError +from synapse.http import RequestTimedOutError from synapse.logging.opentracing import inject_active_span_byte_dict, trace from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import random_string logger = logging.getLogger(__name__) +_pending_outgoing_requests = Gauge( + "synapse_pending_outgoing_replication_requests", + "Number of active outgoing replication requests, by replication method name", + ["name"], +) + +_outgoing_request_counter = Counter( + "synapse_outgoing_replication_requests", + "Number of outgoing replication requests, by replication method name and result", + ["name", "code"], +) + class ReplicationEndpoint(metaclass=abc.ABCMeta): """Helper base class for defining new replication HTTP endpoints. @@ -139,6 +149,7 @@ def make_client(cls, hs): instance_map = hs.config.worker.instance_map @trace(opname="outgoing_replication_request") + @_pending_outgoing_requests.track_inprogress() async def send_request(instance_name="master", **kwargs): if instance_name == local_instance_name: raise Exception("Trying to send HTTP request to self") @@ -193,23 +204,26 @@ async def send_request(instance_name="master", **kwargs): try: result = await request_func(uri, data, headers=headers) break - except CodeMessageException as e: - if e.code != 504 or not cls.RETRY_ON_TIMEOUT: + except RequestTimedOutError: + if not cls.RETRY_ON_TIMEOUT: raise - logger.warning("%s request timed out", cls.NAME) + logger.warning("%s request timed out; retrying", cls.NAME) # If we timed out we probably don't need to worry about backing # off too much, but lets just wait a little anyway. await clock.sleep(1) except HttpResponseException as e: # We convert to SynapseError as we know that it was a SynapseError - # on the master process that we should send to the client. (And + # on the main process that we should send to the client. (And # importantly, not stack traces everywhere) + _outgoing_request_counter.labels(cls.NAME, e.code).inc() raise e.to_synapse_error() - except RequestSendFailed as e: - raise SynapseError(502, "Failed to talk to master") from e + except Exception as e: + _outgoing_request_counter.labels(cls.NAME, "ERR").inc() + raise SynapseError(502, "Failed to talk to main process") from e + _outgoing_request_counter.labels(cls.NAME, 200).inc() return result return send_request From 49759691cd5853213ffee5ad3addbb8af165e948 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 25 Sep 2020 15:06:23 +0100 Subject: [PATCH 2/5] add missing label --- synapse/replication/http/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 6b1dfa70297d..d785301958ec 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -149,7 +149,7 @@ def make_client(cls, hs): instance_map = hs.config.worker.instance_map @trace(opname="outgoing_replication_request") - @_pending_outgoing_requests.track_inprogress() + @_pending_outgoing_requests.labels(cls.NAME).track_inprogress() async def send_request(instance_name="master", **kwargs): if instance_name == local_instance_name: raise Exception("Trying to send HTTP request to self") From 4a48c4545175315a41e70707e86e87afff84789f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 25 Sep 2020 15:09:16 +0100 Subject: [PATCH 3/5] fix --- synapse/replication/http/_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index d785301958ec..8b6459eed146 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -148,8 +148,10 @@ def make_client(cls, hs): instance_map = hs.config.worker.instance_map + outgoing_gauge = _pending_outgoing_requests.labels(cls.NAME) + @trace(opname="outgoing_replication_request") - @_pending_outgoing_requests.labels(cls.NAME).track_inprogress() + @outgoing_gauge.track_inprogress() async def send_request(instance_name="master", **kwargs): if instance_name == local_instance_name: raise Exception("Trying to send HTTP request to self") From b8d00ebafbd7e9509bacb4459dcf0e3c48274372 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 25 Sep 2020 15:37:54 +0100 Subject: [PATCH 4/5] changelog --- changelog.d/8406.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/8406.feature diff --git a/changelog.d/8406.feature b/changelog.d/8406.feature new file mode 100644 index 000000000000..1c6472ae7eae --- /dev/null +++ b/changelog.d/8406.feature @@ -0,0 +1 @@ +Add prometheus metrics for replication requests. From 243a88aa086a69176fa2a839512968ab1ee20216 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 25 Sep 2020 15:54:21 +0100 Subject: [PATCH 5/5] fix lint --- synapse/replication/http/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 8b6459eed146..64edadb624c1 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -22,7 +22,7 @@ from prometheus_client import Counter, Gauge -from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError +from synapse.api.errors import HttpResponseException, SynapseError from synapse.http import RequestTimedOutError from synapse.logging.opentracing import inject_active_span_byte_dict, trace from synapse.util.caches.response_cache import ResponseCache