Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Record more information into structured logs #9654

Merged
merged 13 commits into from
Apr 8, 2021
1 change: 1 addition & 0 deletions changelog.d/9654.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Include request information in structured logging output.
112 changes: 88 additions & 24 deletions synapse/http/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import contextlib
import logging
import time
from typing import Optional, Type, Union
from typing import Optional, Tuple, Type, Union

import attr
from zope.interface import implementer
Expand All @@ -26,7 +26,11 @@
from synapse.config.server import ListenerConfig
from synapse.http import get_request_user_agent, redact_uri
from synapse.http.request_metrics import RequestMetrics, requests_counter
from synapse.logging.context import LoggingContext, PreserveLoggingContext
from synapse.logging.context import (
ContextRequest,
LoggingContext,
PreserveLoggingContext,
)
from synapse.types import Requester

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -63,7 +67,7 @@ def __init__(self, channel, *args, **kw):

# The requester, if authenticated. For federation requests this is the
# server name, for client requests this is the Requester object.
self.requester = None # type: Optional[Union[Requester, str]]
self._requester = None # type: Optional[Union[Requester, str]]

# we can't yet create the logcontext, as we don't know the method.
self.logcontext = None # type: Optional[LoggingContext]
Expand Down Expand Up @@ -93,6 +97,31 @@ def __repr__(self):
self.site.site_tag,
)

@property
def requester(self) -> Optional[Union[Requester, str]]:
return self._requester

@requester.setter
def requester(self, value: Union[Requester, str]) -> None:
# Store the requester, and update some properties based on it.

# This should only be called once.
assert self._requester is None

self._requester = value

# A logging context should exist by now (and have a ContextRequest).
assert self.logcontext is not None
assert self.logcontext.request is not None

(
requester,
authenticated_entity,
) = self.get_authenticated_entity()
self.logcontext.request.requester = requester
# If there's no authenticated entity, it was the requester.
self.logcontext.request.authenticated_entity = authenticated_entity or requester

def get_request_id(self):
return "%s-%i" % (self.get_method(), self.request_seq)

Expand Down Expand Up @@ -126,13 +155,60 @@ def get_method(self) -> str:
return self.method.decode("ascii")
return method

def get_authenticated_entity(self) -> Tuple[Optional[str], Optional[str]]:
"""
Get the "authenticated" entity of the request, which might be the user
performing the action, or a user being puppeted by a server admin.

Returns:
A tuple:
The first item is a string representing the user making the request.

The second item is a string or None representing the user who
authenticated when making this request. See
Requester.authenticated_entity.
"""
# Convert the requester into a string that we can log
if isinstance(self._requester, str):
return self._requester, None
elif isinstance(self._requester, Requester):
requester = self._requester.user.to_string()
authenticated_entity = self._requester.authenticated_entity

# If this is a request where the target user doesn't match the user who
# authenticated (e.g. and admin is puppetting a user) then we return both.
if self._requester.user.to_string() != authenticated_entity:
return requester, authenticated_entity

return requester, None
elif self._requester is not None:
# This shouldn't happen, but we log it so we don't lose information
# and can see that we're doing something wrong.
return repr(self._requester), None # type: ignore[unreachable]

return None, None

def render(self, resrc):
# this is called once a Resource has been found to serve the request; in our
# case the Resource in question will normally be a JsonResource.

# create a LogContext for this request
request_id = self.get_request_id()
self.logcontext = LoggingContext(request_id, request=request_id)
self.logcontext = LoggingContext(
request_id,
request=ContextRequest(
self.get_request_id(),
self.getClientIP(),
self.site.site_tag,
# The requester is going to be unknown at this point.
None,
None,
self.get_method(),
self.get_redacted_uri(),
self.clientproto.decode("ascii", errors="replace"),
get_request_user_agent(self),
clokep marked this conversation as resolved.
Show resolved Hide resolved
),
)

# override the Server header which is set by twisted
self.setHeader("Server", self.site.server_version_string)
Expand Down Expand Up @@ -277,25 +353,6 @@ def _finished_processing(self):
# to the client (nb may be negative)
response_send_time = self.finish_time - self._processing_finished_time

# Convert the requester into a string that we can log
authenticated_entity = None
if isinstance(self.requester, str):
authenticated_entity = self.requester
elif isinstance(self.requester, Requester):
authenticated_entity = self.requester.authenticated_entity

# If this is a request where the target user doesn't match the user who
# authenticated (e.g. and admin is puppetting a user) then we log both.
if self.requester.user.to_string() != authenticated_entity:
authenticated_entity = "{},{}".format(
authenticated_entity,
self.requester.user.to_string(),
)
elif self.requester is not None:
# This shouldn't happen, but we log it so we don't lose information
# and can see that we're doing something wrong.
authenticated_entity = repr(self.requester) # type: ignore[unreachable]

user_agent = get_request_user_agent(self, "-")

code = str(self.code)
Expand All @@ -305,14 +362,21 @@ def _finished_processing(self):
code += "!"

log_level = logging.INFO if self._should_log_request() else logging.DEBUG

# If this is a request where the target user doesn't match the user who
# authenticated (e.g. and admin is puppetting a user) then we log both.
requester, authenticated_entity = self.get_authenticated_entity()
if authenticated_entity:
requester = "{}.{}".format(authenticated_entity, requester)

self.site.access_logger.log(
log_level,
"%s - %s - {%s}"
" Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
' %sB %s "%s %s %s" "%s" [%d dbevts]',
self.getClientIP(),
self.site.site_tag,
authenticated_entity,
clokep marked this conversation as resolved.
Show resolved Hide resolved
requester,
processing_time,
response_send_time,
usage.ru_utime,
Expand Down
84 changes: 72 additions & 12 deletions synapse/logging/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

See doc/log_contexts.rst for details on how this works.
"""

import inspect
import logging
import threading
Expand Down Expand Up @@ -181,6 +180,42 @@ def __sub__(self, other: "ContextResourceUsage") -> "ContextResourceUsage":
return res


class ContextRequest:
clokep marked this conversation as resolved.
Show resolved Hide resolved
clokep marked this conversation as resolved.
Show resolved Hide resolved
__slots__ = [
"request_id",
"ip_address",
"site_tag",
"requester",
"authenticated_entity",
"method",
"url",
"protocol",
"user_agent",
]

def __init__(
self,
request_id,
ip_address,
site_tag,
requester,
authenticated_entity,
method,
url,
protocol,
user_agent,
):
self.request_id = request_id
self.ip_address = ip_address
self.site_tag = site_tag
self.requester = requester
self.authenticated_entity = authenticated_entity
self.method = method
self.url = url
self.protocol = protocol
self.user_agent = user_agent


LoggingContextOrSentinel = Union["LoggingContext", "_Sentinel"]


Expand Down Expand Up @@ -256,7 +291,7 @@ def __init__(
self,
name: Optional[str] = None,
parent_context: "Optional[LoggingContext]" = None,
request: Optional[str] = None,
request: Optional[ContextRequest] = None,
) -> None:
self.previous_context = current_context()
self.name = name
Expand All @@ -281,15 +316,19 @@ def __init__(
self.parent_context = parent_context

if self.parent_context is not None:
self.parent_context.copy_to(self)
# we track the current request_id
self.request = self.parent_context.request

# we also track the current scope:
self.scope = self.parent_context.scope

if request is not None:
# the request param overrides the request from the parent context
# the request_id param overrides the request_id from the parent context
self.request = request

def __str__(self) -> str:
if self.request:
return str(self.request)
return self.request.request_id
return "%s@%x" % (self.name, id(self))

@classmethod
Expand Down Expand Up @@ -556,8 +595,23 @@ def filter(self, record: logging.LogRecord) -> Literal[True]:
# we end up in a death spiral of infinite loops, so let's check, for
# robustness' sake.
if context is not None:
# Logging is interested in the request.
record.request = context.request # type: ignore
# Logging is interested in the request ID. Note that for backwards
# compatibility this is stored as the "request" on the record.
record.request = str(context) # type: ignore

# Add some data from the HTTP request.
request = context.request
if request is None:
return True

record.ip_address = request.ip_address # type: ignore
record.site_tag = request.site_tag # type: ignore
record.requester = request.requester # type: ignore
record.authenticated_entity = request.authenticated_entity # type: ignore
record.method = request.method # type: ignore
record.url = request.url # type: ignore
record.protocol = request.protocol # type: ignore
record.user_agent = request.user_agent # type: ignore

return True

Expand Down Expand Up @@ -630,8 +684,8 @@ def set_current_context(context: LoggingContextOrSentinel) -> LoggingContextOrSe
def nested_logging_context(suffix: str) -> LoggingContext:
"""Creates a new logging context as a child of another.

The nested logging context will have a 'request' made up of the parent context's
request, plus the given suffix.
The nested logging context will have a 'request_id' made up of the parent context's
request_id, plus the given suffix.

CPU/db usage stats will be added to the parent context's on exit.

Expand All @@ -641,7 +695,7 @@ def nested_logging_context(suffix: str) -> LoggingContext:
# ... do stuff

Args:
suffix: suffix to add to the parent context's 'request'.
suffix: suffix to add to the parent context's 'request_id'.

Returns:
LoggingContext: new logging context.
Expand All @@ -653,11 +707,17 @@ def nested_logging_context(suffix: str) -> LoggingContext:
)
parent_context = None
prefix = ""
request = None
else:
assert isinstance(curr_context, LoggingContext)
parent_context = curr_context
prefix = str(parent_context.request)
return LoggingContext(parent_context=parent_context, request=prefix + "-" + suffix)
prefix = str(parent_context.name)
request = parent_context.request
return LoggingContext(
prefix + "-" + suffix,
parent_context=parent_context,
request=request,
)


def preserve_fn(f):
Expand Down
18 changes: 12 additions & 6 deletions synapse/metrics/background_process_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging
import threading
from functools import wraps
from typing import TYPE_CHECKING, Dict, Optional, Set
from typing import TYPE_CHECKING, Dict, Optional, Set, Union

from prometheus_client.core import REGISTRY, Counter, Gauge

Expand Down Expand Up @@ -199,11 +199,11 @@ async def run():
_background_process_start_count.labels(desc).inc()
_background_process_in_flight_count.labels(desc).inc()

with BackgroundProcessLoggingContext(desc, "%s-%i" % (desc, count)) as context:
with BackgroundProcessLoggingContext(desc, count) as context:
try:
ctx = noop_context_manager()
if bg_start_span:
ctx = start_active_span(desc, tags={"request_id": context.request})
ctx = start_active_span(desc, tags={"request_id": str(context)})
with ctx:
return await maybe_awaitable(func(*args, **kwargs))
except Exception:
Expand Down Expand Up @@ -242,13 +242,19 @@ class BackgroundProcessLoggingContext(LoggingContext):
processes.
"""

__slots__ = ["_proc"]
__slots__ = ["_id", "_proc"]

def __init__(self, name: str, request: Optional[str] = None):
super().__init__(name, request=request)
def __init__(self, name: str, id: Optional[Union[int, str]] = None):
super().__init__(name)
self._id = id

self._proc = _BackgroundProcess(name, self)

def __str__(self) -> str:
if self._id is not None:
return "%s-%s" % (self.name, self._id)
return "%s@%x" % (self.name, id(self))

def start(self, rusage: "Optional[resource._RUsage]"):
"""Log context has started running (again)."""

Expand Down
5 changes: 3 additions & 2 deletions synapse/replication/tcp/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,9 @@ def __init__(self, clock: Clock, handler: "ReplicationCommandHandler"):

# a logcontext which we use for processing incoming commands. We declare it as a
# background process so that the CPU stats get reported to prometheus.
ctx_name = "replication-conn-%s" % self.conn_id
self._logging_context = BackgroundProcessLoggingContext(ctx_name, ctx_name)
self._logging_context = BackgroundProcessLoggingContext(
"replication-conn", self.conn_id
)

def connectionMade(self):
logger.info("[%s] Connection established", self.id())
Expand Down
Loading