Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Keep track of user_ips and monthly_active_users when delegating a…
Browse files Browse the repository at this point in the history
…uth (#16672)

* Describe `insert_client_ip`
* Pull out client_ips and MAU tracking to BaseAuth
* Define HAS_AUTHLIB once in tests

sick of copypasting

* Track ips and token usage when delegating auth
* Test that we track MAU and user_ips
* Don't track `__oidc_admin`
  • Loading branch information
David Robertson authored Nov 23, 2023
1 parent 1a5f9bb commit 32a59a6
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 82 deletions.
1 change: 1 addition & 0 deletions changelog.d/16672.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Restore tracking of requests and monthly active users when delegating authentication to an [MSC3861](https://github.com/matrix-org/synapse/pull/16672) OIDC provider.
48 changes: 48 additions & 0 deletions synapse/api/auth/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
UnstableSpecAuthError,
)
from synapse.appservice import ApplicationService
from synapse.http import get_request_user_agent
from synapse.http.site import SynapseRequest
from synapse.logging.opentracing import trace
from synapse.types import Requester, create_requester
from synapse.util.cancellation import cancellable
Expand All @@ -45,6 +47,9 @@ def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers()

self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips
self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips

async def check_user_in_room(
self,
room_id: str,
Expand Down Expand Up @@ -349,3 +354,46 @@ async def get_appservice_user(
return create_requester(
effective_user_id, app_service=app_service, device_id=effective_device_id
)

async def _record_request(
self, request: SynapseRequest, requester: Requester
) -> None:
"""Record that this request was made.
This updates the client_ips and monthly_active_user tables.
"""
ip_addr = request.get_client_ip_if_available()

if ip_addr and (not requester.app_service or self._track_appservice_user_ips):
user_agent = get_request_user_agent(request)
access_token = self.get_access_token_from_request(request)

# XXX(quenting): I'm 95% confident that we could skip setting the
# device_id to "dummy-device" for appservices, and that the only impact
# would be some rows which whould not deduplicate in the 'user_ips'
# table during the transition
recorded_device_id = (
"dummy-device"
if requester.device_id is None and requester.app_service is not None
else requester.device_id
)
await self.store.insert_client_ip(
user_id=requester.authenticated_entity,
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=recorded_device_id,
)

# Track also the puppeted user client IP if enabled and the user is puppeting
if (
requester.user.to_string() != requester.authenticated_entity
and self._track_puppeted_user_ips
):
await self.store.insert_client_ip(
user_id=requester.user.to_string(),
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=requester.device_id,
)
39 changes: 1 addition & 38 deletions synapse/api/auth/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
InvalidClientTokenError,
MissingClientTokenError,
)
from synapse.http import get_request_user_agent
from synapse.http.site import SynapseRequest
from synapse.logging.opentracing import active_span, force_tracing, start_active_span
from synapse.types import Requester, create_requester
Expand All @@ -48,8 +47,6 @@ def __init__(self, hs: "HomeServer"):
self._account_validity_handler = hs.get_account_validity_handler()
self._macaroon_generator = hs.get_macaroon_generator()

self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips
self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips
self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users

@cancellable
Expand Down Expand Up @@ -115,9 +112,6 @@ async def _wrapped_get_user_by_req(
Once get_user_by_req has set up the opentracing span, this does the actual work.
"""
try:
ip_addr = request.get_client_ip_if_available()
user_agent = get_request_user_agent(request)

access_token = self.get_access_token_from_request(request)

# First check if it could be a request from an appservice
Expand Down Expand Up @@ -154,38 +148,7 @@ async def _wrapped_get_user_by_req(
errcode=Codes.EXPIRED_ACCOUNT,
)

if ip_addr and (
not requester.app_service or self._track_appservice_user_ips
):
# XXX(quenting): I'm 95% confident that we could skip setting the
# device_id to "dummy-device" for appservices, and that the only impact
# would be some rows which whould not deduplicate in the 'user_ips'
# table during the transition
recorded_device_id = (
"dummy-device"
if requester.device_id is None and requester.app_service is not None
else requester.device_id
)
await self.store.insert_client_ip(
user_id=requester.authenticated_entity,
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=recorded_device_id,
)

# Track also the puppeted user client IP if enabled and the user is puppeting
if (
requester.user.to_string() != requester.authenticated_entity
and self._track_puppeted_user_ips
):
await self.store.insert_client_ip(
user_id=requester.user.to_string(),
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=requester.device_id,
)
await self._record_request(request, requester)

if requester.is_guest and not allow_guest:
raise AuthError(
Expand Down
4 changes: 4 additions & 0 deletions synapse/api/auth/msc3861_delegated.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ async def get_user_by_req(
# so that we don't provision the user if they don't have enough permission:
requester = await self.get_user_by_access_token(access_token, allow_expired)

# Do not record requests from MAS using the virtual `__oidc_admin` user.
if access_token != self._admin_token:
await self._record_request(request, requester)

if not allow_guest and requester.is_guest:
raise OAuthInsufficientScopeError([SCOPE_MATRIX_API])

Expand Down
21 changes: 21 additions & 0 deletions synapse/storage/databases/main/client_ips.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,27 @@ async def insert_client_ip(
device_id: Optional[str],
now: Optional[int] = None,
) -> None:
"""Record that `user_id` used `access_token` from this `ip` address.
This method does two things.
1. It queues up a row to be upserted into the `client_ips` table. These happen
periodically; see _update_client_ips_batch.
2. It immediately records this user as having taken action for the purposes of
MAU tracking.
Any DB writes take place on the background tasks worker, falling back to the
main process. If we're not that worker, this method emits a replication payload
to run this logic on that worker.
Two caveats to note:
- We only take action once per LAST_SEEN_GRANULARITY, to avoid spamming the
DB with writes.
- Requests using the sliding-sync proxy's user agent are excluded, as its
requests are not directly driven by end-users. This is a hack and we're not
very proud of it.
"""
# The sync proxy continuously triggers /sync even if the user is not
# present so should be excluded from user_ips entries.
if user_agent == "sync-v3-proxy-":
Expand Down
10 changes: 1 addition & 9 deletions tests/config/test_oauth_delegation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,7 @@

from tests.server import get_clock, setup_test_homeserver
from tests.unittest import TestCase, skip_unless
from tests.utils import default_config

try:
import authlib # noqa: F401

HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False

from tests.utils import HAS_AUTHLIB, default_config

# These are a few constants that are used as config parameters in the tests.
SERVER_NAME = "test"
Expand Down
129 changes: 115 additions & 14 deletions tests/handlers/test_oauth_delegation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
# limitations under the License.

from http import HTTPStatus
from typing import Any, Dict, Union
from io import BytesIO
from typing import Any, Dict, Optional, Union
from unittest.mock import ANY, AsyncMock, Mock
from urllib.parse import parse_qs

Expand All @@ -25,6 +26,8 @@
from signedjson.sign import sign_json

from twisted.test.proto_helpers import MemoryReactor
from twisted.web.http_headers import Headers
from twisted.web.iweb import IResponse

from synapse.api.errors import (
AuthError,
Expand All @@ -33,23 +36,17 @@
OAuthInsufficientScopeError,
SynapseError,
)
from synapse.http.site import SynapseRequest
from synapse.rest import admin
from synapse.rest.client import account, devices, keys, login, logout, register
from synapse.server import HomeServer
from synapse.types import JsonDict
from synapse.types import JsonDict, UserID
from synapse.util import Clock

from tests.server import FakeChannel
from tests.test_utils import FakeResponse, get_awaitable_result
from tests.unittest import HomeserverTestCase, skip_unless
from tests.utils import mock_getRawHeaders

try:
import authlib # noqa: F401

HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False

from tests.unittest import HomeserverTestCase, override_config, skip_unless
from tests.utils import HAS_AUTHLIB, checked_cast, mock_getRawHeaders

# These are a few constants that are used as config parameters in the tests.
SERVER_NAME = "test"
Expand All @@ -75,6 +72,7 @@
SUBJECT = "abc-def-ghi"
USERNAME = "test-user"
USER_ID = "@" + USERNAME + ":" + SERVER_NAME
OIDC_ADMIN_USERID = f"@__oidc_admin:{SERVER_NAME}"


async def get_json(url: str) -> JsonDict:
Expand Down Expand Up @@ -134,7 +132,10 @@ def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:

hs = self.setup_test_homeserver(proxied_http_client=self.http_client)

self.auth = hs.get_auth()
# Import this here so that we've checked that authlib is available.
from synapse.api.auth.msc3861_delegated import MSC3861DelegatedAuth

self.auth = checked_cast(MSC3861DelegatedAuth, hs.get_auth())

return hs

Expand Down Expand Up @@ -675,7 +676,8 @@ def test_admin_token(self) -> None:
request.requestHeaders.getRawHeaders = mock_getRawHeaders()
requester = self.get_success(self.auth.get_user_by_req(request))
self.assertEqual(
requester.user.to_string(), "@%s:%s" % ("__oidc_admin", SERVER_NAME)
requester.user.to_string(),
OIDC_ADMIN_USERID,
)
self.assertEqual(requester.is_guest, False)
self.assertEqual(requester.device_id, None)
Expand All @@ -685,3 +687,102 @@ def test_admin_token(self) -> None:

# There should be no call to the introspection endpoint
self.http_client.request.assert_not_called()

@override_config({"mau_stats_only": True})
def test_request_tracking(self) -> None:
"""Using an access token should update the client_ips and MAU tables."""
# To start, there are no MAU users.
store = self.hs.get_datastores().main
mau = self.get_success(store.get_monthly_active_count())
self.assertEqual(mau, 0)

known_token = "token-token-GOOD-:)"

async def mock_http_client_request(
method: str,
uri: str,
data: Optional[bytes] = None,
headers: Optional[Headers] = None,
) -> IResponse:
"""Mocked auth provider response."""
assert method == "POST"
token = parse_qs(data)[b"token"][0].decode("utf-8")
if token == known_token:
return FakeResponse.json(
code=200,
payload={
"active": True,
"scope": MATRIX_USER_SCOPE,
"sub": SUBJECT,
"username": USERNAME,
},
)

return FakeResponse.json(code=200, payload={"active": False})

self.http_client.request = mock_http_client_request

EXAMPLE_IPV4_ADDR = "123.123.123.123"
EXAMPLE_USER_AGENT = "httprettygood"

# First test a known access token
channel = FakeChannel(self.site, self.reactor)
# type-ignore: FakeChannel is a mock of an HTTPChannel, not a proper HTTPChannel
req = SynapseRequest(channel, self.site) # type: ignore[arg-type]
req.client.host = EXAMPLE_IPV4_ADDR
req.requestHeaders.addRawHeader("Authorization", f"Bearer {known_token}")
req.requestHeaders.addRawHeader("User-Agent", EXAMPLE_USER_AGENT)
req.content = BytesIO(b"")
req.requestReceived(
b"GET",
b"/_matrix/client/v3/account/whoami",
b"1.1",
)
channel.await_result()
self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
self.assertEqual(channel.json_body["user_id"], USER_ID, channel.json_body)

# Expect to see one MAU entry, from the first request
mau = self.get_success(store.get_monthly_active_count())
self.assertEqual(mau, 1)

conn_infos = self.get_success(
store.get_user_ip_and_agents(UserID.from_string(USER_ID))
)
self.assertEqual(len(conn_infos), 1, conn_infos)
conn_info = conn_infos[0]
self.assertEqual(conn_info["access_token"], known_token)
self.assertEqual(conn_info["ip"], EXAMPLE_IPV4_ADDR)
self.assertEqual(conn_info["user_agent"], EXAMPLE_USER_AGENT)

# Now test MAS making a request using the special __oidc_admin token
MAS_IPV4_ADDR = "127.0.0.1"
MAS_USER_AGENT = "masmasmas"

channel = FakeChannel(self.site, self.reactor)
req = SynapseRequest(channel, self.site) # type: ignore[arg-type]
req.client.host = MAS_IPV4_ADDR
req.requestHeaders.addRawHeader(
"Authorization", f"Bearer {self.auth._admin_token}"
)
req.requestHeaders.addRawHeader("User-Agent", MAS_USER_AGENT)
req.content = BytesIO(b"")
req.requestReceived(
b"GET",
b"/_matrix/client/v3/account/whoami",
b"1.1",
)
channel.await_result()
self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
self.assertEqual(
channel.json_body["user_id"], OIDC_ADMIN_USERID, channel.json_body
)

# Still expect to see one MAU entry, from the first request
mau = self.get_success(store.get_monthly_active_count())
self.assertEqual(mau, 1)

conn_infos = self.get_success(
store.get_user_ip_and_agents(UserID.from_string(OIDC_ADMIN_USERID))
)
self.assertEqual(conn_infos, [])
8 changes: 1 addition & 7 deletions tests/rest/admin/test_jwks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,7 @@
from synapse.rest.synapse.client import build_synapse_client_resource_tree

from tests.unittest import HomeserverTestCase, override_config, skip_unless

try:
import authlib # noqa: F401

HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
from tests.utils import HAS_AUTHLIB


@skip_unless(HAS_AUTHLIB, "requires authlib")
Expand Down
Loading

0 comments on commit 32a59a6

Please sign in to comment.