Skip to content

Commit 1709234

Browse files
authored
Add an access token introspection cache to make Matrix Authentication Service integration (MSC3861) more efficient. (#18231)
Evolution of cd78f3d This cache does not have any explicit invalidation, but this is deemed acceptable (see code comment). We may still prefer to add it eventually, letting us bump up the Time-To-Live (TTL) on the cache as we currently set a 2 minute expiry to balance the fact that we have no explicit invalidation. This cache makes several things more efficient: - reduces number of outbound requests from Synapse, reducing CPU utilisation + network I/O - reduces request handling time in Synapse, which improves client-visible latency - reduces load on MAS and its database --- Other than that, this PR also introduces support for `expires_in` (seconds) on the introspection response. This lets the cached responses expire at the proper expiry time of the access token, whilst avoiding clock skew issues. Corresponds to: element-hq/matrix-authentication-service#4241 --------- Signed-off-by: Olivier 'reivilibre <[email protected]>
1 parent 80b62d7 commit 1709234

File tree

3 files changed

+135
-17
lines changed

3 files changed

+135
-17
lines changed

changelog.d/18231.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add an access token introspection cache to make Matrix Authentication Service integration (MSC3861) more efficient.

synapse/api/auth/msc3861_delegated.py

Lines changed: 96 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#
2020
#
2121
import logging
22+
from dataclasses import dataclass
2223
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
2324
from urllib.parse import urlencode
2425

@@ -47,6 +48,7 @@
4748
from synapse.types import Requester, UserID, create_requester
4849
from synapse.util import json_decoder
4950
from synapse.util.caches.cached_call import RetryOnExceptionCachedCall
51+
from synapse.util.caches.response_cache import ResponseCache
5052

5153
if TYPE_CHECKING:
5254
from synapse.rest.admin.experimental_features import ExperimentalFeature
@@ -76,6 +78,61 @@ def scope_to_list(scope: str) -> List[str]:
7678
return scope.strip().split(" ")
7779

7880

81+
@dataclass
82+
class IntrospectionResult:
83+
_inner: IntrospectionToken
84+
85+
# when we retrieved this token,
86+
# in milliseconds since the Unix epoch
87+
retrieved_at_ms: int
88+
89+
def is_active(self, now_ms: int) -> bool:
90+
if not self._inner.get("active"):
91+
return False
92+
93+
expires_in = self._inner.get("expires_in")
94+
if expires_in is None:
95+
return True
96+
if not isinstance(expires_in, int):
97+
raise InvalidClientTokenError("token `expires_in` is not an int")
98+
99+
absolute_expiry_ms = expires_in * 1000 + self.retrieved_at_ms
100+
return now_ms < absolute_expiry_ms
101+
102+
def get_scope_list(self) -> List[str]:
103+
value = self._inner.get("scope")
104+
if not isinstance(value, str):
105+
return []
106+
return scope_to_list(value)
107+
108+
def get_sub(self) -> Optional[str]:
109+
value = self._inner.get("sub")
110+
if not isinstance(value, str):
111+
return None
112+
return value
113+
114+
def get_username(self) -> Optional[str]:
115+
value = self._inner.get("username")
116+
if not isinstance(value, str):
117+
return None
118+
return value
119+
120+
def get_name(self) -> Optional[str]:
121+
value = self._inner.get("name")
122+
if not isinstance(value, str):
123+
return None
124+
return value
125+
126+
def get_device_id(self) -> Optional[str]:
127+
value = self._inner.get("device_id")
128+
if value is not None and not isinstance(value, str):
129+
raise AuthError(
130+
500,
131+
"Invalid device ID in introspection result",
132+
)
133+
return value
134+
135+
79136
class PrivateKeyJWTWithKid(PrivateKeyJWT): # type: ignore[misc]
80137
"""An implementation of the private_key_jwt client auth method that includes a kid header.
81138
@@ -121,6 +178,31 @@ def __init__(self, hs: "HomeServer"):
121178
self._hostname = hs.hostname
122179
self._admin_token: Callable[[], Optional[str]] = self._config.admin_token
123180

181+
# # Token Introspection Cache
182+
# This remembers what users/devices are represented by which access tokens,
183+
# in order to reduce overall system load:
184+
# - on Synapse (as requests are relatively expensive)
185+
# - on the network
186+
# - on MAS
187+
#
188+
# Since there is no invalidation mechanism currently,
189+
# the entries expire after 2 minutes.
190+
# This does mean tokens can be treated as valid by Synapse
191+
# for longer than reality.
192+
#
193+
# Ideally, tokens should logically be invalidated in the following circumstances:
194+
# - If a session logout happens.
195+
# In this case, MAS will delete the device within Synapse
196+
# anyway and this is good enough as an invalidation.
197+
# - If the client refreshes their token in MAS.
198+
# In this case, the device still exists and it's not the end of the world for
199+
# the old access token to continue working for a short time.
200+
self._introspection_cache: ResponseCache[str] = ResponseCache(
201+
self._clock,
202+
"token_introspection",
203+
timeout_ms=120_000,
204+
)
205+
124206
self._issuer_metadata = RetryOnExceptionCachedCall[OpenIDProviderMetadata](
125207
self._load_metadata
126208
)
@@ -193,7 +275,7 @@ async def _introspection_endpoint(self) -> str:
193275
metadata = await self._issuer_metadata.get()
194276
return metadata.get("introspection_endpoint")
195277

196-
async def _introspect_token(self, token: str) -> IntrospectionToken:
278+
async def _introspect_token(self, token: str) -> IntrospectionResult:
197279
"""
198280
Send a token to the introspection endpoint and returns the introspection response
199281
@@ -266,7 +348,9 @@ async def _introspect_token(self, token: str) -> IntrospectionToken:
266348
"The introspection endpoint returned an invalid JSON response."
267349
)
268350

269-
return IntrospectionToken(**resp)
351+
return IntrospectionResult(
352+
IntrospectionToken(**resp), retrieved_at_ms=self._clock.time_msec()
353+
)
270354

271355
async def is_server_admin(self, requester: Requester) -> bool:
272356
return "urn:synapse:admin:*" in requester.scope
@@ -344,7 +428,9 @@ async def get_user_by_access_token(
344428
)
345429

346430
try:
347-
introspection_result = await self._introspect_token(token)
431+
introspection_result = await self._introspection_cache.wrap(
432+
token, self._introspect_token, token
433+
)
348434
except Exception:
349435
logger.exception("Failed to introspect token")
350436
raise SynapseError(503, "Unable to introspect the access token")
@@ -353,11 +439,11 @@ async def get_user_by_access_token(
353439

354440
# TODO: introspection verification should be more extensive, especially:
355441
# - verify the audience
356-
if not introspection_result.get("active"):
442+
if not introspection_result.is_active(self._clock.time_msec()):
357443
raise InvalidClientTokenError("Token is not active")
358444

359445
# Let's look at the scope
360-
scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
446+
scope: List[str] = introspection_result.get_scope_list()
361447

362448
# Determine type of user based on presence of particular scopes
363449
has_user_scope = SCOPE_MATRIX_API in scope
@@ -367,7 +453,7 @@ async def get_user_by_access_token(
367453
raise InvalidClientTokenError("No scope in token granting user rights")
368454

369455
# Match via the sub claim
370-
sub: Optional[str] = introspection_result.get("sub")
456+
sub: Optional[str] = introspection_result.get_sub()
371457
if sub is None:
372458
raise InvalidClientTokenError(
373459
"Invalid sub claim in the introspection result"
@@ -381,7 +467,7 @@ async def get_user_by_access_token(
381467
# or the external_id was never recorded
382468

383469
# TODO: claim mapping should be configurable
384-
username: Optional[str] = introspection_result.get("username")
470+
username: Optional[str] = introspection_result.get_username()
385471
if username is None or not isinstance(username, str):
386472
raise AuthError(
387473
500,
@@ -399,7 +485,7 @@ async def get_user_by_access_token(
399485

400486
# TODO: claim mapping should be configurable
401487
# If present, use the name claim as the displayname
402-
name: Optional[str] = introspection_result.get("name")
488+
name: Optional[str] = introspection_result.get_name()
403489

404490
await self.store.register_user(
405491
user_id=user_id.to_string(), create_profile_with_displayname=name
@@ -414,15 +500,8 @@ async def get_user_by_access_token(
414500

415501
# MAS 0.15+ will give us the device ID as an explicit value for compatibility sessions
416502
# If present, we get it from here, if not we get it in thee scope
417-
device_id = introspection_result.get("device_id")
418-
if device_id is not None:
419-
# We got the device ID explicitly, just sanity check that it's a string
420-
if not isinstance(device_id, str):
421-
raise AuthError(
422-
500,
423-
"Invalid device ID in introspection result",
424-
)
425-
else:
503+
device_id = introspection_result.get_device_id()
504+
if device_id is None:
426505
# Find device_ids in scope
427506
# We only allow a single device_id in the scope, so we find them all in the
428507
# scope list, and raise if there are more than one. The OIDC server should be

tests/handlers/test_oauth_delegation.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,44 @@ def test_unavailable_introspection_endpoint(self) -> None:
539539
error = self.get_failure(self.auth.get_user_by_req(request), SynapseError)
540540
self.assertEqual(error.value.code, 503)
541541

542+
def test_cached_expired_introspection(self) -> None:
543+
"""The handler should raise an error if the introspection response gives
544+
an expiry time, the introspection response is cached and then the entry is
545+
re-requested after it has expired."""
546+
547+
self.http_client.request = introspection_mock = AsyncMock(
548+
return_value=FakeResponse.json(
549+
code=200,
550+
payload={
551+
"active": True,
552+
"sub": SUBJECT,
553+
"scope": " ".join(
554+
[
555+
MATRIX_USER_SCOPE,
556+
f"{MATRIX_DEVICE_SCOPE_PREFIX}AABBCC",
557+
]
558+
),
559+
"username": USERNAME,
560+
"expires_in": 60,
561+
},
562+
)
563+
)
564+
request = Mock(args={})
565+
request.args[b"access_token"] = [b"mockAccessToken"]
566+
request.requestHeaders.getRawHeaders = mock_getRawHeaders()
567+
568+
# The first CS-API request causes a successful introspection
569+
self.get_success(self.auth.get_user_by_req(request))
570+
self.assertEqual(introspection_mock.call_count, 1)
571+
572+
# Sleep for 60 seconds so the token expires.
573+
self.reactor.advance(60.0)
574+
575+
# Now the CS-API request fails because the token expired
576+
self.get_failure(self.auth.get_user_by_req(request), InvalidClientTokenError)
577+
# Ensure another introspection request was not sent
578+
self.assertEqual(introspection_mock.call_count, 1)
579+
542580
def make_device_keys(self, user_id: str, device_id: str) -> JsonDict:
543581
# We only generate a master key to simplify the test.
544582
master_signing_key = generate_signing_key(device_id)

0 commit comments

Comments
 (0)