From a8ac40445c98b9e1fc2538d7d4ec49c80b0298ac Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Sep 2019 15:20:49 +0100 Subject: [PATCH 1/5] Record mappings from saml users in an external table We want to assign unique mxids to saml users based on an incrementing suffix. For that to work, we need to record the allocated mxid in a separate table. --- docs/sample_config.yaml | 26 +++++ synapse/config/saml2_config.py | 78 ++++++++++++- synapse/handlers/saml_handler.py | 103 ++++++++++++++++-- synapse/rest/client/v1/login.py | 14 +++ synapse/storage/registration.py | 41 +++++++ .../schema/delta/56/user_external_ids.sql | 24 ++++ 6 files changed, 276 insertions(+), 10 deletions(-) create mode 100644 synapse/storage/schema/delta/56/user_external_ids.sql diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 8cfc5c312a1d..9021fe2cb8ac 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1099,6 +1099,32 @@ saml2_config: # #saml_session_lifetime: 5m + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn + # Enable CAS for registration and login. diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index c46ac087db8d..a022470702c3 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -12,7 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re + from synapse.python_dependencies import DependencyException, check_requirements +from synapse.types import ( + map_username_to_mxid_localpart, + mxid_localpart_allowed_characters, +) from ._base import Config, ConfigError @@ -36,6 +42,14 @@ def read_config(self, config, **kwargs): self.saml2_enabled = True + self.saml2_mxid_source_attribute = saml2_config.get( + "mxid_source_attribute", "uid" + ) + + self.saml2_grandfathered_mxid_source_attribute = saml2_config.get( + "grandfathered_mxid_source_attribute", "uid" + ) + import saml2.config self.saml2_sp_config = saml2.config.SPConfig() @@ -51,6 +65,12 @@ def read_config(self, config, **kwargs): saml2_config.get("saml_session_lifetime", "5m") ) + mapping = saml2_config.get("mxid_mapping", "hexencode") + try: + self.saml2_mxid_mapper = MXID_MAPPER_MAP[mapping] + except KeyError: + raise ConfigError("%s is not a known mxid_mapping" % (mapping,)) + def _default_saml_config_dict(self): import saml2 @@ -58,6 +78,13 @@ def _default_saml_config_dict(self): if public_baseurl is None: raise ConfigError("saml2_config requires a public_baseurl to be set") + required_attributes = {"uid", self.saml2_mxid_source_attribute} + + optional_attributes = {"displayName"} + if self.saml2_grandfathered_mxid_source_attribute: + optional_attributes.add(self.saml2_grandfathered_mxid_source_attribute) + optional_attributes -= required_attributes + metadata_url = public_baseurl + "_matrix/saml2/metadata.xml" response_url = public_baseurl + "_matrix/saml2/authn_response" return { @@ -69,8 +96,9 @@ def _default_saml_config_dict(self): (response_url, saml2.BINDING_HTTP_POST) ] }, - "required_attributes": ["uid"], - "optional_attributes": ["mail", "surname", "givenname"], + "required_attributes": list(required_attributes), + "optional_attributes": list(optional_attributes), + # "name_id_format": saml2.saml.NAMEID_FORMAT_PERSISTENT, } }, } @@ -146,6 +174,52 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # The default is 5 minutes. # #saml_session_lifetime: 5m + + # The SAML attribute (after mapping via the attribute maps) to use to derive + # the Matrix ID from. 'uid' by default. + # + #mxid_source_attribute: displayName + + # The mapping system to use for mapping the saml attribute onto a matrix ID. + # Options include: + # * 'hexencode' (which maps unpermitted characters to '=xx') + # * 'dotreplace' (which replaces unpermitted characters with '.'). + # The default is 'hexencode'. + # + #mxid_mapping: dotreplace + + # In previous versions of synapse, the mapping from SAML attribute to MXID was + # always calculated dynamically rather than stored in a table. For backwards- + # compatibility, we will look for user_ids matching such a pattern before + # creating a new account. + # + # This setting controls the SAML attribute which will be used for this + # backwards-compatibility lookup. Typically it should be 'uid', but if the + # attribute maps are changed, it may be necessary to change it. + # + # The default is 'uid'. + # + #grandfathered_mxid_source_attribute: upn """ % { "config_dir_path": config_dir_path } + + +DOT_REPLACE_PATTERN = re.compile( + ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),)) +) + + +def dot_replace_for_mxid(username: str) -> str: + username = username.lower() + username = DOT_REPLACE_PATTERN.sub(".", username) + + # regular mxids aren't allowed to start with an underscore either + username = re.sub("^_", "", username) + return username + + +MXID_MAPPER_MAP = { + "hexencode": map_username_to_mxid_localpart, + "dotreplace": dot_replace_for_mxid, +} diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index a1ce6929cfee..5fa8272dc9b8 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -21,6 +21,8 @@ from synapse.api.errors import SynapseError from synapse.http.servlet import parse_string from synapse.rest.client.v1.login import SSOAuthHandler +from synapse.types import UserID, map_username_to_mxid_localpart +from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) @@ -29,12 +31,26 @@ class SamlHandler: def __init__(self, hs): self._saml_client = Saml2Client(hs.config.saml2_sp_config) self._sso_auth_handler = SSOAuthHandler(hs) + self._registration_handler = hs.get_registration_handler() + + self._clock = hs.get_clock() + self._datastore = hs.get_datastore() + self._hostname = hs.hostname + self._saml2_session_lifetime = hs.config.saml2_session_lifetime + self._mxid_source_attribute = hs.config.saml2_mxid_source_attribute + self._grandfathered_mxid_source_attribute = ( + hs.config.saml2_grandfathered_mxid_source_attribute + ) + self._mxid_mapper = hs.config.saml2_mxid_mapper + + # identifier for the external_ids table + self._auth_provider_id = "saml" # a map from saml session id to Saml2SessionData object self._outstanding_requests_dict = {} - self._clock = hs.get_clock() - self._saml2_session_lifetime = hs.config.saml2_session_lifetime + # a lock on the mappings + self._mapping_lock = Linearizer(name="saml_mapping", clock=self._clock) def handle_redirect_request(self, client_redirect_url): """Handle an incoming request to /login/sso/redirect @@ -60,7 +76,7 @@ def handle_redirect_request(self, client_redirect_url): # this shouldn't happen! raise Exception("prepare_for_authenticate didn't return a Location header") - def handle_saml_response(self, request): + async def handle_saml_response(self, request): """Handle an incoming request to /_matrix/saml2/authn_response Args: @@ -77,6 +93,10 @@ def handle_saml_response(self, request): # the dict. self.expire_sessions() + user_id = await self._map_saml_response_to_user(resp_bytes) + self._sso_auth_handler.complete_sso_login(user_id, request, relay_state) + + async def _map_saml_response_to_user(self, resp_bytes): try: saml2_auth = self._saml_client.parse_authn_request_response( resp_bytes, @@ -91,18 +111,85 @@ def handle_saml_response(self, request): logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") - if "uid" not in saml2_auth.ava: + try: + remote_user_id = saml2_auth.ava["uid"][0] + except KeyError: logger.warning("SAML2 response lacks a 'uid' attestation") raise SynapseError(400, "uid not in SAML2 response") + try: + mxid_source = saml2_auth.ava[self._mxid_source_attribute][0] + except KeyError: + logger.warning( + "SAML2 response lacks a '%s' attestation", self._mxid_source_attribute + ) + raise SynapseError( + 400, "%s not in SAML2 response" % (self._mxid_source_attribute,) + ) + self._outstanding_requests_dict.pop(saml2_auth.in_response_to, None) - username = saml2_auth.ava["uid"][0] displayName = saml2_auth.ava.get("displayName", [None])[0] - return self._sso_auth_handler.on_successful_auth( - username, request, relay_state, user_display_name=displayName - ) + with (await self._mapping_lock.queue(self._auth_provider_id)): + # first of all, check if we already have a mapping for this user + logger.info( + "Looking for existing mapping for user %s:%s", + self._auth_provider_id, + remote_user_id, + ) + registered_user_id = await self._datastore.get_user_by_external_id( + self._auth_provider_id, remote_user_id + ) + if registered_user_id is not None: + logger.info("Found existing mapping %s", registered_user_id) + return registered_user_id + + # backwards-compatibility hack: see if there is an existing user with a + # suitable mapping from the uid + if ( + self._grandfathered_mxid_source_attribute + and self._grandfathered_mxid_source_attribute in saml2_auth.ava + ): + attrval = saml2_auth.ava[self._grandfathered_mxid_source_attribute][0] + user_id = UserID( + map_username_to_mxid_localpart(attrval), self._hostname + ).to_string() + logger.info( + "Looking for existing account based on mapped %s %s", + self._grandfathered_mxid_source_attribute, + user_id, + ) + + users = await self._datastore.get_users_by_id_case_insensitive(user_id) + if users: + registered_user_id = list(users.keys())[0] + logger.info("Grandfathering mapping to %s", registered_user_id) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id + + # figure out a new mxid for this user + base_mxid_localpart = self._mxid_mapper(mxid_source) + + suffix = 0 + while True: + localpart = base_mxid_localpart + (str(suffix) if suffix else "") + if not await self._datastore.get_users_by_id_case_insensitive( + UserID(localpart, self._hostname).to_string() + ): + break + suffix += 1 + logger.info("Allocating mxid for new user with localpart %s", localpart) + + registered_user_id = await self._registration_handler.register_user( + localpart=localpart, default_display_name=displayName + ) + await self._datastore.record_user_external_id( + self._auth_provider_id, remote_user_id, registered_user_id + ) + return registered_user_id def expire_sessions(self): expire_before = self._clock.time_msec() - self._saml2_session_lifetime diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 5762b9fd0648..eeaa72b2053f 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -29,6 +29,7 @@ parse_json_object_from_request, parse_string, ) +from synapse.http.site import SynapseRequest from synapse.rest.client.v2_alpha._base import client_patterns from synapse.rest.well_known import WellKnownBuilder from synapse.types import UserID, map_username_to_mxid_localpart @@ -507,6 +508,19 @@ def on_successful_auth( localpart=localpart, default_display_name=user_display_name ) + self.complete_sso_login(registered_user_id, request, client_redirect_url) + + def complete_sso_login( + self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str + ): + """Having figured out a mxid for this user, complete the HTTP request + + Args: + registered_user_id: + request: + client_redirect_url: + """ + login_token = self._macaroon_gen.generate_short_term_login_token( registered_user_id ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 55e4e84d71bd..1e3c2148f68a 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -22,6 +22,7 @@ from six.moves import range from twisted.internet import defer +from twisted.internet.defer import Deferred from synapse.api.constants import UserTypes from synapse.api.errors import Codes, StoreError, ThreepidValidationError @@ -337,6 +338,26 @@ def f(txn): return self.runInteraction("get_users_by_id_case_insensitive", f) + async def get_user_by_external_id( + self, auth_provider: str, external_id: str + ) -> str: + """Look up a user by their external auth id + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + + Returns: + str|None: the mxid of the user, or None if they are not known + """ + return await self._simple_select_one_onecol( + table="user_external_ids", + keyvalues={"auth_provider": auth_provider, "external_id": external_id}, + retcol="user_id", + allow_none=True, + desc="get_user_by_external_id", + ) + @defer.inlineCallbacks def count_all_users(self): """Counts all users registered on the homeserver.""" @@ -848,6 +869,26 @@ def _register_user( self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) txn.call_after(self.is_guest.invalidate, (user_id,)) + def record_user_external_id( + self, auth_provider: str, external_id: str, user_id: str + ) -> Deferred: + """Record a mapping from an external user id to a mxid + + Args: + auth_provider: identifier for the remote auth provider + external_id: id on that system + user_id: complete mxid that it is mapped to + """ + return self._simple_insert( + table="user_external_ids", + values={ + "auth_provider": auth_provider, + "external_id": external_id, + "user_id": user_id, + }, + desc="record_user_external_id", + ) + def user_set_password_hash(self, user_id, password_hash): """ NB. This does *not* evict any cache because the one use for this diff --git a/synapse/storage/schema/delta/56/user_external_ids.sql b/synapse/storage/schema/delta/56/user_external_ids.sql new file mode 100644 index 000000000000..91390c452755 --- /dev/null +++ b/synapse/storage/schema/delta/56/user_external_ids.sql @@ -0,0 +1,24 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * a table which records mappings from external auth providers to mxids + */ +CREATE TABLE IF NOT EXISTS user_external_ids ( + auth_provider TEXT NOT NULL, + external_id TEXT NOT NULL, + user_id TEXT NOT NULL, + UNIQUE (auth_provider, external_id) +); From b9d57502da8ae4e11523a155e0fd608433e1025d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Sep 2019 16:06:03 +0100 Subject: [PATCH 2/5] changelog --- changelog.d/6037.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6037.feature diff --git a/changelog.d/6037.feature b/changelog.d/6037.feature new file mode 100644 index 000000000000..95d82bd4d839 --- /dev/null +++ b/changelog.d/6037.feature @@ -0,0 +1 @@ +Handle userid clashes when authenticating via SAML by appending an integer suffix. \ No newline at end of file From 7423fade92d98d4246947bc8491af5827cd9e0dd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Sep 2019 17:16:50 +0100 Subject: [PATCH 3/5] better logging --- synapse/handlers/saml_handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index 5fa8272dc9b8..f000d2a00fc9 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -111,6 +111,8 @@ async def _map_saml_response_to_user(self, resp_bytes): logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") + logger.info("Got SAML2 reponse with attributes: %s", saml2_auth.ava) + try: remote_user_id = saml2_auth.ava["uid"][0] except KeyError: From 599f786e4ee98050f399ff7f530a7208ce14468d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 19 Sep 2019 18:52:17 +0100 Subject: [PATCH 4/5] Update 6037.feature --- changelog.d/6037.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/6037.feature b/changelog.d/6037.feature index 95d82bd4d839..85553d2da0d8 100644 --- a/changelog.d/6037.feature +++ b/changelog.d/6037.feature @@ -1 +1 @@ -Handle userid clashes when authenticating via SAML by appending an integer suffix. \ No newline at end of file +Make the process for mapping SAML2 users to matrix IDs more flexible. From 33757bad19a19adaef211a0d58fc369c68dfeb3c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Sep 2019 11:15:14 +0100 Subject: [PATCH 5/5] More better logging --- synapse/handlers/saml_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index f000d2a00fc9..cc9e6b9bd018 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -111,7 +111,8 @@ async def _map_saml_response_to_user(self, resp_bytes): logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") - logger.info("Got SAML2 reponse with attributes: %s", saml2_auth.ava) + logger.info("SAML2 response: %s", saml2_auth.origxml) + logger.info("SAML2 mapped attributes: %s", saml2_auth.ava) try: remote_user_id = saml2_auth.ava["uid"][0]