From ed0b629d85cf2f63692c6228d1bb3db696200c2c Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 23 Jan 2023 00:04:28 +0100 Subject: [PATCH 01/10] Add more data to export command --- synapse/app/admin_cmd.py | 28 ++++++++++++++++- synapse/handlers/admin.py | 43 ++++++++++++++++++++++++++ tests/handlers/test_admin.py | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 1 deletion(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 165d1c5db06b..db5b86115978 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -54,7 +54,7 @@ from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.databases.main.tags import TagsWorkerStore from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore -from synapse.types import StateMap +from synapse.types import JsonDict, StateMap from synapse.util import SYNAPSE_VERSION from synapse.util.logcontext import LoggingContext @@ -192,6 +192,32 @@ def write_knock( for event in state.values(): print(json.dumps(event), file=f) + def write_profile(self, profile: JsonDict) -> None: + user_directory = os.path.join(self.base_directory, "user_data") + os.makedirs(user_directory, exist_ok=True) + profile_file = os.path.join(user_directory, "profile") + + with open(profile_file, "a") as f: + print(json.dumps(profile), file=f) + + def write_devices(self, devices: List[JsonDict]) -> None: + user_directory = os.path.join(self.base_directory, "user_data") + os.makedirs(user_directory, exist_ok=True) + device_file = os.path.join(user_directory, "devices") + + for device in devices: + with open(device_file, "a") as f: + print(json.dumps(device), file=f) + + def write_connections(self, connections: List[JsonDict]) -> None: + user_directory = os.path.join(self.base_directory, "user_data") + os.makedirs(user_directory, exist_ok=True) + connection_file = os.path.join(user_directory, "connections") + + for connection in connections: + with open(connection_file, "a") as f: + print(json.dumps(connection), file=f) + def finished(self) -> str: return self.base_directory diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 5bf8e863875b..3f7862f71e0d 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -30,6 +30,7 @@ class AdminHandler: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main + self._device_handler = hs.get_device_handler() self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state self._msc3866_enabled = hs.config.experimental.msc3866.enabled @@ -247,6 +248,21 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> ) writer.write_state(room_id, event_id, state) + # Get the user profile + profile = await self.get_user(UserID.from_string(user_id)) + assert profile + writer.write_profile(profile) + + # Get all devices the user has + devices = await self._device_handler.get_devices_by_user(user_id) + writer.write_devices(devices) + + # Get all connections the user has + connections = await self.get_whois(UserID.from_string(user_id)) + writer.write_connections( + connections["devices"][""]["sessions"][0]["connections"] + ) + return writer.finished() @@ -297,6 +313,33 @@ def write_knock( """ raise NotImplementedError() + @abc.abstractmethod + def write_profile(self, profile: JsonDict) -> None: + """Write the profile of a user. + + Args: + profile: The user profile. + """ + raise NotImplementedError() + + @abc.abstractmethod + def write_devices(self, devices: List[JsonDict]) -> None: + """Write the devices of a user. + + Args: + devices: The list of devices. + """ + raise NotImplementedError() + + @abc.abstractmethod + def write_connections(self, connections: List[JsonDict]) -> None: + """Write the connections of a user. + + Args: + connections: The list of connections / sessions. + """ + raise NotImplementedError() + @abc.abstractmethod def finished(self) -> Any: """Called when all data has successfully been exported and written. diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index c1579dac610f..6f300b8e1119 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -38,6 +38,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.admin_handler = hs.get_admin_handler() + self._store = hs.get_datastores().main self.user1 = self.register_user("user1", "password") self.token1 = self.login("user1", "password") @@ -236,3 +237,62 @@ def test_knock(self) -> None: self.assertEqual(args[0], room_id) self.assertEqual(args[1].content["membership"], "knock") self.assertTrue(args[2]) # Assert there is at least one bit of state + + def test_profile(self) -> None: + """Tests that user profile get exported.""" + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + writer.write_events.assert_not_called() + writer.write_profile.assert_called_once() + + # check only a few values, not all available + args = writer.write_profile.call_args[0] + self.assertEqual(args[0]["name"], self.user2) + self.assertIn("displayname", args[0]) + self.assertIn("avatar_url", args[0]) + self.assertIn("threepids", args[0]) + self.assertIn("external_ids", args[0]) + self.assertIn("creation_ts", args[0]) + + def test_devices(self) -> None: + """Tests that user devices get exported.""" + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + writer.write_events.assert_not_called() + writer.write_devices.assert_called_once() + + args = writer.write_devices.call_args[0] + self.assertEqual(len(args[0]), 1) + self.assertEqual(args[0][0]["user_id"], self.user2) + self.assertIn("device_id", args[0][0]) + self.assertIsNone(args[0][0]["display_name"]) + self.assertIsNone(args[0][0]["last_seen_user_agent"]) + self.assertIsNone(args[0][0]["last_seen_ts"]) + self.assertIsNone(args[0][0]["last_seen_ip"]) + + def test_connections(self) -> None: + """Tests that user sessions / connections get exported.""" + # Insert a user IP + self.get_success( + self._store.insert_client_ip( + self.user2, "access_token", "ip", "user_agent", "MY_DEVICE" + ) + ) + + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + writer.write_events.assert_not_called() + writer.write_connections.assert_called_once() + + args = writer.write_connections.call_args[0] + self.assertEqual(len(args[0]), 1) + self.assertEqual(args[0][0]["ip"], "ip") + self.assertEqual(args[0][0]["user_agent"], "user_agent") + self.assertGreater(args[0][0]["last_seen"], 0) + self.assertNotIn("access_token", args[0][0]) From b87c0ea311172f4a448aac3d3a5f8c8f20d0b886 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 23 Jan 2023 00:10:48 +0100 Subject: [PATCH 02/10] newsfile --- changelog.d/14894.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14894.feature diff --git a/changelog.d/14894.feature b/changelog.d/14894.feature new file mode 100644 index 000000000000..3bd425a874a0 --- /dev/null +++ b/changelog.d/14894.feature @@ -0,0 +1 @@ +Adds profile information, devices and connections to the user data export via command line. \ No newline at end of file From 09e997b16ac2ac6ad78ea1a5cccf675ea1c2f611 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 23 Jan 2023 00:13:31 +0100 Subject: [PATCH 03/10] remove space --- changelog.d/14894.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/14894.feature b/changelog.d/14894.feature index 3bd425a874a0..d22741d079af 100644 --- a/changelog.d/14894.feature +++ b/changelog.d/14894.feature @@ -1 +1 @@ -Adds profile information, devices and connections to the user data export via command line. \ No newline at end of file +Adds profile information, devices and connections to the user data export via command line. \ No newline at end of file From 665e50a00d5de6ec892c2e341ab38b15421e891a Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 23 Jan 2023 08:12:10 +0100 Subject: [PATCH 04/10] add needed WorkerStores --- synapse/app/admin_cmd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index db5b86115978..fe7afb94755e 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -35,6 +35,7 @@ ApplicationServiceTransactionWorkerStore, ApplicationServiceWorkerStore, ) +from synapse.storage.databases.main.client_ips import ClientIpWorkerStore from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore from synapse.storage.databases.main.devices import DeviceWorkerStore from synapse.storage.databases.main.event_federation import EventFederationWorkerStore @@ -43,6 +44,7 @@ ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.filtering import FilteringWorkerStore +from synapse.storage.databases.main.profile import ProfileWorkerStore from synapse.storage.databases.main.push_rule import PushRulesWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.registration import RegistrationWorkerStore @@ -63,6 +65,7 @@ class AdminCmdSlavedStore( FilteringWorkerStore, + ClientIpWorkerStore, DeviceWorkerStore, TagsWorkerStore, DeviceInboxWorkerStore, @@ -82,6 +85,7 @@ class AdminCmdSlavedStore( EventsWorkerStore, RegistrationWorkerStore, RoomWorkerStore, + ProfileWorkerStore, ): def __init__( self, From 1f873c42bf877491658064439834514501d4fcef Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 23 Jan 2023 08:17:38 +0100 Subject: [PATCH 05/10] update CI script --- .ci/scripts/test_export_data_command.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.ci/scripts/test_export_data_command.sh b/.ci/scripts/test_export_data_command.sh index 9f6c49acff73..36f836345cae 100755 --- a/.ci/scripts/test_export_data_command.sh +++ b/.ci/scripts/test_export_data_command.sh @@ -23,8 +23,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/sqlite-config.yaml export-dat --output-directory /tmp/export_data # Test that the output directory exists and contains the rooms directory -dir="/tmp/export_data/rooms" -if [ -d "$dir" ]; then +dir_r="/tmp/export_data/rooms" +dir_u="/tmp/export_data/user_data" +if [ -d "$dir_r" ] && [ -d "$dir_u" ]; then echo "Command successful, this test passes" else echo "No output directories found, the command fails against a sqlite database." @@ -43,8 +44,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/postgres-config.yaml export-d --output-directory /tmp/export_data2 # Test that the output directory exists and contains the rooms directory -dir2="/tmp/export_data2/rooms" -if [ -d "$dir2" ]; then +dir_r2="/tmp/export_data2/rooms" +dir_u2="/tmp/export_data2/user_data" +if [ -d "$dir_r2" ] && [ -d "$dir_u2" ]; then echo "Command successful, this test passes" else echo "No output directories found, the command fails against a postgres database." From 5f19a68e0d40d05cf98c0680fa281ea96abf0d5b Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 25 Jan 2023 22:30:53 +0100 Subject: [PATCH 06/10] skip if `profile` is `None` --- synapse/handlers/admin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 3f7862f71e0d..389ecc1dc960 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -250,8 +250,8 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> # Get the user profile profile = await self.get_user(UserID.from_string(user_id)) - assert profile - writer.write_profile(profile) + if profile is not None: + writer.write_profile(profile) # Get all devices the user has devices = await self._device_handler.get_devices_by_user(user_id) From cc3a7887cc1ecb851b6e5982ef666c2799175b4a Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 25 Jan 2023 22:41:41 +0100 Subject: [PATCH 07/10] prepare FAQ --- docs/usage/administration/admin_faq.md | 48 ++++++++++++++++++-------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 18ce6171dbba..e3e82a62ad02 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -2,13 +2,19 @@ How do I become a server admin? --- -If your server already has an admin account you should use the [User Admin API](../../admin_api/user_admin_api.md#change-whether-a-user-is-a-server-administrator-or-not) to promote other accounts to become admins. +If your server already has an admin account you should use the +[User Admin API](../../admin_api/user_admin_api.md#change-whether-a-user-is-a-server-administrator-or-not) +to promote other accounts to become admins. -If you don't have any admin accounts yet you won't be able to use the admin API, so you'll have to edit the database manually. Manually editing the database is generally not recommended so once you have an admin account: use the admin APIs to make further changes. +If you don't have any admin accounts yet you won't be able to use the admin API, +so you'll have to edit the database manually. Manually editing the database is +generally not recommended so once you have an admin account: use the admin APIs +to make further changes. ```sql UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'; ``` + What servers are my server talking to? --- Run this sql query on your db: @@ -50,21 +56,29 @@ I have a problem with my server. Can I just delete my database and start again? --- Deleting your database is unlikely to make anything better. -It's easy to make the mistake of thinking that you can start again from a clean slate by dropping your database, but things don't work like that in a federated network: lots of other servers have information about your server. +It's easy to make the mistake of thinking that you can start again from a clean +slate by dropping your database, but things don't work like that in a federated +network: lots of other servers have information about your server. -For example: other servers might think that you are in a room, your server will think that you are not, and you'll probably be unable to interact with that room in a sensible way ever again. +For example: other servers might think that you are in a room, your server will +think that you are not, and you'll probably be unable to interact with that room +in a sensible way ever again. -In general, there are better solutions to any problem than dropping the database. Come and seek help in https://matrix.to/#/#synapse:matrix.org. +In general, there are better solutions to any problem than dropping the database. +Come and seek help in https://matrix.to/#/#synapse:matrix.org. There are two exceptions when it might be sensible to delete your database and start again: -* You have *never* joined any rooms which are federated with other servers. For instance, a local deployment which the outside world can't talk to. -* You are changing the `server_name` in the homeserver configuration. In effect this makes your server a completely new one from the point of view of the network, so in this case it makes sense to start with a clean database. +* You have *never* joined any rooms which are federated with other servers. For +instance, a local deployment which the outside world can't talk to. +* You are changing the `server_name` in the homeserver configuration. In effect +this makes your server a completely new one from the point of view of the network, +so in this case it makes sense to start with a clean database. (In both cases you probably also want to clear out the media_store.) I've stuffed up access to my room, how can I delete it to free up the alias? --- Using the following curl command: -``` +```console curl -H 'Authorization: Bearer ' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/ ``` `` - can be obtained in riot by looking in the riot settings, down the bottom is: @@ -75,19 +89,25 @@ Access Token:\ How can I find the lines corresponding to a given HTTP request in my homeserver log? --- -Synapse tags each log line according to the HTTP request it is processing. When it finishes processing each request, it logs a line containing the words `Processed request: `. For example: +Synapse tags each log line according to the HTTP request it is processing. When +it finishes processing each request, it logs a line containing the words +`Processed request: `. For example: ``` 2019-02-14 22:35:08,196 - synapse.access.http.8008 - 302 - INFO - GET-37 - ::1 - 8008 - {@richvdh:localhost} Processed request: 0.173sec/0.001sec (0.002sec, 0.000sec) (0.027sec/0.026sec/2) 687B 200 "GET /_matrix/client/r0/sync HTTP/1.1" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" [0 dbevts]" ``` -Here we can see that the request has been tagged with `GET-37`. (The tag depends on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`, `OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do: +Here we can see that the request has been tagged with `GET-37`. (The tag depends +on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`, +`OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do: -``` +```console grep 'GET-37' homeserver.log ``` -If you want to paste that output into a github issue or matrix room, please remember to surround it with triple-backticks (```) to make it legible (see [quoting code](https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code)). +If you want to paste that output into a github issue or matrix room, please +remember to surround it with triple-backticks (```) to make it legible +(see [quoting code](https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code)). What do all those fields in the 'Processed' line mean? @@ -127,7 +147,7 @@ This is normally caused by a misconfiguration in your reverse-proxy. See [the re Help!! Synapse is slow and eats all my RAM/CPU! ------------------------------------------------ +--- First, ensure you are running the latest version of Synapse, using Python 3 with a [PostgreSQL database](../../postgres.md). @@ -169,7 +189,7 @@ in the Synapse config file: [see here](../configuration/config_documentation.md# Running out of File Handles ---------------------------- +--- If Synapse runs out of file handles, it typically fails badly - live-locking at 100% CPU, and/or failing to accept new TCP connections (blocking the From 1c012e24eda67d138eb8829ad5d845153518a1c1 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 25 Jan 2023 23:23:06 +0100 Subject: [PATCH 08/10] add docs --- docs/usage/administration/admin_faq.md | 32 +++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index e3e82a62ad02..371c738ef262 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -42,8 +42,38 @@ How can I export user data? --- Synapse includes a Python command to export data for a specific user. It takes the homeserver configuration file and the full Matrix ID of the user to export: + +```console +python -m synapse.app.admin_cmd -c export-data --output-directory +``` + +If you uses [Poetry](../../development/dependencies.md#managing-dependencies-with-poetry) +to run Synapse: + ```console -python -m synapse.app.admin_cmd -c export-data +poetry run python -m synapse.app.admin_cmd -c export-data +--output-directory +``` + +The directory to store the exported data must be empty. +It can be set with the optional parameter `--output-directory`. +Defaults to creating a temp directory. This is a subfolder that starts with +`synapse-exfiltrate` in `/tmp`, `/var/tmp`, or `/usr/tmp`, in that order. + +The exported data has the following layout: + +``` +output-directory +├───rooms +│ └─── +│ ├───events +│ ├───state +│ ├───invite_state +│ └───knock_state +└───user_data + ├───connections + ├───devices + └───profile ``` Manually resetting passwords From e491677bf7941c1d3aa37d3bcb7b20d865f77ff7 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 25 Jan 2023 23:25:42 +0100 Subject: [PATCH 09/10] fix line break --- docs/usage/administration/admin_faq.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 371c738ef262..59414b038e9c 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -51,8 +51,7 @@ If you uses [Poetry](../../development/dependencies.md#managing-dependencies-wit to run Synapse: ```console -poetry run python -m synapse.app.admin_cmd -c export-data ---output-directory +poetry run python -m synapse.app.admin_cmd -c export-data --output-directory ``` The directory to store the exported data must be empty. From 3a372d45833b2ac40690d61c22dc7d58cef0e44a Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 1 Feb 2023 08:07:53 -0500 Subject: [PATCH 10/10] Clarify documentation. --- docs/usage/administration/admin_faq.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 59414b038e9c..7a2774119964 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -54,10 +54,11 @@ to run Synapse: poetry run python -m synapse.app.admin_cmd -c export-data --output-directory ``` -The directory to store the exported data must be empty. -It can be set with the optional parameter `--output-directory`. -Defaults to creating a temp directory. This is a subfolder that starts with -`synapse-exfiltrate` in `/tmp`, `/var/tmp`, or `/usr/tmp`, in that order. +The directory to store the export data in can be customised with the +`--output-directory` parameter; ensure that the provided directory is +empty. If this parameter is not provided, Synapse defaults to creating +a temporary directory (which starts with "synapse-exfiltrate") in `/tmp`, +`/var/tmp`, or `/usr/tmp`, in that order. The exported data has the following layout: