Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add more data to export command #14894

Merged
merged 11 commits into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions .ci/scripts/test_export_data_command.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/sqlite-config.yaml export-dat
--output-directory /tmp/export_data

# Test that the output directory exists and contains the rooms directory
dir="/tmp/export_data/rooms"
if [ -d "$dir" ]; then
dir_r="/tmp/export_data/rooms"
dir_u="/tmp/export_data/user_data"
if [ -d "$dir_r" ] && [ -d "$dir_u" ]; then
echo "Command successful, this test passes"
else
echo "No output directories found, the command fails against a sqlite database."
Expand All @@ -43,8 +44,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/postgres-config.yaml export-d
--output-directory /tmp/export_data2

# Test that the output directory exists and contains the rooms directory
dir2="/tmp/export_data2/rooms"
if [ -d "$dir2" ]; then
dir_r2="/tmp/export_data2/rooms"
dir_u2="/tmp/export_data2/user_data"
if [ -d "$dir_r2" ] && [ -d "$dir_u2" ]; then
echo "Command successful, this test passes"
else
echo "No output directories found, the command fails against a postgres database."
Expand Down
1 change: 1 addition & 0 deletions changelog.d/14894.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Adds profile information, devices and connections to the user data export via command line.
32 changes: 31 additions & 1 deletion synapse/app/admin_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
ApplicationServiceTransactionWorkerStore,
ApplicationServiceWorkerStore,
)
from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
from synapse.storage.databases.main.devices import DeviceWorkerStore
from synapse.storage.databases.main.event_federation import EventFederationWorkerStore
Expand All @@ -43,6 +44,7 @@
)
from synapse.storage.databases.main.events_worker import EventsWorkerStore
from synapse.storage.databases.main.filtering import FilteringWorkerStore
from synapse.storage.databases.main.profile import ProfileWorkerStore
from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
from synapse.storage.databases.main.registration import RegistrationWorkerStore
Expand All @@ -54,7 +56,7 @@
from synapse.storage.databases.main.stream import StreamWorkerStore
from synapse.storage.databases.main.tags import TagsWorkerStore
from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
from synapse.types import StateMap
from synapse.types import JsonDict, StateMap
from synapse.util import SYNAPSE_VERSION
from synapse.util.logcontext import LoggingContext

Expand All @@ -63,6 +65,7 @@

class AdminCmdSlavedStore(
FilteringWorkerStore,
ClientIpWorkerStore,
DeviceWorkerStore,
TagsWorkerStore,
DeviceInboxWorkerStore,
Expand All @@ -82,6 +85,7 @@ class AdminCmdSlavedStore(
EventsWorkerStore,
RegistrationWorkerStore,
RoomWorkerStore,
ProfileWorkerStore,
):
def __init__(
self,
Expand Down Expand Up @@ -192,6 +196,32 @@ def write_knock(
for event in state.values():
print(json.dumps(event), file=f)

def write_profile(self, profile: JsonDict) -> None:
user_directory = os.path.join(self.base_directory, "user_data")
os.makedirs(user_directory, exist_ok=True)
profile_file = os.path.join(user_directory, "profile")

with open(profile_file, "a") as f:
print(json.dumps(profile), file=f)

def write_devices(self, devices: List[JsonDict]) -> None:
user_directory = os.path.join(self.base_directory, "user_data")
os.makedirs(user_directory, exist_ok=True)
device_file = os.path.join(user_directory, "devices")

for device in devices:
with open(device_file, "a") as f:
print(json.dumps(device), file=f)

def write_connections(self, connections: List[JsonDict]) -> None:
user_directory = os.path.join(self.base_directory, "user_data")
os.makedirs(user_directory, exist_ok=True)
connection_file = os.path.join(user_directory, "connections")

for connection in connections:
with open(connection_file, "a") as f:
print(json.dumps(connection), file=f)

def finished(self) -> str:
return self.base_directory

Expand Down
43 changes: 43 additions & 0 deletions synapse/handlers/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
class AdminHandler:
def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self._device_handler = hs.get_device_handler()
self._storage_controllers = hs.get_storage_controllers()
self._state_storage_controller = self._storage_controllers.state
self._msc3866_enabled = hs.config.experimental.msc3866.enabled
Expand Down Expand Up @@ -247,6 +248,21 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") ->
)
writer.write_state(room_id, event_id, state)

# Get the user profile
profile = await self.get_user(UserID.from_string(user_id))
assert profile
clokep marked this conversation as resolved.
Show resolved Hide resolved
writer.write_profile(profile)

# Get all devices the user has
devices = await self._device_handler.get_devices_by_user(user_id)
writer.write_devices(devices)

# Get all connections the user has
connections = await self.get_whois(UserID.from_string(user_id))
writer.write_connections(
connections["devices"][""]["sessions"][0]["connections"]
)
clokep marked this conversation as resolved.
Show resolved Hide resolved

return writer.finished()


Expand Down Expand Up @@ -297,6 +313,33 @@ def write_knock(
"""
raise NotImplementedError()

@abc.abstractmethod
def write_profile(self, profile: JsonDict) -> None:
"""Write the profile of a user.

Args:
profile: The user profile.
"""
raise NotImplementedError()

@abc.abstractmethod
def write_devices(self, devices: List[JsonDict]) -> None:
"""Write the devices of a user.

Args:
devices: The list of devices.
"""
raise NotImplementedError()

@abc.abstractmethod
def write_connections(self, connections: List[JsonDict]) -> None:
"""Write the connections of a user.

Args:
connections: The list of connections / sessions.
"""
raise NotImplementedError()

@abc.abstractmethod
def finished(self) -> Any:
"""Called when all data has successfully been exported and written.
Expand Down
60 changes: 60 additions & 0 deletions tests/handlers/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class ExfiltrateData(unittest.HomeserverTestCase):

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.admin_handler = hs.get_admin_handler()
self._store = hs.get_datastores().main

self.user1 = self.register_user("user1", "password")
self.token1 = self.login("user1", "password")
Expand Down Expand Up @@ -236,3 +237,62 @@ def test_knock(self) -> None:
self.assertEqual(args[0], room_id)
self.assertEqual(args[1].content["membership"], "knock")
self.assertTrue(args[2]) # Assert there is at least one bit of state

def test_profile(self) -> None:
"""Tests that user profile get exported."""
writer = Mock()

self.get_success(self.admin_handler.export_user_data(self.user2, writer))

writer.write_events.assert_not_called()
writer.write_profile.assert_called_once()

# check only a few values, not all available
args = writer.write_profile.call_args[0]
self.assertEqual(args[0]["name"], self.user2)
self.assertIn("displayname", args[0])
self.assertIn("avatar_url", args[0])
self.assertIn("threepids", args[0])
self.assertIn("external_ids", args[0])
self.assertIn("creation_ts", args[0])

def test_devices(self) -> None:
"""Tests that user devices get exported."""
writer = Mock()

self.get_success(self.admin_handler.export_user_data(self.user2, writer))

writer.write_events.assert_not_called()
writer.write_devices.assert_called_once()

args = writer.write_devices.call_args[0]
self.assertEqual(len(args[0]), 1)
self.assertEqual(args[0][0]["user_id"], self.user2)
self.assertIn("device_id", args[0][0])
self.assertIsNone(args[0][0]["display_name"])
self.assertIsNone(args[0][0]["last_seen_user_agent"])
self.assertIsNone(args[0][0]["last_seen_ts"])
self.assertIsNone(args[0][0]["last_seen_ip"])

def test_connections(self) -> None:
"""Tests that user sessions / connections get exported."""
# Insert a user IP
self.get_success(
self._store.insert_client_ip(
self.user2, "access_token", "ip", "user_agent", "MY_DEVICE"
)
)

writer = Mock()

self.get_success(self.admin_handler.export_user_data(self.user2, writer))

writer.write_events.assert_not_called()
writer.write_connections.assert_called_once()

args = writer.write_connections.call_args[0]
self.assertEqual(len(args[0]), 1)
self.assertEqual(args[0][0]["ip"], "ip")
self.assertEqual(args[0][0]["user_agent"], "user_agent")
self.assertGreater(args[0][0]["last_seen"], 0)
self.assertNotIn("access_token", args[0][0])