Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 5a60e07

Browse files
committed
Servlet to purge old rooms (#5845)
2 parents 51a0fa5 + 119aa31 commit 5a60e07

File tree

6 files changed

+232
-0
lines changed

6 files changed

+232
-0
lines changed

changelog.d/5845.feature

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add an admin API to purge old rooms from the database.

docs/admin_api/purge_room.md

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Purge room API
2+
==============
3+
4+
This API will remove all trace of a room from your database.
5+
6+
All local users must have left the room before it can be removed.
7+
8+
The API is:
9+
10+
```
11+
POST /_synapse/admin/v1/purge_room
12+
13+
{
14+
"room_id": "!room:id"
15+
}
16+
```
17+
18+
You must authenticate using the access token of an admin user.

synapse/handlers/pagination.py

+17
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def __init__(self, hs):
7373
self.auth = hs.get_auth()
7474
self.store = hs.get_datastore()
7575
self.clock = hs.get_clock()
76+
self._server_name = hs.hostname
7677

7778
self.pagination_lock = ReadWriteLock()
7879
self._purges_in_progress_by_room = set()
@@ -261,6 +262,22 @@ def get_purge_status(self, purge_id):
261262
"""
262263
return self._purges_by_id.get(purge_id)
263264

265+
async def purge_room(self, room_id):
266+
"""Purge the given room from the database"""
267+
with (await self.pagination_lock.write(room_id)):
268+
# check we know about the room
269+
await self.store.get_room_version(room_id)
270+
271+
# first check that we have no users in this room
272+
joined = await defer.maybeDeferred(
273+
self.store.is_host_joined, room_id, self._server_name
274+
)
275+
276+
if joined:
277+
raise SynapseError(400, "Users are still joined to this room")
278+
279+
await self.store.purge_room(room_id)
280+
264281
@defer.inlineCallbacks
265282
def get_messages(
266283
self,

synapse/rest/admin/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
historical_admin_path_patterns,
4343
)
4444
from synapse.rest.admin.media import register_servlets_for_media_repo
45+
from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet
4546
from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet
4647
from synapse.types import UserID, create_requester
4748
from synapse.util.versionstring import get_version_string
@@ -738,6 +739,7 @@ def register_servlets(hs, http_server):
738739
Register all the admin servlets.
739740
"""
740741
register_servlets_for_client_rest_resource(hs, http_server)
742+
PurgeRoomServlet(hs).register(http_server)
741743
SendServerNoticeServlet(hs).register(http_server)
742744
VersionServlet(hs).register(http_server)
743745

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2019 The Matrix.org Foundation C.I.C.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
import re
16+
17+
from synapse.http.servlet import (
18+
RestServlet,
19+
assert_params_in_dict,
20+
parse_json_object_from_request,
21+
)
22+
from synapse.rest.admin import assert_requester_is_admin
23+
24+
25+
class PurgeRoomServlet(RestServlet):
26+
"""Servlet which will remove all trace of a room from the database
27+
28+
POST /_synapse/admin/v1/purge_room
29+
{
30+
"room_id": "!room:id"
31+
}
32+
33+
returns:
34+
35+
{}
36+
"""
37+
38+
PATTERNS = (re.compile("^/_synapse/admin/v1/purge_room$"),)
39+
40+
def __init__(self, hs):
41+
"""
42+
Args:
43+
hs (synapse.server.HomeServer): server
44+
"""
45+
self.hs = hs
46+
self.auth = hs.get_auth()
47+
self.pagination_handler = hs.get_pagination_handler()
48+
49+
async def on_POST(self, request):
50+
await assert_requester_is_admin(self.auth, request)
51+
52+
body = parse_json_object_from_request(request)
53+
assert_params_in_dict(body, ("room_id",))
54+
55+
await self.pagination_handler.purge_room(body["room_id"])
56+
57+
return (200, {})

synapse/storage/events.py

+137
Original file line numberDiff line numberDiff line change
@@ -2184,6 +2184,143 @@ def _find_unreferenced_groups_during_purge(self, txn, state_groups):
21842184

21852185
return to_delete, to_dedelta
21862186

2187+
def purge_room(self, room_id):
2188+
"""Deletes all record of a room
2189+
2190+
Args:
2191+
room_id (str):
2192+
"""
2193+
2194+
return self.runInteraction("purge_room", self._purge_room_txn, room_id)
2195+
2196+
def _purge_room_txn(self, txn, room_id):
2197+
# first we have to delete the state groups states
2198+
logger.info("[purge] removing %s from state_groups_state", room_id)
2199+
2200+
txn.execute(
2201+
"""
2202+
DELETE FROM state_groups_state WHERE state_group IN (
2203+
SELECT state_group FROM events JOIN event_to_state_groups USING(event_id)
2204+
WHERE events.room_id=?
2205+
)
2206+
""",
2207+
(room_id,),
2208+
)
2209+
2210+
# ... and the state group edges
2211+
logger.info("[purge] removing %s from state_group_edges", room_id)
2212+
2213+
txn.execute(
2214+
"""
2215+
DELETE FROM state_group_edges WHERE state_group IN (
2216+
SELECT state_group FROM events JOIN event_to_state_groups USING(event_id)
2217+
WHERE events.room_id=?
2218+
)
2219+
""",
2220+
(room_id,),
2221+
)
2222+
2223+
# ... and the state groups
2224+
logger.info("[purge] removing %s from state_groups", room_id)
2225+
2226+
txn.execute(
2227+
"""
2228+
DELETE FROM state_groups WHERE id IN (
2229+
SELECT state_group FROM events JOIN event_to_state_groups USING(event_id)
2230+
WHERE events.room_id=?
2231+
)
2232+
""",
2233+
(room_id,),
2234+
)
2235+
2236+
# and then tables which lack an index on room_id but have one on event_id
2237+
for table in (
2238+
"event_auth",
2239+
"event_edges",
2240+
"event_push_actions_staging",
2241+
"event_reference_hashes",
2242+
"event_relations",
2243+
"event_to_state_groups",
2244+
"redactions",
2245+
"rejections",
2246+
"state_events",
2247+
):
2248+
logger.info("[purge] removing %s from %s", room_id, table)
2249+
2250+
txn.execute(
2251+
"""
2252+
DELETE FROM %s WHERE event_id IN (
2253+
SELECT event_id FROM events WHERE room_id=?
2254+
)
2255+
"""
2256+
% (table,),
2257+
(room_id,),
2258+
)
2259+
2260+
# and finally, the tables with an index on room_id (or no useful index)
2261+
for table in (
2262+
"current_state_events",
2263+
"event_backward_extremities",
2264+
"event_forward_extremities",
2265+
"event_json",
2266+
"event_push_actions",
2267+
"event_search",
2268+
"events",
2269+
"group_rooms",
2270+
"public_room_list_stream",
2271+
"receipts_graph",
2272+
"receipts_linearized",
2273+
"room_aliases",
2274+
"room_depth",
2275+
"room_memberships",
2276+
"room_state",
2277+
"room_stats",
2278+
"room_stats_earliest_token",
2279+
"rooms",
2280+
"stream_ordering_to_exterm",
2281+
"topics",
2282+
"users_in_public_rooms",
2283+
"users_who_share_private_rooms",
2284+
# no useful index, but let's clear them anyway
2285+
"appservice_room_list",
2286+
"e2e_room_keys",
2287+
"event_push_summary",
2288+
"pusher_throttle",
2289+
"group_summary_rooms",
2290+
"local_invites",
2291+
"room_account_data",
2292+
"room_tags",
2293+
):
2294+
logger.info("[purge] removing %s from %s", room_id, table)
2295+
txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,))
2296+
2297+
# Other tables we do NOT need to clear out:
2298+
#
2299+
# - blocked_rooms
2300+
# This is important, to make sure that we don't accidentally rejoin a blocked
2301+
# room after it was purged
2302+
#
2303+
# - user_directory
2304+
# This has a room_id column, but it is unused
2305+
#
2306+
2307+
# Other tables that we might want to consider clearing out include:
2308+
#
2309+
# - event_reports
2310+
# Given that these are intended for abuse management my initial
2311+
# inclination is to leave them in place.
2312+
#
2313+
# - current_state_delta_stream
2314+
# - ex_outlier_stream
2315+
# - room_tags_revisions
2316+
# The problem with these is that they are largeish and there is no room_id
2317+
# index on them. In any case we should be clearing out 'stream' tables
2318+
# periodically anyway (#5888)
2319+
2320+
# TODO: we could probably usefully do a bunch of cache invalidation here
2321+
2322+
logger.info("[purge] done")
2323+
21872324
@defer.inlineCallbacks
21882325
def is_event_after(self, event_id1, event_id2):
21892326
"""Returns True if event_id1 is after event_id2 in the stream

0 commit comments

Comments
 (0)