From 595ce25fcb7eddb8c947990c71575136f3b263c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Apr 2021 17:46:58 +0100 Subject: [PATCH 1/3] Small speed up joining large remote rooms There are a couple of points in `persist_events` where we are doing a query per event in series, which we can replace. --- changelog.d/9825.misc | 1 + synapse/storage/databases/main/events.py | 52 ++++++++++++++---------- 2 files changed, 32 insertions(+), 21 deletions(-) create mode 100644 changelog.d/9825.misc diff --git a/changelog.d/9825.misc b/changelog.d/9825.misc new file mode 100644 index 000000000000..38b68ca258e6 --- /dev/null +++ b/changelog.d/9825.misc @@ -0,0 +1 @@ +Small speed up joining large remote rooms. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index bed4326d1137..cbd594081f10 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1378,17 +1378,21 @@ def get_internal_metadata(event): ], ) - for event, _ in events_and_contexts: - if not event.internal_metadata.is_redacted(): - # If we're persisting an unredacted event we go and ensure - # that we mark any redactions that reference this event as - # requiring censoring. - self.db_pool.simple_update_txn( - txn, - table="redactions", - keyvalues={"redacts": event.event_id}, - updatevalues={"have_censored": False}, + # If we're persisting an unredacted event we go and ensure + # that we mark any redactions that reference this event as + # requiring censoring. + sql = "UPDATE redactions SET have_censored = ? WHERE redacts = ?" + txn.execute_batch( + sql, + ( + ( + False, + event.event_id, ) + for event, _ in events_and_contexts + if not event.internal_metadata.is_redacted() + ), + ) state_events_and_contexts = [ ec for ec in events_and_contexts if ec[0].is_state() @@ -1881,20 +1885,26 @@ def _set_push_actions_for_event_and_users_txn( ), ) - for event, _ in events_and_contexts: - user_ids = self.db_pool.simple_select_onecol_txn( - txn, - table="event_push_actions_staging", - keyvalues={"event_id": event.event_id}, - retcol="user_id", - ) + room_to_event_ids = {} # type: Dict[str, List[str]] + for e, _ in events_and_contexts: + room_to_event_ids.setdefault(e.room_id, []).append(e.event_id) - for uid in user_ids: - txn.call_after( - self.store.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid), + for room_id, event_ids in room_to_event_ids.items(): + rows = self.db_pool.simple_select_many_txn( + txn, + table="event_push_actions_staging", + column="event_id", + iterable=event_ids, + keyvalues={}, + retcols=("user_id",), ) + for row in rows: + txn.call_after( + self.store.get_unread_event_push_actions_by_room_for_user.invalidate_many, + (room_id, row["user_id"]), + ) + # Now we delete the staging area for *all* events that were being # persisted. txn.execute_batch( From 684f52dcc68f03a0b9b69cad554d167c4659046e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Apr 2021 13:15:55 +0100 Subject: [PATCH 2/3] Update changelog.d/9825.misc Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/9825.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/9825.misc b/changelog.d/9825.misc index 38b68ca258e6..42f3f1561936 100644 --- a/changelog.d/9825.misc +++ b/changelog.d/9825.misc @@ -1 +1 @@ -Small speed up joining large remote rooms. +Small speed up for joining large remote rooms. From 5c9b41b020f8a7fc7229e548f30150fa5153ff4a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Apr 2021 13:23:13 +0100 Subject: [PATCH 3/3] Deduplicate user_ids before invalidating --- synapse/storage/databases/main/events.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cbd594081f10..a362521e20e7 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1899,10 +1899,12 @@ def _set_push_actions_for_event_and_users_txn( retcols=("user_id",), ) - for row in rows: + user_ids = {row["user_id"] for row in rows} + + for user_id in user_ids: txn.call_after( self.store.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (room_id, row["user_id"]), + (room_id, user_id), ) # Now we delete the staging area for *all* events that were being