From 8913ae6a96c81cf7884fe920f30b1287a7b579a7 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 20 Feb 2023 12:10:59 +0000 Subject: [PATCH 1/4] Sort BOOLEAN_COLUMNS and APPEND_ONLY_TABLES So I can see if a given table is present in logarithmic time, rather than linear. --- synapse/_scripts/synapse_port_db.py | 66 ++++++++++++++--------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 5e137dbbf711..4228b008756c 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -95,60 +95,60 @@ BOOLEAN_COLUMNS = { - "events": ["processed", "outlier", "contains_url"], - "rooms": ["is_public", "has_auth_chain_index"], + "access_tokens": ["used"], + "account_validity": ["email_sent"], + "device_lists_changes_in_room": ["converted_to_destinations"], + "device_lists_outbound_pokes": ["sent"], + "devices": ["hidden"], + "e2e_fallback_keys_json": ["used"], + "e2e_room_keys": ["is_verified"], "event_edges": ["is_state"], + "events": ["processed", "outlier", "contains_url"], + "local_media_repository": ["safe_from_quarantine"], "presence_list": ["accepted"], "presence_stream": ["currently_active"], "public_room_list_stream": ["visibility"], - "devices": ["hidden"], - "device_lists_outbound_pokes": ["sent"], - "users_who_share_rooms": ["share_private"], - "e2e_room_keys": ["is_verified"], - "account_validity": ["email_sent"], + "pushers": ["enabled"], "redactions": ["have_censored"], "room_stats_state": ["is_federatable"], - "local_media_repository": ["safe_from_quarantine"], + "rooms": ["is_public", "has_auth_chain_index"], "users": ["shadow_banned", "approved"], - "e2e_fallback_keys_json": ["used"], - "access_tokens": ["used"], - "device_lists_changes_in_room": ["converted_to_destinations"], - "pushers": ["enabled"], + "users_who_share_rooms": ["share_private"], } APPEND_ONLY_TABLES = [ + "cache_invalidation_stream_by_instance", + "event_auth", + "event_edges", + "event_json", "event_reference_hashes", + "event_search", + "event_to_state_groups", "events", - "event_json", - "state_events", - "room_memberships", - "topics", - "room_names", - "rooms", + "ex_outlier_stream", "local_media_repository", "local_media_repository_thumbnails", + "presence_stream", + "public_room_list_stream", + "push_rules_stream", + "received_transactions", + "redactions", + "rejections", "remote_media_cache", "remote_media_cache_thumbnails", - "redactions", - "event_edges", - "event_auth", - "received_transactions", + "room_memberships", + "room_names", + "rooms", "sent_transactions", - "transaction_id_to_pdu", - "users", + "state_events", + "state_group_edges", "state_groups", "state_groups_state", - "event_to_state_groups", - "rejections", - "event_search", - "presence_stream", - "push_rules_stream", - "ex_outlier_stream", - "cache_invalidation_stream_by_instance", - "public_room_list_stream", - "state_group_edges", "stream_ordering_to_exterm", + "topics", + "transaction_id_to_pdu", + "users", ] From f134685accf270eaec49878be456fafecd92fdda Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 20 Feb 2023 12:29:56 +0000 Subject: [PATCH 2/4] Teach portdb about `un_partial_stated_event_streams` --- synapse/_scripts/synapse_port_db.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 4228b008756c..41cf7be60a1a 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -113,6 +113,7 @@ "room_stats_state": ["is_federatable"], "rooms": ["is_public", "has_auth_chain_index"], "users": ["shadow_banned", "approved"], + "un_partial_stated_event_stream": ["rejection_status_changed"], "users_who_share_rooms": ["share_private"], } @@ -148,6 +149,7 @@ "stream_ordering_to_exterm", "topics", "transaction_id_to_pdu", + "un_partial_stated_event_stream", "users", ] From 90a086baed06b150e1d8851ede01160e708fc435 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 20 Feb 2023 12:44:41 +0000 Subject: [PATCH 3/4] Comments comments comments --- synapse/_scripts/synapse_port_db.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 41cf7be60a1a..0d35e0af8fcd 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -94,6 +94,10 @@ logger = logging.getLogger("synapse_port_db") +# SQLite doesn't have a dedicated boolean type (it stores True/False as 1/0). This means +# portdb will read sqlite bools as integers, then try to insert them into postgres +# boolean columns---which fails. Lacking some Python-parseable metaschema, we must +# specify which integer columns should be inserted as booleans into postgres. BOOLEAN_COLUMNS = { "access_tokens": ["used"], "account_validity": ["email_sent"], @@ -118,6 +122,19 @@ } +# These tables are never deleted from in normal operation [*], so we can resume porting +# over rows from a previous attempt rather than starting from scratch. +# +# [*]: We do delete from many of these tables when purging a room, and +# presumably when purging old events. So we might e.g. +# +# 1. Run portdb and port half of some table. +# 2. Stop portdb. +# 3. Purge something, deleting some of the rows we've ported over. +# 4. Restart portdb. The rows deleted from sqlite are still present in postgres. +# +# But this isn't the end of the world: we should be able to repeat the purge +# on the postgres DB when porting completes. APPEND_ONLY_TABLES = [ "cache_invalidation_stream_by_instance", "event_auth", From ec1d97e46e4b5c6f421865ccf528ea48ddf007c8 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 20 Feb 2023 12:49:49 +0000 Subject: [PATCH 4/4] Changelog --- changelog.d/15108.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/15108.bugfix diff --git a/changelog.d/15108.bugfix b/changelog.d/15108.bugfix new file mode 100644 index 000000000000..30af8b439d1b --- /dev/null +++ b/changelog.d/15108.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.75 where the [portdb script](https://matrix-org.github.io/synapse/release-v1.78/postgres.html#porting-from-sqlite) would fail to run after a room had been faster-joined.