From 9eee30b152e4b8f4bcd9e25e9ce46a0ab2bf0ce6 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Tue, 21 May 2024 15:27:15 +0200 Subject: [PATCH 1/3] Add a materialized view for event_types and use it in an updated query --- state/event_table.go | 33 +++++++++++++++++++++------------ state/storage.go | 11 ++++++++++- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/state/event_table.go b/state/event_table.go index 48d81c58..bc7e59e3 100644 --- a/state/event_table.go +++ b/state/event_table.go @@ -141,6 +141,11 @@ func NewEventTable(db *sqlx.DB) *EventTable { CREATE INDEX IF NOT EXISTS syncv3_nid_room_state_idx ON syncv3_events(room_id, event_nid, is_state); CREATE UNIQUE INDEX IF NOT EXISTS syncv3_events_room_event_nid_type_skey_idx ON syncv3_events(event_nid, event_type, state_key); + + -- Create a materialized view for event_types (used on startup to get the latest events in each room) + CREATE MATERIALIZED VIEW IF NOT EXISTS event_types as + SELECT DISTINCT event_type + FROM syncv3_events; `) return &EventTable{db} } @@ -441,18 +446,22 @@ func (t *EventTable) SelectLatestEventsBetween(txn *sqlx.Tx, roomID string, lowe func (t *EventTable) selectLatestEventByTypeInAllRooms(txn *sqlx.Tx) ([]Event, error) { result := []Event{} - // TODO: this query ends up doing a sequential scan on the events table. We have - // an index on (event_type, room_id, event_nid) so I'm a little surprised that PG - // decides to do so. Can we do something better here? Ideas: - // - Find a better query for selecting the newest event of each type in a room. - // - At present we only care about the _timestamps_ of these events. Perhaps we - // could store those in the DB (and even in an index) as a column and select - // those, to avoid having to parse the event bodies. - // - We could have the application maintain a `latest_events` table so that the - // rows can be directly read. Assuming a mostly-static set of event types, reads - // are then linear in the number of rooms. - rows, err := txn.Query( - `SELECT room_id, event_nid, event FROM syncv3_events WHERE event_nid in (SELECT MAX(event_nid) FROM syncv3_events GROUP BY room_id, event_type)`, + // What the following query does: + // 1. Gets all event types from a materialized view (updated on startup in `PrepareSnapshot`) as the `event_types` CTE + // 2. Gets all rooms as the `room_ids` CTE + // 3. Gets the latest event_nid for each event_type and room as the `max_by_ev_type` CTE + // 4. Queries the required data using the event_nids provided by the `max_by_ev_type` CTE + rows, err := txn.Query(` +WITH event_types AS ( + SELECT * FROM event_types +), room_ids AS ( + SELECT DISTINCT room_id FROM syncv3_rooms +), max_by_ev_type AS ( + SELECT m.max FROM event_types, room_ids, + LATERAL ( SELECT max(event_nid) as max FROM syncv3_events e WHERE e.room_id = room_ids.room_id AND e.event_type = event_types.event_type ) AS m +) +SELECT room_id, event_nid, event FROM syncv3_events, max_by_ev_type WHERE event_nid = max_by_ev_type.max +`, ) if err != nil { return nil, err diff --git a/state/storage.go b/state/storage.go index 0d0faa74..148f76aa 100644 --- a/state/storage.go +++ b/state/storage.go @@ -171,7 +171,16 @@ func (s *Storage) PrepareSnapshot(txn *sqlx.Tx) (tableName string, err error) { `SELECT UNNEST(membership_events) AS membership_nid INTO TEMP ` + tempTableName + ` FROM syncv3_snapshots JOIN syncv3_rooms ON syncv3_snapshots.snapshot_id = syncv3_rooms.current_snapshot_id`, ) - return tempTableName, err + if err != nil { + return "", err + } + // Refresh the materialized view, so getting latest events by type per room + // can use a fresh view of the event_types. + _, err = txn.Exec("REFRESH MATERIALIZED VIEW event_types;") + if err != nil { + return "", err + } + return tempTableName, nil } // GlobalSnapshot snapshots the entire database for the purposes of initialising From ce15a2800cd7710b81b41a1b62561da9419c6509 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Tue, 21 May 2024 16:16:17 +0200 Subject: [PATCH 2/3] Remove materialized view and use a recursive CTE instead to get unique event_types --- state/event_table.go | 16 +++++++++------- state/storage.go | 9 --------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/state/event_table.go b/state/event_table.go index bc7e59e3..33c5c6c5 100644 --- a/state/event_table.go +++ b/state/event_table.go @@ -141,11 +141,6 @@ func NewEventTable(db *sqlx.DB) *EventTable { CREATE INDEX IF NOT EXISTS syncv3_nid_room_state_idx ON syncv3_events(room_id, event_nid, is_state); CREATE UNIQUE INDEX IF NOT EXISTS syncv3_events_room_event_nid_type_skey_idx ON syncv3_events(event_nid, event_type, state_key); - - -- Create a materialized view for event_types (used on startup to get the latest events in each room) - CREATE MATERIALIZED VIEW IF NOT EXISTS event_types as - SELECT DISTINCT event_type - FROM syncv3_events; `) return &EventTable{db} } @@ -447,13 +442,20 @@ func (t *EventTable) SelectLatestEventsBetween(txn *sqlx.Tx, roomID string, lowe func (t *EventTable) selectLatestEventByTypeInAllRooms(txn *sqlx.Tx) ([]Event, error) { result := []Event{} // What the following query does: - // 1. Gets all event types from a materialized view (updated on startup in `PrepareSnapshot`) as the `event_types` CTE + // 1. Gets all event types from a recursive CTE as the `event_types` CTE // 2. Gets all rooms as the `room_ids` CTE // 3. Gets the latest event_nid for each event_type and room as the `max_by_ev_type` CTE // 4. Queries the required data using the event_nids provided by the `max_by_ev_type` CTE rows, err := txn.Query(` WITH event_types AS ( - SELECT * FROM event_types + WITH RECURSIVE t AS ( + (SELECT event_type FROM syncv3_events ORDER BY event_type LIMIT 1) -- parentheses required + UNION ALL + SELECT (SELECT event_type FROM syncv3_events WHERE event_type > t.event_type ORDER BY event_type LIMIT 1) + FROM t + WHERE t.event_type IS NOT NULL + ) + SELECT event_type FROM t WHERE event_type IS NOT NULL ), room_ids AS ( SELECT DISTINCT room_id FROM syncv3_rooms ), max_by_ev_type AS ( diff --git a/state/storage.go b/state/storage.go index 148f76aa..d8cb2e1c 100644 --- a/state/storage.go +++ b/state/storage.go @@ -171,15 +171,6 @@ func (s *Storage) PrepareSnapshot(txn *sqlx.Tx) (tableName string, err error) { `SELECT UNNEST(membership_events) AS membership_nid INTO TEMP ` + tempTableName + ` FROM syncv3_snapshots JOIN syncv3_rooms ON syncv3_snapshots.snapshot_id = syncv3_rooms.current_snapshot_id`, ) - if err != nil { - return "", err - } - // Refresh the materialized view, so getting latest events by type per room - // can use a fresh view of the event_types. - _, err = txn.Exec("REFRESH MATERIALIZED VIEW event_types;") - if err != nil { - return "", err - } return tempTableName, nil } From 4b70d7d55fa74c54fe4ebd3aa3be9087009ffe06 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Tue, 21 May 2024 16:17:06 +0200 Subject: [PATCH 3/3] Return err and not nil --- state/storage.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/state/storage.go b/state/storage.go index d8cb2e1c..0d0faa74 100644 --- a/state/storage.go +++ b/state/storage.go @@ -171,7 +171,7 @@ func (s *Storage) PrepareSnapshot(txn *sqlx.Tx) (tableName string, err error) { `SELECT UNNEST(membership_events) AS membership_nid INTO TEMP ` + tempTableName + ` FROM syncv3_snapshots JOIN syncv3_rooms ON syncv3_snapshots.snapshot_id = syncv3_rooms.current_snapshot_id`, ) - return tempTableName, nil + return tempTableName, err } // GlobalSnapshot snapshots the entire database for the purposes of initialising