Skip to content

Commit 11db575

Browse files
Sliding Sync: Use stream_ordering based timeline pagination for incremental sync (#17510)
Use `stream_ordering` based `timeline` pagination for incremental `/sync` in Sliding Sync. Previously, we were always using a `topological_ordering` but we should only be using that for historical scenarios (initial `/sync`, newly joined, or haven't sent the room down the connection before). This is slightly different than what the [spec suggests](https://spec.matrix.org/v1.10/client-server-api/#syncing) > Events are ordered in this API according to the arrival time of the event on the homeserver. This can conflict with other APIs which order events based on their partial ordering in the event graph. This can result in duplicate events being received (once per distinct API called). Clients SHOULD de-duplicate events based on the event ID when this happens. But we've had a [discussion below in this PR](#17510 (comment)) and this matches what Sync v2 already does and seems like it makes sense. Created a spec issue matrix-org/matrix-spec#1917 to clarify this. Related issues: - matrix-org/matrix-spec#1917 - matrix-org/matrix-spec#852 - matrix-org/matrix-spec-proposals#4033
1 parent 30e9f6e commit 11db575

File tree

9 files changed

+311
-126
lines changed

9 files changed

+311
-126
lines changed

changelog.d/17510.bugfix

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix timeline ordering (using `stream_ordering` instead of topological ordering) in experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint.

docs/development/room-dag-concepts.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@ incrementing integer, but backfilled events start with `stream_ordering=-1` and
2121

2222
---
2323

24-
- `/sync` returns things in the order they arrive at the server (`stream_ordering`).
25-
- `/messages` (and `/backfill` in the federation API) return them in the order determined by the event graph `(topological_ordering, stream_ordering)`.
24+
- Incremental `/sync?since=xxx` returns things in the order they arrive at the server
25+
(`stream_ordering`).
26+
- Initial `/sync`, `/messages` (and `/backfill` in the federation API) return them in
27+
the order determined by the event graph `(topological_ordering, stream_ordering)`.
2628

2729
The general idea is that, if you're following a room in real-time (i.e.
2830
`/sync`), you probably want to see the messages as they arrive at your server,

synapse/handlers/admin.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,14 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") ->
197197
# events that we have and then filtering, this isn't the most
198198
# efficient method perhaps but it does guarantee we get everything.
199199
while True:
200-
events, _ = await self._store.paginate_room_events(
201-
room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS
200+
events, _ = (
201+
await self._store.paginate_room_events_by_topological_ordering(
202+
room_id=room_id,
203+
from_key=from_key,
204+
to_key=to_key,
205+
limit=100,
206+
direction=Direction.FORWARDS,
207+
)
202208
)
203209
if not events:
204210
break

synapse/handlers/pagination.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -507,13 +507,15 @@ async def get_messages(
507507

508508
# Initially fetch the events from the database. With any luck, we can return
509509
# these without blocking on backfill (handled below).
510-
events, next_key = await self.store.paginate_room_events(
511-
room_id=room_id,
512-
from_key=from_token.room_key,
513-
to_key=to_room_key,
514-
direction=pagin_config.direction,
515-
limit=pagin_config.limit,
516-
event_filter=event_filter,
510+
events, next_key = (
511+
await self.store.paginate_room_events_by_topological_ordering(
512+
room_id=room_id,
513+
from_key=from_token.room_key,
514+
to_key=to_room_key,
515+
direction=pagin_config.direction,
516+
limit=pagin_config.limit,
517+
event_filter=event_filter,
518+
)
517519
)
518520

519521
if pagin_config.direction == Direction.BACKWARDS:
@@ -582,13 +584,15 @@ async def get_messages(
582584
# If we did backfill something, refetch the events from the database to
583585
# catch anything new that might have been added since we last fetched.
584586
if did_backfill:
585-
events, next_key = await self.store.paginate_room_events(
586-
room_id=room_id,
587-
from_key=from_token.room_key,
588-
to_key=to_room_key,
589-
direction=pagin_config.direction,
590-
limit=pagin_config.limit,
591-
event_filter=event_filter,
587+
events, next_key = (
588+
await self.store.paginate_room_events_by_topological_ordering(
589+
room_id=room_id,
590+
from_key=from_token.room_key,
591+
to_key=to_room_key,
592+
direction=pagin_config.direction,
593+
limit=pagin_config.limit,
594+
event_filter=event_filter,
595+
)
592596
)
593597
else:
594598
# Otherwise, we can backfill in the background for eventual

synapse/handlers/room.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1750,7 +1750,7 @@ async def get_new_events(
17501750
from_key=from_key,
17511751
to_key=to_key,
17521752
limit=limit or 10,
1753-
order="ASC",
1753+
direction=Direction.FORWARDS,
17541754
)
17551755

17561756
events = list(room_events)

synapse/handlers/sliding_sync.py

+37-3
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@
6464
ROOM_UNKNOWN_SENTINEL,
6565
Sentinel as StateSentinel,
6666
)
67-
from synapse.storage.databases.main.stream import CurrentStateDeltaMembership
67+
from synapse.storage.databases.main.stream import (
68+
CurrentStateDeltaMembership,
69+
PaginateFunction,
70+
)
6871
from synapse.storage.roommember import MemberSummary
6972
from synapse.types import (
7073
DeviceListUpdates,
@@ -1863,10 +1866,13 @@ async def get_room_sync_data(
18631866
# We should return historical messages (before token range) in the
18641867
# following cases because we want clients to be able to show a basic
18651868
# screen of information:
1869+
#
18661870
# - Initial sync (because no `from_token` to limit us anyway)
18671871
# - When users `newly_joined`
18681872
# - For an incremental sync where we haven't sent it down this
18691873
# connection before
1874+
#
1875+
# Relevant spec issue: https://github.com/matrix-org/matrix-spec/issues/1917
18701876
from_bound = None
18711877
initial = True
18721878
if from_token and not room_membership_for_user_at_to_token.newly_joined:
@@ -1927,7 +1933,36 @@ async def get_room_sync_data(
19271933
room_membership_for_user_at_to_token.event_pos.to_room_stream_token()
19281934
)
19291935

1930-
timeline_events, new_room_key = await self.store.paginate_room_events(
1936+
# For initial `/sync` (and other historical scenarios mentioned above), we
1937+
# want to view a historical section of the timeline; to fetch events by
1938+
# `topological_ordering` (best representation of the room DAG as others were
1939+
# seeing it at the time). This also aligns with the order that `/messages`
1940+
# returns events in.
1941+
#
1942+
# For incremental `/sync`, we want to get all updates for rooms since
1943+
# the last `/sync` (regardless if those updates arrived late or happened
1944+
# a while ago in the past); to fetch events by `stream_ordering` (in the
1945+
# order they were received by the server).
1946+
#
1947+
# Relevant spec issue: https://github.com/matrix-org/matrix-spec/issues/1917
1948+
#
1949+
# FIXME: Using workaround for mypy,
1950+
# https://github.com/python/mypy/issues/10740#issuecomment-1997047277 and
1951+
# https://github.com/python/mypy/issues/17479
1952+
paginate_room_events_by_topological_ordering: PaginateFunction = (
1953+
self.store.paginate_room_events_by_topological_ordering
1954+
)
1955+
paginate_room_events_by_stream_ordering: PaginateFunction = (
1956+
self.store.paginate_room_events_by_stream_ordering
1957+
)
1958+
pagination_method: PaginateFunction = (
1959+
# Use `topographical_ordering` for historical events
1960+
paginate_room_events_by_topological_ordering
1961+
if from_bound is None
1962+
# Use `stream_ordering` for updates
1963+
else paginate_room_events_by_stream_ordering
1964+
)
1965+
timeline_events, new_room_key = await pagination_method(
19311966
room_id=room_id,
19321967
# The bounds are reversed so we can paginate backwards
19331968
# (from newer to older events) starting at to_bound.
@@ -1938,7 +1973,6 @@ async def get_room_sync_data(
19381973
# We add one so we can determine if there are enough events to saturate
19391974
# the limit or not (see `limited`)
19401975
limit=room_sync_config.timeline_limit + 1,
1941-
event_filter=None,
19421976
)
19431977

19441978
# We want to return the events in ascending order (the last event is the

synapse/handlers/sync.py

+51-18
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
from synapse.api.constants import (
4545
AccountDataTypes,
46+
Direction,
4647
EventContentFields,
4748
EventTypes,
4849
JoinRules,
@@ -64,6 +65,7 @@
6465
)
6566
from synapse.storage.databases.main.event_push_actions import RoomNotifCounts
6667
from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
68+
from synapse.storage.databases.main.stream import PaginateFunction
6769
from synapse.storage.roommember import MemberSummary
6870
from synapse.types import (
6971
DeviceListUpdates,
@@ -879,22 +881,49 @@ async def _load_filtered_recents(
879881
since_key = since_token.room_key
880882

881883
while limited and len(recents) < timeline_limit and max_repeat:
882-
# If we have a since_key then we are trying to get any events
883-
# that have happened since `since_key` up to `end_key`, so we
884-
# can just use `get_room_events_stream_for_room`.
885-
# Otherwise, we want to return the last N events in the room
886-
# in topological ordering.
887-
if since_key:
888-
events, end_key = await self.store.get_room_events_stream_for_room(
889-
room_id,
890-
limit=load_limit + 1,
891-
from_key=since_key,
892-
to_key=end_key,
893-
)
894-
else:
895-
events, end_key = await self.store.get_recent_events_for_room(
896-
room_id, limit=load_limit + 1, end_token=end_key
897-
)
884+
# For initial `/sync`, we want to view a historical section of the
885+
# timeline; to fetch events by `topological_ordering` (best
886+
# representation of the room DAG as others were seeing it at the time).
887+
# This also aligns with the order that `/messages` returns events in.
888+
#
889+
# For incremental `/sync`, we want to get all updates for rooms since
890+
# the last `/sync` (regardless if those updates arrived late or happened
891+
# a while ago in the past); to fetch events by `stream_ordering` (in the
892+
# order they were received by the server).
893+
#
894+
# Relevant spec issue: https://github.com/matrix-org/matrix-spec/issues/1917
895+
#
896+
# FIXME: Using workaround for mypy,
897+
# https://github.com/python/mypy/issues/10740#issuecomment-1997047277 and
898+
# https://github.com/python/mypy/issues/17479
899+
paginate_room_events_by_topological_ordering: PaginateFunction = (
900+
self.store.paginate_room_events_by_topological_ordering
901+
)
902+
paginate_room_events_by_stream_ordering: PaginateFunction = (
903+
self.store.paginate_room_events_by_stream_ordering
904+
)
905+
pagination_method: PaginateFunction = (
906+
# Use `topographical_ordering` for historical events
907+
paginate_room_events_by_topological_ordering
908+
if since_key is None
909+
# Use `stream_ordering` for updates
910+
else paginate_room_events_by_stream_ordering
911+
)
912+
events, end_key = await pagination_method(
913+
room_id=room_id,
914+
# The bounds are reversed so we can paginate backwards
915+
# (from newer to older events) starting at to_bound.
916+
# This ensures we fill the `limit` with the newest events first,
917+
from_key=end_key,
918+
to_key=since_key,
919+
direction=Direction.BACKWARDS,
920+
# We add one so we can determine if there are enough events to saturate
921+
# the limit or not (see `limited`)
922+
limit=load_limit + 1,
923+
)
924+
# We want to return the events in ascending order (the last event is the
925+
# most recent).
926+
events.reverse()
898927

899928
log_kv({"loaded_recents": len(events)})
900929

@@ -2641,9 +2670,10 @@ async def _get_room_changes_for_incremental_sync(
26412670
# a "gap" in the timeline, as described by the spec for /sync.
26422671
room_to_events = await self.store.get_room_events_stream_for_rooms(
26432672
room_ids=sync_result_builder.joined_room_ids,
2644-
from_key=since_token.room_key,
2645-
to_key=now_token.room_key,
2673+
from_key=now_token.room_key,
2674+
to_key=since_token.room_key,
26462675
limit=timeline_limit + 1,
2676+
direction=Direction.BACKWARDS,
26472677
)
26482678

26492679
# We loop through all room ids, even if there are no new events, in case
@@ -2654,6 +2684,9 @@ async def _get_room_changes_for_incremental_sync(
26542684
newly_joined = room_id in newly_joined_rooms
26552685
if room_entry:
26562686
events, start_key = room_entry
2687+
# We want to return the events in ascending order (the last event is the
2688+
# most recent).
2689+
events.reverse()
26572690

26582691
prev_batch_token = now_token.copy_and_replace(
26592692
StreamKeyType.ROOM, start_key

0 commit comments

Comments
 (0)