From 079194c54740e5046bb988a1b6d602bdd21044ec Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:03:02 -0500 Subject: [PATCH 001/109] Return some room timeline data in Sliding Sync --- synapse/handlers/sliding_sync.py | 202 ++++++++++++++++++++++++-- synapse/rest/client/sync.py | 89 ++++++++++-- synapse/types/handlers/__init__.py | 7 +- synapse/types/rest/client/__init__.py | 7 - 4 files changed, 275 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 16d94925f54..cf448fa3cdf 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,22 +18,25 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +import attr from immutabledict import immutabledict -from synapse.api.constants import AccountDataTypes, EventTypes, Membership +from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase from synapse.storage.roommember import RoomsForUser from synapse.types import ( PersistedEventPosition, Requester, RoomStreamToken, + StreamKeyType, StreamToken, UserID, ) from synapse.types.handlers import OperationType, SlidingSyncConfig, SlidingSyncResult -from synapse.types.state import StateFilter +from synapse.types.state import StateFilter, StateKey +from synapse.visibility import filter_events_for_client if TYPE_CHECKING: from synapse.server import HomeServer @@ -82,6 +85,18 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> return membership != Membership.LEAVE or sender != user_id +# We can't freeze this class because we want to update it in place with the +# de-duplicated data. +@attr.s(slots=True, auto_attribs=True) +class RoomSyncConfig: + """ + Holds the config for what data we should fetch for a room in the sync response. + """ + + timeline_limit: int + required_state: Set[StateKey] + + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() @@ -201,6 +216,7 @@ async def current_sync_for_user( # Assemble sliding window lists lists: Dict[str, SlidingSyncResult.SlidingWindowList] = {} + relevant_room_map: Dict[str, RoomSyncConfig] = {} if sync_config.lists: # Get all of the room IDs that the user should be able to see in the sync # response @@ -225,29 +241,66 @@ async def current_sync_for_user( ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: + room_id_set = { + room_id + for room_id, _ in sorted_room_info[range[0] : range[1]] + } + ops.append( SlidingSyncResult.SlidingWindowList.Operation( op=OperationType.SYNC, range=range, - room_ids=[ - room_id - for room_id, _ in sorted_room_info[ - range[0] : range[1] - ] - ], + room_ids=list(room_id_set), ) ) + # Update the relevant room map + for room_id in room_id_set: + if relevant_room_map.get(room_id) is not None: + # Take the highest timeline limit + if ( + relevant_room_map[room_id].timeline_limit + < list_config.timeline_limit + ): + relevant_room_map[room_id].timeline_limit = ( + list_config.timeline_limit + ) + + # Union the required state + relevant_room_map[room_id].required_state.update( + list_config.required_state + ) + else: + relevant_room_map[room_id] = RoomSyncConfig( + timeline_limit=list_config.timeline_limit, + required_state=set(list_config.required_state), + ) + lists[list_key] = SlidingSyncResult.SlidingWindowList( count=len(sorted_room_info), ops=ops, ) + # TODO: if (sync_config.room_subscriptions): + + # Fetch room data + rooms: Dict[str, SlidingSyncResult.RoomResult] = {} + for room_id, room_sync_config in relevant_room_map.items(): + room_sync_result = await self.get_room_sync_data( + user=sync_config.user, + room_id=room_id, + room_sync_config=room_sync_config, + rooms_for_user_membership_at_to_token=sync_room_map[room_id], + from_token=from_token, + to_token=to_token, + ) + + rooms[room_id] = room_sync_result + return SlidingSyncResult( next_pos=to_token, lists=lists, - # TODO: Gather room data for rooms in lists and `sync_config.room_subscriptions` - rooms={}, + rooms=rooms, extensions={}, ) @@ -665,3 +718,130 @@ async def sort_rooms( # We want descending order reverse=True, ) + + async def get_room_sync_data( + self, + user: UserID, + room_id: str, + room_sync_config: RoomSyncConfig, + rooms_for_user_membership_at_to_token: RoomsForUser, + from_token: Optional[StreamToken], + to_token: StreamToken, + ) -> SlidingSyncResult.RoomResult: + """ + Fetch room data for a room. + + We fetch data according to the token range (> `from_token` and <= `to_token`). + + Args: + user: User to fetch data for + room_id: The room ID to fetch data for + room_sync_config: Config for what data we should fetch for a room in the + sync response. + rooms_for_user_membership_at_to_token: Membership information for the user + in the room at the time of `to_token`. + from_token: The point in the stream to sync from. + to_token: The point in the stream to sync up to. + """ + + timeline_events: List[EventBase] = [] + limited = False + # We want to use `to_token` (vs `from_token`) because we look backwards from the + # `to_token` up to the `timeline_limit` and we might not reach `from_token` + # before we hit the limit. We will update the room stream position once we've + # fetched the events. + prev_batch_token = to_token + if room_sync_config.timeline_limit > 0: + timeline_events, new_room_key = await self.store.paginate_room_events( + room_id=room_id, + # We're going to paginate backwards from the `to_token` + from_key=to_token.room_key, + to_key=from_token.room_key if from_token is not None else None, + direction=Direction.BACKWARDS, + # We add one so we can determine if there are enough events to saturate + # the limit or not (see `limited`) + limit=room_sync_config.timeline_limit + 1, + event_filter=None, + ) + + # We want to return the events in ascending order (the last event is the + # most recent). + timeline_events.reverse() + + timeline_events = await filter_events_for_client( + self.storage_controllers, + user.to_string(), + timeline_events, + is_peeking=rooms_for_user_membership_at_to_token.membership + != Membership.JOIN, + filter_send_to_client=True, + ) + + # Determine our `limited` status + if len(timeline_events) > room_sync_config.timeline_limit: + limited = True + # Get rid of that extra "+ 1" event because we only used it to determine + # if we hit the limit or not + timeline_events = timeline_events[-room_sync_config.timeline_limit :] + assert timeline_events[0].internal_metadata.stream_ordering + new_room_key = RoomStreamToken( + stream=timeline_events[0].internal_metadata.stream_ordering - 1 + ) + + prev_batch_token = prev_batch_token.copy_and_replace( + StreamKeyType.ROOM, new_room_key + ) + + # Figure out any stripped state events for invite/knocks + stripped_state: List[EventBase] = [] + if rooms_for_user_membership_at_to_token.membership in { + Membership.INVITE, + Membership.KNOCK, + }: + invite_or_knock_event = await self.store.get_event( + rooms_for_user_membership_at_to_token.event_id + ) + + stripped_state = [] + if invite_or_knock_event.membership == Membership.INVITE: + stripped_state = invite_or_knock_event.unsigned.get( + "invite_room_state", [] + ) + elif invite_or_knock_event.membership == Membership.KNOCK: + stripped_state = invite_or_knock_event.unsigned.get( + "knock_room_state", [] + ) + + stripped_state.append(invite_or_knock_event) + + return SlidingSyncResult.RoomResult( + # TODO: Dummy value + name="TODO", + # TODO: Dummy value + avatar=None, + # TODO: Dummy value + heroes=None, + # Since we can't determine whether we've already sent a room down this + # Sliding Sync connection before (we plan to add this optimization in the + # future), we're always returning the requested room state instead of + # updates. + initial=True, + # TODO: Dummy value + required_state=[], + timeline=timeline_events, + # TODO: Dummy value + is_dm=False, + stripped_state=stripped_state, + prev_batch=prev_batch_token, + limited=limited, + # TODO: Dummy values + joined_count=0, + invited_count=0, + # TODO: These are just dummy values. We could potentially just remove these + # since notifications can only really be done correctly on the client anyway + # (encrypted rooms). + notification_count=0, + highlight_count=0, + # TODO: Dummy value + num_live=0, + ) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 1b0ac20d94b..b261b2dd88c 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -761,7 +761,6 @@ class SlidingSyncRestServlet(RestServlet): "lists": { "foo-list": { "ranges": [ [0, 99] ], - "sort": [ "by_notification_level", "by_recency", "by_name" ], "required_state": [ ["m.room.join_rules", ""], ["m.room.history_visibility", ""], @@ -771,7 +770,6 @@ class SlidingSyncRestServlet(RestServlet): "filters": { "is_dm": true }, - "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } }, // Room Subscriptions API @@ -779,10 +777,6 @@ class SlidingSyncRestServlet(RestServlet): "!sub1:bar": { "required_state": [ ["*","*"] ], "timeline_limit": 10, - "include_old_rooms": { - "timeline_limit": 1, - "required_state": [ ["m.room.tombstone", ""], ["m.room.create", ""] ], - } } }, // Extensions API @@ -871,10 +865,11 @@ def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastores().main + self.clock = hs.get_clock() self.filtering = hs.get_filtering() self.sliding_sync_handler = hs.get_sliding_sync_handler() + self.event_serializer = hs.get_event_client_serializer() - # TODO: Update this to `on_GET` once we figure out how we want to handle params async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user = requester.user @@ -920,13 +915,14 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: logger.info("Client has disconnected; not serializing response.") return 200, {} - response_content = await self.encode_response(sliding_sync_results) + response_content = await self.encode_response(requester, sliding_sync_results) return 200, response_content # TODO: Is there a better way to encode things? async def encode_response( self, + requester: Requester, sliding_sync_result: SlidingSyncResult, ) -> JsonDict: response: JsonDict = defaultdict(dict) @@ -935,7 +931,9 @@ async def encode_response( serialized_lists = self.encode_lists(sliding_sync_result.lists) if serialized_lists: response["lists"] = serialized_lists - response["rooms"] = {} # TODO: sliding_sync_result.rooms + response["rooms"] = await self.encode_rooms( + requester, sliding_sync_result.rooms + ) response["extensions"] = {} # TODO: sliding_sync_result.extensions return response @@ -961,6 +959,79 @@ def encode_operation( return serialized_lists + async def encode_rooms( + self, + requester: Requester, + rooms: Dict[str, SlidingSyncResult.RoomResult], + ) -> JsonDict: + time_now = self.clock.time_msec() + + serialize_options = SerializeEventConfig( + event_format=format_event_for_client_v2_without_room_id, + requester=requester, + ) + + serialized_rooms = {} + for room_id, room_result in rooms.items(): + serialized_timeline = await self.event_serializer.serialize_events( + room_result.timeline, + time_now, + config=serialize_options, + # TODO + # bundle_aggregations=room.timeline.bundled_aggregations, + ) + + serialized_required_state = await self.event_serializer.serialize_events( + room_result.required_state, + time_now, + config=serialize_options, + ) + + serialized_rooms[room_id] = { + "name": room_result.name, + "required_state": serialized_required_state, + "timeline": serialized_timeline, + "prev_batch": await room_result.prev_batch.to_string(self.store), + "limited": room_result.limited, + "joined_count": room_result.joined_count, + "invited_count": room_result.invited_count, + "notification_count": room_result.notification_count, + "highlight_count": room_result.highlight_count, + "num_live": room_result.num_live, + } + + if room_result.avatar: + serialized_rooms[room_id]["avatar"] = room_result.avatar + + if room_result.heroes: + serialized_rooms[room_id]["heroes"] = room_result.heroes + + # We should only include the `initial` key if it's `True` to save bandwidth. + # The absense of this flag means `False`. + if room_result.initial: + serialized_rooms[room_id]["initial"] = room_result.initial + + # Field should be absent on non-DM rooms + if room_result.is_dm: + serialized_rooms[room_id]["is_dm"] = room_result.is_dm + + # Stripped state only applies to invite/knock rooms + if room_result.stripped_state: + serialized_stripped_state = ( + await self.event_serializer.serialize_events( + room_result.stripped_state, + time_now, + config=serialize_options, + ) + ) + + # TODO: Would be good to rename this to `stripped_state` so it can be + # shared between invite and knock rooms, see + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 + serialized_rooms[room_id]["invite_state"] = serialized_stripped_state + + return serialized_rooms + def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: SyncRestServlet(hs).register(http_server) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1d65551d5b5..b544398a35d 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -162,8 +162,9 @@ class RoomResult: timeline: Latest events in the room. The last event is the most recent is_dm: Flag to specify whether the room is a direct-message room (most likely between two people). - invite_state: Stripped state events. Same as `rooms.invite.$room_id.invite_state` - in sync v2, absent on joined/left rooms + stripped_state: Stripped state events (for rooms where the usre is + invited/knocked). Same as `rooms.invite.$room_id.invite_state` in sync v2, + absent on joined/left rooms prev_batch: A token that can be passed as a start parameter to the `/rooms//messages` API to retrieve earlier messages. limited: True if their are more events than fit between the given position and now. @@ -192,7 +193,7 @@ class RoomResult: required_state: List[EventBase] timeline: List[EventBase] is_dm: bool - invite_state: List[EventBase] + stripped_state: Optional[List[EventBase]] prev_batch: StreamToken limited: bool joined_count: int diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index e2c79c41064..25fbd772f6f 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -152,9 +152,6 @@ class CommonRoomParameters(RequestBodyModel): anyway. timeline_limit: The maximum number of timeline events to return per response. (Max 1000 messages) - include_old_rooms: Determines if `predecessor` rooms are included in the - `rooms` response. The user MUST be joined to old rooms for them to show up - in the response. """ class IncludeOldRooms(RequestBodyModel): @@ -167,7 +164,6 @@ class IncludeOldRooms(RequestBodyModel): timeline_limit: int else: timeline_limit: conint(le=1000, strict=True) # type: ignore[valid-type] - include_old_rooms: Optional[IncludeOldRooms] = None class SlidingSyncList(CommonRoomParameters): """ @@ -208,9 +204,6 @@ class SlidingSyncList(CommonRoomParameters): } timeline_limit: The maximum number of timeline events to return per response. - include_old_rooms: Determines if `predecessor` rooms are included in the - `rooms` response. The user MUST be joined to old rooms for them to show up - in the response. include_heroes: Return a stripped variant of membership events (containing `user_id` and optionally `avatar_url` and `displayname`) for the users used to calculate the room name. From 3e0f759dbc34cb3be0a1946cd36e617fc3c5a17c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:26:59 -0500 Subject: [PATCH 002/109] Strip invite/knock event itself and avoid mutating event `unsigned` Make sure we don't run into https://github.com/element-hq/synapse/issues/14919 (https://github.com/matrix-org/synapse/issues/14919) --- synapse/events/utils.py | 18 ++++++++++++++++++ synapse/handlers/sliding_sync.py | 14 ++++++++------ synapse/rest/client/sync.py | 10 +--------- .../storage/databases/main/events_worker.py | 12 ++---------- synapse/types/handlers/__init__.py | 4 ++-- 5 files changed, 31 insertions(+), 27 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index b997d82d71f..f937fd46980 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -836,3 +836,21 @@ def maybe_upsert_event_field( del container[key] return upsert_okay + + +def strip_event(event: EventBase) -> JsonDict: + """ + Used for "stripped state" events which provide a simplified view of the state of a + room intended to help a potential joiner identify the room (relevant when the user + is invited or knocked). + + Stripped state events can only have the `sender`, `type`, `state_key` and `content` + properties present. + """ + + return { + "type": event.type, + "state_key": event.state_key, + "content": event.content, + "sender": event.sender, + } diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cf448fa3cdf..23f971c1f78 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -25,8 +25,10 @@ from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase +from synapse.events.utils import strip_event from synapse.storage.roommember import RoomsForUser from synapse.types import ( + JsonDict, PersistedEventPosition, Requester, RoomStreamToken, @@ -793,7 +795,7 @@ async def get_room_sync_data( ) # Figure out any stripped state events for invite/knocks - stripped_state: List[EventBase] = [] + stripped_state: List[JsonDict] = [] if rooms_for_user_membership_at_to_token.membership in { Membership.INVITE, Membership.KNOCK, @@ -804,15 +806,15 @@ async def get_room_sync_data( stripped_state = [] if invite_or_knock_event.membership == Membership.INVITE: - stripped_state = invite_or_knock_event.unsigned.get( - "invite_room_state", [] + stripped_state.extend( + invite_or_knock_event.unsigned.get("invite_room_state", []) ) elif invite_or_knock_event.membership == Membership.KNOCK: - stripped_state = invite_or_knock_event.unsigned.get( - "knock_room_state", [] + stripped_state.extend( + invite_or_knock_event.unsigned.get("knock_room_state", []) ) - stripped_state.append(invite_or_knock_event) + stripped_state.append(strip_event(invite_or_knock_event)) return SlidingSyncResult.RoomResult( # TODO: Dummy value diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index b261b2dd88c..a9be37bbf32 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1017,18 +1017,10 @@ async def encode_rooms( # Stripped state only applies to invite/knock rooms if room_result.stripped_state: - serialized_stripped_state = ( - await self.event_serializer.serialize_events( - room_result.stripped_state, - time_now, - config=serialize_options, - ) - ) - # TODO: Would be good to rename this to `stripped_state` so it can be # shared between invite and knock rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 - serialized_rooms[room_id]["invite_state"] = serialized_stripped_state + serialized_rooms[room_id]["invite_state"] = room_result.stripped_state return serialized_rooms diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index e264d36f025..f0f390cec46 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -55,7 +55,7 @@ ) from synapse.events import EventBase, make_event_from_dict from synapse.events.snapshot import EventContext -from synapse.events.utils import prune_event +from synapse.events.utils import prune_event, strip_event from synapse.logging.context import ( PreserveLoggingContext, current_context, @@ -1025,15 +1025,7 @@ async def get_stripped_room_state_from_event_context( state_to_include = await self.get_events(selected_state_ids.values()) - return [ - { - "type": e.type, - "state_key": e.state_key, - "content": e.content, - "sender": e.sender, - } - for e in state_to_include.values() - ] + return [strip_event(e) for e in state_to_include.values()] def _maybe_start_fetch_thread(self) -> None: """Starts an event fetch thread if we are not yet at the maximum number.""" diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index b544398a35d..04b0ab972b8 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,7 +31,7 @@ from pydantic import Extra from synapse.events import EventBase -from synapse.types import JsonMapping, StreamToken, UserID +from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody @@ -193,7 +193,7 @@ class RoomResult: required_state: List[EventBase] timeline: List[EventBase] is_dm: bool - stripped_state: Optional[List[EventBase]] + stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken limited: bool joined_count: int From 5e2fd4e93ca2084ee92533b59e6d45b3a914fa89 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:29:44 -0500 Subject: [PATCH 003/109] Add changelog --- changelog.d/17320.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17320.feature diff --git a/changelog.d/17320.feature b/changelog.d/17320.feature new file mode 100644 index 00000000000..1e524f3eca1 --- /dev/null +++ b/changelog.d/17320.feature @@ -0,0 +1 @@ +Add `rooms` data to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. From 8ce06f145260540f0c81c1594a011556e90f32c8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:54:23 -0500 Subject: [PATCH 004/109] Fix sort being lost --- synapse/handlers/sliding_sync.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 23f971c1f78..e61b86d7792 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -243,21 +243,21 @@ async def current_sync_for_user( ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: - room_id_set = { + sliced_room_ids = [ room_id for room_id, _ in sorted_room_info[range[0] : range[1]] - } + ] ops.append( SlidingSyncResult.SlidingWindowList.Operation( op=OperationType.SYNC, range=range, - room_ids=list(room_id_set), + room_ids=sliced_room_ids, ) ) # Update the relevant room map - for room_id in room_id_set: + for room_id in sliced_room_ids: if relevant_room_map.get(room_id) is not None: # Take the highest timeline limit if ( From aa5f54aa135de8ae7fdc201792d548de494cbd40 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 08:26:10 -0500 Subject: [PATCH 005/109] Start on required_state --- synapse/handlers/sliding_sync.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e61b86d7792..5b834fe9ef6 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -93,10 +93,16 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> class RoomSyncConfig: """ Holds the config for what data we should fetch for a room in the sync response. + + Attributes: + timeline_limit: The maximum number of events to return in the timeline. + required_state: The minimum set of state events requested for the room. The + values are close to `StateKey` but actually use a syntax where you can provide + `*` and `$LAZY` as the state key part of the tuple (type, state_key). """ timeline_limit: int - required_state: Set[StateKey] + required_state: Set[Tuple[str, str]] class SlidingSyncHandler: @@ -816,6 +822,14 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) + required_state = [] + if len(room_sync_config.required_state) > 0: + required_state = await self.storage_controllers.state.get_state_at( + room_id, + to_token, + state_filter=StateFilter.from_types(TODO), + ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value name="TODO", From 5c175d5488ac7b700906a722ee16404527d8d711 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 09:35:20 -0500 Subject: [PATCH 006/109] Add some notes from pairing --- synapse/handlers/sliding_sync.py | 20 ++++++++++++++++++-- synapse/rest/client/sync.py | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5b834fe9ef6..f9ec4f79610 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -764,6 +764,13 @@ async def get_room_sync_data( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, + # We should always return historical messages (outside token range) in + # these cases because clients want to be able to show a basic screen of + # information: + # - Initial sync (because no `from_token`) + # - When users newly_join + # - TODO: For incremental sync where we haven't sent it down this + # connection before to_key=from_token.room_key if from_token is not None else None, direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate @@ -824,14 +831,23 @@ async def get_room_sync_data( required_state = [] if len(room_sync_config.required_state) > 0: - required_state = await self.storage_controllers.state.get_state_at( + await self.storage_controllers.state.get_current_state( room_id, - to_token, state_filter=StateFilter.from_types(TODO), + await_full_state=False, ) + # TODO: rewind + + # required_state = await self.storage_controllers.state.get_state_at( + # room_id, + # to_token, + # state_filter=StateFilter.from_types(TODO), + # ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value + # TODO: Make this optional because a computed name doesn't make sense for translated cases name="TODO", # TODO: Dummy value avatar=None, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index a9be37bbf32..0ae31f23e96 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1021,6 +1021,7 @@ async def encode_rooms( # shared between invite and knock rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 serialized_rooms[room_id]["invite_state"] = room_result.stripped_state + # TODO: `knocked_state` but that isn't specced yet return serialized_rooms From 9089bfe4dc505c02739968cdb1b67220e060580d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:06:29 -0500 Subject: [PATCH 007/109] Remove required_state for now --- synapse/handlers/sliding_sync.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f9ec4f79610..f8fd2c6c5ea 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -96,9 +96,10 @@ class RoomSyncConfig: Attributes: timeline_limit: The maximum number of events to return in the timeline. - required_state: The minimum set of state events requested for the room. The - values are close to `StateKey` but actually use a syntax where you can provide - `*` and `$LAZY` as the state key part of the tuple (type, state_key). + required_state: The set of state events requested for the room. The + values are close to `StateKey` but actually use a syntax where you can + provide `*` wildcard and `$LAZY` for lazy room members as the `state_key` part + of the tuple (type, state_key). """ timeline_limit: int @@ -829,22 +830,6 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) - required_state = [] - if len(room_sync_config.required_state) > 0: - await self.storage_controllers.state.get_current_state( - room_id, - state_filter=StateFilter.from_types(TODO), - await_full_state=False, - ) - - # TODO: rewind - - # required_state = await self.storage_controllers.state.get_state_at( - # room_id, - # to_token, - # state_filter=StateFilter.from_types(TODO), - # ) - return SlidingSyncResult.RoomResult( # TODO: Dummy value # TODO: Make this optional because a computed name doesn't make sense for translated cases From 94279915d4432fefb87b2d210a8cd03fd633c002 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:09:33 -0500 Subject: [PATCH 008/109] Clean up knock_state comments --- synapse/rest/client/sync.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 0ae31f23e96..db447738249 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1017,11 +1017,13 @@ async def encode_rooms( # Stripped state only applies to invite/knock rooms if room_result.stripped_state: - # TODO: Would be good to rename this to `stripped_state` so it can be - # shared between invite and knock rooms, see + # TODO: `knocked_state` but that isn't specced yet. + # + # TODO: Instead of adding `knocked_state`, it would be good to rename + # this to `stripped_state` so it can be shared between invite and knock + # rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 serialized_rooms[room_id]["invite_state"] = room_result.stripped_state - # TODO: `knocked_state` but that isn't specced yet return serialized_rooms From 19b22971711da0c8bdbaebed0d2f7a7ccb01e2ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:36:50 -0500 Subject: [PATCH 009/109] Calculate `num_live` --- synapse/handlers/sliding_sync.py | 55 ++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f8fd2c6c5ea..1d07e22c914 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -753,6 +753,7 @@ async def get_room_sync_data( to_token: The point in the stream to sync up to. """ + # Assemble the list of timeline events timeline_events: List[EventBase] = [] limited = False # We want to use `to_token` (vs `from_token`) because we look backwards from the @@ -761,18 +762,34 @@ async def get_room_sync_data( # fetched the events. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: + newly_joined = False + if ( + from_token is not None + and rooms_for_user_membership_at_to_token.membership == Membership.JOIN + ): + newly_joined = ( + rooms_for_user_membership_at_to_token.event_pos.stream + > from_token.room_key.get_stream_pos_for_instance( + rooms_for_user_membership_at_to_token.event_pos.instance_name + ) + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should always return historical messages (outside token range) in - # these cases because clients want to be able to show a basic screen of - # information: - # - Initial sync (because no `from_token`) - # - When users newly_join - # - TODO: For incremental sync where we haven't sent it down this + # We should return historical messages (outside token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users newly_joined + # - TODO: For an incremental sync where we haven't sent it down this # connection before - to_key=from_token.room_key if from_token is not None else None, + to_key=( + from_token.room_key + if from_token is not None and not newly_joined + else None + ), direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate # the limit or not (see `limited`) @@ -804,6 +821,25 @@ async def get_room_sync_data( stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) + # Determine how many "live" events we have (events within the given token range). + # + # This is mostly useful to determine whether a given @mention event should + # make a noise or not. Clients cannot rely solely on the absence of + # `initial: true` to determine live events because if a room not in the + # sliding window bumps into the window because of an @mention it will have + # `initial: true` yet contain a single live event (with potentially other + # old events in the timeline) + num_live = 0 + if from_token is not None: + for timeline_event in timeline_events: + if ( + timeline_event.internal_metadata.stream_ordering + > from_token.room_key.get_stream_pos_for_instance( + timeline_event.internal_metadata.instance_name + ) + ): + num_live += 1 + prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key ) @@ -838,7 +874,7 @@ async def get_room_sync_data( avatar=None, # TODO: Dummy value heroes=None, - # Since we can't determine whether we've already sent a room down this + # TODO: Since we can't determine whether we've already sent a room down this # Sliding Sync connection before (we plan to add this optimization in the # future), we're always returning the requested room state instead of # updates. @@ -859,6 +895,5 @@ async def get_room_sync_data( # (encrypted rooms). notification_count=0, highlight_count=0, - # TODO: Dummy value - num_live=0, + num_live=num_live, ) From 81d36f36c1731738b38f0b7842de1ce84a570d74 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 15:28:23 -0500 Subject: [PATCH 010/109] Add tests for `limited` --- synapse/handlers/sliding_sync.py | 22 +++-- tests/rest/client/test_sync.py | 140 ++++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 1d07e22c914..90991031aa8 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -37,7 +37,7 @@ UserID, ) from synapse.types.handlers import OperationType, SlidingSyncConfig, SlidingSyncResult -from synapse.types.state import StateFilter, StateKey +from synapse.types.state import StateFilter from synapse.visibility import filter_events_for_client if TYPE_CHECKING: @@ -764,6 +764,7 @@ async def get_room_sync_data( if room_sync_config.timeline_limit > 0: newly_joined = False if ( + # We can only determine new-ness if we have a `from_token` to define our range from_token is not None and rooms_for_user_membership_at_to_token.membership == Membership.JOIN ): @@ -778,11 +779,11 @@ async def get_room_sync_data( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should return historical messages (outside token range) in the + # We should return historical messages (before token range) in the # following cases because we want clients to be able to show a basic # screen of information: # - Initial sync (because no `from_token` to limit us anyway) - # - When users newly_joined + # - When users `newly_joined` # - TODO: For an incremental sync where we haven't sent it down this # connection before to_key=( @@ -832,12 +833,15 @@ async def get_room_sync_data( num_live = 0 if from_token is not None: for timeline_event in timeline_events: - if ( - timeline_event.internal_metadata.stream_ordering - > from_token.room_key.get_stream_pos_for_instance( - timeline_event.internal_metadata.instance_name - ) - ): + # This fields should be present for all persisted events + assert timeline_event.internal_metadata.stream_ordering is not None + assert timeline_event.internal_metadata.instance_name is not None + + persisted_position = PersistedEventPosition( + instance_name=timeline_event.internal_metadata.instance_name, + stream=timeline_event.internal_metadata.stream_ordering, + ) + if persisted_position.persisted_after(from_token.room_key): num_live += 1 prev_batch_token = prev_batch_token.copy_and_replace( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 2b06767b8a8..5b611cd0962 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -35,7 +35,7 @@ ) from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken, StreamKeyType +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken from synapse.util import Clock from tests import unittest @@ -1282,7 +1282,7 @@ def _create_dm_room( def test_sync_list(self) -> None: """ - Test that room IDs show up in the Sliding Sync lists + Test that room IDs show up in the Sliding Sync `lists` """ alice_user_id = self.register_user("alice", "correcthorse") alice_access_token = self.login(alice_user_id, "correcthorse") @@ -1387,7 +1387,7 @@ def test_wait_for_sync_token(self) -> None: def test_filter_list(self) -> None: """ - Test that filters apply to lists + Test that filters apply to `lists` """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1462,7 +1462,7 @@ def test_filter_list(self) -> None: def test_sort_list(self) -> None: """ - Test that the lists are sorted by `stream_ordering` + Test that the `lists` are sorted by `stream_ordering` """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1516,3 +1516,135 @@ def test_sort_list(self) -> None: ], channel.json_body["lists"]["foo-list"], ) + + def test_rooms_limited_initial_sync(self) -> None: + """ + Test that we mark `rooms` as `limited=True` when we saturate the `timeline_limit` + on initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity1", tok=user2_tok) + self.helper.send(room_id1, "activity2", tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity3", tok=user2_tok) + event_pos3 = self.get_success( + self.store.get_position_for_event(event_response3["event_id"]) + ) + event_response4 = self.helper.send(room_id1, "activity4", tok=user2_tok) + event_pos4 = self.get_success( + self.store.get_position_for_event(event_response4["event_id"]) + ) + event_response5 = self.helper.send(room_id1, "activity5", tok=user2_tok) + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We expect to saturate the `timeline_limit` (there are more than 3 messages in the room) + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + # Check to make sure the latest events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response4["event_id"], + event_response5["event_id"], + user1_join_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # Check to make sure the `prev_batch` points at the right place + prev_batch_token = self.get_success( + StreamToken.from_string( + self.store, channel.json_body["rooms"][room_id1]["prev_batch"] + ) + ) + prev_batch_room_stream_token_serialized = self.get_success( + prev_batch_token.room_key.to_string(self.store) + ) + # If we use the `prev_batch` token to look backwards, we should see `event3` + # next so make sure the token encompasses it + self.assertEqual( + event_pos3.persisted_after(prev_batch_token.room_key), + False, + f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be >= event_pos3={self.get_success(event_pos3.to_room_stream_token().to_string(self.store))}", + ) + # If we use the `prev_batch` token to look backwards, we shouldn't see `event4` + # anymore since it was just returned in this response. + self.assertEqual( + event_pos4.persisted_after(prev_batch_token.room_key), + True, + f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be < event_pos4={self.get_success(event_pos4.to_room_stream_token().to_string(self.store))}", + ) + + def test_not_limited_initial_sync(self) -> None: + """ + Test that we mark `rooms` as `limited=False` when there are no more events to + paginate to. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity1", tok=user2_tok) + self.helper.send(room_id1, "activity2", tok=user2_tok) + self.helper.send(room_id1, "activity3", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 100, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # The timeline should be `limited=False` because we have all of the events (no + # more to paginate to) + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) + # We're just looking to make sure we got all of the events before hitting the `timeline_limit` + self.assertEqual( + len(channel.json_body["rooms"][room_id1]["timeline"]), + 9, + channel.json_body["rooms"][room_id1]["timeline"], + ) From 9791209a3d5c82ad9975acea06aaacb55de2326a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 18:10:17 -0500 Subject: [PATCH 011/109] Add more tests --- synapse/handlers/sliding_sync.py | 33 ++-- synapse/rest/client/sync.py | 10 +- synapse/types/__init__.py | 3 + tests/rest/client/test_sync.py | 274 ++++++++++++++++++++++++++++++- 4 files changed, 296 insertions(+), 24 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 90991031aa8..c1b0b2153a1 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -769,26 +769,29 @@ async def get_room_sync_data( and rooms_for_user_membership_at_to_token.membership == Membership.JOIN ): newly_joined = ( - rooms_for_user_membership_at_to_token.event_pos.stream - > from_token.room_key.get_stream_pos_for_instance( - rooms_for_user_membership_at_to_token.event_pos.instance_name + rooms_for_user_membership_at_to_token.event_pos.persisted_after( + from_token.room_key ) ) + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before + should_limit_timeline_to_token_range = ( + from_token is not None and not newly_joined + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before to_key=( from_token.room_key - if from_token is not None and not newly_joined + if should_limit_timeline_to_token_range else None ), direction=Direction.BACKWARDS, @@ -832,7 +835,7 @@ async def get_room_sync_data( # old events in the timeline) num_live = 0 if from_token is not None: - for timeline_event in timeline_events: + for timeline_event in reversed(timeline_events): # This fields should be present for all persisted events assert timeline_event.internal_metadata.stream_ordering is not None assert timeline_event.internal_metadata.instance_name is not None @@ -843,6 +846,12 @@ async def get_room_sync_data( ) if persisted_position.persisted_after(from_token.room_key): num_live += 1 + else: + # Since we're iterating over the timeline events in + # reverse-chronological order, we can break once we hit an event + # that's not live. In the future, we could potentially optimize + # this more with a binary search (bisect). + break prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index db447738249..434eaa4789a 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -785,7 +785,7 @@ class SlidingSyncRestServlet(RestServlet): Response JSON:: { - "next_pos": "s58_224_0_13_10_1_1_16_0_1", + "pos": "s58_224_0_13_10_1_1_16_0_1", "lists": { "foo-list": { "count": 1337, @@ -824,7 +824,8 @@ class SlidingSyncRestServlet(RestServlet): "joined_count": 41, "invited_count": 1, "notification_count": 1, - "highlight_count": 0 + "highlight_count": 0, + "num_live": 2" }, // rooms from list "!foo:bar": { @@ -849,7 +850,8 @@ class SlidingSyncRestServlet(RestServlet): "joined_count": 4, "invited_count": 0, "notification_count": 54, - "highlight_count": 3 + "highlight_count": 3, + "num_live": 1, }, // ... 99 more items }, @@ -927,7 +929,7 @@ async def encode_response( ) -> JsonDict: response: JsonDict = defaultdict(dict) - response["next_pos"] = await sliding_sync_result.next_pos.to_string(self.store) + response["pos"] = await sliding_sync_result.next_pos.to_string(self.store) serialized_lists = self.encode_lists(sliding_sync_result.lists) if serialized_lists: response["lists"] = serialized_lists diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 151658df534..b52236d6029 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -1078,6 +1078,9 @@ class PersistedPosition: stream: int def persisted_after(self, token: AbstractMultiWriterStreamToken) -> bool: + """ + Checks whether this position happened after the token + """ return token.get_stream_pos_for_instance(self.instance_name) < self.stream diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 5b611cd0962..d538716e5ac 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -19,6 +19,7 @@ # # import json +import logging from typing import List from parameterized import parameterized, parameterized_class @@ -35,7 +36,7 @@ ) from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken, UserID from synapse.util import Clock from tests import unittest @@ -44,6 +45,8 @@ ) from tests.server import TimedOutException +logger = logging.getLogger(__name__) + class FilterTestCase(unittest.HomeserverTestCase): user_id = "@apple:test" @@ -1379,11 +1382,9 @@ def test_wait_for_sync_token(self) -> None: channel.await_result(timeout_ms=200) self.assertEqual(channel.code, 200, channel.json_body) - # We expect the `next_pos` in the result to be the same as what we requested + # We expect the next `pos` in the result to be the same as what we requested # with because we weren't able to find anything new yet. - self.assertEqual( - channel.json_body["next_pos"], future_position_token_serialized - ) + self.assertEqual(channel.json_body["pos"], future_position_token_serialized) def test_filter_list(self) -> None: """ @@ -1602,7 +1603,15 @@ def test_rooms_limited_initial_sync(self) -> None: f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be < event_pos4={self.get_success(event_pos4.to_room_stream_token().to_string(self.store))}", ) - def test_not_limited_initial_sync(self) -> None: + # With no `from_token` (initial sync), it's all historical since there is no + # "current" range + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_not_limited_initial_sync(self) -> None: """ Test that we mark `rooms` as `limited=False` when there are no more events to paginate to. @@ -1619,6 +1628,7 @@ def test_not_limited_initial_sync(self) -> None: self.helper.join(room_id1, user1_id, tok=user1_tok) # Make the Sliding Sync request + timeline_limit = 100 channel = self.make_request( "POST", self.sync_endpoint, @@ -1627,7 +1637,7 @@ def test_not_limited_initial_sync(self) -> None: "foo-list": { "ranges": [[0, 1]], "required_state": [], - "timeline_limit": 100, + "timeline_limit": timeline_limit, } } }, @@ -1642,9 +1652,257 @@ def test_not_limited_initial_sync(self) -> None: False, channel.json_body["rooms"][room_id1], ) + expected_number_of_events = 9 # We're just looking to make sure we got all of the events before hitting the `timeline_limit` self.assertEqual( len(channel.json_body["rooms"][room_id1]["timeline"]), - 9, + expected_number_of_events, channel.json_body["rooms"][room_id1]["timeline"], ) + self.assertLessEqual(expected_number_of_events, timeline_limit) + + # With no `from_token` (initial sync), it's all historical since there is no + # "live" token range. + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_incremental_sync(self) -> None: + """ + Test that `rooms` data during an incremental sync after an initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + self.helper.send(room_id1, "activity before initial sync1", tok=user2_tok) + + # Make an initial Sliding Sync request to grab a token. This is also a sanity + # check that we can go from initial to incremental sync. + sync_params = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + } + channel = self.make_request( + "POST", + self.sync_endpoint, + sync_params, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + next_pos = channel.json_body["pos"] + + # Send some events but don't send enough to saturate the `timeline_limit`. + # We want to later test that we only get the new events since the `next_pos` + event_response2 = self.helper.send(room_id1, "activity after2", tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + + # Make an incremental Sliding Sync request (what we're trying to test) + channel = self.make_request( + "POST", + self.sync_endpoint + f"?pos={next_pos}", + sync_params, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We only expect to see the new events since the last sync which isn't enough to + # fill up the `timeline_limit`. + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + f'Our `timeline_limit` was {sync_params["lists"]["foo-list"]["timeline_limit"]} ' + + f'and {len(channel.json_body["rooms"][room_id1]["timeline"])} events were returned in the timeline. ' + + str(channel.json_body["rooms"][room_id1]), + ) + # Check to make sure the latest events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + event_response3["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # All events are "live" + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 2, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_newly_joined_incremental_sync(self) -> None: + """ + Test that when we make an incremental sync with a `newly_joined` `rooms`, we are + able to see some historical events before the `from_token`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before token1", tok=user2_tok) + event_response2 = self.helper.send( + room_id1, "activity before token2", tok=user2_tok + ) + + from_token = self.event_sources.get_current_token() + + # Join the room after the `from_token` which will make us consider this room as + # `newly_joined`. + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Send some events but don't send enough to saturate the `timeline_limit`. + # We want to later test that we only get the new events since the `next_pos` + event_response3 = self.helper.send( + room_id1, "activity after token3", tok=user2_tok + ) + event_response4 = self.helper.send( + room_id1, "activity after token4", tok=user2_tok + ) + + # The `timeline_limit` is set to 4 so we can at least see one historical event + # before the `from_token`. We should see historical events because this is a + # `newly_joined` room. + timeline_limit = 4 + # Make an incremental Sliding Sync request (what we're trying to test) + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": timeline_limit, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see the new events and the rest should be filled with historical + # events which will make us `limited=True` since there are more to paginate to. + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + f"Our `timeline_limit` was {timeline_limit} " + + f'and {len(channel.json_body["rooms"][room_id1]["timeline"])} events were returned in the timeline. ' + + str(channel.json_body["rooms"][room_id1]), + ) + # Check to make sure that the "live" and historical events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + user1_join_response["event_id"], + event_response3["event_id"], + event_response4["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # Only events after the `from_token` are "live" (join, event3, event4) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 3, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_invite_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` and that + we can't see any timeline events because we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after3", tok=user2_tok) + self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Should not see anything (except maybe the invite event) because we haven't + # joined yet (`filter_events_for_client(...)` is doing the work here) + self.assertEqual( + channel.json_body["rooms"][room_id1]["timeline"], + [], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # We should have some stripped state so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) From 70ecd4d8d3646ddb1fb55b37cdf9a07612a59d2f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:38:35 -0500 Subject: [PATCH 012/109] Fix lint --- synapse/handlers/sliding_sync.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c1b0b2153a1..7a6ef1a2d98 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -774,24 +774,22 @@ async def get_room_sync_data( ) ) - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before - should_limit_timeline_to_token_range = ( - from_token is not None and not newly_joined - ) - timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, to_key=( + # Determine whether we should limit the timeline to the token range. + # + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before from_token.room_key - if should_limit_timeline_to_token_range + if from_token is not None and not newly_joined else None ), direction=Direction.BACKWARDS, From 71eabe5e63fc2d637785866c6e1f471fe67d0966 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:41:41 -0500 Subject: [PATCH 013/109] Make room name optional --- synapse/handlers/sliding_sync.py | 3 +-- synapse/rest/client/sync.py | 4 +++- synapse/types/handlers/__init__.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 7a6ef1a2d98..f2b29ce1d1f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -879,8 +879,7 @@ async def get_room_sync_data( return SlidingSyncResult.RoomResult( # TODO: Dummy value - # TODO: Make this optional because a computed name doesn't make sense for translated cases - name="TODO", + name=None, # TODO: Dummy value avatar=None, # TODO: Dummy value diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 434eaa4789a..da28c2b3a5b 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -990,7 +990,6 @@ async def encode_rooms( ) serialized_rooms[room_id] = { - "name": room_result.name, "required_state": serialized_required_state, "timeline": serialized_timeline, "prev_batch": await room_result.prev_batch.to_string(self.store), @@ -1002,6 +1001,9 @@ async def encode_rooms( "num_live": room_result.num_live, } + if room_result.name: + serialized_rooms[room_id]["name"] = room_result.name + if room_result.avatar: serialized_rooms[room_id]["avatar"] = room_result.avatar diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 04b0ab972b8..1b544456a65 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -186,7 +186,7 @@ class RoomResult: (with potentially other old events in the timeline). """ - name: str + name: Optional[str] avatar: Optional[str] heroes: Optional[List[EventBase]] initial: bool From 39b4f10533fded08647c198c80e6b185bc8558e0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:55:12 -0500 Subject: [PATCH 014/109] Update comments --- synapse/handlers/sliding_sync.py | 14 +++++++++----- tests/rest/client/test_sync.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f2b29ce1d1f..cb5274d495f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -756,10 +756,10 @@ async def get_room_sync_data( # Assemble the list of timeline events timeline_events: List[EventBase] = [] limited = False - # We want to use `to_token` (vs `from_token`) because we look backwards from the - # `to_token` up to the `timeline_limit` and we might not reach `from_token` - # before we hit the limit. We will update the room stream position once we've - # fetched the events. + # We want to start off using the `to_token` (vs `from_token`) because we look + # backwards from the `to_token` up to the `timeline_limit` and we might not + # reach the `from_token` before we hit the limit. We will update the room stream + # position once we've fetched the events. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: newly_joined = False @@ -803,6 +803,7 @@ async def get_room_sync_data( # most recent). timeline_events.reverse() + # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, user.to_string(), @@ -851,11 +852,14 @@ async def get_room_sync_data( # this more with a binary search (bisect). break + # Update the `prev_batch_token` to point to the position that allows us to + # keep paginating backwards from the oldest event we return in the timeline. prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key ) - # Figure out any stripped state events for invite/knocks + # Figure out any stripped state events for invite/knocks. This allows the + # potential joiner to identify the room. stripped_state: List[JsonDict] = [] if rooms_for_user_membership_at_to_token.membership in { Membership.INVITE, diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index d538716e5ac..838ff6e2b41 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1874,7 +1874,7 @@ def test_rooms_invite_sync(self) -> None: [], channel.json_body["rooms"][room_id1]["timeline"], ) - # We should have some stripped state so the potential joiner can identify the + # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( channel.json_body["rooms"][room_id1]["invite_state"], From 9883b0f63f87cf34b50e28390a0fa29d8e014443 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 21:00:26 -0500 Subject: [PATCH 015/109] Add bundled aggregations --- synapse/handlers/sliding_sync.py | 16 +++++++++++++++- synapse/rest/client/sync.py | 5 ++--- synapse/types/handlers/__init__.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cb5274d495f..e418a6e0742 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -114,6 +114,7 @@ def __init__(self, hs: "HomeServer"): self.auth_blocking = hs.get_auth_blocking() self.notifier = hs.get_notifier() self.event_sources = hs.get_event_sources() + self.relations_handler = hs.get_relations_handler() self.rooms_to_exclude_globally = hs.config.server.rooms_to_exclude_from_sync async def wait_for_sync_for_user( @@ -881,6 +882,18 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) + # TODO: Handle timeline gaps (`get_timeline_gaps()`) + + # If the timeline is `limited=True`, the client does not have all events + # necessary to calculate aggregations themselves. + bundled_aggregations = None + if limited: + bundled_aggregations = ( + await self.relations_handler.get_bundled_aggregations( + timeline_events, user.to_string() + ) + ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, @@ -895,7 +908,8 @@ async def get_room_sync_data( initial=True, # TODO: Dummy value required_state=[], - timeline=timeline_events, + timeline_events=timeline_events, + bundled_aggregations=bundled_aggregations, # TODO: Dummy value is_dm=False, stripped_state=stripped_state, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index da28c2b3a5b..4333ee8c2b6 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -976,11 +976,10 @@ async def encode_rooms( serialized_rooms = {} for room_id, room_result in rooms.items(): serialized_timeline = await self.event_serializer.serialize_events( - room_result.timeline, + room_result.timeline_events, time_now, config=serialize_options, - # TODO - # bundle_aggregations=room.timeline.bundled_aggregations, + bundle_aggregations=room_result.bundled_aggregations, ) serialized_required_state = await self.event_serializer.serialize_events( diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1b544456a65..1ba5ea55c14 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,6 +31,7 @@ from pydantic import Extra from synapse.events import EventBase +from synapse.handlers.relations import BundledAggregations from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody @@ -159,7 +160,11 @@ class RoomResult: entirely and NOT send "initial":false as this is wasteful on bandwidth. The absence of this flag means 'false'. required_state: The current state of the room - timeline: Latest events in the room. The last event is the most recent + timeline: Latest events in the room. The last event is the most recent. + bundled_aggregations: A mapping of event ID to the bundled aggregations for + the timeline events above. This allows clients to show accurate reaction + counts (or edits, threads), even if some of the reaction events were skipped + over in a gappy sync. is_dm: Flag to specify whether the room is a direct-message room (most likely between two people). stripped_state: Stripped state events (for rooms where the usre is @@ -191,7 +196,8 @@ class RoomResult: heroes: Optional[List[EventBase]] initial: bool required_state: List[EventBase] - timeline: List[EventBase] + timeline_events: List[EventBase] + bundled_aggregations: Optional[Dict[str, BundledAggregations]] is_dm: bool stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken From 1c06153a0d3c24039a70b0c770947874bc05c246 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 21:22:40 -0500 Subject: [PATCH 016/109] Determine limited before filtering --- synapse/handlers/sliding_sync.py | 27 ++++++++++++++++----------- tests/rest/client/test_sync.py | 8 ++++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e418a6e0742..fe369949c5e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -804,17 +804,9 @@ async def get_room_sync_data( # most recent). timeline_events.reverse() - # Make sure we don't expose any events that the client shouldn't see - timeline_events = await filter_events_for_client( - self.storage_controllers, - user.to_string(), - timeline_events, - is_peeking=rooms_for_user_membership_at_to_token.membership - != Membership.JOIN, - filter_send_to_client=True, - ) - - # Determine our `limited` status + # Determine our `limited` status based on the timeline. We do this before + # filtering the events so we can accurately determine if there is more to + # paginate even if we filter out some/all events. if len(timeline_events) > room_sync_config.timeline_limit: limited = True # Get rid of that extra "+ 1" event because we only used it to determine @@ -825,6 +817,19 @@ async def get_room_sync_data( stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) + # TODO: Does `newly_joined` affect `limited`? It does in sync v2 but I fail + # to understand why. + + # Make sure we don't expose any events that the client shouldn't see + timeline_events = await filter_events_for_client( + self.storage_controllers, + user.to_string(), + timeline_events, + is_peeking=rooms_for_user_membership_at_to_token.membership + != Membership.JOIN, + filter_send_to_client=True, + ) + # Determine how many "live" events we have (events within the given token range). # # This is mostly useful to determine whether a given @mention event should diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 838ff6e2b41..df85c94bd5a 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1874,6 +1874,13 @@ def test_rooms_invite_sync(self) -> None: [], channel.json_body["rooms"][room_id1]["timeline"], ) + # Even though we don't get any timeline events because they are filtered out, + # there is still more to paginate + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( @@ -1906,3 +1913,4 @@ def test_rooms_invite_sync(self) -> None: ], channel.json_body["rooms"][room_id1]["invite_state"], ) + From c81f3006a5e768e0e3f099dd7e001a7f1768b2c6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 12:54:39 -0500 Subject: [PATCH 017/109] Add better support for leave/ban --- synapse/handlers/sliding_sync.py | 48 ++-- synapse/storage/databases/main/stream.py | 20 ++ tests/rest/client/test_sync.py | 350 ++++++++++++++++++++++- tests/rest/client/utils.py | 4 +- 4 files changed, 399 insertions(+), 23 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fe369949c5e..0d2f4dbfffa 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -775,24 +775,36 @@ async def get_room_sync_data( ) ) + # We're going to paginate backwards from the `to_token` + from_bound = to_token.room_key + # People shouldn't see past their leave/ban event + if rooms_for_user_membership_at_to_token.membership in ( + Membership.LEAVE, + Membership.BAN, + ): + from_bound = ( + rooms_for_user_membership_at_to_token.event_pos.to_room_stream_token() + ) + + # Determine whether we should limit the timeline to the token range. + # + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before + to_bound = ( + from_token.room_key + if from_token is not None and not newly_joined + else None + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, - # We're going to paginate backwards from the `to_token` - from_key=to_token.room_key, - to_key=( - # Determine whether we should limit the timeline to the token range. - # - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before - from_token.room_key - if from_token is not None and not newly_joined - else None - ), + from_key=from_bound, + to_key=to_bound, direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate # the limit or not (see `limited`) @@ -867,10 +879,10 @@ async def get_room_sync_data( # Figure out any stripped state events for invite/knocks. This allows the # potential joiner to identify the room. stripped_state: List[JsonDict] = [] - if rooms_for_user_membership_at_to_token.membership in { + if rooms_for_user_membership_at_to_token.membership in ( Membership.INVITE, Membership.KNOCK, - }: + ): invite_or_knock_event = await self.store.get_event( rooms_for_user_membership_at_to_token.event_id ) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ff0d723684d..c21e69ecdab 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1551,6 +1551,9 @@ async def paginate_room_events( ) -> Tuple[List[EventBase], RoomStreamToken]: """Returns list of events before or after a given token. + When Direction.FORWARDS: from_key < x <= to_key + When Direction.BACKWARDS: from_key >= x > to_key + Args: room_id from_key: The token used to stream from @@ -1567,6 +1570,23 @@ async def paginate_room_events( and `to_key`). """ + # We can bail early if we're looking forwards, and our `to_key` is already + # before our `from_key`. + if ( + direction == Direction.FORWARDS + and to_key is not None + and to_key.is_before_or_eq(from_key) + ): + return [], from_key + # Or vice-versa, if we're looking backwards and our `from_key` is already before + # our `to_key`. + elif ( + direction == Direction.BACKWARDS + and to_key is not None + and from_key.is_before_or_eq(to_key) + ): + return [], from_key + rows, token = await self.db_pool.runInteraction( "paginate_room_events", self._paginate_room_events_txn, diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index df85c94bd5a..32542a64e8b 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -31,6 +31,7 @@ AccountDataTypes, EventContentFields, EventTypes, + HistoryVisibility, ReceiptTypes, RelationTypes, ) @@ -1831,10 +1832,11 @@ def test_rooms_newly_joined_incremental_sync(self) -> None: channel.json_body["rooms"][room_id1], ) - def test_rooms_invite_sync(self) -> None: + def test_rooms_invite_shared_history_initial_sync(self) -> None: """ Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because we haven't joined the room yet. + we can't see any timeline events because the history visiblity is `shared` and + we haven't joined the room yet. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1844,6 +1846,16 @@ def test_rooms_invite_sync(self) -> None: user2 = UserID.from_string(user2_id) room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Ensure we're testing with a room with `shared` history visibility which means + # history visible until you actually join the room. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.SHARED, + ) + self.helper.send(room_id1, "activity before1", tok=user2_tok) self.helper.send(room_id1, "activity before2", tok=user2_tok) self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) @@ -1868,12 +1880,21 @@ def test_rooms_invite_sync(self) -> None: self.assertEqual(channel.code, 200, channel.json_body) # Should not see anything (except maybe the invite event) because we haven't - # joined yet (`filter_events_for_client(...)` is doing the work here) + # joined yet (history visibility is `shared`) (`filter_events_for_client(...)` + # is doing the work here) self.assertEqual( channel.json_body["rooms"][room_id1]["timeline"], [], channel.json_body["rooms"][room_id1]["timeline"], ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) and no events returned in the timeline anyway so nothing could be + # "live". + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) # Even though we don't get any timeline events because they are filtered out, # there is still more to paginate self.assertEqual( @@ -1914,3 +1935,326 @@ def test_rooms_invite_sync(self) -> None: channel.json_body["rooms"][room_id1]["invite_state"], ) + + def test_rooms_invite_world_readable_history_initial_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` and that + we can't see any timeline events because the history visiblity is `shared` and + we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, + extra_content={ + "preset": "public_chat", + "initial_state": [ + { + "content": {"history_visibility": HistoryVisibility.WORLD_READABLE}, + "state_key": "", + "type": EventTypes.RoomHistoryVisibility, + } + ], + },) + # Ensure we're testing with a room with `world_readable` history visibility + # which means events are visible to anyone even without membership. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.WORLD_READABLE, + ) + + self.helper.send(room_id1, "activity before1", tok=user2_tok) + event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) + use1_invite_response = self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + # Large enough to see the latest events and before the invite + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Should see the last 4 events in the room + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + use1_invite_response["event_id"], + event_response3["event_id"], + event_response4["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There is still more to paginate + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + + def test_rooms_ban_initial_sync(self) -> None: + """ + Test that `rooms` we are banned from in an intial sync only allows us to see + timeline events up to the ban event. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + + self.helper.send(room_id1, "activity after5", tok=user2_tok) + self.helper.send(room_id1, "activity after6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see events before the ban but not after + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response3["event_id"], + event_response4["event_id"], + user1_ban_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There are more events to paginate to + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_ban_incremental_sync1(self) -> None: + """ + Test that `rooms` we are banned from during the next incremental sync only + allows us to see timeline events up to the ban event. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + from_token = self.event_sources.get_current_token() + + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + # The ban is within the token range (between the `from_token` and the sliding + # sync request) + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + + self.helper.send(room_id1, "activity after5", tok=user2_tok) + self.helper.send(room_id1, "activity after6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see events before the ban but not after + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response3["event_id"], + event_response4["event_id"], + user1_ban_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # All live events in the incremental sync + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 3, + channel.json_body["rooms"][room_id1], + ) + # There aren't anymore events to paginate to in this range + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_ban_incremental_sync2(self) -> None: + """ + Test that `rooms` we are banned from before the incremental sync doesn't return + any events in the timeline. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + self.helper.send(room_id1, "activity after2", tok=user2_tok) + # The ban is before we get our `from_token` + self.helper.ban(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + + self.helper.send(room_id1, "activity after3", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Nothing to see for this banned user in the room in the token range + self.assertEqual( + channel.json_body["rooms"][room_id1]["timeline"], + [], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No events returned in the timeline so nothing is "live" + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There aren't anymore events to paginate to in this range + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index f0ba40a1f13..e43140720db 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -261,9 +261,9 @@ def ban( targ: str, expect_code: int = HTTPStatus.OK, tok: Optional[str] = None, - ) -> None: + ) -> JsonDict: """A convenience helper: `change_membership` with `membership` preset to "ban".""" - self.change_membership( + return self.change_membership( room=room, src=src, targ=targ, From d801db0d96ef53e1eaa42c7540f744a56de90b59 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 13:24:01 -0500 Subject: [PATCH 018/109] Fix lints --- tests/rest/client/test_sync.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 32542a64e8b..6db6f855baf 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1935,7 +1935,6 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: channel.json_body["rooms"][room_id1]["invite_state"], ) - def test_rooms_invite_world_readable_history_initial_sync(self) -> None: """ Test that `rooms` we are invited to have some stripped `invite_state` and that @@ -1949,17 +1948,22 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: user2_tok = self.login(user2_id, "pass") user2 = UserID.from_string(user2_id) - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, extra_content={ "preset": "public_chat", "initial_state": [ { - "content": {"history_visibility": HistoryVisibility.WORLD_READABLE}, + "content": { + "history_visibility": HistoryVisibility.WORLD_READABLE + }, "state_key": "", "type": EventTypes.RoomHistoryVisibility, } ], - },) + }, + ) # Ensure we're testing with a room with `world_readable` history visibility # which means events are visible to anyone even without membership. history_visibility_response = self.helper.get_state( @@ -1972,7 +1976,9 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: self.helper.send(room_id1, "activity before1", tok=user2_tok) event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) - use1_invite_response = self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + use1_invite_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) From 884b44801253c6b97ae07f958744c8443649153e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 13:50:28 -0500 Subject: [PATCH 019/109] Update some wording --- synapse/handlers/sliding_sync.py | 6 +++--- tests/rest/client/test_sync.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0d2f4dbfffa..3e49054e430 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -264,7 +264,7 @@ async def current_sync_for_user( ) ) - # Update the relevant room map + # Take the superset of the `RoomSyncConfig` for each room for room_id in sliced_room_ids: if relevant_room_map.get(room_id) is not None: # Take the highest timeline limit @@ -739,7 +739,7 @@ async def get_room_sync_data( to_token: StreamToken, ) -> SlidingSyncResult.RoomResult: """ - Fetch room data for a room. + Fetch room data for the sync response. We fetch data according to the token range (> `from_token` and <= `to_token`). @@ -760,7 +760,7 @@ async def get_room_sync_data( # We want to start off using the `to_token` (vs `from_token`) because we look # backwards from the `to_token` up to the `timeline_limit` and we might not # reach the `from_token` before we hit the limit. We will update the room stream - # position once we've fetched the events. + # position once we've fetched the events to point to the earliest event fetched. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: newly_joined = False diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3213059a784..a55804c96ca 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1607,7 +1607,7 @@ def test_rooms_limited_initial_sync(self) -> None: ) # With no `from_token` (initial sync), it's all historical since there is no - # "current" range + # "live" range self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], 0, @@ -1674,7 +1674,7 @@ def test_rooms_not_limited_initial_sync(self) -> None: def test_rooms_incremental_sync(self) -> None: """ - Test that `rooms` data during an incremental sync after an initial sync. + Test `rooms` data during an incremental sync after an initial sync. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1889,7 +1889,7 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: [], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) and no events returned in the timeline anyway so nothing could be # "live". self.assertEqual( @@ -2016,7 +2016,7 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: ], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], @@ -2116,7 +2116,7 @@ def test_rooms_ban_initial_sync(self) -> None: ], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], @@ -2206,7 +2206,7 @@ def test_rooms_ban_incremental_sync1(self) -> None: def test_rooms_ban_incremental_sync2(self) -> None: """ - Test that `rooms` we are banned from before the incremental sync doesn't return + Test that `rooms` we are banned from before the incremental sync don't return any events in the timeline. """ user1_id = self.register_user("user1", "pass") From ef3f606cbff8bf6ec008ec795e0ae699eb1619cc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 14:06:00 -0500 Subject: [PATCH 020/109] Start of combinining `RoomSyncConfig` --- synapse/handlers/sliding_sync.py | 116 +++++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3e49054e430..283629abe68 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,7 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Dict, Final, List, Optional, Set, Tuple import attr from immutabledict import immutabledict @@ -96,14 +96,73 @@ class RoomSyncConfig: Attributes: timeline_limit: The maximum number of events to return in the timeline. - required_state: The set of state events requested for the room. The - values are close to `StateKey` but actually use a syntax where you can + required_state_map: Map from state type to a list of state events requested for the + room. The values are close to `StateKey` but actually use a syntax where you can provide `*` wildcard and `$LAZY` for lazy room members as the `state_key` part of the tuple (type, state_key). """ timeline_limit: int - required_state: Set[Tuple[str, str]] + required_state_map: Dict[str, Set[Tuple[str, str]]] + + def from_list_config(list_config: SlidingSyncConfig.SlidingSyncList) -> "RoomSyncConfig": + """ + Create a `RoomSyncConfig` from a `SlidingSyncList` config. + """ + return RoomSyncConfig( + timeline_limit=list_config.timeline_limit, + required_state_map={ + state_type: {(state_type, state_key) for state_key in state_keys} + for state_type, state_keys in list_config.required_state.items() + }, + ) + + def combine_room_sync_config( + self, other_room_sync_config: "RoomSyncConfig" + ) -> None: + """ + Combine this `RoomSyncConfig` with another `RoomSyncConfig` and take the + superset union of the two. + """ + # Take the highest timeline limit + if self.timeline_limit < other_room_sync_config.timeline_limit: + self.timeline_limit = other_room_sync_config.timeline_limit + + # Union the required state + for ( + state_type, + state_key_set, + ) in other_room_sync_config.required_state_map.items(): + # If we already have a wildcard, we don't need to add anything else + if ( + # This is just a tricky way to grab the first element of the set + next(iter(self.required_state_map.get(state_type) or []), None) + == (state_type, StateKeys.WILDCARD) + ): + continue + + for state_key in state_key_set: + # If we're getting a wildcard, that's all that matters so get rid of any + # other state keys + if state_key == StateKeys.WILDCARD: + self.required_state_map[state_type] = (state_type, state_key) + break + # Otherwise, just add it to the set + else: + self.required_state_map[state_type].add((state_type, state_key)) + + +class StateKeys: + """ + Understood values of the `state_key` part of the tuple (type, state_key) in + `required_state`. + """ + + # Include all state events of the given type + WILDCARD: Final = "*" + # Lazy-load room membership events (include room membership events for any event + # `sender` in the timeline) + LAZY: Final = "$LAZY" class SlidingSyncHandler: @@ -266,25 +325,22 @@ async def current_sync_for_user( # Take the superset of the `RoomSyncConfig` for each room for room_id in sliced_room_ids: - if relevant_room_map.get(room_id) is not None: - # Take the highest timeline limit - if ( - relevant_room_map[room_id].timeline_limit - < list_config.timeline_limit - ): - relevant_room_map[room_id].timeline_limit = ( - list_config.timeline_limit - ) - - # Union the required state - relevant_room_map[room_id].required_state.update( - list_config.required_state - ) - else: - relevant_room_map[room_id] = RoomSyncConfig( - timeline_limit=list_config.timeline_limit, - required_state=set(list_config.required_state), + room_sync_config = RoomSyncConfig( + timeline_limit=list_config.timeline_limit, + required_state_map={ + state_type: (state_type, state_key) + for state_type, state_key in list_config.required_state.items() + }, + ) + existing_room_sync_config = relevant_room_map.get(room_id) + + relevant_room_map[room_id] = ( + existing_room_sync_config.combine_room_sync_config( + room_sync_config ) + if existing_room_sync_config is not None + else room_sync_config + ) lists[list_key] = SlidingSyncResult.SlidingWindowList( count=len(sorted_room_info), @@ -911,6 +967,22 @@ async def get_room_sync_data( ) ) + room_sync_config.required_state + + room_state = await self._storage_controllers.state.get_current_state( + room_id, + StateFilter.from_types( + [ + (EventTypes.Member, user.to_string()), + (EventTypes.CanonicalAlias, ""), + (EventTypes.Name, ""), + (EventTypes.Create, ""), + (EventTypes.JoinRules, ""), + (EventTypes.RoomAvatar, ""), + ] + ), + ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, From bbc3eb5f6fffdbc518005a09d3942d2a6ce95493 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 14:33:17 -0500 Subject: [PATCH 021/109] Different better --- synapse/handlers/sliding_sync.py | 75 ++++++++++++++++++----------- tests/handlers/test_sliding_sync.py | 10 +++- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 283629abe68..71cd9f7c5d3 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -105,16 +105,39 @@ class RoomSyncConfig: timeline_limit: int required_state_map: Dict[str, Set[Tuple[str, str]]] - def from_list_config(list_config: SlidingSyncConfig.SlidingSyncList) -> "RoomSyncConfig": + def from_room_parameters( + room_params: SlidingSyncConfig.CommonRoomParameters, + ) -> "RoomSyncConfig": """ - Create a `RoomSyncConfig` from a `SlidingSyncList` config. + Create a `RoomSyncConfig` from a `SlidingSyncList`/`RoomSubscription` config. + + Args: + room_params: `SlidingSyncConfig.SlidingSyncList` or `SlidingSyncConfig.RoomSubscription` """ + required_state_map: Dict[str, Set[Tuple[str, str]]] = {} + for ( + state_type, + state_key, + ) in room_params.required_state: + # If we already have a wildcard, we don't need to add anything else + if ( + # This is just a tricky way to grab the first element of the set + next(iter(required_state_map.get(state_type) or []), None) + == (state_type, StateKeys.WILDCARD) + ): + continue + + # If we're getting a wildcard, that's all that matters so get rid of any + # other state keys + if state_key == StateKeys.WILDCARD: + required_state_map[state_type] = (state_type, state_key) + # Otherwise, just add it to the set + else: + required_state_map[state_type].add((state_type, state_key)) + return RoomSyncConfig( - timeline_limit=list_config.timeline_limit, - required_state_map={ - state_type: {(state_type, state_key) for state_key in state_keys} - for state_type, state_keys in list_config.required_state.items() - }, + timeline_limit=room_params.timeline_limit, + required_state_map=required_state_map, ) def combine_room_sync_config( @@ -325,12 +348,8 @@ async def current_sync_for_user( # Take the superset of the `RoomSyncConfig` for each room for room_id in sliced_room_ids: - room_sync_config = RoomSyncConfig( - timeline_limit=list_config.timeline_limit, - required_state_map={ - state_type: (state_type, state_key) - for state_type, state_key in list_config.required_state.items() - }, + room_sync_config = RoomSyncConfig.from_room_parameters( + list_config ) existing_room_sync_config = relevant_room_map.get(room_id) @@ -967,21 +986,21 @@ async def get_room_sync_data( ) ) - room_sync_config.required_state - - room_state = await self._storage_controllers.state.get_current_state( - room_id, - StateFilter.from_types( - [ - (EventTypes.Member, user.to_string()), - (EventTypes.CanonicalAlias, ""), - (EventTypes.Name, ""), - (EventTypes.Create, ""), - (EventTypes.JoinRules, ""), - (EventTypes.RoomAvatar, ""), - ] - ), - ) + # TODO: room_sync_config.required_state + + # room_state = await self._storage_controllers.state.get_current_state( + # room_id, + # StateFilter.from_types( + # [ + # (EventTypes.Member, user.to_string()), + # (EventTypes.CanonicalAlias, ""), + # (EventTypes.Name, ""), + # (EventTypes.Create, ""), + # (EventTypes.JoinRules, ""), + # (EventTypes.RoomAvatar, ""), + # ] + # ), + # ) return SlidingSyncResult.RoomResult( # TODO: Dummy value diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0358239c7f4..5be9402c922 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -26,20 +26,26 @@ from synapse.api.constants import AccountDataTypes, EventTypes, JoinRules, Membership from synapse.api.room_versions import RoomVersions -from synapse.handlers.sliding_sync import SlidingSyncConfig +from synapse.handlers.sliding_sync import RoomSyncConfig from synapse.rest import admin from synapse.rest.client import knock, login, room from synapse.server import HomeServer from synapse.storage.util.id_generators import MultiWriterIdGenerator from synapse.types import JsonDict, UserID +from synapse.types.handlers import SlidingSyncConfig from synapse.util import Clock from tests.replication._base import BaseMultiWorkerStreamTestCase -from tests.unittest import HomeserverTestCase +from tests.unittest import HomeserverTestCase, TestCase logger = logging.getLogger(__name__) +class RoomSyncConfigTestCase(TestCase): + def test_from_list_config() -> None: + RoomSyncConfig + + class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): """ Tests Sliding Sync handler `get_sync_room_ids_for_user()` to make sure it returns From 0eb029472e5410b780156f12db13434b003f42ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 14:34:10 -0500 Subject: [PATCH 022/109] Remove unused `IncludeOldRooms` class --- synapse/types/rest/client/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index 25fbd772f6f..5d453769b5e 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -154,10 +154,6 @@ class CommonRoomParameters(RequestBodyModel): (Max 1000 messages) """ - class IncludeOldRooms(RequestBodyModel): - timeline_limit: StrictInt - required_state: List[Tuple[StrictStr, StrictStr]] - required_state: List[Tuple[StrictStr, StrictStr]] # mypy workaround via https://github.com/pydantic/pydantic/issues/156#issuecomment-1130883884 if TYPE_CHECKING: From 5b17d706df4ab5db825f42969a4a5f60af517b07 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 15:35:26 -0500 Subject: [PATCH 023/109] Add `RoomSyncConfig` tests --- synapse/handlers/sliding_sync.py | 41 +++-- tests/handlers/test_sliding_sync.py | 256 +++++++++++++++++++++++++++- 2 files changed, 278 insertions(+), 19 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 71cd9f7c5d3..8c2da1264d2 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,6 +18,7 @@ # # import logging +from collections import defaultdict from typing import TYPE_CHECKING, Dict, Final, List, Optional, Set, Tuple import attr @@ -96,16 +97,19 @@ class RoomSyncConfig: Attributes: timeline_limit: The maximum number of events to return in the timeline. - required_state_map: Map from state type to a list of state events requested for the - room. The values are close to `StateKey` but actually use a syntax where you can - provide `*` wildcard and `$LAZY` for lazy room members as the `state_key` part - of the tuple (type, state_key). + + required_state_map: Map from state type to a set of state (type, state_key) + tuples requested for the room. The values are close to `StateKey` but actually + use a syntax where you can provide `*` wildcard and `$LAZY` for lazy room + members as the `state_key` part of the tuple (type, state_key). """ timeline_limit: int required_state_map: Dict[str, Set[Tuple[str, str]]] - def from_room_parameters( + @classmethod + def from_room_config( + cls, room_params: SlidingSyncConfig.CommonRoomParameters, ) -> "RoomSyncConfig": """ @@ -130,12 +134,15 @@ def from_room_parameters( # If we're getting a wildcard, that's all that matters so get rid of any # other state keys if state_key == StateKeys.WILDCARD: - required_state_map[state_type] = (state_type, state_key) + required_state_map[state_type] = {(state_type, state_key)} # Otherwise, just add it to the set else: - required_state_map[state_type].add((state_type, state_key)) + if required_state_map.get(state_type) is None: + required_state_map[state_type] = {(state_type, state_key)} + else: + required_state_map[state_type].add((state_type, state_key)) - return RoomSyncConfig( + return cls( timeline_limit=room_params.timeline_limit, required_state_map=required_state_map, ) @@ -164,15 +171,18 @@ def combine_room_sync_config( ): continue - for state_key in state_key_set: + for _state_type, state_key in state_key_set: # If we're getting a wildcard, that's all that matters so get rid of any # other state keys if state_key == StateKeys.WILDCARD: - self.required_state_map[state_type] = (state_type, state_key) + self.required_state_map[state_type] = {(state_type, state_key)} break # Otherwise, just add it to the set else: - self.required_state_map[state_type].add((state_type, state_key)) + if self.required_state_map.get(state_type) is None: + self.required_state_map[state_type] = {(state_type, state_key)} + else: + self.required_state_map[state_type].add((state_type, state_key)) class StateKeys: @@ -348,18 +358,17 @@ async def current_sync_for_user( # Take the superset of the `RoomSyncConfig` for each room for room_id in sliced_room_ids: - room_sync_config = RoomSyncConfig.from_room_parameters( + room_sync_config = RoomSyncConfig.from_room_config( list_config ) existing_room_sync_config = relevant_room_map.get(room_id) - relevant_room_map[room_id] = ( + if existing_room_sync_config is not None: existing_room_sync_config.combine_room_sync_config( room_sync_config ) - if existing_room_sync_config is not None - else room_sync_config - ) + else: + relevant_room_map[room_id] = room_sync_config lists[list_key] = SlidingSyncResult.SlidingWindowList( count=len(sorted_room_info), diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 5be9402c922..57dd8ea6a70 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -21,12 +21,13 @@ from unittest.mock import patch from parameterized import parameterized +from copy import deepcopy from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import AccountDataTypes, EventTypes, JoinRules, Membership from synapse.api.room_versions import RoomVersions -from synapse.handlers.sliding_sync import RoomSyncConfig +from synapse.handlers.sliding_sync import RoomSyncConfig, StateKeys from synapse.rest import admin from synapse.rest.client import knock, login, room from synapse.server import HomeServer @@ -42,8 +43,257 @@ class RoomSyncConfigTestCase(TestCase): - def test_from_list_config() -> None: - RoomSyncConfig + def test_from_list_config(self) -> None: + """ + Test that we can convert a `SlidingSyncConfig.SlidingSyncList` to a + `RoomSyncConfig`. + """ + + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self.assertEqual(room_sync_config.timeline_limit, 10) + self.assertEqual( + room_sync_config.required_state_map, + { + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + }, + ) + + def test_from_room_subscription(self) -> None: + """ + Test that we can convert a `SlidingSyncConfig.RoomSubscription` to a + `RoomSyncConfig`. + """ + room_subscription_config = SlidingSyncConfig.RoomSubscription( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(room_subscription_config) + + self.assertEqual(room_sync_config.timeline_limit, 10) + self.assertEqual( + room_sync_config.required_state_map, + { + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + }, + ) + + def test_from_room_config_wildcard(self) -> None: + """ + Test that a wildcard (*) will override all other values for the same event type. + """ + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (EventTypes.Member, "@foo"), + (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, "@bar"), + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self.assertEqual(room_sync_config.timeline_limit, 10) + self.assertEqual( + room_sync_config.required_state_map, + { + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "*"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + }, + ) + + def test_from_room_config_lazy_members(self) -> None: + """ + `$LAZY` room members should just be another additional key next to other + explicit keys. We will unroll the special `$LAZY` meaning later. + """ + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self.assertEqual(room_sync_config.timeline_limit, 10) + self.assertEqual( + room_sync_config.required_state_map, + { + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + }, + ) + + @parameterized.expand( + [ + ( + "No direct overlap", + # A + RoomSyncConfig( + timeline_limit=9, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + }, + }, + ), + # B + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Member: { + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + }, + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + }, + ), + ), + ( + "Wildcard overlap", + # A + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Dummy: {(EventTypes.Dummy, "foo")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@baz"), + (EventTypes.Member, "*"), + }, + "org.matrix.flowers": {("org.matrix.flowers", "*")}, + }, + ), + # B + RoomSyncConfig( + timeline_limit=9, + required_state_map={ + EventTypes.Dummy: {(EventTypes.Dummy, "*")}, + "org.matrix.flowers": {("org.matrix.flowers", "tulips")}, + }, + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Dummy: { + (EventTypes.Dummy, "*"), + }, + EventTypes.Member: { + (EventTypes.Member, "*"), + }, + "org.matrix.flowers": {("org.matrix.flowers", "*")}, + }, + ), + ), + ] + ) + def test_combine_room_sync_config( + self, + _test_label: str, + a: RoomSyncConfig, + b: RoomSyncConfig, + expected: RoomSyncConfig, + ) -> None: + """ + Combine A into B and B into A to make sure we get the same result. + """ + # Since we're mutating these in place, make a copy for each of our trials + room_sync_config_a = deepcopy(a) + room_sync_config_b = deepcopy(b) + + # Combine B into A + room_sync_config_a.combine_room_sync_config(room_sync_config_b) + + self.assertEqual(room_sync_config_a.timeline_limit, expected.timeline_limit) + self.assertCountEqual( + room_sync_config_a.required_state_map, expected.required_state_map + ) + for event_type, expected_state_keys in expected.required_state_map.items(): + self.assertCountEqual( + room_sync_config_a.required_state_map[event_type], expected_state_keys + ) + + # Since we're mutating these in place, make a copy for each of our trials + room_sync_config_a = deepcopy(a) + room_sync_config_b = deepcopy(b) + + # Combine A into B + room_sync_config_b.combine_room_sync_config(room_sync_config_a) + + self.assertEqual(room_sync_config_b.timeline_limit, expected.timeline_limit) + self.assertCountEqual( + room_sync_config_b.required_state_map, expected.required_state_map + ) + for event_type, expected_state_keys in expected.required_state_map.items(): + self.assertCountEqual( + room_sync_config_b.required_state_map[event_type], expected_state_keys + ) class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): From b6edd6b43eeafce827048e1b07e62b9f24202164 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 15:51:53 -0500 Subject: [PATCH 024/109] Better `RoomSyncConfig` tests --- synapse/handlers/sliding_sync.py | 1 - tests/handlers/test_sliding_sync.py | 129 +++++++++++++++------------- 2 files changed, 67 insertions(+), 63 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 8c2da1264d2..5b8494d2fc5 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,6 @@ # # import logging -from collections import defaultdict from typing import TYPE_CHECKING, Dict, Final, List, Optional, Set, Tuple import attr diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 57dd8ea6a70..4758715fe11 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -18,10 +18,10 @@ # # import logging +from copy import deepcopy from unittest.mock import patch from parameterized import parameterized -from copy import deepcopy from twisted.test.proto_helpers import MemoryReactor @@ -43,6 +43,19 @@ class RoomSyncConfigTestCase(TestCase): + def _assert_room_config_equal( + self, actual: RoomSyncConfig, expected: RoomSyncConfig + ) -> None: + self.assertEqual(actual.timeline_limit, expected.timeline_limit) + + # `self.assertEqual(...)` works fine to catch differences but the output is + # almost impossible to read because of the way it truncates the output + self.assertCountEqual(actual.required_state_map, expected.required_state_map) + for event_type, expected_state_keys in expected.required_state_map.items(): + self.assertCountEqual( + actual.required_state_map[event_type], expected_state_keys + ) + def test_from_list_config(self) -> None: """ Test that we can convert a `SlidingSyncConfig.SlidingSyncList` to a @@ -62,18 +75,20 @@ def test_from_list_config(self) -> None: room_sync_config = RoomSyncConfig.from_room_config(list_config) - self.assertEqual(room_sync_config.timeline_limit, 10) - self.assertEqual( - room_sync_config.required_state_map, - { - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, "@baz"), + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ) def test_from_room_subscription(self) -> None: @@ -94,18 +109,20 @@ def test_from_room_subscription(self) -> None: room_sync_config = RoomSyncConfig.from_room_config(room_subscription_config) - self.assertEqual(room_sync_config.timeline_limit, 10) - self.assertEqual( - room_sync_config.required_state_map, - { - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, "@baz"), + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ) def test_from_room_config_wildcard(self) -> None: @@ -127,16 +144,18 @@ def test_from_room_config_wildcard(self) -> None: room_sync_config = RoomSyncConfig.from_room_config(list_config) - self.assertEqual(room_sync_config.timeline_limit, 10) - self.assertEqual( - room_sync_config.required_state_map, - { - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { - (EventTypes.Member, "*"), + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "*"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ) def test_from_room_config_lazy_members(self) -> None: @@ -158,19 +177,21 @@ def test_from_room_config_lazy_members(self) -> None: room_sync_config = RoomSyncConfig.from_room_config(list_config) - self.assertEqual(room_sync_config.timeline_limit, 10) - self.assertEqual( - room_sync_config.required_state_map, - { - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, StateKeys.LAZY), - (EventTypes.Member, "@baz"), + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ) @parameterized.expand( @@ -222,8 +243,6 @@ def test_from_room_config_lazy_members(self) -> None: required_state_map={ EventTypes.Dummy: {(EventTypes.Dummy, "foo")}, EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@baz"), (EventTypes.Member, "*"), }, "org.matrix.flowers": {("org.matrix.flowers", "*")}, @@ -270,14 +289,7 @@ def test_combine_room_sync_config( # Combine B into A room_sync_config_a.combine_room_sync_config(room_sync_config_b) - self.assertEqual(room_sync_config_a.timeline_limit, expected.timeline_limit) - self.assertCountEqual( - room_sync_config_a.required_state_map, expected.required_state_map - ) - for event_type, expected_state_keys in expected.required_state_map.items(): - self.assertCountEqual( - room_sync_config_a.required_state_map[event_type], expected_state_keys - ) + self._assert_room_config_equal(room_sync_config_a, expected) # Since we're mutating these in place, make a copy for each of our trials room_sync_config_a = deepcopy(a) @@ -286,14 +298,7 @@ def test_combine_room_sync_config( # Combine A into B room_sync_config_b.combine_room_sync_config(room_sync_config_a) - self.assertEqual(room_sync_config_b.timeline_limit, expected.timeline_limit) - self.assertCountEqual( - room_sync_config_b.required_state_map, expected.required_state_map - ) - for event_type, expected_state_keys in expected.required_state_map.items(): - self.assertCountEqual( - room_sync_config_b.required_state_map[event_type], expected_state_keys - ) + self._assert_room_config_equal(room_sync_config_b, expected) class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): From cddcf1f419b424f8a920dacc5e6a081bb37dc4d5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 16:20:18 -0500 Subject: [PATCH 025/109] Fetch required state --- synapse/handlers/sliding_sync.py | 42 ++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5b8494d2fc5..a612beb83eb 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -96,7 +96,6 @@ class RoomSyncConfig: Attributes: timeline_limit: The maximum number of events to return in the timeline. - required_state_map: Map from state type to a set of state (type, state_key) tuples requested for the room. The values are close to `StateKey` but actually use a syntax where you can provide `*` wildcard and `$LAZY` for lazy room @@ -994,21 +993,29 @@ async def get_room_sync_data( ) ) - # TODO: room_sync_config.required_state - - # room_state = await self._storage_controllers.state.get_current_state( - # room_id, - # StateFilter.from_types( - # [ - # (EventTypes.Member, user.to_string()), - # (EventTypes.CanonicalAlias, ""), - # (EventTypes.Name, ""), - # (EventTypes.Create, ""), - # (EventTypes.JoinRules, ""), - # (EventTypes.RoomAvatar, ""), - # ] - # ), - # ) + # Fetch the required state for the room + required_state_types: List[Tuple[str, Optional[str]]] = [] + for state_type, state_key_set in room_sync_config.required_state_map.items(): + for _state_type, state_key in state_key_set: + if state_key == StateKeys.WILDCARD: + # `None` is a wildcard in the `StateFilter` + required_state_types.append((state_type, None)) + # We need to fetch all relevant people when we're lazy-loading membership + if state_type == EventTypes.Member and state_key == StateKeys.LAZY: + # Everyone in the timeline is relevant + timeline_membership: Set[str] = set() + for timeline_event in timeline_events: + timeline_membership.add(timeline_event.sender) + + for user_id in timeline_membership: + required_state_types.append((EventTypes.Member, user_id)) + else: + required_state_types.append((state_type, state_key)) + + room_state = await self.storage_controllers.state.get_current_state( + room_id, + StateFilter.from_types(required_state_types), + ) return SlidingSyncResult.RoomResult( # TODO: Dummy value @@ -1022,8 +1029,7 @@ async def get_room_sync_data( # future), we're always returning the requested room state instead of # updates. initial=True, - # TODO: Dummy value - required_state=[], + required_state=list(room_state.values()), timeline_events=timeline_events, bundled_aggregations=bundled_aggregations, # TODO: Dummy value From ddbcd786fa7f7a2deabc3838906c6ffd3fffe68a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 16:36:08 -0500 Subject: [PATCH 026/109] Add future note about other people we might care about when lazy-loading membership --- synapse/handlers/sliding_sync.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index a612beb83eb..3fab6429b70 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1009,6 +1009,9 @@ async def get_room_sync_data( for user_id in timeline_membership: required_state_types.append((EventTypes.Member, user_id)) + + # TODO: We probably also care about invite, ban, kick, targets, etc + # but the spec only mentions "senders". else: required_state_types.append((state_type, state_key)) From 55ad5590bd15a65f54f4530e68d2cc97373fcd0a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 17:24:32 -0500 Subject: [PATCH 027/109] Start of thinking to handle partial stated rooms --- synapse/handlers/sliding_sync.py | 80 +++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3fab6429b70..d6fc79f7089 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,17 @@ # # import logging -from typing import TYPE_CHECKING, Dict, Final, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Dict, + Final, + List, + Optional, + Set, + Tuple, + TypeVar, + AbstractSet, +) import attr from immutabledict import immutabledict @@ -87,6 +97,17 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> return membership != Membership.LEAVE or sender != user_id +R = TypeVar("R") + + +def get_first_item_in_set(target_set: Optional[AbstractSet[R]]) -> R: + """ + Helper to grab the "first" item in a set. A set is an unordered collection so this + is just a way to grab some item in the set. + """ + return next(iter(target_set or []), None) + + # We can't freeze this class because we want to update it in place with the # de-duplicated data. @attr.s(slots=True, auto_attribs=True) @@ -123,8 +144,9 @@ def from_room_config( ) in room_params.required_state: # If we already have a wildcard, we don't need to add anything else if ( - # This is just a tricky way to grab the first element of the set - next(iter(required_state_map.get(state_type) or []), None) + # This is just a tricky way to grab the first element of the set. We + # assume that if a wildcard is present, it's the only thing in the set. + get_first_item_in_set(required_state_map.get(state_type)) == (state_type, StateKeys.WILDCARD) ): continue @@ -316,6 +338,8 @@ async def current_sync_for_user( # Assemble sliding window lists lists: Dict[str, SlidingSyncResult.SlidingWindowList] = {} + # Keep track of the rooms that we're going to display and need to fetch more + # info about relevant_room_map: Dict[str, RoomSyncConfig] = {} if sync_config.lists: # Get all of the room IDs that the user should be able to see in the sync @@ -359,8 +383,26 @@ async def current_sync_for_user( room_sync_config = RoomSyncConfig.from_room_config( list_config ) - existing_room_sync_config = relevant_room_map.get(room_id) + membership_state_keys = ( + room_sync_config.required_state_map.get( + EventTypes.Member + ) + ) + # Exclude partially stated rooms unless the `required_state` + # only has `["m.room.member", "$LAZY"]` for membership. + if ( + is_room_partial + and membership_state_keys is not None + and len(membership_state_keys) == 1 + and get_first_item_in_set(membership_state_keys) + == (EventTypes.Member, StateKeys.LAZY) + ): + continue + + # Update our `relevant_room_map` with the room we're going + # to display and need to fetch more info about. + existing_room_sync_config = relevant_room_map.get(room_id) if existing_room_sync_config is not None: existing_room_sync_config.combine_room_sync_config( room_sync_config @@ -678,9 +720,6 @@ async def filter_rooms( user_id = user.to_string() # TODO: Apply filters - # - # TODO: Exclude partially stated rooms unless the `required_state` has - # `["m.room.member", "$LAZY"]` filtered_room_id_set = set(sync_room_map.keys()) @@ -993,6 +1032,12 @@ async def get_room_sync_data( ) ) + # TODO: Since we can't determine whether we've already sent a room down this + # Sliding Sync connection before (we plan to add this optimization in the + # future), we're always returning the requested room state instead of + # updates. + initial = True + # Fetch the required state for the room required_state_types: List[Tuple[str, Optional[str]]] = [] for state_type, state_key_set in room_sync_config.required_state_map.items(): @@ -1015,10 +1060,17 @@ async def get_room_sync_data( else: required_state_types.append((state_type, state_key)) - room_state = await self.storage_controllers.state.get_current_state( - room_id, - StateFilter.from_types(required_state_types), - ) + if initial: + room_state = await self.storage_controllers.state.get_current_state( + room_id, + StateFilter.from_types(required_state_types), + await_full_state=False, + ) + # TODO: Query `current_state_delta_stream` and reverse/rewind back to the `to_token` + else: + # TODO: Once we can figure out if we've sent a room down this connection before, + # we can return updates instead of the full required state. + raise NotImplementedError() return SlidingSyncResult.RoomResult( # TODO: Dummy value @@ -1027,11 +1079,7 @@ async def get_room_sync_data( avatar=None, # TODO: Dummy value heroes=None, - # TODO: Since we can't determine whether we've already sent a room down this - # Sliding Sync connection before (we plan to add this optimization in the - # future), we're always returning the requested room state instead of - # updates. - initial=True, + initial=initial, required_state=list(room_state.values()), timeline_events=timeline_events, bundled_aggregations=bundled_aggregations, From 41988a73582de58fe4ca71f0755957b1e33b5a64 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 23:33:11 -0500 Subject: [PATCH 028/109] Allow the next room to fill its place if we exclude a partially-stated room --- synapse/handlers/sliding_sync.py | 82 +++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 23 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index d6fc79f7089..a485c41ee38 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -20,6 +20,7 @@ import logging from typing import ( TYPE_CHECKING, + AbstractSet, Dict, Final, List, @@ -27,7 +28,6 @@ Set, Tuple, TypeVar, - AbstractSet, ) import attr @@ -100,7 +100,7 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> R = TypeVar("R") -def get_first_item_in_set(target_set: Optional[AbstractSet[R]]) -> R: +def get_first_item_in_set(target_set: Optional[AbstractSet[R]]) -> Optional[R]: """ Helper to grab the "first" item in a set. A set is an unordered collection so this is just a way to grab some item in the set. @@ -167,6 +167,17 @@ def from_room_config( required_state_map=required_state_map, ) + def deep_copy(self) -> "RoomSyncConfig": + required_state_map: Dict[str, Set[Tuple[str, str]]] = { + state_type: state_key_set.copy() + for state_type, state_key_set in self.required_state_map.items() + } + + return RoomSyncConfig( + timeline_limit=self.timeline_limit, + required_state_map=required_state_map, + ) + def combine_room_sync_config( self, other_room_sync_config: "RoomSyncConfig" ) -> None: @@ -186,7 +197,7 @@ def combine_room_sync_config( # If we already have a wildcard, we don't need to add anything else if ( # This is just a tricky way to grab the first element of the set - next(iter(self.required_state_map.get(state_type) or []), None) + get_first_item_in_set(self.required_state_map.get(state_type)) == (state_type, StateKeys.WILDCARD) ): continue @@ -358,31 +369,36 @@ async def current_sync_for_user( sync_config.user, sync_room_map, list_config.filters, to_token ) + # Sort the list sorted_room_info = await self.sort_rooms( filtered_sync_room_map, to_token ) + # Find which rooms are partially stated and may need to be filtered out + # depending on the `required_state` requested (see below). + partial_state_room_map = await self.store.is_partial_state_room_batched( + filtered_sync_room_map.keys() + ) + + # Since creating the `RoomSyncConfig` takes some work, let's just do it + # once and make a copy whenever we need it. + room_sync_config = RoomSyncConfig.from_room_config(list_config) + ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: - sliced_room_ids = [ - room_id - for room_id, _ in sorted_room_info[range[0] : range[1]] - ] - - ops.append( - SlidingSyncResult.SlidingWindowList.Operation( - op=OperationType.SYNC, - range=range, - room_ids=sliced_room_ids, - ) - ) - - # Take the superset of the `RoomSyncConfig` for each room - for room_id in sliced_room_ids: - room_sync_config = RoomSyncConfig.from_room_config( - list_config - ) + room_ids_in_list = [] + + # We're going to loop through the sorted list of rooms starting + # at the range start index and keep adding rooms until we fill + # up the range or run out of rooms. + current_range_index = range[0] + range_end_index = range[1] + while ( + current_range_index < range_end_index + and current_range_index <= len(sorted_room_info) - 1 + ): + room_id, _ = sorted_room_info[current_range_index] membership_state_keys = ( room_sync_config.required_state_map.get( @@ -392,14 +408,19 @@ async def current_sync_for_user( # Exclude partially stated rooms unless the `required_state` # only has `["m.room.member", "$LAZY"]` for membership. if ( - is_room_partial + partial_state_room_map.get(room_id) and membership_state_keys is not None and len(membership_state_keys) == 1 and get_first_item_in_set(membership_state_keys) == (EventTypes.Member, StateKeys.LAZY) ): + # Since we're skipping this room, we need to allow + # for the next room to take its place in the list + range_end_index += 1 continue + # Take the superset of the `RoomSyncConfig` for each room. + # # Update our `relevant_room_map` with the room we're going # to display and need to fetch more info about. existing_room_sync_config = relevant_room_map.get(room_id) @@ -408,7 +429,22 @@ async def current_sync_for_user( room_sync_config ) else: - relevant_room_map[room_id] = room_sync_config + # Make a copy so if we modify it later, it doesn't + # affect all references. + relevant_room_map[room_id] = ( + room_sync_config.deep_copy() + ) + + room_ids_in_list.append(room_id) + current_range_index += 1 + + ops.append( + SlidingSyncResult.SlidingWindowList.Operation( + op=OperationType.SYNC, + range=range, + room_ids=room_ids_in_list, + ) + ) lists[list_key] = SlidingSyncResult.SlidingWindowList( count=len(sorted_room_info), From 8dc40098b809142c74d9d8fce1f8af7d2aace7c3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 23:48:06 -0500 Subject: [PATCH 029/109] Add a test --- tests/rest/client/test_sync.py | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index a55804c96ca..77b34e8a261 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1237,6 +1237,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: ) self.store = hs.get_datastores().main self.event_sources = hs.get_event_sources() + self.storage_controllers = hs.get_storage_controllers() def _create_dm_room( self, @@ -2266,3 +2267,60 @@ def test_rooms_ban_incremental_sync2(self) -> None: False, channel.json_body["rooms"][room_id1], ) + + def test_rooms_required_state_initial_sync(self) -> None: + """ + Test `rooms.required_state` returns requested state events in the room during an + initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.RoomHistoryVisibility, ""], + # This one doesn't exist in the room + [EventTypes.Tombstone, ""], + ], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state_ids(room_id1) + ) + + self.assertEqual( + [ + state_event["event_id"] + for state_event in channel.json_body["rooms"][room_id1][ + "required_state" + ] + ], + [ + state_map[(EventTypes.Create, "")], + state_map[(EventTypes.RoomHistoryVisibility, "")], + ], + channel.json_body["rooms"][room_id1]["required_state"], + ) + + # TODO: Add more `required_state` tests + + # TODO: Add tests for partially-stated rooms being excluded From 5fdbeff2154f96ff8a384d4388de19cd5a6e0856 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 23:52:49 -0500 Subject: [PATCH 030/109] Make sure we add some specific tests --- tests/rest/client/test_sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 77b34e8a261..2d3b7357a4d 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -2322,5 +2322,7 @@ def test_rooms_required_state_initial_sync(self) -> None: ) # TODO: Add more `required_state` tests + # TODO: Add test for `"required_state": [ ["*","*"] ],` + # TODO: Add test for `"required_state": [ ["*","foobarbaz"] ],` # TODO: Add tests for partially-stated rooms being excluded From 87fac19fdebd070b09a7a7daae7217ccaa2f2d1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 10:15:15 -0500 Subject: [PATCH 031/109] Fix lints See https://github.com/element-hq/synapse/pull/17320#discussion_r1647701997 ``` synapse/federation/federation_server.py:677: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] synapse/federation/federation_server.py:720: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] ``` --- synapse/types/handlers/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1ba5ea55c14..8e097d8b486 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,10 +31,12 @@ from pydantic import Extra from synapse.events import EventBase -from synapse.handlers.relations import BundledAggregations from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody +if TYPE_CHECKING: + from synapse.handlers.relations import BundledAggregations + class ShutdownRoomParams(TypedDict): """ @@ -197,7 +199,7 @@ class RoomResult: initial: bool required_state: List[EventBase] timeline_events: List[EventBase] - bundled_aggregations: Optional[Dict[str, BundledAggregations]] + bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken From 0e71a2f2d1231603d4643f9402dbd7b4f4df226b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 15:56:27 -0500 Subject: [PATCH 032/109] Add TODO for filtering call invites in public rooms --- synapse/handlers/sliding_sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3e49054e430..a6e84cb976e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -841,6 +841,8 @@ async def get_room_sync_data( != Membership.JOIN, filter_send_to_client=True, ) + # TODO: Filter out `EventTypes.CallInvite` in public rooms, + # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 # Determine how many "live" events we have (events within the given token range). # From 21ca02c5ad2b030f3a3d76526690b23f40ef9412 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 16:08:58 -0500 Subject: [PATCH 033/109] `newly_joined` vs `limited` already being tracked in a discussion See https://github.com/element-hq/synapse/pull/17320#discussion_r1646579623 if anything comes out of it. --- synapse/handlers/sliding_sync.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index a6e84cb976e..4d73134e7ff 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -829,9 +829,6 @@ async def get_room_sync_data( stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) - # TODO: Does `newly_joined` affect `limited`? It does in sync v2 but I fail - # to understand why. - # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, From 35683119890e06bb65bca24e303154acb4f62a1b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 19:08:18 -0500 Subject: [PATCH 034/109] Fix spelling typo --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 4d73134e7ff..d5390e89459 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -535,7 +535,7 @@ async def get_sync_room_ids_for_user( # 2) ----------------------------------------------------- # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out our newly_left in the following code + # some left rooms that we can figure out are newly_left in the following code # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` membership_change_events_in_from_to_range = [] From 7aea406c22066f061cf537ed25d0dbb00a107308 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 11:18:27 -0500 Subject: [PATCH 035/109] Just stripped_state for invite rooms --- synapse/handlers/sliding_sync.py | 27 +++-- synapse/rest/client/sync.py | 57 ++++++--- synapse/types/handlers/__init__.py | 15 ++- tests/rest/client/test_sync.py | 188 ++++++++++++++++++++++------- 4 files changed, 208 insertions(+), 79 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index d5390e89459..991d32356ee 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -755,14 +755,23 @@ async def get_room_sync_data( """ # Assemble the list of timeline events - timeline_events: List[EventBase] = [] - limited = False - # We want to start off using the `to_token` (vs `from_token`) because we look - # backwards from the `to_token` up to the `timeline_limit` and we might not - # reach the `from_token` before we hit the limit. We will update the room stream - # position once we've fetched the events to point to the earliest event fetched. - prev_batch_token = to_token - if room_sync_config.timeline_limit > 0: + timeline_events: Optional[List[EventBase]] = None + limited: Optional[bool] = None + prev_batch_token: Optional[StreamToken] = None + num_live: Optional[int] = None + if ( + room_sync_config.timeline_limit > 0 + # No timeline for invite/knock rooms (just `stripped_state`) + and rooms_for_user_membership_at_to_token.membership + not in (Membership.INVITE, Membership.KNOCK) + ): + limited = False + # We want to start off using the `to_token` (vs `from_token`) because we look + # backwards from the `to_token` up to the `timeline_limit` and we might not + # reach the `from_token` before we hit the limit. We will update the room stream + # position once we've fetched the events to point to the earliest event fetched. + prev_batch_token = to_token + newly_joined = False if ( # We can only determine new-ness if we have a `from_token` to define our range @@ -903,7 +912,7 @@ async def get_room_sync_data( # If the timeline is `limited=True`, the client does not have all events # necessary to calculate aggregations themselves. bundled_aggregations = None - if limited: + if limited and timeline_events is not None: bundled_aggregations = ( await self.relations_handler.get_bundled_aggregations( timeline_events, user.to_string() diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index b60af6356ab..1d955a2e893 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -973,31 +973,13 @@ async def encode_rooms( requester=requester, ) - serialized_rooms = {} + serialized_rooms: Dict[str, JsonDict] = {} for room_id, room_result in rooms.items(): - serialized_timeline = await self.event_serializer.serialize_events( - room_result.timeline_events, - time_now, - config=serialize_options, - bundle_aggregations=room_result.bundled_aggregations, - ) - - serialized_required_state = await self.event_serializer.serialize_events( - room_result.required_state, - time_now, - config=serialize_options, - ) - serialized_rooms[room_id] = { - "required_state": serialized_required_state, - "timeline": serialized_timeline, - "prev_batch": await room_result.prev_batch.to_string(self.store), - "limited": room_result.limited, "joined_count": room_result.joined_count, "invited_count": room_result.invited_count, "notification_count": room_result.notification_count, "highlight_count": room_result.highlight_count, - "num_live": room_result.num_live, } if room_result.name: @@ -1014,12 +996,47 @@ async def encode_rooms( if room_result.initial: serialized_rooms[room_id]["initial"] = room_result.initial + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.required_state is not None: + serialized_required_state = ( + await self.event_serializer.serialize_events( + room_result.required_state, + time_now, + config=serialize_options, + ) + ) + serialized_rooms[room_id]["required_state"] = serialized_required_state + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.timeline_events is not None: + serialized_timeline = await self.event_serializer.serialize_events( + room_result.timeline_events, + time_now, + config=serialize_options, + bundle_aggregations=room_result.bundled_aggregations, + ) + serialized_rooms[room_id]["timeline"] = serialized_timeline + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.limited is not None: + serialized_rooms[room_id]["limited"] = room_result.limited + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.prev_batch is not None: + serialized_rooms[room_id]["prev_batch"] = ( + await room_result.prev_batch.to_string(self.store) + ) + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.num_live is not None: + serialized_rooms[room_id]["num_live"] = room_result.num_live + # Field should be absent on non-DM rooms if room_result.is_dm: serialized_rooms[room_id]["is_dm"] = room_result.is_dm # Stripped state only applies to invite/knock rooms - if room_result.stripped_state: + if room_result.stripped_state is not None: # TODO: `knocked_state` but that isn't specced yet. # # TODO: Instead of adding `knocked_state`, it would be good to rename diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 8e097d8b486..d50d02bfc60 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -197,18 +197,23 @@ class RoomResult: avatar: Optional[str] heroes: Optional[List[EventBase]] initial: bool - required_state: List[EventBase] - timeline_events: List[EventBase] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + required_state: Optional[List[EventBase]] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] - prev_batch: StreamToken - limited: bool + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + prev_batch: Optional[StreamToken] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + limited: Optional[bool] joined_count: int invited_count: int notification_count: int highlight_count: int - num_live: int + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + num_live: Optional[int] @attr.s(slots=True, frozen=True, auto_attribs=True) class SlidingWindowList: diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index a55804c96ca..ad6b29b412a 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1881,27 +1881,134 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: ) self.assertEqual(channel.code, 200, channel.json_body) - # Should not see anything (except maybe the invite event) because we haven't - # joined yet (history visibility is `shared`) (`filter_events_for_client(...)` - # is doing the work here) - self.assertEqual( - channel.json_body["rooms"][room_id1]["timeline"], - [], - channel.json_body["rooms"][room_id1]["timeline"], + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) and no events returned in the timeline anyway so nothing could be - # "live". - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) - # Even though we don't get any timeline events because they are filtered out, - # there is still more to paginate + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + + def test_rooms_invite_shared_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` + + This is an `invite` room so we should only have `stripped_state` (no timeline) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Ensure we're testing with a room with `shared` history visibility which means + # history visible until you actually join the room. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + history_visibility_response.get("history_visibility"), + HistoryVisibility.SHARED, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the @@ -1977,12 +2084,10 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: ) self.helper.send(room_id1, "activity before1", tok=user2_tok) - event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) - use1_invite_response = self.helper.invite( - room_id1, src=user2_id, targ=user1_id, tok=user2_tok - ) - event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) - event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after3", tok=user2_tok) + self.helper.send(room_id1, "activity after4", tok=user2_tok) # Make the Sliding Sync request channel = self.make_request( @@ -2002,31 +2107,24 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: ) self.assertEqual(channel.code, 200, channel.json_body) - # Should see the last 4 events in the room - self.assertEqual( - [ - event["event_id"] - for event in channel.json_body["rooms"][room_id1]["timeline"] - ], - [ - event_response2["event_id"], - use1_invite_response["event_id"], - event_response3["event_id"], - event_response4["event_id"], - ], - channel.json_body["rooms"][room_id1]["timeline"], + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), channel.json_body["rooms"][room_id1], ) - # There is still more to paginate - self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the From e3e431fab4ba821b62558ebdffb5bbad2fcc6da3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:35:48 -0500 Subject: [PATCH 036/109] Finish up stripped_state for invite rooms See https://github.com/element-hq/synapse/pull/17320#discussion_r1646581077 --- synapse/handlers/sliding_sync.py | 27 ++--- synapse/types/handlers/__init__.py | 1 + tests/rest/client/test_sync.py | 156 +++++++++++++++++++++++++++-- 3 files changed, 162 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 991d32356ee..e7810804707 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -26,6 +26,7 @@ from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase from synapse.events.utils import strip_event +from synapse.handlers.relations import BundledAggregations from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -756,6 +757,7 @@ async def get_room_sync_data( # Assemble the list of timeline events timeline_events: Optional[List[EventBase]] = None + bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None prev_batch_token: Optional[StreamToken] = None num_live: Optional[int] = None @@ -848,7 +850,9 @@ async def get_room_sync_data( filter_send_to_client=True, ) # TODO: Filter out `EventTypes.CallInvite` in public rooms, - # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 + # see https://github.com/element-hq/synapse/issues/17359 + + # TODO: Handle timeline gaps (`get_timeline_gaps()`) # Determine how many "live" events we have (events within the given token range). # @@ -878,6 +882,15 @@ async def get_room_sync_data( # this more with a binary search (bisect). break + # If the timeline is `limited=True`, the client does not have all events + # necessary to calculate aggregations themselves. + if limited: + bundled_aggregations = ( + await self.relations_handler.get_bundled_aggregations( + timeline_events, user.to_string() + ) + ) + # Update the `prev_batch_token` to point to the position that allows us to # keep paginating backwards from the oldest event we return in the timeline. prev_batch_token = prev_batch_token.copy_and_replace( @@ -907,18 +920,6 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) - # TODO: Handle timeline gaps (`get_timeline_gaps()`) - - # If the timeline is `limited=True`, the client does not have all events - # necessary to calculate aggregations themselves. - bundled_aggregations = None - if limited and timeline_events is not None: - bundled_aggregations = ( - await self.relations_handler.get_bundled_aggregations( - timeline_events, user.to_string() - ) - ) - return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index d50d02bfc60..3cd3c8fb0fa 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -203,6 +203,7 @@ class RoomResult: timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool + # Optional because it's only relevant to invite/knock rooms stripped_state: Optional[List[JsonDict]] # Only optional because it won't be included for invite/knock rooms with `stripped_state` prev_batch: Optional[StreamToken] diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index ad6b29b412a..ba7cae8645f 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1836,9 +1836,12 @@ def test_rooms_newly_joined_incremental_sync(self) -> None: def test_rooms_invite_shared_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1936,9 +1939,10 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: def test_rooms_invite_shared_history_incremental_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` - - This is an `invite` room so we should only have `stripped_state` (no timeline) + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) but we also shouldn't see any timeline events because the history visiblity is `shared` and we haven't joined the room yet. """ @@ -2046,9 +2050,14 @@ def test_rooms_invite_shared_history_incremental_sync(self) -> None: def test_rooms_invite_world_readable_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -2160,6 +2169,135 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: channel.json_body["rooms"][room_id1]["invite_state"], ) + def test_rooms_invite_world_readable_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "preset": "public_chat", + "initial_state": [ + { + "content": { + "history_visibility": HistoryVisibility.WORLD_READABLE + }, + "state_key": "", + "type": EventTypes.RoomHistoryVisibility, + } + ], + }, + ) + # Ensure we're testing with a room with `world_readable` history visibility + # which means events are visible to anyone even without membership. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.WORLD_READABLE, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + # Large enough to see the latest events and before the invite + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + def test_rooms_ban_initial_sync(self) -> None: """ Test that `rooms` we are banned from in an intial sync only allows us to see From 303d834b78a7c93e390da3f426754cafff07c20f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:38:09 -0500 Subject: [PATCH 037/109] Add tracking discussion for not optional in the future --- synapse/handlers/sliding_sync.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e7810804707..0538fddf845 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -756,6 +756,11 @@ async def get_room_sync_data( """ # Assemble the list of timeline events + # + # It would be nice to make the `rooms` response more uniform regardless of + # membership. Currently, we have to make all of these optional because + # `invite`/`knock` rooms only have `stripped_state`. See + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1653045932 timeline_events: Optional[List[EventBase]] = None bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None From 4c2213144258cef2b2ac7960f290649a076d1927 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:43 -0500 Subject: [PATCH 038/109] Start testing for the correct room membership (failing) --- tests/handlers/test_sliding_sync.py | 477 +++++++++++++++++++++++++--- 1 file changed, 432 insertions(+), 45 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0358239c7f4..df262400e4f 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -63,6 +63,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.sliding_sync_handler = self.hs.get_sliding_sync_handler() self.store = self.hs.get_datastores().main self.event_sources = hs.get_event_sources() + self.storage_controllers = hs.get_storage_controllers() def test_no_rooms(self) -> None: """ @@ -90,10 +91,13 @@ def test_get_newly_joined_room(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room_token = self.event_sources.get_current_token() - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -106,6 +110,12 @@ def test_get_newly_joined_room(self) -> None: ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_already_joined_room(self) -> None: """ @@ -113,8 +123,11 @@ def test_get_already_joined_room(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -127,6 +140,12 @@ def test_get_already_joined_room(self) -> None: ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -142,14 +161,18 @@ def test_get_invited_banned_knocked_room(self) -> None: # Setup the invited room (user2 invites user1 to the room) invited_room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.invite(invited_room_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + invited_room_id, targ=user1_id, tok=user2_tok + ) # Setup the ban room (user2 bans user1 from the room) ban_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) self.helper.join(ban_room_id, user1_id, tok=user1_tok) - self.helper.ban(ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok) + ban_response = self.helper.ban( + ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) # Setup the knock room (user1 knocks on the room) knock_room_id = self.helper.create_room_as( @@ -162,13 +185,19 @@ def test_get_invited_banned_knocked_room(self) -> None: tok=user2_tok, ) # User1 knocks on the room - channel = self.make_request( + knock_channel = self.make_request( "POST", "/_matrix/client/r0/knock/%s" % (knock_room_id,), b"{}", user1_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(knock_channel.code, 200, knock_channel.result) + knock_room_membership_state_event = self.get_success( + self.storage_controllers.state.get_current_state_event( + knock_room_id, EventTypes.Member, user1_id + ) + ) + assert knock_room_membership_state_event is not None after_room_token = self.event_sources.get_current_token() @@ -189,6 +218,20 @@ def test_get_invited_banned_knocked_room(self) -> None: knock_room_id, }, ) + # It should be pointing to the the respective membership event (latest + # membership event in the from/to range) + self.assertEqual( + room_id_results[invited_room_id].event_id, + invite_response["event_id"], + ) + self.assertEqual( + room_id_results[ban_room_id].event_id, + ban_response["event_id"], + ) + self.assertEqual( + room_id_results[knock_room_id].event_id, + knock_room_membership_state_event.event_id, + ) def test_get_kicked_room(self) -> None: """ @@ -206,7 +249,7 @@ def test_get_kicked_room(self) -> None: ) self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -229,6 +272,11 @@ def test_get_kicked_room(self) -> None: # The kicked room should show up self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + ) def test_forgotten_rooms(self) -> None: """ @@ -329,7 +377,7 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - self.helper.leave(room_id2, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -343,6 +391,11 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id2].event_id, + leave_response["event_id"], + ) def test_no_joins_after_to_token(self) -> None: """ @@ -351,16 +404,19 @@ def test_no_joins_after_to_token(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() - # Room join after after our `to_token` shouldn't show up - room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - _ = room_id2 + # Room join after our `to_token` shouldn't show up + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -371,6 +427,11 @@ def test_no_joins_after_to_token(self) -> None: ) self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -380,15 +441,18 @@ def test_join_during_range_and_left_room_after_to_token(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -401,6 +465,18 @@ def test_join_during_range_and_left_room_after_to_token(self) -> None: # We should still see the room because we were joined during the # from_token/to_token time period. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -410,13 +486,16 @@ def test_join_before_range_and_left_room_after_to_token(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -428,6 +507,18 @@ def test_join_before_range_and_left_room_after_to_token(self) -> None: # We should still see the room because we were joined before the `from_token` self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -444,9 +535,9 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: kick_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) + join_response1 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -463,8 +554,8 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: # # We have to join before we can leave (leave -> leave isn't a valid transition # or at least it doesn't work in Synapse, 403 forbidden) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) - self.helper.leave(kick_room_id, user1_id, tok=user1_tok) + join_response2 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) + leave_response = self.helper.leave(kick_room_id, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -476,6 +567,20 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: # We shouldn't see the room because it was forgotten self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "kick_response": kick_response["event_id"], + "join_response2": join_response2["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -494,14 +599,14 @@ def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -513,6 +618,20 @@ def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -531,13 +650,13 @@ def test_newly_left_during_range_and_join_after_to_token(self) -> None: # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -549,11 +668,24 @@ def test_newly_left_during_range_and_join_after_to_token(self) -> None: # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + } + ), + ) def test_no_from_token(self) -> None: """ Test that if we don't provide a `from_token`, we get all the rooms that we we're - joined to up to the `to_token`. + joined up to the `to_token`. Providing `from_token` only really has the effect that it adds `newly_left` rooms to the response. @@ -569,7 +701,7 @@ def test_no_from_token(self) -> None: room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before the `to_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -590,6 +722,11 @@ def test_no_from_token(self) -> None: # Only rooms we were joined to before the `to_token` should show up self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -609,7 +746,7 @@ def test_from_token_ahead_of_to_token(self) -> None: room_id4 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 before `before_room_token` - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before `before_room_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -651,6 +788,11 @@ def test_from_token_ahead_of_to_token(self) -> None: # There won't be any newly_left rooms because the `from_token` is ahead of the # `to_token` and that range will give no membership changes to check. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -741,16 +883,16 @@ def test_join_leave_multiple_times_during_range_and_after_to_token( # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -762,6 +904,22 @@ def test_join_leave_multiple_times_during_range_and_after_to_token( # Room should show up because it was newly_left and joined during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -781,16 +939,16 @@ def test_join_leave_multiple_times_before_range_and_after_to_token( # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -802,6 +960,22 @@ def test_join_leave_multiple_times_before_range_and_after_to_token( # Room should show up because we were joined before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -821,13 +995,15 @@ def test_invite_before_range_and_join_leave_after_to_token( room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Invited to the room before the token - self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_respsonse = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -839,6 +1015,217 @@ def test_invite_before_range_and_join_leave_after_to_token( # Room should show up because we were invited before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + invite_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "invite_response": invite_response["event_id"], + "join_respsonse": join_respsonse["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) + + def test_display_name_changes( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_leave_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates and we leave after the `to_token`. + + See condition "1a)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + # Leave after the token + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_join_after_token_range( + self, + ) -> None: + """ + Test that multiple `join` membership events (after the `to_token`) in a row + indicating `displayname`/`avatar_url` updates doesn't affect the results (we + joined after the token range so it shouldn't show up) + + See condition "1b)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + + after_room1_token = self.event_sources.get_current_token() + + self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname after the token range + self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room shouldn't show up because we joined after the from/to range + self.assertEqual(room_id_results.keys(), set()) def test_multiple_rooms_are_not_confused( self, From 83d6f76606bb7d1eaba9d5e498efc9fa15d13957 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:49 -0500 Subject: [PATCH 039/109] Describe `current_state_delta_stream` better --- synapse/storage/schema/main/delta/42/current_state_delta.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/42/current_state_delta.sql b/synapse/storage/schema/main/delta/42/current_state_delta.sql index 876b61e6a51..3d2fd694803 100644 --- a/synapse/storage/schema/main/delta/42/current_state_delta.sql +++ b/synapse/storage/schema/main/delta/42/current_state_delta.sql @@ -32,7 +32,10 @@ * limitations under the License. */ - +-- Tracks what the server thinks is the current state of the room as time goes. It does +-- not track how state progresses from the beginning of the room. So for example, when +-- you remotely join a room, the first rows will just be the state when you joined and +-- progress from there. CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, From fbd92e1c9da2bc89a555f3fa609bba20a76e4440 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 20:16:50 -0500 Subject: [PATCH 040/109] Add `get_current_state_delta_membership_changes_for_user(...)` (using `current_state_delta_stream`) (still need to add newly_left rooms back) --- synapse/handlers/sliding_sync.py | 351 ++++++++++++++--------- synapse/storage/databases/main/stream.py | 151 +++++++++- tests/handlers/test_sliding_sync.py | 73 ++++- 3 files changed, 428 insertions(+), 147 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0538fddf845..2e24b0c338c 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -27,6 +27,7 @@ from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -369,6 +370,9 @@ async def get_sync_room_ids_for_user( # Our working list of rooms that can show up in the sync response sync_room_id_set = { + # Note: The `room_for_user` we're assigning here will need to be fixed up + # (below) because they are potentially from the current snapshot time + # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list if filter_membership_for_sync( @@ -404,33 +408,10 @@ async def get_sync_room_ids_for_user( instance_map=immutabledict(instance_to_max_stream_ordering_map), ) - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # - # Below, we're doing two separate lookups for membership changes. We could - # request everything for both fixups in one range, [`from_token.room_key`, - # `membership_snapshot_token`), but we want to avoid raw `stream_ordering` - # comparison without `instance_name` (which is flawed). We could refactor - # `event.internal_metadata` to include `instance_name` but it might turn out a - # little difficult and a bigger, broader Synapse change than we want to make. - - # 1) ----------------------------------------------------- - - # 1) Fetch membership changes that fall in the range from `to_token` up to - # `membership_snapshot_token` - # - # If our `to_token` is already the same or ahead of the latest room membership - # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # use the room list from the snapshot as a base (nothing has changed) - membership_change_events_after_to_token = [] + current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - membership_change_events_after_to_token = ( - await self.store.get_membership_changes_for_user( + current_state_delta_membership_changes_after_to_token = ( + await self.store.get_current_state_delta_membership_changes_for_user( user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, @@ -438,138 +419,224 @@ async def get_sync_room_ids_for_user( ) ) - # 1) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # We also need the first membership event after the `to_token` so we can step + # We need the first membership event after the `to_token` so we can step # backward to the previous membership that would apply to the from/to range. - first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - for event in membership_change_events_after_to_token: - last_membership_change_by_room_id_after_to_token[event.room_id] = event + first_membership_change_by_room_id_after_to_token: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - event.room_id, event + membership_change.room_id, membership_change ) - # 1) Fixup + # Since we fetched a snapshot of the users room list at some point in time after + # the from/to tokens, we need to revert/rewind some membership changes to match + # the point in time of the `to_token`. + prev_event_ids_in_from_to_range = [] for ( - last_membership_change_after_to_token - ) in last_membership_change_by_room_id_after_to_token.values(): - room_id = last_membership_change_after_to_token.room_id - - # We want to find the first membership change after the `to_token` then step - # backward to know the membership in the from/to range. - first_membership_change_after_to_token = ( - first_membership_change_by_room_id_after_to_token.get(room_id) - ) - assert first_membership_change_after_to_token is not None, ( - "If there was a `last_membership_change_after_to_token` that we're iterating over, " - + "then there should be corresponding a first change. For example, even if there " - + "is only one event after the `to_token`, the first and last event will be same event. " - + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - + "/`first_membership_change_by_room_id_after_to_token` dicts above." - ) - # TODO: Instead of reading from `unsigned`, refactor this to use the - # `current_state_delta_stream` table in the future. Probably a new - # `get_membership_changes_for_user()` function that uses - # `current_state_delta_stream` with a join to `room_memberships`. This would - # help in state reset scenarios since `prev_content` is looking at the - # current branch vs the current room state. This is all just data given to - # the client so no real harm to data integrity, but we'd like to be nice to - # the client. Since the `current_state_delta_stream` table is new, it - # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # to, we can signal the client to throw all of their state away by sending - # "operation: RESET". - prev_content = first_membership_change_after_to_token.unsigned.get( - "prev_content", {} - ) - prev_membership = prev_content.get("membership", None) - prev_sender = first_membership_change_after_to_token.unsigned.get( - "prev_sender", None + room_id, + first_membership_change_after_to_token, + ) in first_membership_change_by_room_id_after_to_token.items(): + # One of these should exist to be a valid row in `current_state_delta_stream` + assert ( + first_membership_change_after_to_token.event_id is not None + or first_membership_change_after_to_token.prev_event_id is not None ) - # Check if the previous membership (membership that applies to the from/to - # range) should be included in our `sync_room_id_set` - should_prev_membership_be_included = ( - prev_membership is not None - and prev_sender is not None - and filter_membership_for_sync( - membership=prev_membership, - user_id=user_id, - sender=prev_sender, + # If the membership change was added after the `to_token`, we need to remove + # it + if first_membership_change_after_to_token.prev_event_id is None: + sync_room_id_set.pop(room_id, None) + # From the first membership event after the `to_token`, we need to step + # backward to the previous membership that would apply to the from/to range. + else: + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) - ) - # Check if the last membership (membership that applies to our snapshot) was - # already included in our `sync_room_id_set` - was_last_membership_already_included = filter_membership_for_sync( - membership=last_membership_change_after_to_token.membership, + # Fetch the previous membership events that apply to the from/to range and fixup + # our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # Update if the membership should be included + if filter_membership_for_sync( + membership=prev_event_in_from_to_range.membership, user_id=user_id, - sender=last_membership_change_after_to_token.sender, - ) - - # 1a) Add back rooms that the user left after the `to_token` - # - # For example, if the last membership event after the `to_token` is a leave - # event, then the room was excluded from `sync_room_id_set` when we first - # crafted it above. We should add these rooms back as long as the user also - # was part of the room before the `to_token`. - if ( - not was_last_membership_already_included - and should_prev_membership_be_included + sender=prev_event_in_from_to_range.sender, ): - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_after_to_token + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) ) - # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # - # For example, if the last membership event after the `to_token` is a "join" - # event, then the room was included `sync_room_id_set` when we first crafted - # it above. We should remove these rooms as long as the user also wasn't - # part of the room before the `to_token`. - elif ( - was_last_membership_already_included - and not should_prev_membership_be_included - ): - del sync_room_id_set[room_id] - - # 2) ----------------------------------------------------- - # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out are newly_left in the following code + # Otherwise, remove it + else: + sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) - # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - membership_change_events_in_from_to_range = [] - if from_token: - membership_change_events_in_from_to_range = ( - await self.store.get_membership_changes_for_user( - user_id, - from_key=from_token.room_key, - to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, - ) - ) + # TODO: Add back newly_left rooms - # 2) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - for event in membership_change_events_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # 2) Fixup - for ( - last_membership_change_in_from_to_range - ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id - - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # include newly_left rooms because the last event that the user should see - # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # # 1) ----------------------------------------------------- + + # # 1) Fetch membership changes that fall in the range from `to_token` up to + # # `membership_snapshot_token` + # # + # # If our `to_token` is already the same or ahead of the latest room membership + # # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # # use the room list from the snapshot as a base (nothing has changed) + # membership_change_events_after_to_token = [] + # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): + # membership_change_events_after_to_token = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=to_token.room_key, + # to_key=membership_snapshot_token, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) + + # # 1) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # # We also need the first membership event after the `to_token` so we can step + # # backward to the previous membership that would apply to the from/to range. + # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # for event in membership_change_events_after_to_token: + # last_membership_change_by_room_id_after_to_token[event.room_id] = event + # # Only set if we haven't already set it + # first_membership_change_by_room_id_after_to_token.setdefault( + # event.room_id, event + # ) + + # # 1) Fixup + # for ( + # last_membership_change_after_to_token + # ) in last_membership_change_by_room_id_after_to_token.values(): + # room_id = last_membership_change_after_to_token.room_id + + # # We want to find the first membership change after the `to_token` then step + # # backward to know the membership in the from/to range. + # first_membership_change_after_to_token = ( + # first_membership_change_by_room_id_after_to_token.get(room_id) + # ) + # assert first_membership_change_after_to_token is not None, ( + # "If there was a `last_membership_change_after_to_token` that we're iterating over, " + # + "then there should be corresponding a first change. For example, even if there " + # + "is only one event after the `to_token`, the first and last event will be same event. " + # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" + # + "/`first_membership_change_by_room_id_after_to_token` dicts above." + # ) + # # TODO: Instead of reading from `unsigned`, refactor this to use the + # # `current_state_delta_stream` table in the future. Probably a new + # # `get_membership_changes_for_user()` function that uses + # # `current_state_delta_stream` with a join to `room_memberships`. This would + # # help in state reset scenarios since `prev_content` is looking at the + # # current branch vs the current room state. This is all just data given to + # # the client so no real harm to data integrity, but we'd like to be nice to + # # the client. Since the `current_state_delta_stream` table is new, it + # # doesn't have all events in it. Since this is Sliding Sync, if we ever need + # # to, we can signal the client to throw all of their state away by sending + # # "operation: RESET". + # prev_content = first_membership_change_after_to_token.unsigned.get( + # "prev_content", {} + # ) + # prev_membership = prev_content.get("membership", None) + # prev_sender = first_membership_change_after_to_token.unsigned.get( + # "prev_sender", None + # ) + + # # Check if the previous membership (membership that applies to the from/to + # # range) should be included in our `sync_room_id_set` + # should_prev_membership_be_included = ( + # prev_membership is not None + # and prev_sender is not None + # and filter_membership_for_sync( + # membership=prev_membership, + # user_id=user_id, + # sender=prev_sender, + # ) + # ) + + # # Check if the last membership (membership that applies to our snapshot) was + # # already included in our `sync_room_id_set` + # was_last_membership_already_included = filter_membership_for_sync( + # membership=last_membership_change_after_to_token.membership, + # user_id=user_id, + # sender=last_membership_change_after_to_token.sender, + # ) + + # # 1a) Add back rooms that the user left after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a leave + # # event, then the room was excluded from `sync_room_id_set` when we first + # # crafted it above. We should add these rooms back as long as the user also + # # was part of the room before the `to_token`. + # if ( + # not was_last_membership_already_included + # and should_prev_membership_be_included + # ): + # # TODO: Assign the correct membership event at the `to_token` here + # # (currently we're setting it as the last event after the `to_token`) + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_after_to_token + # ) + # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a "join" + # # event, then the room was included `sync_room_id_set` when we first crafted + # # it above. We should remove these rooms as long as the user also wasn't + # # part of the room before the `to_token`. + # elif ( + # was_last_membership_already_included + # and not should_prev_membership_be_included + # ): + # del sync_room_id_set[room_id] + + # # 2) ----------------------------------------------------- + # # We fix-up newly_left rooms after the first fixup because it may have removed + # # some left rooms that we can figure out are newly_left in the following code + + # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + # membership_change_events_in_from_to_range = [] + # if from_token: + # membership_change_events_in_from_to_range = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=from_token.room_key, + # to_key=to_token.room_key, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) + + # # 2) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} + # for event in membership_change_events_in_from_to_range: + # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + + # # 2) Fixup + # for ( + # last_membership_change_in_from_to_range + # ) in last_membership_change_by_room_id_in_from_to_range.values(): + # room_id = last_membership_change_in_from_to_range.room_id + + # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # # include newly_left rooms because the last event that the user should see + # # is their own leave event + # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_in_from_to_range + # ) return sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c21e69ecdab..f5de23080d9 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -44,6 +44,7 @@ import logging from typing import ( TYPE_CHECKING, + AbstractSet, Any, Collection, Dict, @@ -62,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -111,6 +112,24 @@ class _EventsAround: end: RoomStreamToken +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event_id: The "current" membership event ID in this room. May be `None` if the + server is no longer in the room or a state reset happened. + prev_event_id: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + """ + + event_id: Optional[str] + prev_event_id: Optional[str] + room_id: str + # Could be useful but we're not using it yet. + # event_pos: PersistedEventPosition + + def generate_pagination_where_clause( direction: Direction, column_names: Tuple[str, str], @@ -390,6 +409,42 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + Note: This function only works with "live" tokens with `stream_ordering` only. + + Returns True if the event persisted by the given instance at the given + topological/stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -731,6 +786,94 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: return ret, key + async def get_current_state_delta_membership_changes_for_user( + self, + user_id: str, + from_key: RoomStreamToken, + to_key: RoomStreamToken, + excluded_rooms: Optional[List[str]] = None, + ) -> List[CurrentStateDeltaMembership]: + """ + TODO + + Note: This function only works with "live" tokens with `stream_ordering` only. + + All such events whose stream ordering `s` lies in the range `from_key < s <= + to_key` are returned. Events are sorted by `stream_ordering` ascending. + """ + # Start by ruling out cases where a DB query is not necessary. + if from_key == to_key: + return [] + + if from_key: + has_changed = self._membership_stream_cache.has_entity_changed( + user_id, int(from_key.stream) + ) + if not has_changed: + return [] + + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + # To handle tokens with a non-empty instance_map we fetch more + # results than necessary and then filter down + min_from_id = from_key.stream + max_to_id = to_key.get_max_stream_pos() + + args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + + # TODO: It would be good to assert that the `to_token` is >= + # the first row in `current_state_delta_stream` for the rooms we're + # interested in. Otherwise, we will end up with empty results and not know + # it. + + # Note: There is no index for `(type, state_key)` in + # `current_state_delta_stream`. We also can't just add an index for + # `event_id` and join the `room_memberships` table by `event_id` because it + # may be `null` in `current_state_delta_stream` so nothing will match (it's + # `null` when the server is no longer in the room or a state reset happened + # and it was unset). + sql = """ + SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + FROM current_state_delta_stream AS s + WHERE s.type = ? AND s.state_key = ? + AND s.stream_id > ? AND s.stream_id <= ? + ORDER BY s.stream_id ASC + """ + + txn.execute(sql, args) + + return [ + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + if _filter_results_by_stream( + from_key, + to_key, + instance_name, + stream_ordering, + ) + ] + + current_state_delta_membership_changes = await self.db_pool.runInteraction( + "get_current_state_delta_membership_changes_for_user", f + ) + + rooms_to_exclude: AbstractSet[str] = set() + if excluded_rooms is not None: + rooms_to_exclude = set(excluded_rooms) + + return [ + membership_change + for membership_change in current_state_delta_membership_changes + if membership_change.room_id not in rooms_to_exclude + ] + @cancellable async def get_membership_changes_for_user( self, @@ -766,10 +909,10 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause = "" if excluded_rooms is not None and len(excluded_rooms) > 0: - ignore_room_clause = "AND e.room_id NOT IN (%s)" % ",".join( - "?" for _ in excluded_rooms + ignore_room_clause, ignore_room_args = make_in_list_sql_clause( + txn.database_engine, "e.room_id", excluded_rooms, negative=True ) - args = args + excluded_rooms + args += ignore_room_args sql = """ SELECT m.event_id, instance_name, topological_ordering, stream_ordering diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index df262400e4f..694fd17a023 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1029,7 +1029,7 @@ def test_invite_before_range_and_join_leave_after_to_token( ), ) - def test_display_name_changes( + def test_display_name_changes_in_token_range( self, ) -> None: """ @@ -1102,6 +1102,77 @@ def test_display_name_changes( ), ) + def test_display_name_changes_before_and_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event even though there are no + membership events in the from/range but there are `displayname`/`avatar_url` + changes before/after the token range. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname before the token range + displayname_change_before_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined before the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_before_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_before_token_range_response": displayname_change_before_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + def test_display_name_changes_leave_after_token_range( self, ) -> None: From 6c791a88b34b5646324a22584d5f84d99501ff34 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 21:07:19 -0500 Subject: [PATCH 041/109] WIP: Add back `newly_left` --- synapse/handlers/sliding_sync.py | 107 ++++++++++++++++++----- synapse/storage/databases/main/stream.py | 18 +++- 2 files changed, 98 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 2e24b0c338c..5603fdeb383 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -375,11 +375,6 @@ async def get_sync_room_ids_for_user( # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list - if filter_membership_for_sync( - membership=room_for_user.membership, - user_id=user_id, - sender=room_for_user.sender, - ) } # Get the `RoomStreamToken` that represents the spot we queried up to when we got @@ -408,6 +403,23 @@ async def get_sync_room_ids_for_user( instance_map=immutabledict(instance_to_max_stream_ordering_map), ) + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 1c) Update room membership events to the point in time of the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + + # 1) ----------------------------------------------------- + + # 1) Fetch membership changes that fall in the range from `to_token` up to + # `membership_snapshot_token` + # + # If our `to_token` is already the same or ahead of the latest room membership + # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # use the room list from the snapshot as a base (nothing has changed) current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): current_state_delta_membership_changes_after_to_token = ( @@ -419,8 +431,9 @@ async def get_sync_room_ids_for_user( ) ) - # We need the first membership event after the `to_token` so we can step - # backward to the previous membership that would apply to the from/to range. + # 1) Assemble a list of the first membership event after the `to_token` so we can + # step backward to the previous membership that would apply to the from/to + # range. first_membership_change_by_room_id_after_to_token: Dict[ str, CurrentStateDeltaMembership ] = {} @@ -430,6 +443,8 @@ async def get_sync_room_ids_for_user( membership_change.room_id, membership_change ) + # 1) Fixup part 1 + # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. @@ -444,37 +459,81 @@ async def get_sync_room_ids_for_user( or first_membership_change_after_to_token.prev_event_id is not None ) - # If the membership change was added after the `to_token`, we need to remove - # it + # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) - # From the first membership event after the `to_token`, we need to step - # backward to the previous membership that would apply to the from/to range. + # 1b) 1c) From the first membership event after the `to_token`, step backward to the + # previous membership that would apply to the from/to range. else: prev_event_ids_in_from_to_range.append( first_membership_change_after_to_token.prev_event_id ) - # Fetch the previous membership events that apply to the from/to range and fixup - # our working list. + # 1) Fixup part 2 + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. prev_events_in_from_to_range = await self.store.get_events( prev_event_ids_in_from_to_range ) for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # Update if the membership should be included + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) + ) + + filtered_sync_room_id_set = { + room_id: room_for_user + for room_id, room_for_user in sync_room_id_set.items() if filter_membership_for_sync( - membership=prev_event_in_from_to_range.membership, + membership=room_for_user.membership, user_id=user_id, - sender=prev_event_in_from_to_range.sender, - ): - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) + sender=room_for_user.sender, + ) + } + + # 2) ----------------------------------------------------- + # We fix-up newly_left rooms after the first fixup because it may have removed + # some left rooms that we can figure out are newly_left in the following code + + # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + current_state_delta_membership_changes_in_from_to_range = [] + if from_token: + current_state_delta_membership_changes_in_from_to_range = ( + await self.store.get_current_state_delta_membership_changes_for_user( + user_id, + from_key=from_token.room_key, + to_key=to_token.room_key, + excluded_rooms=self.rooms_to_exclude_globally, ) - # Otherwise, remove it - else: - sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) + ) - # TODO: Add back newly_left rooms + # 2) Assemble a list of the last membership events in some given ranges. Someone + # could have left and joined multiple times during the given range but we only + # care about end-result so we grab the last one. + last_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for ( + membership_change + ) in current_state_delta_membership_changes_in_from_to_range: + last_membership_change_by_room_id_in_from_to_range[ + membership_change.room_id + ] = membership_change + + # 2) Fixup + for ( + last_membership_change_in_from_to_range + ) in last_membership_change_by_room_id_in_from_to_range.values(): + room_id = last_membership_change_in_from_to_range.room_id + + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # include newly_left rooms because the last event that the user should see + # is their own leave event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range + ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in @@ -638,7 +697,7 @@ async def get_sync_room_ids_for_user( # last_membership_change_in_from_to_range # ) - return sync_room_id_set + return filtered_sync_room_id_set async def filter_rooms( self, diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f5de23080d9..595245e70ec 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -126,6 +126,7 @@ class CurrentStateDeltaMembership: event_id: Optional[str] prev_event_id: Optional[str] room_id: str + membership: str # Could be useful but we're not using it yet. # event_pos: PersistedEventPosition @@ -832,7 +833,13 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `null` when the server is no longer in the room or a state reset happened # and it was unset). sql = """ - SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + SELECT + s.event_id, + s.prev_event_id, + s.room_id, + s.instance_name, + s.stream_id, + m.membership FROM current_state_delta_stream AS s WHERE s.type = ? AND s.state_key = ? AND s.stream_id > ? AND s.stream_id <= ? @@ -846,12 +853,17 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, + # We can assume that the membership is `LEAVE` as a default. This + # will happen when `current_state_delta_stream.event_id` is null + # because it was unset due to a state reset or the server is no + # longer in the room (everyone on our local server left). + membership=membership if membership else Membership.LEAVE, # event_pos=PersistedEventPosition( # instance_name=instance_name, # stream=stream_ordering, # ), ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn if _filter_results_by_stream( from_key, to_key, From 27d74b023e1a5679b4fbe6a5b4f6efaada8ec3b0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:20:27 -0500 Subject: [PATCH 042/109] Iterate --- synapse/handlers/sliding_sync.py | 33 ++++--- synapse/storage/databases/main/stream.py | 115 +++++++++-------------- 2 files changed, 68 insertions(+), 80 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5603fdeb383..dbbbbc66bfa 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -443,22 +443,16 @@ async def get_sync_room_ids_for_user( membership_change.room_id, membership_change ) - # 1) Fixup part 1 + # 1) Fixup # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range = [] + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): - # One of these should exist to be a valid row in `current_state_delta_stream` - assert ( - first_membership_change_after_to_token.event_id is not None - or first_membership_change_after_to_token.prev_event_id is not None - ) - # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) @@ -469,7 +463,7 @@ async def get_sync_room_ids_for_user( first_membership_change_after_to_token.prev_event_id ) - # 1) Fixup part 2 + # 1) Fixup (more) # # 1b) 1c) Fetch the previous membership events that apply to the from/to range # and fixup our working list. @@ -522,18 +516,33 @@ async def get_sync_room_ids_for_user( ] = membership_change # 2) Fixup + last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + sync_room_id_set[room_id] + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # Save the look-up if we already have the `leave` event + if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: + filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] + else: + last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) + + # TODO + # last_membership_events_to_include_in_from_to_range = await self.store.get_events( + # last_membership_event_ids_to_include_in_from_to_range + # ) + # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # # 1b) 1c) Update the membership with what we found + # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + # convert_event_to_rooms_for_user(prev_event_in_from_to_range) + # ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 595245e70ec..ed571b0de7f 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes, Membership +from synapse.api.constants import Direction from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -116,14 +116,13 @@ class _EventsAround: class CurrentStateDeltaMembership: """ Attributes: - event_id: The "current" membership event ID in this room. May be `None` if the - server is no longer in the room or a state reset happened. + event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. """ - event_id: Optional[str] + event_id: str prev_event_id: Optional[str] room_id: str membership: str @@ -410,42 +409,6 @@ def _filter_results( return True -def _filter_results_by_stream( - lower_token: Optional[RoomStreamToken], - upper_token: Optional[RoomStreamToken], - instance_name: str, - stream_ordering: int, -) -> bool: - """ - Note: This function only works with "live" tokens with `stream_ordering` only. - - Returns True if the event persisted by the given instance at the given - topological/stream_ordering falls between the two tokens (taking a None - token to mean unbounded). - - Used to filter results from fetching events in the DB against the given - tokens. This is necessary to handle the case where the tokens include - position maps, which we handle by fetching more than necessary from the DB - and then filtering (rather than attempting to construct a complicated SQL - query). - """ - if lower_token: - assert lower_token.topological is None - - # If these are live tokens we compare the stream ordering against the - # writers stream position. - if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): - return False - - if upper_token: - assert upper_token.topological is None - - if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: - return False - - return True - - def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -819,58 +782,74 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + args: List[Any] = [user_id, min_from_id, max_to_id] # TODO: It would be good to assert that the `to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # Note: There is no index for `(type, state_key)` in - # `current_state_delta_stream`. We also can't just add an index for - # `event_id` and join the `room_memberships` table by `event_id` because it - # may be `null` in `current_state_delta_stream` so nothing will match (it's - # `null` when the server is no longer in the room or a state reset happened - # and it was unset). + # We have to look-up events by `stream_ordering` because + # `current_state_delta_stream.event_id` can be `null` if the server is no + # longer in the room or a state reset happened and it was unset. + # `stream_ordering` is unique across the Synapse instance so this should + # work fine. sql = """ SELECT - s.event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id, + e.topological_ordering, m.membership FROM current_state_delta_stream AS s - WHERE s.type = ? AND s.state_key = ? + INNER JOIN events AS e ON e.stream_ordering = s.stream_id + INNER JOIN room_memberships AS m ON m.event_id = e.event_id + WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC """ txn.execute(sql, args) - return [ - CurrentStateDeltaMembership( - event_id=event_id, - prev_event_id=prev_event_id, - room_id=room_id, - # We can assume that the membership is `LEAVE` as a default. This - # will happen when `current_state_delta_stream.event_id` is null - # because it was unset due to a state reset or the server is no - # longer in the room (everyone on our local server left). - membership=membership if membership else Membership.LEAVE, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), - ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn - if _filter_results_by_stream( + membership_changes: List[CurrentStateDeltaMembership] = [] + for ( + event_id, + prev_event_id, + room_id, + instance_name, + stream_ordering, + topological_ordering, + membership, + ) in txn: + assert event_id is not None + # `prev_event_id` can be `None` + assert room_id is not None + assert instance_name is not None + assert stream_ordering is not None + assert topological_ordering is not None + assert membership is not None + + if _filter_results( from_key, to_key, instance_name, + topological_ordering, stream_ordering, - ) - ] + ): + membership_changes.append( + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + membership=membership, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + ) current_state_delta_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f From fb8fbd489cb920b6d29282e3b2912a311bade162 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:37:45 -0500 Subject: [PATCH 043/109] Just fetch full events for `get_current_state_delta_membership_changes_for_user(...)` Makes downstream logic simpler and although we may look-up some events we don't use, the lookup is all done in one go instead of fetching events from event_ids in a couple different places. --- synapse/handlers/sliding_sync.py | 203 +---------------------- synapse/storage/databases/main/stream.py | 54 +++++- 2 files changed, 51 insertions(+), 206 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index dbbbbc66bfa..5d63099499f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -448,34 +448,20 @@ async def get_sync_room_ids_for_user( # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event_id is None: + if first_membership_change_after_to_token.prev_event is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - prev_event_ids_in_from_to_range.append( - first_membership_change_after_to_token.prev_event_id + sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + first_membership_change_after_to_token.prev_event ) - # 1) Fixup (more) - # - # 1b) 1c) Fetch the previous membership events that apply to the from/to range - # and fixup our working list. - prev_events_in_from_to_range = await self.store.get_events( - prev_event_ids_in_from_to_range - ) - for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # 1b) 1c) Update the membership with what we found - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) - ) - filtered_sync_room_id_set = { room_id: room_for_user for room_id, room_for_user in sync_room_id_set.items() @@ -516,195 +502,18 @@ async def get_sync_room_ids_for_user( ] = membership_change # 2) Fixup - last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id - sync_room_id_set[room_id] - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # Save the look-up if we already have the `leave` event - if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: - filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] - else: - last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) - - # TODO - # last_membership_events_to_include_in_from_to_range = await self.store.get_events( - # last_membership_event_ids_to_include_in_from_to_range - # ) - # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # # 1b) 1c) Update the membership with what we found - # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - # convert_event_to_rooms_for_user(prev_event_in_from_to_range) - # ) - - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - - # # 1) ----------------------------------------------------- - - # # 1) Fetch membership changes that fall in the range from `to_token` up to - # # `membership_snapshot_token` - # # - # # If our `to_token` is already the same or ahead of the latest room membership - # # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # # use the room list from the snapshot as a base (nothing has changed) - # membership_change_events_after_to_token = [] - # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - # membership_change_events_after_to_token = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=to_token.room_key, - # to_key=membership_snapshot_token, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 1) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # # We also need the first membership event after the `to_token` so we can step - # # backward to the previous membership that would apply to the from/to range. - # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # for event in membership_change_events_after_to_token: - # last_membership_change_by_room_id_after_to_token[event.room_id] = event - # # Only set if we haven't already set it - # first_membership_change_by_room_id_after_to_token.setdefault( - # event.room_id, event - # ) - - # # 1) Fixup - # for ( - # last_membership_change_after_to_token - # ) in last_membership_change_by_room_id_after_to_token.values(): - # room_id = last_membership_change_after_to_token.room_id - - # # We want to find the first membership change after the `to_token` then step - # # backward to know the membership in the from/to range. - # first_membership_change_after_to_token = ( - # first_membership_change_by_room_id_after_to_token.get(room_id) - # ) - # assert first_membership_change_after_to_token is not None, ( - # "If there was a `last_membership_change_after_to_token` that we're iterating over, " - # + "then there should be corresponding a first change. For example, even if there " - # + "is only one event after the `to_token`, the first and last event will be same event. " - # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - # + "/`first_membership_change_by_room_id_after_to_token` dicts above." - # ) - # # TODO: Instead of reading from `unsigned`, refactor this to use the - # # `current_state_delta_stream` table in the future. Probably a new - # # `get_membership_changes_for_user()` function that uses - # # `current_state_delta_stream` with a join to `room_memberships`. This would - # # help in state reset scenarios since `prev_content` is looking at the - # # current branch vs the current room state. This is all just data given to - # # the client so no real harm to data integrity, but we'd like to be nice to - # # the client. Since the `current_state_delta_stream` table is new, it - # # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # # to, we can signal the client to throw all of their state away by sending - # # "operation: RESET". - # prev_content = first_membership_change_after_to_token.unsigned.get( - # "prev_content", {} - # ) - # prev_membership = prev_content.get("membership", None) - # prev_sender = first_membership_change_after_to_token.unsigned.get( - # "prev_sender", None - # ) - - # # Check if the previous membership (membership that applies to the from/to - # # range) should be included in our `sync_room_id_set` - # should_prev_membership_be_included = ( - # prev_membership is not None - # and prev_sender is not None - # and filter_membership_for_sync( - # membership=prev_membership, - # user_id=user_id, - # sender=prev_sender, - # ) - # ) - - # # Check if the last membership (membership that applies to our snapshot) was - # # already included in our `sync_room_id_set` - # was_last_membership_already_included = filter_membership_for_sync( - # membership=last_membership_change_after_to_token.membership, - # user_id=user_id, - # sender=last_membership_change_after_to_token.sender, - # ) - - # # 1a) Add back rooms that the user left after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a leave - # # event, then the room was excluded from `sync_room_id_set` when we first - # # crafted it above. We should add these rooms back as long as the user also - # # was part of the room before the `to_token`. - # if ( - # not was_last_membership_already_included - # and should_prev_membership_be_included - # ): - # # TODO: Assign the correct membership event at the `to_token` here - # # (currently we're setting it as the last event after the `to_token`) - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_after_to_token - # ) - # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a "join" - # # event, then the room was included `sync_room_id_set` when we first crafted - # # it above. We should remove these rooms as long as the user also wasn't - # # part of the room before the `to_token`. - # elif ( - # was_last_membership_already_included - # and not should_prev_membership_be_included - # ): - # del sync_room_id_set[room_id] - - # # 2) ----------------------------------------------------- - # # We fix-up newly_left rooms after the first fixup because it may have removed - # # some left rooms that we can figure out are newly_left in the following code - - # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - # membership_change_events_in_from_to_range = [] - # if from_token: - # membership_change_events_in_from_to_range = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=from_token.room_key, - # to_key=to_token.room_key, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 2) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - # for event in membership_change_events_in_from_to_range: - # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event - - # # 2) Fixup - # for ( - # last_membership_change_in_from_to_range - # ) in last_membership_change_by_room_id_in_from_to_range.values(): - # room_id = last_membership_change_in_from_to_range.room_id - - # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # # include newly_left rooms because the last event that the user should see - # # is their own leave event - # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_in_from_to_range - # ) + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range.event + ) return filtered_sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ed571b0de7f..ce135ededc9 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -113,21 +113,37 @@ class _EventsAround: @attr.s(slots=True, frozen=True, auto_attribs=True) -class CurrentStateDeltaMembership: +class _CurrentStateDeltaMembershipReturn: """ Attributes: event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. + membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str membership: str - # Could be useful but we're not using it yet. - # event_pos: PersistedEventPosition + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event: The "current" membership event in this room. + prev_event: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + membership: The membership state of the user in the room. + """ + + event: EventBase + prev_event: Optional[EventBase] + room_id: str + membership: str def generate_pagination_where_clause( @@ -776,7 +792,7 @@ async def get_current_state_delta_membership_changes_for_user( if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -813,7 +829,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: txn.execute(sql, args) - membership_changes: List[CurrentStateDeltaMembership] = [] + membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] for ( event_id, prev_event_id, @@ -839,7 +855,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: stream_ordering, ): membership_changes.append( - CurrentStateDeltaMembership( + _CurrentStateDeltaMembershipReturn( event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, @@ -851,17 +867,37 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: ) ) - current_state_delta_membership_changes = await self.db_pool.runInteraction( + return membership_changes + + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) + # Fetch all events in one go + event_ids = [] + for m in membership_changes: + event_ids.append(m.event_id) + if m.prev_event_id is not None: + event_ids.append(m.prev_event_id) + + events = await self.get_events(event_ids, get_prev_content=False) + rooms_to_exclude: AbstractSet[str] = set() if excluded_rooms is not None: rooms_to_exclude = set(excluded_rooms) return [ - membership_change - for membership_change in current_state_delta_membership_changes + CurrentStateDeltaMembership( + event=events[membership_change.event_id], + prev_event=( + events[membership_change.prev_event_id] + if membership_change.prev_event_id + else None + ), + room_id=membership_change.room_id, + membership=membership_change.membership, + ) + for membership_change in membership_changes if membership_change.room_id not in rooms_to_exclude ] From d91aa0018ca082cc88a3b3bfb7e06d1becb74227 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:45:59 -0500 Subject: [PATCH 044/109] Remove extras --- synapse/handlers/sliding_sync.py | 15 +++++---- synapse/storage/databases/main/stream.py | 40 +++++++----------------- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5d63099499f..fed663ac364 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -427,7 +427,7 @@ async def get_sync_room_ids_for_user( user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -440,7 +440,7 @@ async def get_sync_room_ids_for_user( for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.room_id, membership_change + membership_change.event.room_id, membership_change ) # 1) Fixup @@ -484,7 +484,7 @@ async def get_sync_room_ids_for_user( user_id, from_key=from_token.room_key, to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -498,19 +498,22 @@ async def get_sync_room_ids_for_user( membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id + membership_change.event.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id + room_id = last_membership_change_in_from_to_range.event.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + if ( + last_membership_change_in_from_to_range.event.membership + == Membership.LEAVE + ): filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( last_membership_change_in_from_to_range.event ) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ce135ededc9..efc0b88797e 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -120,13 +120,11 @@ class _CurrentStateDeltaMembershipReturn: prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str - membership: str @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -136,14 +134,10 @@ class CurrentStateDeltaMembership: event: The "current" membership event in this room. prev_event: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. - room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event: EventBase prev_event: Optional[EventBase] - room_id: str - membership: str def generate_pagination_where_clause( @@ -771,7 +765,7 @@ async def get_current_state_delta_membership_changes_for_user( user_id: str, from_key: RoomStreamToken, to_key: RoomStreamToken, - excluded_rooms: Optional[List[str]] = None, + excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ TODO @@ -817,8 +811,7 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: s.room_id, s.instance_name, s.stream_id, - e.topological_ordering, - m.membership + e.topological_ordering FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -837,7 +830,6 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: instance_name, stream_ordering, topological_ordering, - membership, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -845,7 +837,6 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None - assert membership is not None if _filter_results( from_key, @@ -859,46 +850,39 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, - membership=membership, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), ) ) return membership_changes - membership_changes = await self.db_pool.runInteraction( + raw_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) # Fetch all events in one go event_ids = [] - for m in membership_changes: + for m in raw_membership_changes: event_ids.append(m.event_id) if m.prev_event_id is not None: event_ids.append(m.prev_event_id) events = await self.get_events(event_ids, get_prev_content=False) - rooms_to_exclude: AbstractSet[str] = set() - if excluded_rooms is not None: - rooms_to_exclude = set(excluded_rooms) + room_ids_to_exclude: AbstractSet[str] = set() + if excluded_room_ids is not None: + room_ids_to_exclude = set(excluded_room_ids) return [ CurrentStateDeltaMembership( - event=events[membership_change.event_id], + event=events[raw_membership_change.event_id], prev_event=( - events[membership_change.prev_event_id] - if membership_change.prev_event_id + events[raw_membership_change.prev_event_id] + if raw_membership_change.prev_event_id else None ), - room_id=membership_change.room_id, - membership=membership_change.membership, ) - for membership_change in membership_changes - if membership_change.room_id not in rooms_to_exclude + for raw_membership_change in raw_membership_changes + if raw_membership_change.room_id not in room_ids_to_exclude ] @cancellable From daa7e3691aa73f2d8a81de1823a0a44b54fe838f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:01:28 -0500 Subject: [PATCH 045/109] Add docstring --- synapse/storage/databases/main/stream.py | 32 ++++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index efc0b88797e..730e55d135c 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -768,12 +768,34 @@ async def get_current_state_delta_membership_changes_for_user( excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ - TODO + Fetch membership events (and the previous event that was replaced by that one) + for a given user. - Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= + `to_key`). - All such events whose stream ordering `s` lies in the range `from_key < s <= - to_key` are returned. Events are sorted by `stream_ordering` ascending. + Please be mindful to only use this with `from_key` and `to_key` tokens that are + recent enough to be after when the first local user joined the room. Otherwise, + the results may be incomplete or too greedy. For example, if you use a token + range before the first local user joined the room, you will see 0 events since + `current_state_delta_stream` tracks what the server thinks is the current state + of the room as time goes. It does not track how state progresses from the + beginning of the room. So for example, when you remotely join a room, the first + rows will just be the state when you joined and progress from there. + + You can probably reasonably use this with `/sync` because the `to_key` passed in + will be the "current" now token and the range will cover when the user joined + the room. + + Args: + user_id: The user ID to fetch membership events for. + from_key: The point in the stream to sync from (fetching events > this point). + to_key: The token to fetch rooms up to (fetching events <= this point). + excluded_room_ids: Optional list of room IDs to exclude from the results. + + Returns: + All membership changes to the current state in the token range. Events are + sorted by `stream_ordering` ascending. """ # Start by ruling out cases where a DB query is not necessary. if from_key == to_key: @@ -794,7 +816,7 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: args: List[Any] = [user_id, min_from_id, max_to_id] - # TODO: It would be good to assert that the `to_token` is >= + # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. From cccbd15e7ece55ec8aab2632fcb7099215b29c86 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:40:55 -0500 Subject: [PATCH 046/109] Refactor back to not pulling out full events --- synapse/handlers/sliding_sync.py | 159 +++++++++++++---------- synapse/storage/databases/main/stream.py | 65 ++++----- 2 files changed, 120 insertions(+), 104 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fed663ac364..c1cfec50008 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -28,7 +28,6 @@ from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, PersistedEventPosition, @@ -48,27 +47,6 @@ logger = logging.getLogger(__name__) -def convert_event_to_rooms_for_user(event: EventBase) -> RoomsForUser: - """ - Quick helper to convert an event to a `RoomsForUser` object. - """ - # These fields should be present for all persisted events - assert event.internal_metadata.stream_ordering is not None - assert event.internal_metadata.instance_name is not None - - return RoomsForUser( - room_id=event.room_id, - sender=event.sender, - membership=event.membership, - event_id=event.event_id, - event_pos=PersistedEventPosition( - event.internal_metadata.instance_name, - event.internal_metadata.stream_ordering, - ), - room_version_id=event.room_version.identifier, - ) - - def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: """ Returns True if the membership event should be included in the sync response, @@ -108,6 +86,25 @@ class RoomSyncConfig: required_state: Set[Tuple[str, str]] +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _RoomMembershipForUser: + """ + Attributes: + event_id: The event ID of the membership event + event_pos: The stream position of the membership event + membership: The membership state of the user in the room + sender: The person who sent the membership event + newly_joined: Whether the user newly joined the room during the given token + range + """ + + event_id: str + event_pos: PersistedEventPosition + membership: str + sender: str + newly_joined: bool + + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() @@ -302,7 +299,7 @@ async def current_sync_for_user( user=sync_config.user, room_id=room_id, room_sync_config=room_sync_config, - rooms_for_user_membership_at_to_token=sync_room_map[room_id], + rooms_membership_for_user_at_to_token=sync_room_map[room_id], from_token=from_token, to_token=to_token, ) @@ -321,7 +318,7 @@ async def get_sync_room_ids_for_user( user: UserID, to_token: StreamToken, from_token: Optional[StreamToken] = None, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Fetch room IDs that should be listed for this user in the sync response (the full room list that will be filtered, sorted, and sliced). @@ -373,7 +370,13 @@ async def get_sync_room_ids_for_user( # Note: The `room_for_user` we're assigning here will need to be fixed up # (below) because they are potentially from the current snapshot time # instead from the time of the `to_token`. - room_for_user.room_id: room_for_user + room_for_user.room_id: _RoomMembershipForUser( + event_id=room_for_user.event_id, + event_pos=room_for_user.event_pos, + membership=room_for_user.membership, + sender=room_for_user.sender, + newly_joined=False, + ) for room_for_user in room_for_user_list } @@ -440,7 +443,7 @@ async def get_sync_room_ids_for_user( for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.event.room_id, membership_change + membership_change.room_id, membership_change ) # 1) Fixup @@ -448,27 +451,59 @@ async def get_sync_room_ids_for_user( # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event is None: + if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - first_membership_change_after_to_token.prev_event + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) + # 1) Fixup (more) + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # These fields should be present for all persisted events + assert ( + prev_event_in_from_to_range.internal_metadata.instance_name is not None + ) + assert ( + prev_event_in_from_to_range.internal_metadata.stream_ordering + is not None + ) + + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + _RoomMembershipForUser( + event_id=prev_event_in_from_to_range.event_id, + event_pos=PersistedEventPosition( + instance_name=prev_event_in_from_to_range.internal_metadata.instance_name, + stream=prev_event_in_from_to_range.internal_metadata.stream_ordering, + ), + membership=prev_event_in_from_to_range.membership, + sender=prev_event_in_from_to_range.sender, + newly_joined=False, + ) + ) + filtered_sync_room_id_set = { - room_id: room_for_user - for room_id, room_for_user in sync_room_id_set.items() + room_id: room_membership_for_user + for room_id, room_membership_for_user in sync_room_id_set.items() if filter_membership_for_sync( - membership=room_for_user.membership, + membership=room_membership_for_user.membership, user_id=user_id, - sender=room_for_user.sender, + sender=room_membership_for_user.sender, ) } @@ -498,35 +533,38 @@ async def get_sync_room_ids_for_user( membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.event.room_id + membership_change.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.event.room_id + room_id = last_membership_change_in_from_to_range.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if ( - last_membership_change_in_from_to_range.event.membership - == Membership.LEAVE - ): - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range.event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=last_membership_change_in_from_to_range.event_id, + event_pos=last_membership_change_in_from_to_range.event_pos, + membership=last_membership_change_in_from_to_range.membership, + sender=last_membership_change_in_from_to_range.sender, + newly_joined=False, ) + # TODO: Figure out `newly_joined` + return filtered_sync_room_id_set async def filter_rooms( self, user: UserID, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], filters: SlidingSyncConfig.SlidingSyncList.Filters, to_token: StreamToken, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Filter rooms based on the sync request. @@ -627,9 +665,9 @@ async def filter_rooms( async def sort_rooms( self, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], to_token: StreamToken, - ) -> List[Tuple[str, RoomsForUser]]: + ) -> List[Tuple[str, _RoomMembershipForUser]]: """ Sort by `stream_ordering` of the last event that the user should see in the room. `stream_ordering` is unique so we get a stable sort. @@ -682,7 +720,7 @@ async def get_room_sync_data( user: UserID, room_id: str, room_sync_config: RoomSyncConfig, - rooms_for_user_membership_at_to_token: RoomsForUser, + rooms_membership_for_user_at_to_token: _RoomMembershipForUser, from_token: Optional[StreamToken], to_token: StreamToken, ) -> SlidingSyncResult.RoomResult: @@ -696,7 +734,7 @@ async def get_room_sync_data( room_id: The room ID to fetch data for room_sync_config: Config for what data we should fetch for a room in the sync response. - rooms_for_user_membership_at_to_token: Membership information for the user + rooms_membership_for_user_at_to_token: Membership information for the user in the room at the time of `to_token`. from_token: The point in the stream to sync from. to_token: The point in the stream to sync up to. @@ -716,7 +754,7 @@ async def get_room_sync_data( if ( room_sync_config.timeline_limit > 0 # No timeline for invite/knock rooms (just `stripped_state`) - and rooms_for_user_membership_at_to_token.membership + and rooms_membership_for_user_at_to_token.membership not in (Membership.INVITE, Membership.KNOCK) ): limited = False @@ -726,27 +764,15 @@ async def get_room_sync_data( # position once we've fetched the events to point to the earliest event fetched. prev_batch_token = to_token - newly_joined = False - if ( - # We can only determine new-ness if we have a `from_token` to define our range - from_token is not None - and rooms_for_user_membership_at_to_token.membership == Membership.JOIN - ): - newly_joined = ( - rooms_for_user_membership_at_to_token.event_pos.persisted_after( - from_token.room_key - ) - ) - # We're going to paginate backwards from the `to_token` from_bound = to_token.room_key # People shouldn't see past their leave/ban event - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.LEAVE, Membership.BAN, ): from_bound = ( - rooms_for_user_membership_at_to_token.event_pos.to_room_stream_token() + rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token() ) # Determine whether we should limit the timeline to the token range. @@ -760,7 +786,8 @@ async def get_room_sync_data( # connection before to_bound = ( from_token.room_key - if from_token is not None and not newly_joined + if from_token is not None + and not rooms_membership_for_user_at_to_token.newly_joined else None ) @@ -797,7 +824,7 @@ async def get_room_sync_data( self.storage_controllers, user.to_string(), timeline_events, - is_peeking=rooms_for_user_membership_at_to_token.membership + is_peeking=rooms_membership_for_user_at_to_token.membership != Membership.JOIN, filter_send_to_client=True, ) @@ -852,12 +879,12 @@ async def get_room_sync_data( # Figure out any stripped state events for invite/knocks. This allows the # potential joiner to identify the room. stripped_state: List[JsonDict] = [] - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.INVITE, Membership.KNOCK, ): invite_or_knock_event = await self.store.get_event( - rooms_for_user_membership_at_to_token.event_id + rooms_membership_for_user_at_to_token.event_id ) stripped_state = [] diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 730e55d135c..c5e65379806 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -113,31 +113,24 @@ class _EventsAround: @attr.s(slots=True, frozen=True, auto_attribs=True) -class _CurrentStateDeltaMembershipReturn: +class CurrentStateDeltaMembership: """ Attributes: event_id: The "current" membership event ID in this room. + event_pos: The position of the "current" membership event in the event stream. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. + membership: The membership state of the user in the room + sender: The person who sent the membership event """ event_id: str + event_pos: PersistedEventPosition prev_event_id: Optional[str] room_id: str - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class CurrentStateDeltaMembership: - """ - Attributes: - event: The "current" membership event in this room. - prev_event: The previous membership event in this room that was replaced by - the "current" one. May be `None` if there was no previous membership event. - """ - - event: EventBase - prev_event: Optional[EventBase] + membership: str + sender: str def generate_pagination_where_clause( @@ -808,7 +801,7 @@ async def get_current_state_delta_membership_changes_for_user( if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -833,7 +826,9 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: s.room_id, s.instance_name, s.stream_id, - e.topological_ordering + e.topological_ordering, + m.membership, + e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -844,7 +839,7 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: txn.execute(sql, args) - membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] + membership_changes: List[CurrentStateDeltaMembership] = [] for ( event_id, prev_event_id, @@ -852,6 +847,8 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: instance_name, stream_ordering, topological_ordering, + membership, + sender, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -859,6 +856,8 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None + assert membership is not None + assert sender is not None if _filter_results( from_key, @@ -868,43 +867,33 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: stream_ordering, ): membership_changes.append( - _CurrentStateDeltaMembershipReturn( + CurrentStateDeltaMembership( event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), prev_event_id=prev_event_id, room_id=room_id, + membership=membership, + sender=sender, ) ) return membership_changes - raw_membership_changes = await self.db_pool.runInteraction( + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) - # Fetch all events in one go - event_ids = [] - for m in raw_membership_changes: - event_ids.append(m.event_id) - if m.prev_event_id is not None: - event_ids.append(m.prev_event_id) - - events = await self.get_events(event_ids, get_prev_content=False) - room_ids_to_exclude: AbstractSet[str] = set() if excluded_room_ids is not None: room_ids_to_exclude = set(excluded_room_ids) return [ - CurrentStateDeltaMembership( - event=events[raw_membership_change.event_id], - prev_event=( - events[raw_membership_change.prev_event_id] - if raw_membership_change.prev_event_id - else None - ), - ) - for raw_membership_change in raw_membership_changes - if raw_membership_change.room_id not in room_ids_to_exclude + membership_change + for membership_change in membership_changes + if membership_change.room_id not in room_ids_to_exclude ] @cancellable From 62c6a4e8609f5d563b85f576d0a4d5b764c1f9c2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:10:00 -0500 Subject: [PATCH 047/109] Add `newly_joined` support to `get_sync_room_ids_for_user(...)` --- synapse/handlers/sliding_sync.py | 82 +++++++++- tests/handlers/test_sliding_sync.py | 224 +++++++++++++++++++++++++++- 2 files changed, 300 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c1cfec50008..97b04698b2d 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,8 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from collections import defaultdict +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr from immutabledict import immutabledict @@ -104,6 +105,9 @@ class _RoomMembershipForUser: sender: str newly_joined: bool + def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": + return attr.evolve(self, **kwds) + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): @@ -414,6 +418,7 @@ async def get_sync_room_ids_for_user( # - 1b) Add back rooms that the user left after the `to_token` # - 1c) Update room membership events to the point in time of the `to_token` # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + # - 3) Figure out which rooms are `newly_joined` # 1) ----------------------------------------------------- @@ -529,19 +534,49 @@ async def get_sync_room_ids_for_user( last_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} + # We also want to assemble a list of the first membership events during the token + # range so we can step backward to the previous membership that would apply to + # before the token range to see if we have `newly_joined` the room. + first_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( + defaultdict(list) + ) for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id - ] = membership_change + room_id = membership_change.room_id + + last_membership_change_by_room_id_in_from_to_range[room_id] = ( + membership_change + ) + + # Only set if we haven't already set it + first_membership_change_by_room_id_in_from_to_range.setdefault( + room_id, membership_change + ) + + if membership_change.membership != Membership.JOIN: + non_join_event_ids_by_room_id_in_from_to_range[room_id].append( + membership_change.event_id + ) # 2) Fixup + # + # 3) We also want to assemble a list of possibly newly joined rooms. Someone + # could have left and joined multiple times during the given range but we only + # care about whether they are joined at the end of the token range so we are + # working with the last membership even in the token range. + possibly_newly_joined_room_ids = set() for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + if last_membership_change_in_from_to_range.membership == Membership.JOIN: + possibly_newly_joined_room_ids.add(room_id) + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event @@ -554,7 +589,44 @@ async def get_sync_room_ids_for_user( newly_joined=False, ) - # TODO: Figure out `newly_joined` + # 3) Figure out `newly_joined` + prev_event_ids_before_token_range: List[str] = [] + for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: + non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ + possibly_newly_joined_room_id + ] + if len(non_joins_for_room) > 0: + # We found a `newly_joined` room (we left and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + prev_event_id = first_membership_change_by_room_id_in_from_to_range[ + room_id + ].prev_event_id + + if prev_event_id is None: + # We found a `newly_joined` room (we are joining the room for the + # first time within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + # Last resort, we need to step back to the previous membership event + # just before the token range to see if we're joined then or not. + prev_event_ids_before_token_range.append(prev_event_id) + + # 3) more + prev_events_before_token_range = await self.store.get_events( + prev_event_ids_before_token_range + ) + for prev_event_before_token_range in prev_events_before_token_range.values(): + if prev_event_before_token_range.membership != Membership.JOIN: + # We found a `newly_joined` room (we left before the token range + # and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) return filtered_sync_room_id_set diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 694fd17a023..c25ca41098d 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -116,6 +116,9 @@ def test_get_newly_joined_room(self) -> None: room_id_results[room_id].event_id, join_response["event_id"], ) + # We should be considered `newly_joined` because we joined during the token + # range + self.assertEqual(room_id_results[room_id].newly_joined, True) def test_get_already_joined_room(self) -> None: """ @@ -146,6 +149,8 @@ def test_get_already_joined_room(self) -> None: room_id_results[room_id].event_id, join_response["event_id"], ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id].newly_joined, False) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -232,6 +237,11 @@ def test_get_invited_banned_knocked_room(self) -> None: room_id_results[knock_room_id].event_id, knock_room_membership_state_event.event_id, ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[invited_room_id].newly_joined, False) + self.assertEqual(room_id_results[ban_room_id].newly_joined, False) + self.assertEqual(room_id_results[knock_room_id].newly_joined, False) def test_get_kicked_room(self) -> None: """ @@ -277,6 +287,9 @@ def test_get_kicked_room(self) -> None: room_id_results[kick_room_id].event_id, kick_response["event_id"], ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_forgotten_rooms(self) -> None: """ @@ -396,6 +409,8 @@ def test_only_newly_left_rooms_show_up(self) -> None: room_id_results[room_id2].event_id, leave_response["event_id"], ) + # We should *NOT* be `newly_joined` because we are instead `newly_left` + self.assertEqual(room_id_results[room_id2].newly_joined, False) def test_no_joins_after_to_token(self) -> None: """ @@ -432,6 +447,8 @@ def test_no_joins_after_to_token(self) -> None: room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -477,6 +494,8 @@ def test_join_during_range_and_left_room_after_to_token(self) -> None: } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -519,6 +538,8 @@ def test_join_before_range_and_left_room_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -581,6 +602,8 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we were kicked + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -632,6 +655,8 @@ def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -681,6 +706,8 @@ def test_newly_left_during_range_and_join_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_no_from_token(self) -> None: """ @@ -727,6 +754,9 @@ def test_no_from_token(self) -> None: room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because there is no `from_token` to + # define a "live" range to compare against + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -793,6 +823,8 @@ def test_from_token_ahead_of_to_token(self) -> None: room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because we joined `room1` before either of the tokens + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -920,6 +952,8 @@ def test_join_leave_multiple_times_during_range_and_after_to_token( } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -976,6 +1010,8 @@ def test_join_leave_multiple_times_before_range_and_after_to_token( } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -1028,8 +1064,11 @@ def test_invite_before_range_and_join_leave_after_to_token( } ), ) + # We should *NOT* be `newly_joined` because we were only invited before the + # token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) - def test_display_name_changes_in_token_range( + def test_join_and_display_name_changes_in_token_range( self, ) -> None: """ @@ -1101,6 +1140,68 @@ def test_display_name_changes_in_token_range( } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_display_name_changes_in_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there is `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_change1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_change1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + } + ), + ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_before_and_after_token_range( self, @@ -1172,6 +1273,8 @@ def test_display_name_changes_before_and_after_token_range( } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_leave_after_token_range( self, @@ -1250,6 +1353,8 @@ def test_display_name_changes_leave_after_token_range( } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_display_name_changes_join_after_token_range( self, @@ -1298,6 +1403,123 @@ def test_display_name_changes_join_after_token_range( # Room shouldn't show up because we joined after the from/to range self.assertEqual(room_id_results.keys(), set()) + def test_newly_joined_with_leave_join_in_token_range( + self, + ) -> None: + """ + Test that `newly_joined` TODO + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Leave and join back during the token range + self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_more_changes_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_more_changes_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + ) + # We should be considered `newly_joined` because there is some non-join event in + # between our latest join event. + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_newly_joined_only_joins_during_token_range( + self, + ) -> None: + """ + Test that a join and more joins caused by display name changes, all during the + token range, still count as `newly_joined`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + # Join, leave, join back to the room before the from/to range + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response1 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response2 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because it was newly_left and joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "displayname_change_during_token_range_response1": displayname_change_during_token_range_response1[ + "event_id" + ], + "displayname_change_during_token_range_response2": displayname_change_during_token_range_response2[ + "event_id" + ], + } + ), + ) + # We should be `newly_joined` because we first joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + def test_multiple_rooms_are_not_confused( self, ) -> None: From 39259f66fa8ccd13818b8a5681b81fa020a8d4d2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:16:46 -0500 Subject: [PATCH 048/109] Join both tables with stream_ordering --- synapse/storage/databases/main/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c5e65379806..2646dfd9cb9 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -831,7 +831,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_id = e.event_id + INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC From c60aca755b35f9e655b2f2c71367ba5806db64e5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:02:34 -0500 Subject: [PATCH 049/109] Fix clause change --- synapse/storage/databases/main/stream.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 2646dfd9cb9..562dc6eacf7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -934,6 +934,7 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause, ignore_room_args = make_in_list_sql_clause( txn.database_engine, "e.room_id", excluded_rooms, negative=True ) + ignore_room_clause = f"AND {ignore_room_clause}" args += ignore_room_args sql = """ @@ -948,6 +949,8 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause, ) + logger.info("get_membership_changes_for_user: %s", sql) + txn.execute(sql, args) rows = [ From 11db1befa2845f89d09be78e32d53b9b4b9bbad4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:05:25 -0500 Subject: [PATCH 050/109] Remove debug log --- synapse/storage/databases/main/stream.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 562dc6eacf7..f6be97698ea 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -949,8 +949,6 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause, ) - logger.info("get_membership_changes_for_user: %s", sql) - txn.execute(sql, args) rows = [ From 7395e1042072b3ab9f04898afa3989fda55a0978 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:19:31 -0500 Subject: [PATCH 051/109] Fix `builtins.SyntaxError: EOL while scanning string literal (test_sync.py, line 1885)` See https://github.com/element-hq/synapse/actions/runs/9675073109/job/26692003103?pr=17320#step:9:5552 Worked fine locally but failed in CI with Python 3.8 --- tests/rest/client/test_sync.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 338149f09a0..bd1e7d521b7 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1882,9 +1882,7 @@ def test_rooms_newly_joined_incremental_sync(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2074,9 +2072,7 @@ def test_rooms_invite_shared_history_incremental_sync(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2323,9 +2319,7 @@ def test_rooms_invite_world_readable_history_incremental_sync(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2493,9 +2487,7 @@ def test_rooms_ban_incremental_sync1(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2563,9 +2555,7 @@ def test_rooms_ban_incremental_sync2(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { From 2bf39231ede3a9bcad65ad3f1321e788acfdcd15 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:40:36 -0500 Subject: [PATCH 052/109] Add some tests for `get_current_state_delta_membership_changes_for_user(...)` --- synapse/storage/databases/main/stream.py | 14 +- tests/storage/test_stream.py | 515 +++++++++++++++++++++++ 2 files changed, 523 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f6be97698ea..e222f36bab7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -807,7 +807,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [user_id, min_from_id, max_to_id] + args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -824,16 +824,18 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: e.event_id, s.prev_event_id, s.room_id, - s.instance_name, - s.stream_id, + e.instance_name, + e.stream_ordering, e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id - WHERE m.user_id = ? - AND s.stream_id > ? AND s.stream_id <= ? + WHERE s.stream_id > ? AND s.stream_id <= ? + AND m.user_id = ? + AND s.state_key = m.user_id + AND s.type = ? ORDER BY s.stream_id ASC """ diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index fe1e873e154..64f123987af 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -28,9 +28,12 @@ from synapse.api.constants import Direction, EventTypes, RelationTypes from synapse.api.filtering import Filter +from synapse.api.room_versions import RoomVersions +from synapse.events import make_event_from_dict from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock @@ -543,3 +546,515 @@ def test_last_event_before_sharded_token(self) -> None: } ), ) + + +class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` + """ + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.state_handler = self.hs.get_state_handler() + persistence = hs.get_storage_controllers().persistence + assert persistence is not None + self.persistence = persistence + + def test_returns_membership_events(self) -> None: + """ + A basic test that a membership event in the token range is returned for the user. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos = self.get_success( + self.store.get_position_for_event(join_response["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response["event_id"], + event_pos=join_pos, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + def test_server_left_after_us_room(self) -> None: + """ + Test that when probing over part of the DAG where the server left the room *after + us*, we still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + # User1 should leave the room first + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_server_left_room(self) -> None: + """ + Test that when probing over part of the DAG where we leave the room causing the + server to leave the room (because we were the last local user in the room), we + still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + + # User2 should leave the room first. + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + # User1 (the person we're testing with) should also leave the room (everyone has + # left the room which means the server is no longer in the room). + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events being processed at once. This will result in all + of the memberships being stored in the `current_state_delta_stream` table with + the same `stream_ordering` even though the individual events have different + `stream_ordering`s. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + _user3_tok = self.login(user3_id, "pass") + user4_id = self.register_user("user4", "pass") + _user4_tok = self.login(user4_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist the user1, user3, and user4 join events in the same batch so they all + # end up in the `current_state_delta_stream` table with the same + # stream_ordering. + join_event1 = make_event_from_dict( + { + "sender": user1_id, + "type": EventTypes.Member, + "state_key": user1_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context1 = self.get_success( + self.state_handler.compute_event_context(join_event1) + ) + join_event3 = make_event_from_dict( + { + "sender": user3_id, + "type": EventTypes.Member, + "state_key": user3_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context3 = self.get_success( + self.state_handler.compute_event_context(join_event3) + ) + join_event4 = make_event_from_dict( + { + "sender": user4_id, + "type": EventTypes.Member, + "state_key": user4_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 2, + "origin_server_ts": 2, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context4 = self.get_success( + self.state_handler.compute_event_context(join_event4) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event1, join_event_context1), + (join_event3, join_event_context3), + (join_event4, join_event_context4), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Let's get membership changes from user3's perspective because it was in the + # middle of the batch. This way, if rows in` current_state_delta_stream` are + # stored with the first or last event's `stream_ordering`, we will still catch + # bugs. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user3_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos3 = self.get_success( + self.store.get_position_for_event(join_event3.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event3.event_id, + event_pos=join_pos3, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + ], + ) + + # TODO: Test remote join where the first rows will just be the state when you joined + + # TODO: Test state reset where the user gets removed from the room (when there is no + # corresponding leave event) + + def test_excluded_room_ids(self) -> None: + """ + Test that the `excluded_room_ids` option excludes changes from the specified rooms. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response2 = self.helper.join(room_id2, user1_id, tok=user1_tok) + join_pos2 = self.get_success( + self.store.get_position_for_event(join_response2["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + # First test the the room is returned without the `excluded_room_ids` option + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=join_response2["event_id"], + event_pos=join_pos2, + prev_event_id=None, + room_id=room_id2, + membership="join", + sender=user1_id, + ), + ], + ) + + # The test that `excluded_room_ids` excludes room2 as expected + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + excluded_room_ids=[room_id2], + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + +# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): +# """ +# TODO +# """ + +# servlets = [ +# admin.register_servlets_for_client_rest_resource, +# room.register_servlets, +# login.register_servlets, +# ] + +# def default_config(self) -> dict: +# conf = super().default_config() +# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] +# return conf + +# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: +# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() +# self.store = self.hs.get_datastores().main +# self.event_sources = hs.get_event_sources() + + +# def test_sharded_event_persisters(self) -> None: +# """ +# TODO +# """ +# user1_id = self.register_user("user1", "pass") +# user1_tok = self.login(user1_id, "pass") +# user2_id = self.register_user("user2", "pass") +# user2_tok = self.login(user2_id, "pass") + +# remote_hs = self.make_worker_hs("synapse.app.generic_worker") + +# channel = make_request( +# self.reactor, +# self._hs_to_site[hs], +# "GET", +# f"/_matrix/media/r0/download/{target}/{media_id}", +# shorthand=False, +# access_token=self.access_token, +# await_result=False, +# ) + +# remote_hs + +# worker_store2 = worker_hs2.get_datastores().main +# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) +# actx = worker_store2._stream_id_gen.get_next() + +# self.assertEqual( +# room_id_results.keys(), +# { +# room_id1, +# # room_id2 shouldn't show up because we left before the from/to range +# # and the join event during the range happened while worker2 was stuck. +# # This means that from the perspective of the master, where the +# # `stuck_activity_token` is generated, the stream position for worker2 +# # wasn't advanced to the join yet. Looking at the `instance_map`, the +# # join technically comes after `stuck_activity_token``. +# # +# # room_id2, +# room_id3, +# }, +# ) From ec2d8dc1e3c602dadb4fac289bcd38b211f6b34d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:52:24 -0500 Subject: [PATCH 053/109] Create events using helper --- tests/storage/test_stream.py | 76 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 64f123987af..39cb5a25c54 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -37,6 +37,7 @@ from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock +from tests.test_utils.event_injection import create_event from tests.unittest import HomeserverTestCase logger = logging.getLogger(__name__) @@ -809,56 +810,35 @@ def test_membership_persisted_in_same_batch(self) -> None: # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1 = make_event_from_dict( - { - "sender": user1_id, - "type": EventTypes.Member, - "state_key": user1_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 0, - "origin_server_ts": 0, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context1 = self.get_success( - self.state_handler.compute_event_context(join_event1) - ) - join_event3 = make_event_from_dict( - { - "sender": user3_id, - "type": EventTypes.Member, - "state_key": user3_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 1, - "origin_server_ts": 1, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context3 = self.get_success( - self.state_handler.compute_event_context(join_event3) + join_event1, join_event_context1 = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event4 = make_event_from_dict( - { - "sender": user4_id, - "type": EventTypes.Member, - "state_key": user4_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 2, - "origin_server_ts": 2, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, + join_event3, join_event_context3 = self.get_success( + create_event( + self.hs, + sender=user3_id, + type=EventTypes.Member, + state_key=user3_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event_context4 = self.get_success( - self.state_handler.compute_event_context(join_event4) + join_event4, join_event_context4 = self.get_success( + create_event( + self.hs, + sender=user4_id, + type=EventTypes.Member, + state_key=user4_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) self.get_success( self.persistence.persist_events( From 0b9a903ca12831e431b596daacf127e53ecbd050 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:35:18 -0500 Subject: [PATCH 054/109] Add test that remotely joins room --- tests/storage/test_stream.py | 259 +++++++++++++++++++++++++---------- 1 file changed, 188 insertions(+), 71 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 39cb5a25c54..3b825dbbbef 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -21,24 +21,32 @@ import logging from typing import List, Tuple +from unittest.mock import AsyncMock, patch from immutabledict import immutabledict from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import Direction, EventTypes, RelationTypes +from synapse.api.constants import Direction, EventTypes, Membership, RelationTypes from synapse.api.filtering import Filter -from synapse.api.room_versions import RoomVersions -from synapse.events import make_event_from_dict +from synapse.crypto.event_signing import add_hashes_and_signatures +from synapse.events import FrozenEventV3 +from synapse.federation.federation_client import SendJoinResult from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken +from synapse.types import ( + JsonDict, + PersistedEventPosition, + RoomStreamToken, + UserID, + create_requester, +) from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -884,8 +892,6 @@ def test_membership_persisted_in_same_batch(self) -> None: ], ) - # TODO: Test remote join where the first rows will just be the state when you joined - # TODO: Test state reset where the user gets removed from the room (when there is no # corresponding leave event) @@ -974,67 +980,178 @@ def test_excluded_room_ids(self) -> None: ) -# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): -# """ -# TODO -# """ - -# servlets = [ -# admin.register_servlets_for_client_rest_resource, -# room.register_servlets, -# login.register_servlets, -# ] - -# def default_config(self) -> dict: -# conf = super().default_config() -# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] -# return conf - -# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: -# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() -# self.store = self.hs.get_datastores().main -# self.event_sources = hs.get_event_sources() - - -# def test_sharded_event_persisters(self) -> None: -# """ -# TODO -# """ -# user1_id = self.register_user("user1", "pass") -# user1_tok = self.login(user1_id, "pass") -# user2_id = self.register_user("user2", "pass") -# user2_tok = self.login(user2_id, "pass") - -# remote_hs = self.make_worker_hs("synapse.app.generic_worker") - -# channel = make_request( -# self.reactor, -# self._hs_to_site[hs], -# "GET", -# f"/_matrix/media/r0/download/{target}/{media_id}", -# shorthand=False, -# access_token=self.access_token, -# await_result=False, -# ) - -# remote_hs - -# worker_store2 = worker_hs2.get_datastores().main -# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) -# actx = worker_store2._stream_id_gen.get_next() - -# self.assertEqual( -# room_id_results.keys(), -# { -# room_id1, -# # room_id2 shouldn't show up because we left before the from/to range -# # and the join event during the range happened while worker2 was stuck. -# # This means that from the perspective of the master, where the -# # `stuck_activity_token` is generated, the stream position for worker2 -# # wasn't advanced to the join yet. Looking at the `instance_map`, the -# # join technically comes after `stuck_activity_token``. -# # -# # room_id2, -# room_id3, -# }, -# ) +class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( + FederatingHomeserverTestCase +): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` when joining remote federated rooms. + """ + + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.sliding_sync_handler = self.hs.get_sliding_sync_handler() + self.store = self.hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.room_member_handler = hs.get_room_member_handler() + + def test_remote_join(self) -> None: + """ + Test remote join where the first rows will just be the state when you joined + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" + + # Remotely join a room on another homeserver. + # + # To do this we have to mock the responses from the remote homeserver. We also + # patch out a bunch of event checks on our end. + create_event_source = { + "auth_events": [], + "content": { + "creator": f"@creator:{self.OTHER_SERVER_NAME}", + "room_version": self.hs.config.server.default_room_version.identifier, + }, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": "", + "type": EventTypes.Create, + } + self.add_hashes_and_signatures_from_other_server( + create_event_source, + self.hs.config.server.default_room_version, + ) + create_event = FrozenEventV3( + create_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + creator_join_event_source = { + "auth_events": [create_event.event_id], + "content": { + "membership": "join", + }, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": f"@creator:{self.OTHER_SERVER_NAME}", + "type": EventTypes.Member, + } + self.add_hashes_and_signatures_from_other_server( + creator_join_event_source, + self.hs.config.server.default_room_version, + ) + creator_join_event = FrozenEventV3( + creator_join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + + # Our local user is going to remote join the room + join_event_source = { + "auth_events": [create_event.event_id], + "content": {"membership": "join"}, + "depth": 1, + "origin_server_ts": 100, + "prev_events": [creator_join_event.event_id], + "sender": user1_id, + "state_key": user1_id, + "room_id": intially_unjoined_room_id, + "type": EventTypes.Member, + } + add_hashes_and_signatures( + self.hs.config.server.default_room_version, + join_event_source, + self.hs.hostname, + self.hs.signing_key, + ) + join_event = FrozenEventV3( + join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + + mock_make_membership_event = AsyncMock( + return_value=( + self.OTHER_SERVER_NAME, + join_event, + self.hs.config.server.default_room_version, + ) + ) + mock_send_join = AsyncMock( + return_value=SendJoinResult( + join_event, + self.OTHER_SERVER_NAME, + state=[create_event, creator_join_event], + auth_chain=[create_event, creator_join_event], + partial_state=False, + servers_in_room=frozenset(), + ) + ) + + with patch.object( + self.room_member_handler.federation_handler.federation_client, + "make_membership_event", + mock_make_membership_event, + ), patch.object( + self.room_member_handler.federation_handler.federation_client, + "send_join", + mock_send_join, + ), patch( + "synapse.event_auth._is_membership_change_allowed", + return_value=None, + ), patch( + "synapse.handlers.federation_event.check_state_dependent_auth_rules", + return_value=None, + ): + self.get_success( + self.room_member_handler.update_membership( + requester=create_requester(user1_id), + target=UserID.from_string(user1_id), + room_id=intially_unjoined_room_id, + action=Membership.JOIN, + remote_room_hosts=[self.OTHER_SERVER_NAME], + ) + ) + + events_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="events", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump events", + ) + ) + + logger.info("events_db_dump: %s", events_db_dump) + + current_state_delta_stream_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="current_state_delta_stream", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump current_state_delta_stream", + ) + ) + + logger.info( + "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + ) From 48d0acfbcda30f956d79cef873fa762f88530341 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:52:12 -0500 Subject: [PATCH 055/109] Actually test `get_current_state_delta_membership_changes_for_user(...)` in remote join test --- tests/storage/test_stream.py | 59 ++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 3b825dbbbef..dfca17db64b 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1001,10 +1001,13 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: def test_remote_join(self) -> None: """ - Test remote join where the first rows will just be the state when you joined + Test remote join where the first rows in `current_state_delta_stream` will just + be the state when you joined the remote room. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") + + before_join_token = self.event_sources.get_current_token() intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" @@ -1128,30 +1131,40 @@ def test_remote_join(self) -> None: ) ) - events_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="events", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump events", + after_join_token = self.event_sources.get_current_token() + + # Get the membership changes for the user at this point, the + # `current_state_delta_stream` table should look like: + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_join_token.room_key, + to_key=after_join_token.room_key, ) ) - logger.info("events_db_dump: %s", events_db_dump) - - current_state_delta_stream_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="current_state_delta_stream", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump current_state_delta_stream", - ) + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) ) - logger.info( - "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event.event_id, + event_pos=join_pos, + prev_event_id=None, + room_id=intially_unjoined_room_id, + membership="join", + sender=user1_id, + ), + ], ) From 2a944ffcef16744ade6b0172fcb98c7eeb281766 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:20:17 -0500 Subject: [PATCH 056/109] Add state of the db in each situation --- tests/storage/test_stream.py | 68 ++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index dfca17db64b..2ac88f18eaf 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -673,6 +673,29 @@ def test_server_left_after_us_room(self) -> None: after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -761,6 +784,29 @@ def test_server_left_room(self) -> None: after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -864,6 +910,21 @@ def test_membership_persisted_in_same_batch(self) -> None: # middle of the batch. This way, if rows in` current_state_delta_stream` are # stored with the first or last event's `stream_ordering`, we will still catch # bugs. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1133,8 +1194,11 @@ def test_remote_join(self) -> None: after_join_token = self.event_sources.get_current_token() - # Get the membership changes for the user at this point, the - # `current_state_delta_stream` table should look like: + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. Notice that all of the events are at the same `stream_id` because + # the current state starts out where we remotely joined: # # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| From 8df39d1baff8cac6aa446c8b71b3a64a8bf29a1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:22:03 -0500 Subject: [PATCH 057/109] Remove redundant `instance_name` column --- tests/storage/test_stream.py | 98 ++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 2ac88f18eaf..840f9803440 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -679,23 +679,23 @@ def test_server_left_after_us_room(self) -> None: # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| - # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | - # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | - # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | - # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | - # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | - # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | - # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | - # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|----------|-----------------------------|----------------|----------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -790,23 +790,23 @@ def test_server_left_room(self) -> None: # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | - # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | - # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | - # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | - # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|-----------------------------|---------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -915,16 +915,16 @@ def test_membership_persisted_in_same_batch(self) -> None: # those three memberships at the end with `stream_id=7` because we persisted # them in the same batch): # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|----------------------------|------------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1200,11 +1200,11 @@ def test_remote_join(self) -> None: # following. Notice that all of the events are at the same `stream_id` because # the current state starts out where we remotely joined: # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| - # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, From b7914e76769ea330cdfa99e18fd7695f8301b02b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:01:54 -0500 Subject: [PATCH 058/109] Add skipped test for state resets --- tests/storage/test_stream.py | 92 ++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 840f9803440..04a0e24154d 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -839,6 +839,7 @@ def test_server_left_room(self) -> None: ], ) + @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -948,13 +949,96 @@ def test_membership_persisted_in_same_batch(self) -> None: prev_event_id=None, room_id=room_id1, membership="join", - sender=user1_id, + sender=user3_id, ), ], ) - # TODO: Test state reset where the user gets removed from the room (when there is no - # corresponding leave event) + @skip_unless(False, "We don't support this yet") + def test_state_reset(self) -> None: + """ + Test a state reset scenario where the user gets removed from the room (when + there is no corresponding leave event) + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + before_reset_token = self.event_sources.get_current_token() + + # Send another state event which we will cause the reset at + dummy_state_response = self.helper.send_state( + room_id1, + event_type="foobarbaz", + state_key="", + body={"foo": "bar"}, + tok=user2_tok, + ) + dummy_state_pos = self.get_success( + self.store.get_position_for_event(dummy_state_response["event_id"]) + ) + + # Mock a state reset removing the membership for user1 in the current state + self.get_success( + self.store.db_pool.simple_delete( + table="current_state_events", + keyvalues={ + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + self.get_success( + self.store.db_pool.simple_insert( + table="current_state_delta_stream", + values={ + "stream_id": dummy_state_pos.stream, + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + "event_id": None, + # FIXME: I'm not sure if a state reset should have a prev_event_id + "prev_event_id": None, + "instance_name": dummy_state_pos.instance_name, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + + after_reset_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_reset_token.room_key, + to_key=after_reset_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=TODO, + event_pos=TODO, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) def test_excluded_room_ids(self) -> None: """ From 7eb1806ee3279f6581996b029f80251f8aaf3d69 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:06:05 -0500 Subject: [PATCH 059/109] Fix lints --- tests/storage/test_stream.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 04a0e24154d..5b30d7106f7 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -966,10 +966,7 @@ def test_state_reset(self) -> None: user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) - join_pos1 = self.get_success( - self.store.get_position_for_event(join_response1["event_id"]) - ) + self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1028,16 +1025,19 @@ def test_state_reset(self) -> None: self.maxDiff = None self.assertEqual( membership_changes, - [ - CurrentStateDeltaMembership( - event_id=TODO, - event_pos=TODO, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], + # TODO: Uncomment the expected membership. We just have a `False` value + # here so the test expectation fails and you look here. + False, + # [ + # CurrentStateDeltaMembership( + # event_id=TODO, + # event_pos=TODO, + # prev_event_id=None, + # room_id=room_id1, + # membership="leave", + # sender=user1_id, + # ), + # ], ) def test_excluded_room_ids(self) -> None: From 935b98c474f030f92bdd28cd69fcf20f3d6045fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 00:48:17 -0500 Subject: [PATCH 060/109] All `get_current_state_delta_membership_changes_for_user(...)` tests passing --- synapse/storage/databases/main/stream.py | 80 ++++++++++++++++-------- tests/storage/test_stream.py | 39 ++++++------ 2 files changed, 75 insertions(+), 44 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index e222f36bab7..9ae1fe6c152 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -125,12 +125,12 @@ class CurrentStateDeltaMembership: sender: The person who sent the membership event """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition prev_event_id: Optional[str] room_id: str membership: str - sender: str + sender: Optional[str] def generate_pagination_where_clause( @@ -819,22 +819,32 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # longer in the room or a state reset happened and it was unset. # `stream_ordering` is unique across the Synapse instance so this should # work fine. + # + # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer + # the source of truth from the events table. This gives slightly more + # accurate results when available since `current_state_delta_stream` only + # tracks that the current state is at this stream position (not what stream + # position the state event was added) and batches events at the same + # `stream_id` in certain cases. + # + # TODO: We need to add indexes for `current_state_delta_stream.event_id` and + # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` + # for this to be efficient. sql = """ SELECT e.event_id, s.prev_event_id, s.room_id, - e.instance_name, - e.stream_ordering, + COALESCE(e.instance_name, s.instance_name), + COALESCE(e.stream_ordering, s.stream_id), e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s - INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id + LEFT JOIN events AS e ON e.event_id = s.event_id + LEFT JOIN room_memberships AS m ON m.event_id = s.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND m.user_id = ? - AND s.state_key = m.user_id + AND s.state_key = ? AND s.type = ? ORDER BY s.stream_id ASC """ @@ -842,6 +852,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] + membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -852,36 +863,55 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership, sender, ) in txn: - assert event_id is not None - # `prev_event_id` can be `None` assert room_id is not None assert instance_name is not None assert stream_ordering is not None - assert topological_ordering is not None - assert membership is not None - assert sender is not None if _filter_results( from_key, to_key, instance_name, + # TODO: This isn't always filled now topological_ordering, stream_ordering, ): - membership_changes.append( - CurrentStateDeltaMembership( - event_id=event_id, - event_pos=PersistedEventPosition( - instance_name=instance_name, - stream=stream_ordering, - ), - prev_event_id=prev_event_id, - room_id=room_id, - membership=membership, - sender=sender, + # When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. This means we might + # already have a row for the leave event and then another for the + # same leave where the `event_id=null` but the `prev_event_id` is + # pointing back at the earlier leave event. Since we're assuming the + # `event_id = null` row is a `leave` and we don't want duplicate + # membership changes in our results, let's get rid of those + # (deduplicate) (see `test_server_left_after_us_room`). + if event_id is None: + already_tracked_membership_change = membership_change_map.get( + prev_event_id ) + if ( + already_tracked_membership_change is not None + and already_tracked_membership_change.membership + == Membership.LEAVE + ): + continue + + membership_change = CurrentStateDeltaMembership( + event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), + prev_event_id=prev_event_id, + room_id=room_id, + membership=( + membership if membership is not None else Membership.LEAVE + ), + sender=sender, ) + membership_changes.append(membership_change) + if event_id: + membership_change_map[event_id] = membership_change + return membership_changes membership_changes = await self.db_pool.runInteraction( diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5b30d7106f7..ffa763bff2f 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -829,17 +829,16 @@ def test_server_left_room(self) -> None: sender=user1_id, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, + sender=None, # user1_id, ), ], ) - @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -954,7 +953,6 @@ def test_membership_persisted_in_same_batch(self) -> None: ], ) - @skip_unless(False, "We don't support this yet") def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when @@ -970,7 +968,7 @@ def test_state_reset(self) -> None: before_reset_token = self.event_sources.get_current_token() - # Send another state event which we will cause the reset at + # Send another state event to make a position for the state reset to happen at dummy_state_response = self.helper.send_state( room_id1, event_type="foobarbaz", @@ -1011,6 +1009,12 @@ def test_state_reset(self) -> None: ) ) + # Manually bust the cache since we we're just manually messing with the database + # and not causing an actual state reset. + self.store._membership_stream_cache.entity_has_changed( + user1_id, dummy_state_pos.stream + ) + after_reset_token = self.event_sources.get_current_token() membership_changes = self.get_success( @@ -1025,19 +1029,16 @@ def test_state_reset(self) -> None: self.maxDiff = None self.assertEqual( membership_changes, - # TODO: Uncomment the expected membership. We just have a `False` value - # here so the test expectation fails and you look here. - False, - # [ - # CurrentStateDeltaMembership( - # event_id=TODO, - # event_pos=TODO, - # prev_event_id=None, - # room_id=room_id1, - # membership="leave", - # sender=user1_id, - # ), - # ], + [ + CurrentStateDeltaMembership( + event_id=None, + event_pos=dummy_state_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=None, # user1_id, + ), + ], ) def test_excluded_room_ids(self) -> None: From f163fcf08a435ea96de334b1f88bd99a0ccbcc25 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:20:42 -0500 Subject: [PATCH 061/109] Remove need for topological_ordering --- synapse/storage/databases/main/stream.py | 45 +++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9ae1fe6c152..9e94cb08f63 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -412,6 +412,43 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + This function only works with "live" tokens with `stream_ordering` only. See + `_filter_results(...)` if you want to work with all tokens. + + Returns True if the event persisted by the given instance at the given + stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -764,6 +801,8 @@ async def get_current_state_delta_membership_changes_for_user( Fetch membership events (and the previous event that was replaced by that one) for a given user. + Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= `to_key`). @@ -837,7 +876,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: s.room_id, COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), - e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s @@ -859,7 +897,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: room_id, instance_name, stream_ordering, - topological_ordering, membership, sender, ) in txn: @@ -867,12 +904,10 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: assert instance_name is not None assert stream_ordering is not None - if _filter_results( + if _filter_results_by_stream( from_key, to_key, instance_name, - # TODO: This isn't always filled now - topological_ordering, stream_ordering, ): # When the server leaves a room, it will insert new rows with From 956f20ef748b6e3caf76f91623e72b9a617ae235 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:24:15 -0500 Subject: [PATCH 062/109] (currently failing) Add test to make sure membership changes don't re-appear if the server leaves the room later --- tests/storage/test_stream.py | 63 +++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index ffa763bff2f..00821324744 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -619,7 +619,7 @@ def test_returns_membership_events(self) -> None: ], ) - def test_server_left_after_us_room(self) -> None: + def test_server_left_room_after_us(self) -> None: """ Test that when probing over part of the DAG where the server left the room *after us*, we still see the join and leave changes. @@ -652,7 +652,7 @@ def test_server_left_after_us_room(self) -> None: join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, @@ -728,7 +728,62 @@ def test_server_left_after_us_room(self) -> None: ], ) - def test_server_left_room(self) -> None: + def test_server_left_room_after_us_later(self) -> None: + """ + Test when the user leaves the room, then sometime later, everyone else leaves + the room, causing the server to leave the room, we shouldn't see any membership + changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + # User1 should leave the room first + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + after_user1_leave_token = self.event_sources.get_current_token() + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_server_leave_token = self.event_sources.get_current_token() + + # Join another room as user1 just to advance the stream_ordering and bust + # `_membership_stream_cache` + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # TODO: Add DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=after_user1_leave_token.room_key, + to_key=after_server_leave_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [], + ) + + def test_we_cause_server_left_room(self) -> None: """ Test that when probing over part of the DAG where we leave the room causing the server to leave the room (because we were the last local user in the room), we @@ -762,7 +817,7 @@ def test_server_left_room(self) -> None: join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, From 830e09d2defc6ae742dce30bdc822dcaf9a74092 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 10:13:02 -0500 Subject: [PATCH 063/109] Grab `prev_membership` to see whether the server left the room (fixes tests) See https://github.com/element-hq/synapse/pull/17320#discussion_r1657170493 `prev_membership` helps determine whether we should include the `event_id=null` row because we can check whether we have already left. - When we leave the room causing the server to leave the room, the `prev_event_id` will be our join event - When the server leaves the room after us, the `prev_event_id` will be leave event - In the state reset case, `prev_event_id` will be our join event --- synapse/storage/databases/main/stream.py | 20 ++++++-------------- tests/storage/test_stream.py | 7 +++---- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9e94cb08f63..d94b9366ab7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -877,10 +877,12 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), m.membership, - e.sender + e.sender, + m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id + LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? AND s.state_key = ? AND s.type = ? @@ -890,7 +892,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] - membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -899,6 +900,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: stream_ordering, membership, sender, + prev_membership, ) in txn: assert room_id is not None assert instance_name is not None @@ -918,16 +920,8 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `event_id = null` row is a `leave` and we don't want duplicate # membership changes in our results, let's get rid of those # (deduplicate) (see `test_server_left_after_us_room`). - if event_id is None: - already_tracked_membership_change = membership_change_map.get( - prev_event_id - ) - if ( - already_tracked_membership_change is not None - and already_tracked_membership_change.membership - == Membership.LEAVE - ): - continue + if event_id is None and prev_membership == Membership.LEAVE: + continue membership_change = CurrentStateDeltaMembership( event_id=event_id, @@ -944,8 +938,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: ) membership_changes.append(membership_change) - if event_id: - membership_change_map[event_id] = membership_change return membership_changes diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 00821324744..1342794d377 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1019,7 +1019,7 @@ def test_state_reset(self) -> None: user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1056,8 +1056,7 @@ def test_state_reset(self) -> None: "type": EventTypes.Member, "state_key": user1_id, "event_id": None, - # FIXME: I'm not sure if a state reset should have a prev_event_id - "prev_event_id": None, + "prev_event_id": join_response1["event_id"], "instance_name": dummy_state_pos.instance_name, }, desc="state reset user in current_state_delta_stream", @@ -1088,7 +1087,7 @@ def test_state_reset(self) -> None: CurrentStateDeltaMembership( event_id=None, event_pos=dummy_state_pos, - prev_event_id=None, + prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", sender=None, # user1_id, From 15fcead2a5df17ee10278f1c0cdd16dbba26c76d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:33:41 -0500 Subject: [PATCH 064/109] Slight clean-up --- synapse/storage/databases/main/stream.py | 12 +++++------- tests/storage/test_stream.py | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index d94b9366ab7..ab592dcf150 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -846,7 +846,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -874,7 +874,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: e.event_id, s.prev_event_id, s.room_id, - COALESCE(e.instance_name, s.instance_name), + s.instance_name, COALESCE(e.stream_ordering, s.stream_id), m.membership, e.sender, @@ -884,8 +884,8 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND s.state_key = ? AND s.type = ? + AND s.state_key = ? ORDER BY s.stream_id ASC """ @@ -916,10 +916,8 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `event_id = null` for all current state. This means we might # already have a row for the leave event and then another for the # same leave where the `event_id=null` but the `prev_event_id` is - # pointing back at the earlier leave event. Since we're assuming the - # `event_id = null` row is a `leave` and we don't want duplicate - # membership changes in our results, let's get rid of those - # (deduplicate) (see `test_server_left_after_us_room`). + # pointing back at the earlier leave event. We don't want to report + # the leave, if we already have a leave event. if event_id is None and prev_membership == Membership.LEAVE: continue diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 1342794d377..5a054d7f2ed 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -785,9 +785,9 @@ def test_server_left_room_after_us_later(self) -> None: def test_we_cause_server_left_room(self) -> None: """ - Test that when probing over part of the DAG where we leave the room causing the - server to leave the room (because we were the last local user in the room), we - still see the join and leave changes. + Test that when probing over part of the DAG where the user leaves the room + causing the server to leave the room (because we were the last local user in the + room), we still see the join and leave changes. This is to make sure we play nicely with this behavior: When the server leaves a room, it will insert new rows with `event_id = null` into the From 81c06bec20d2f6732100672853a140a6e19ff67d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:50:18 -0500 Subject: [PATCH 065/109] Detect state resets --- synapse/storage/databases/main/stream.py | 51 +++++++++++++++++------- tests/storage/test_stream.py | 15 ++++++- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ab592dcf150..19dba00a0fa 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,6 +123,8 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event + state_reset: Whether the membership in the room was changed without a + corresponding event (state reset). """ event_id: Optional[str] @@ -131,6 +133,7 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] + state_reset: bool def generate_pagination_where_clause( @@ -846,7 +849,15 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] + args: List[Any] = [ + EventTypes.Member, + user_id, + user_id, + min_from_id, + max_to_id, + EventTypes.Member, + user_id, + ] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -859,30 +870,35 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `stream_ordering` is unique across the Synapse instance so this should # work fine. # - # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer - # the source of truth from the events table. This gives slightly more - # accurate results when available since `current_state_delta_stream` only - # tracks that the current state is at this stream position (not what stream - # position the state event was added) and batches events at the same - # `stream_id` in certain cases. + # We `COALESCE` the `stream_ordering` because we prefer the source of truth + # from the `events` table. This gives slightly more accurate results when + # available since `current_state_delta_stream` only tracks that the current + # state is at this stream position (not what stream position the state event + # was added) and uses the *minimum* stream position for batches of events. # - # TODO: We need to add indexes for `current_state_delta_stream.event_id` and - # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` - # for this to be efficient. + # The extra `LEFT JOIN` by stream position are only needed to tell a state + # reset from the server leaving the room. Both cases have `event_id = null` + # but if we can find a corresponding event at that stream position, then we + # know it was just the server leaving the room. sql = """ SELECT - e.event_id, + COALESCE(e.event_id, e_by_stream.event_id) AS event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, s.stream_id), - m.membership, - e.sender, + COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, + COALESCE(m.membership, m_by_stream.membership) AS membership, + COALESCE(e.sender, e_by_stream.sender) AS sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id + LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id + AND e_by_stream.type = ? + AND e_by_stream.state_key = ? + LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id + AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -921,6 +937,12 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: if event_id is None and prev_membership == Membership.LEAVE: continue + # We can detect a state reset if there was a membership change + # without a corresponding event. + state_reset = False + if event_id is None and membership != prev_membership: + state_reset = True + membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -933,6 +955,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership if membership is not None else Membership.LEAVE ), sender=sender, + state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5a054d7f2ed..acb2f0e429b 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,6 +615,7 @@ def test_returns_membership_events(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -716,6 +717,7 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -724,6 +726,7 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="leave", sender=user1_id, + state_reset=False, ), ], ) @@ -882,14 +885,16 @@ def test_we_cause_server_left_room(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( - event_id=None, # leave_response1["event_id"], + event_id=leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=None, # user1_id, + sender=user1_id, + state_reset=False, ), ], ) @@ -1004,6 +1009,7 @@ def test_membership_persisted_in_same_batch(self) -> None: room_id=room_id1, membership="join", sender=user3_id, + state_reset=False, ), ], ) @@ -1091,6 +1097,7 @@ def test_state_reset(self) -> None: room_id=room_id1, membership="leave", sender=None, # user1_id, + state_reset=True, ), ], ) @@ -1141,6 +1148,7 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1149,6 +1157,7 @@ def test_excluded_room_ids(self) -> None: room_id=room_id2, membership="join", sender=user1_id, + state_reset=False, ), ], ) @@ -1175,6 +1184,7 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -1368,6 +1378,7 @@ def test_remote_join(self) -> None: room_id=intially_unjoined_room_id, membership="join", sender=user1_id, + state_reset=False, ), ], ) From eb159c11cd7bcc0a72983da46a728282fdbed8e7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 14:38:55 -0500 Subject: [PATCH 066/109] Don't worry about `state_reset` for now See: - Why no `COALESCE` https://github.com/element-hq/synapse/pull/17320#discussion_r1657435662 - Don't worry about `state_reset` for now, https://github.com/element-hq/synapse/pull/17320#discussion_r1657562645 --- synapse/storage/databases/main/stream.py | 53 ++++++------------------ tests/storage/test_stream.py | 47 +++++++++------------ 2 files changed, 32 insertions(+), 68 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 19dba00a0fa..c128eb5d5b4 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,8 +123,6 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event - state_reset: Whether the membership in the room was changed without a - corresponding event (state reset). """ event_id: Optional[str] @@ -133,7 +131,6 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] - state_reset: bool def generate_pagination_where_clause( @@ -849,56 +846,37 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [ - EventTypes.Member, - user_id, - user_id, - min_from_id, - max_to_id, - EventTypes.Member, - user_id, - ] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # We have to look-up events by `stream_ordering` because - # `current_state_delta_stream.event_id` can be `null` if the server is no - # longer in the room or a state reset happened and it was unset. - # `stream_ordering` is unique across the Synapse instance so this should - # work fine. + # We could `COALESCE(e.stream_ordering, s.stream_id)` to get more accurate + # stream positioning when available but given our usages, we can avoid the + # complexity. Between two (valid) stream tokens, we will still get all of + # the state changes. Since those events are persisted in a batch, valid + # tokens will either be before or after the batch of events. # - # We `COALESCE` the `stream_ordering` because we prefer the source of truth - # from the `events` table. This gives slightly more accurate results when - # available since `current_state_delta_stream` only tracks that the current + # `stream_ordering` from the `events` table is more accurate when available + # since the `current_state_delta_stream` table only tracks that the current # state is at this stream position (not what stream position the state event # was added) and uses the *minimum* stream position for batches of events. - # - # The extra `LEFT JOIN` by stream position are only needed to tell a state - # reset from the server leaving the room. Both cases have `event_id = null` - # but if we can find a corresponding event at that stream position, then we - # know it was just the server leaving the room. sql = """ SELECT - COALESCE(e.event_id, e_by_stream.event_id) AS event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, - COALESCE(m.membership, m_by_stream.membership) AS membership, - COALESCE(e.sender, e_by_stream.sender) AS sender, + s.stream_id, + m.membership, + e.sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id - LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id - AND e_by_stream.type = ? - AND e_by_stream.state_key = ? - LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id - AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -937,12 +915,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: if event_id is None and prev_membership == Membership.LEAVE: continue - # We can detect a state reset if there was a membership change - # without a corresponding event. - state_reset = False - if event_id is None and membership != prev_membership: - state_reset = True - membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -955,7 +927,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership if membership is not None else Membership.LEAVE ), sender=sender, - state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index acb2f0e429b..4f8f919a24e 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,7 +615,6 @@ def test_returns_membership_events(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -717,7 +716,6 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -726,7 +724,6 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="leave", sender=user1_id, - state_reset=False, ), ], ) @@ -885,16 +882,14 @@ def test_we_cause_server_left_room(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, - state_reset=False, + sender=None, # user1_id, ), ], ) @@ -924,22 +919,25 @@ def test_membership_persisted_in_same_batch(self) -> None: # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1, join_event_context1 = self.get_success( + join_event3, join_event_context3 = self.get_success( create_event( self.hs, - sender=user1_id, + sender=user3_id, type=EventTypes.Member, - state_key=user1_id, + state_key=user3_id, content={"membership": "join"}, room_id=room_id1, ) ) - join_event3, join_event_context3 = self.get_success( + # We want to put user1 in the middle of the batch. This way, regardless of the + # implementation that inserts rows into current_state_delta_stream` (whether it + # be minimum/maximum of stream position of the batch), we will still catch bugs. + join_event1, join_event_context1 = self.get_success( create_event( self.hs, - sender=user3_id, + sender=user1_id, type=EventTypes.Member, - state_key=user3_id, + state_key=user1_id, content={"membership": "join"}, room_id=room_id1, ) @@ -957,8 +955,8 @@ def test_membership_persisted_in_same_batch(self) -> None: self.get_success( self.persistence.persist_events( [ - (join_event1, join_event_context1), (join_event3, join_event_context3), + (join_event1, join_event_context1), (join_event4, join_event_context4), ] ) @@ -966,10 +964,7 @@ def test_membership_persisted_in_same_batch(self) -> None: after_room1_token = self.event_sources.get_current_token() - # Let's get membership changes from user3's perspective because it was in the - # middle of the batch. This way, if rows in` current_state_delta_stream` are - # stored with the first or last event's `stream_ordering`, we will still catch - # bugs. + # Get the membership changes for the user. # # At this point, the `current_state_delta_stream` table should look like (notice # those three memberships at the end with `stream_id=7` because we persisted @@ -987,7 +982,7 @@ def test_membership_persisted_in_same_batch(self) -> None: # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( - user3_id, + user1_id, from_key=before_room1_token.room_key, to_key=after_room1_token.room_key, ) @@ -1003,13 +998,16 @@ def test_membership_persisted_in_same_batch(self) -> None: membership_changes, [ CurrentStateDeltaMembership( - event_id=join_event3.event_id, + event_id=join_event1.event_id, + # Ideally, this would be `join_pos1` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. event_pos=join_pos3, prev_event_id=None, room_id=room_id1, membership="join", - sender=user3_id, - state_reset=False, + sender=user1_id, ), ], ) @@ -1097,7 +1095,6 @@ def test_state_reset(self) -> None: room_id=room_id1, membership="leave", sender=None, # user1_id, - state_reset=True, ), ], ) @@ -1148,7 +1145,6 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1157,7 +1153,6 @@ def test_excluded_room_ids(self) -> None: room_id=room_id2, membership="join", sender=user1_id, - state_reset=False, ), ], ) @@ -1184,7 +1179,6 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -1378,7 +1372,6 @@ def test_remote_join(self) -> None: room_id=intially_unjoined_room_id, membership="join", sender=user1_id, - state_reset=False, ), ], ) From ba56350642d33332d5ab3f3a94005e408cb9f433 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:31:18 -0500 Subject: [PATCH 067/109] Passing current tests --- synapse/handlers/sliding_sync.py | 44 +++++++++++++++++++---------- tests/handlers/test_sliding_sync.py | 9 ++++-- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3ce10d3ea70..b327e340ff1 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,6 @@ # # import logging -from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr @@ -48,7 +47,9 @@ logger = logging.getLogger(__name__) -def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: +def filter_membership_for_sync( + *, membership: str, user_id: str, sender: Optional[str] +) -> bool: """ Returns True if the membership event should be included in the sync response, otherwise False. @@ -65,6 +66,11 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> # # This logic includes kicks (leave events where the sender is not the same user) and # can be read as "anything that isn't a leave or a leave with a different sender". + # + # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset + # happened that removed the user from the room, or the user was the last person + # locally to leave the room which caused the server to leave the room. In both + # cases, TODO return membership != Membership.LEAVE or sender != user_id @@ -99,10 +105,10 @@ class _RoomMembershipForUser: range """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition membership: str - sender: str + sender: Optional[str] newly_joined: bool def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": @@ -540,9 +546,11 @@ async def get_sync_room_ids_for_user( first_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} - non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( - defaultdict(list) - ) + # Keep track if the room has a non-join event in the token range so we can later + # tell if it was a `newly_joined` room. If the last membership event in the + # token range is a join and there is also some non-join in the range, we know + # they `newly_joined`. + has_non_join_event_by_room_id_in_from_to_range: Dict[str, bool] = {} for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: @@ -551,16 +559,13 @@ async def get_sync_room_ids_for_user( last_membership_change_by_room_id_in_from_to_range[room_id] = ( membership_change ) - # Only set if we haven't already set it first_membership_change_by_room_id_in_from_to_range.setdefault( room_id, membership_change ) if membership_change.membership != Membership.JOIN: - non_join_event_ids_by_room_id_in_from_to_range[room_id].append( - membership_change.event_id - ) + has_non_join_event_by_room_id_in_from_to_range[room_id] = True # 2) Fixup # @@ -574,6 +579,7 @@ async def get_sync_room_ids_for_user( ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + # 3) if last_membership_change_in_from_to_range.membership == Membership.JOIN: possibly_newly_joined_room_ids.add(room_id) @@ -592,10 +598,14 @@ async def get_sync_room_ids_for_user( # 3) Figure out `newly_joined` prev_event_ids_before_token_range: List[str] = [] for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: - non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ - possibly_newly_joined_room_id - ] - if len(non_joins_for_room) > 0: + has_non_join_in_from_to_range = ( + has_non_join_event_by_room_id_in_from_to_range.get( + possibly_newly_joined_room_id, False + ) + ) + # If the last membership event in the token range is a join and there is + # also some non-join in the range, we know they `newly_joined`. + if has_non_join_in_from_to_range: # We found a `newly_joined` room (we left and joined within the token range) filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ room_id @@ -968,6 +978,10 @@ async def get_room_sync_data( Membership.INVITE, Membership.KNOCK, ): + # This should never happen. If someone is invited/knocked on room, then + # there should be an event for it. + assert rooms_membership_for_user_at_to_token.event_id is not None + invite_or_knock_event = await self.store.get_event( rooms_membership_for_user_at_to_token.event_id ) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 7339cb460e0..a751fef1df5 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -390,7 +390,7 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) + _leave_response2 = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -404,10 +404,13 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) - # It should be pointing to the latest membership event in the from/to range + # It should be pointing to the latest membership event in the from/to range but + # the `event_id` is `None` because we left the room causing the server to leave + # the room because no other local users are in it (quirk of the + # `current_state_delta_stream` table that we source things from) self.assertEqual( room_id_results[room_id2].event_id, - leave_response["event_id"], + None, # _leave_response2["event_id"], ) # We should *NOT* be `newly_joined` because we are instead `newly_left` self.assertEqual(room_id_results[room_id2].newly_joined, False) From f77403251cd2faf65689b785eba0a6af5366b5bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:39:43 -0500 Subject: [PATCH 068/109] Add better comments --- synapse/handlers/sliding_sync.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b327e340ff1..3dd32ae1f15 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -70,8 +70,9 @@ def filter_membership_for_sync( # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset # happened that removed the user from the room, or the user was the last person # locally to leave the room which caused the server to leave the room. In both - # cases, TODO - return membership != Membership.LEAVE or sender != user_id + # cases, we can just remove the rooms since they are no longer relevant to the user. + # They could still be added back later if they are `newly_left`. + return membership != Membership.LEAVE or sender not in (user_id, None) # We can't freeze this class because we want to update it in place with the @@ -508,6 +509,8 @@ async def get_sync_room_ids_for_user( ) ) + # Filter the rooms that that we have updated room membership events to the point + # in time of the `to_token` (from the "1)" fixups) filtered_sync_room_id_set = { room_id: room_membership_for_user for room_id, room_membership_for_user in sync_room_id_set.items() From 325856e14b97aa6eca59d4d5d3b4145d050adfe0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:57:01 -0500 Subject: [PATCH 069/109] Inclusive ranges --- synapse/handlers/sliding_sync.py | 3 +- tests/rest/client/test_sync.py | 92 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3dd32ae1f15..db5dd75d044 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -263,7 +263,8 @@ async def current_sync_for_user( for range in list_config.ranges: sliced_room_ids = [ room_id - for room_id, _ in sorted_room_info[range[0] : range[1]] + # Both sides of range are inclusive + for room_id, _ in sorted_room_info[range[0] : range[1] + 1] ] ops.append( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index bd1e7d521b7..3f4f88c3d14 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1616,6 +1616,98 @@ def test_sort_list(self) -> None: channel.json_body["lists"]["foo-list"], ) + def test_sliced_windows(self) -> None: + """ + Test that the `lists` `ranges` are sliced correctly. Both sides of each range + are inclusive. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + + # Make the Sliding Sync request for a single room + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 0], + "room_ids": [room_id3], + } + ], + channel.json_body["lists"]["foo-list"], + ) + + # Make the Sliding Sync request for the first two rooms + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 1], + "room_ids": [room_id3, room_id2], + } + ], + channel.json_body["lists"]["foo-list"], + ) + def test_rooms_limited_initial_sync(self) -> None: """ Test that we mark `rooms` as `limited=True` when we saturate the `timeline_limit` From 63c7b5017ad82ee20bc2ae5898b051a2660cf188 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 16:34:41 -0500 Subject: [PATCH 070/109] (doesn't work) Add test for batch persisting multiple member events for the same user --- tests/storage/test_stream.py | 121 +++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 6 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 4f8f919a24e..53a58bd82a9 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -894,12 +894,12 @@ def test_we_cause_server_left_room(self) -> None: ], ) - def test_membership_persisted_in_same_batch(self) -> None: + def test_different_user_membership_persisted_in_same_batch(self) -> None: """ - Test batch of membership events being processed at once. This will result in all - of the memberships being stored in the `current_state_delta_stream` table with - the same `stream_ordering` even though the individual events have different - `stream_ordering`s. + Test batch of membership events from different users being processed at once. + This will result in all of the memberships being stored in the + `current_state_delta_stream` table with the same `stream_ordering` even though + the individual events have different `stream_ordering`s. """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -1012,6 +1012,115 @@ def test_membership_persisted_in_same_batch(self) -> None: ], ) + @skip_unless( + False, + "persist code does not support multiple membership events for the same user in the same batch", + ) + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events for the same user being processed at once. + + This *should* (doesn't happen currently) result in all of the memberships being + stored in the `current_state_delta_stream` table with the same `stream_ordering` + even though the individual events have different `stream_ordering`s. + + FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` + table. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist a timeline event sandwiched between two membership events so they end + # up in the `current_state_delta_stream` table with the same `stream_id`. + join_event, join_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) + ) + timeline_event, timeline_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Message, + state_key=user1_id, + content={"body": "foo bar", "msgtype": "m.text"}, + room_id=room_id1, + ) + ) + leave_event, leave_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "leave"}, + room_id=room_id1, + ) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event, join_event_context), + (timeline_event, timeline_event_context), + (leave_event, leave_event_context), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # TODO: DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=leave_event.event_id, + # Ideally, this would be `leave_pos` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. + event_pos=join_pos, # leave_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 1158058aa52e47d0463b44f115222e0e122e045e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 17:58:31 -0500 Subject: [PATCH 071/109] Opt for tackling more batch scenarios in future PRs --- tests/rest/client/test_sync.py | 2 +- tests/storage/test_stream.py | 111 +-------------------------------- 2 files changed, 2 insertions(+), 111 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3f4f88c3d14..766c8850d0c 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1624,7 +1624,7 @@ def test_sliced_windows(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + _room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 53a58bd82a9..e420e680e27 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -1012,115 +1012,6 @@ def test_different_user_membership_persisted_in_same_batch(self) -> None: ], ) - @skip_unless( - False, - "persist code does not support multiple membership events for the same user in the same batch", - ) - def test_membership_persisted_in_same_batch(self) -> None: - """ - Test batch of membership events for the same user being processed at once. - - This *should* (doesn't happen currently) result in all of the memberships being - stored in the `current_state_delta_stream` table with the same `stream_ordering` - even though the individual events have different `stream_ordering`s. - - FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` - table. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - before_room1_token = self.event_sources.get_current_token() - - # User2 is just the designated person to create the room (we do this across the - # tests to be consistent) - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # Persist a timeline event sandwiched between two membership events so they end - # up in the `current_state_delta_stream` table with the same `stream_id`. - join_event, join_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "join"}, - room_id=room_id1, - ) - ) - timeline_event, timeline_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Message, - state_key=user1_id, - content={"body": "foo bar", "msgtype": "m.text"}, - room_id=room_id1, - ) - ) - leave_event, leave_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "leave"}, - room_id=room_id1, - ) - ) - self.get_success( - self.persistence.persist_events( - [ - (join_event, join_event_context), - (timeline_event, timeline_event_context), - (leave_event, leave_event_context), - ] - ) - ) - - after_room1_token = self.event_sources.get_current_token() - - # Get the membership changes for the user. - # - # At this point, the `current_state_delta_stream` table should look like (notice - # those three memberships at the end with `stream_id=7` because we persisted - # them in the same batch): - # - # TODO: DB rows to better see what's going on. - membership_changes = self.get_success( - self.store.get_current_state_delta_membership_changes_for_user( - user1_id, - from_key=before_room1_token.room_key, - to_key=after_room1_token.room_key, - ) - ) - - join_pos = self.get_success( - self.store.get_position_for_event(join_event.event_id) - ) - - # Let the whole diff show on failure - self.maxDiff = None - self.assertEqual( - membership_changes, - [ - CurrentStateDeltaMembership( - event_id=leave_event.event_id, - # Ideally, this would be `leave_pos` (to match the `event_id`) but - # when events are persisted in a batch, they are all stored in the - # `current_state_delta_stream` table with the minimum - # `stream_ordering` from the batch. - event_pos=join_pos, # leave_pos, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], - ) - def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 32b8b68df67c6ef4a11921704c570236d2d08592 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 18:13:34 -0500 Subject: [PATCH 072/109] Add TODO to handle state resets See https://github.com/element-hq/synapse/pull/17320#discussion_r1656548733 --- synapse/handlers/sliding_sync.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index db5dd75d044..5dc98679071 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1002,6 +1002,12 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) + # TODO: Handle state resets. For example, if we see + # `rooms_membership_for_user_at_to_token.membership = Membership.LEAVE` but + # `required_state` doesn't include it, we should indicate to the client that a + # state reset happened. Perhaps we should indicate this by setting `initial: + # True` and empty `required_state`. + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, From 9e53336a71f3567c451456d778e0606cef19cde1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 13:44:00 -0500 Subject: [PATCH 073/109] Avoid fetching full events for `prev_event_ids` See https://github.com/element-hq/synapse/pull/17320#discussion_r1658832755 --- synapse/handlers/sliding_sync.py | 72 ++++++------------------ synapse/storage/databases/main/stream.py | 51 +++++++++++++---- 2 files changed, 59 insertions(+), 64 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5dc98679071..c9285d23c02 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -464,7 +464,6 @@ async def get_sync_room_ids_for_user( # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, @@ -475,40 +474,13 @@ async def get_sync_room_ids_for_user( # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - prev_event_ids_in_from_to_range.append( - first_membership_change_after_to_token.prev_event_id - ) - - # 1) Fixup (more) - # - # 1b) 1c) Fetch the previous membership events that apply to the from/to range - # and fixup our working list. - prev_events_in_from_to_range = await self.store.get_events( - prev_event_ids_in_from_to_range - ) - for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # These fields should be present for all persisted events - assert ( - prev_event_in_from_to_range.internal_metadata.instance_name is not None - ) - assert ( - prev_event_in_from_to_range.internal_metadata.stream_ordering - is not None - ) - - # 1b) 1c) Update the membership with what we found - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - _RoomMembershipForUser( - event_id=prev_event_in_from_to_range.event_id, - event_pos=PersistedEventPosition( - instance_name=prev_event_in_from_to_range.internal_metadata.instance_name, - stream=prev_event_in_from_to_range.internal_metadata.stream_ordering, - ), - membership=prev_event_in_from_to_range.membership, - sender=prev_event_in_from_to_range.sender, + sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=first_membership_change_after_to_token.prev_event_id, + event_pos=first_membership_change_after_to_token.prev_event_pos, + membership=first_membership_change_after_to_token.prev_membership, + sender=first_membership_change_after_to_token.prev_sender, newly_joined=False, ) - ) # Filter the rooms that that we have updated room membership events to the point # in time of the `to_token` (from the "1)" fixups) @@ -600,12 +572,9 @@ async def get_sync_room_ids_for_user( ) # 3) Figure out `newly_joined` - prev_event_ids_before_token_range: List[str] = [] - for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: + for room_id in possibly_newly_joined_room_ids: has_non_join_in_from_to_range = ( - has_non_join_event_by_room_id_in_from_to_range.get( - possibly_newly_joined_room_id, False - ) + has_non_join_event_by_room_id_in_from_to_range.get(room_id, False) ) # If the last membership event in the token range is a join and there is # also some non-join in the range, we know they `newly_joined`. @@ -618,6 +587,9 @@ async def get_sync_room_ids_for_user( prev_event_id = first_membership_change_by_room_id_in_from_to_range[ room_id ].prev_event_id + prev_membership = first_membership_change_by_room_id_in_from_to_range[ + room_id + ].prev_membership if prev_event_id is None: # We found a `newly_joined` room (we are joining the room for the @@ -625,22 +597,14 @@ async def get_sync_room_ids_for_user( filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ room_id ].copy_and_replace(newly_joined=True) - else: - # Last resort, we need to step back to the previous membership event - # just before the token range to see if we're joined then or not. - prev_event_ids_before_token_range.append(prev_event_id) - - # 3) more - prev_events_before_token_range = await self.store.get_events( - prev_event_ids_before_token_range - ) - for prev_event_before_token_range in prev_events_before_token_range.values(): - if prev_event_before_token_range.membership != Membership.JOIN: - # We found a `newly_joined` room (we left before the token range - # and joined within the token range) - filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ - room_id - ].copy_and_replace(newly_joined=True) + # Last resort, we need to step back to the previous membership event + # just before the token range to see if we're joined then or not. + elif prev_membership != Membership.JOIN: + # We found a `newly_joined` room (we left before the token range + # and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) return filtered_sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c128eb5d5b4..29f675ae441 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -125,12 +125,17 @@ class CurrentStateDeltaMembership: sender: The person who sent the membership event """ + room_id: str + # Event event_id: Optional[str] event_pos: PersistedEventPosition - prev_event_id: Optional[str] - room_id: str membership: str sender: Optional[str] + # Prev event + prev_event_id: Optional[str] + prev_event_pos: Optional[PersistedEventPosition] + prev_membership: Optional[str] + prev_sender: Optional[str] def generate_pagination_where_clause( @@ -865,18 +870,22 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # was added) and uses the *minimum* stream position for batches of events. sql = """ SELECT - e.event_id, - s.prev_event_id, s.room_id, + e.event_id, s.instance_name, s.stream_id, m.membership, e.sender, - m_prev.membership AS prev_membership + s.prev_event_id, + e_prev.instance_name AS prev_instance_name, + e_prev.stream_ordering AS prev_stream_ordering, + m_prev.membership AS prev_membership, + e_prev.sender AS prev_sender FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id - LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id + LEFT JOIN events AS e_prev ON e_prev.event_id = s.prev_event_id + LEFT JOIN room_memberships AS m_prev ON m_prev.event_id = s.prev_event_id WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -887,14 +896,17 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership_changes: List[CurrentStateDeltaMembership] = [] for ( - event_id, - prev_event_id, room_id, + event_id, instance_name, stream_ordering, membership, sender, + prev_event_id, + prev_instance_name, + prev_stream_ordering, prev_membership, + prev_sender, ) in txn: assert room_id is not None assert instance_name is not None @@ -916,17 +928,36 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: continue membership_change = CurrentStateDeltaMembership( + room_id=room_id, + # Event event_id=event_id, event_pos=PersistedEventPosition( instance_name=instance_name, stream=stream_ordering, ), - prev_event_id=prev_event_id, - room_id=room_id, membership=( membership if membership is not None else Membership.LEAVE ), sender=sender, + # Prev event + prev_event_id=prev_event_id, + prev_event_pos=( + PersistedEventPosition( + instance_name=prev_instance_name, + stream=prev_stream_ordering, + ) + if ( + prev_instance_name is not None + and prev_stream_ordering is not None + ) + else None + ), + prev_membership=( + prev_membership + if prev_membership is not None + else Membership.LEAVE + ), + prev_sender=prev_sender, ) membership_changes.append(membership_change) From a4263bf92513ecb395fc646dd783badecd2b0c3a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 18:56:45 -0500 Subject: [PATCH 074/109] Update stream tests with prev event info --- synapse/storage/databases/main/stream.py | 24 +++---- tests/storage/test_stream.py | 80 +++++++++++++++++------- 2 files changed, 71 insertions(+), 33 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 29f675ae441..7e6beb52395 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -918,12 +918,13 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: instance_name, stream_ordering, ): - # When the server leaves a room, it will insert new rows with - # `event_id = null` for all current state. This means we might - # already have a row for the leave event and then another for the - # same leave where the `event_id=null` but the `prev_event_id` is - # pointing back at the earlier leave event. We don't want to report - # the leave, if we already have a leave event. + # When the server leaves a room, it will insert new rows into the + # `current_state_delta_stream` table with `event_id = null` for all + # current state. This means we might already have a row for the + # leave event and then another for the same leave where the + # `event_id=null` but the `prev_event_id` is pointing back at the + # earlier leave event. We don't want to report the leave, if we + # already have a leave event. if event_id is None and prev_membership == Membership.LEAVE: continue @@ -935,6 +936,11 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: instance_name=instance_name, stream=stream_ordering, ), + # When `s.event_id = null`, we won't be able to get respective + # `room_membership` but can assume the user has left the room + # because this only happens when the server leaves a room + # (meaning everyone locally left) or a state reset which removed + # the person from the room. membership=( membership if membership is not None else Membership.LEAVE ), @@ -952,11 +958,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: ) else None ), - prev_membership=( - prev_membership - if prev_membership is not None - else Membership.LEAVE - ), + prev_membership=prev_membership, prev_sender=prev_sender, ) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index e420e680e27..aad46b1b445 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -609,12 +609,15 @@ def test_returns_membership_events(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response["event_id"], event_pos=join_pos, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ) ], ) @@ -710,20 +713,26 @@ def test_server_left_room_after_us(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id1, event_id=leave_response1["event_id"], event_pos=leave_pos1, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -876,20 +885,26 @@ def test_we_cause_server_left_room(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id1, event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=None, # user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -998,16 +1013,19 @@ def test_different_user_membership_persisted_in_same_batch(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_event1.event_id, # Ideally, this would be `join_pos1` (to match the `event_id`) but # when events are persisted in a batch, they are all stored in the # `current_state_delta_stream` table with the minimum # `stream_ordering` from the batch. event_pos=join_pos3, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) @@ -1024,6 +1042,9 @@ def test_state_reset(self) -> None: room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) before_reset_token = self.event_sources.get_current_token() @@ -1089,12 +1110,15 @@ def test_state_reset(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=None, event_pos=dummy_state_pos, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=None, # user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -1139,20 +1163,26 @@ def test_excluded_room_ids(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id2, event_id=join_response2["event_id"], event_pos=join_pos2, - prev_event_id=None, - room_id=room_id2, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) @@ -1173,12 +1203,15 @@ def test_excluded_room_ids(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ) ], ) @@ -1366,12 +1399,15 @@ def test_remote_join(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=intially_unjoined_room_id, event_id=join_event.event_id, event_pos=join_pos, - prev_event_id=None, - room_id=intially_unjoined_room_id, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) From 10d78d66b7f2e28c8391da7fc479b329eeddf3cd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 19:04:46 -0500 Subject: [PATCH 075/109] Protect for non-existent prev events --- synapse/handlers/sliding_sync.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c9285d23c02..8622ef84726 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -474,13 +474,26 @@ async def get_sync_room_ids_for_user( # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - sync_room_id_set[room_id] = _RoomMembershipForUser( - event_id=first_membership_change_after_to_token.prev_event_id, - event_pos=first_membership_change_after_to_token.prev_event_pos, - membership=first_membership_change_after_to_token.prev_membership, - sender=first_membership_change_after_to_token.prev_sender, - newly_joined=False, - ) + # We don't expect these fields to be `None` if we have a `prev_event_id` + # but we're being defensive since it's possible that the prev event was + # culled from the database. + if ( + first_membership_change_after_to_token.prev_event_pos is not None + and first_membership_change_after_to_token.prev_membership + is not None + ): + sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=first_membership_change_after_to_token.prev_event_id, + event_pos=first_membership_change_after_to_token.prev_event_pos, + membership=first_membership_change_after_to_token.prev_membership, + sender=first_membership_change_after_to_token.prev_sender, + newly_joined=False, + ) + else: + # If we can't find the previous membership event, we shouldn't + # include the room in the sync response since we can't determine the + # exact membership state and shouldn't rely on the current snapshot. + sync_room_id_set.pop(room_id, None) # Filter the rooms that that we have updated room membership events to the point # in time of the `to_token` (from the "1)" fixups) From e4195875b93f7f865cf4a26fc49d2f388a86eb54 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 21:21:38 -0500 Subject: [PATCH 076/109] Protect from no timeline_events --- synapse/handlers/sliding_sync.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cbd7cfd57b0..ca7d31276dd 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1161,8 +1161,9 @@ async def get_room_sync_data( if state_type == EventTypes.Member and state_key == StateKeys.LAZY: # Everyone in the timeline is relevant timeline_membership: Set[str] = set() - for timeline_event in timeline_events: - timeline_membership.add(timeline_event.sender) + if timeline_events is not None: + for timeline_event in timeline_events: + timeline_membership.add(timeline_event.sender) for user_id in timeline_membership: required_state_types.append((EventTypes.Member, user_id)) From 2bd6cba76dc4f0788745c9f41680994be50f19bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 21:42:11 -0500 Subject: [PATCH 077/109] Select `to_key if to_key else from_key` See https://github.com/element-hq/synapse/pull/17320#discussion_r1646591886 --- synapse/storage/databases/main/stream.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 7e6beb52395..f96032c9533 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1827,7 +1827,9 @@ async def paginate_room_events( and to_key is not None and to_key.is_before_or_eq(from_key) ): - return [], from_key + # Token selection matches what we do in `_paginate_room_events_txn` if there + # are no rows + return [], to_key if to_key else from_key # Or vice-versa, if we're looking backwards and our `from_key` is already before # our `to_key`. elif ( @@ -1835,7 +1837,9 @@ async def paginate_room_events( and to_key is not None and from_key.is_before_or_eq(to_key) ): - return [], from_key + # Token selection matches what we do in `_paginate_room_events_txn` if there + # are no rows + return [], to_key if to_key else from_key rows, token = await self.db_pool.runInteraction( "paginate_room_events", From b8687e771cef14ac936bf4c401c83470fae1d8e7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 21:42:11 -0500 Subject: [PATCH 078/109] Select `to_key if to_key else from_key` See https://github.com/element-hq/synapse/pull/17320#discussion_r1646591886 --- synapse/storage/databases/main/stream.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 7e6beb52395..f96032c9533 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1827,7 +1827,9 @@ async def paginate_room_events( and to_key is not None and to_key.is_before_or_eq(from_key) ): - return [], from_key + # Token selection matches what we do in `_paginate_room_events_txn` if there + # are no rows + return [], to_key if to_key else from_key # Or vice-versa, if we're looking backwards and our `from_key` is already before # our `to_key`. elif ( @@ -1835,7 +1837,9 @@ async def paginate_room_events( and to_key is not None and from_key.is_before_or_eq(to_key) ): - return [], from_key + # Token selection matches what we do in `_paginate_room_events_txn` if there + # are no rows + return [], to_key if to_key else from_key rows, token = await self.db_pool.runInteraction( "paginate_room_events", From 7c9513ccb1d6e4ab296395fe171318a9a128d052 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 21:49:41 -0500 Subject: [PATCH 079/109] Add missing test description --- tests/handlers/test_sliding_sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index a751fef1df5..3d37a696d5b 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1410,7 +1410,8 @@ def test_newly_joined_with_leave_join_in_token_range( self, ) -> None: """ - Test that `newly_joined` TODO + Test that even though we're joined before the token range, if we leave and join + within the token range, it's still counted as `newly_joined`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") From 78df54af2df466232483af9a1e8faeedb5c4c401 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 22:40:18 -0500 Subject: [PATCH 080/109] Get state at leave/ban --- synapse/handlers/sliding_sync.py | 112 ++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 39 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index ca7d31276dd..0d04ba0d640 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -44,6 +44,7 @@ PersistedEventPosition, Requester, RoomStreamToken, + StateMap, StreamKeyType, StreamToken, UserID, @@ -133,8 +134,8 @@ def from_room_config( ) in room_params.required_state: # If we already have a wildcard, we don't need to add anything else if ( - # This is just a tricky way to grab the first element of the set. We - # assume that if a wildcard is present, it's the only thing in the set. + # We assume that if a wildcard is present, it's the only thing in the + # set. get_first_item_in_set(required_state_map.get(state_type)) == (state_type, StateKeys.WILDCARD) ): @@ -184,10 +185,9 @@ def combine_room_sync_config( state_key_set, ) in other_room_sync_config.required_state_map.items(): # If we already have a wildcard, we don't need to add anything else - if ( - # This is just a tricky way to grab the first element of the set - get_first_item_in_set(self.required_state_map.get(state_type)) - == (state_type, StateKeys.WILDCARD) + if get_first_item_in_set(self.required_state_map.get(state_type)) == ( + state_type, + StateKeys.WILDCARD, ): continue @@ -977,7 +977,7 @@ async def get_room_sync_data( # Assemble the list of timeline events # - # It would be nice to make the `rooms` response more uniform regardless of + # FIXME: It would be nice to make the `rooms` response more uniform regardless of # membership. Currently, we have to make all of these optional because # `invite`/`knock` rooms only have `stripped_state`. See # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1653045932 @@ -1151,39 +1151,73 @@ async def get_room_sync_data( initial = True # Fetch the required state for the room - required_state_types: List[Tuple[str, Optional[str]]] = [] - for state_type, state_key_set in room_sync_config.required_state_map.items(): - for _state_type, state_key in state_key_set: - if state_key == StateKeys.WILDCARD: - # `None` is a wildcard in the `StateFilter` - required_state_types.append((state_type, None)) - # We need to fetch all relevant people when we're lazy-loading membership - if state_type == EventTypes.Member and state_key == StateKeys.LAZY: - # Everyone in the timeline is relevant - timeline_membership: Set[str] = set() - if timeline_events is not None: - for timeline_event in timeline_events: - timeline_membership.add(timeline_event.sender) - - for user_id in timeline_membership: - required_state_types.append((EventTypes.Member, user_id)) - - # TODO: We probably also care about invite, ban, kick, targets, etc - # but the spec only mentions "senders". - else: - required_state_types.append((state_type, state_key)) + # + # No `required_state` for invite/knock rooms (just `stripped_state`) + # + # FIXME: It would be nice to make the `rooms` response more uniform regardless + # of membership. Currently, we have to make this optional because + # `invite`/`knock` rooms only have `stripped_state`. See + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1653045932 + room_state: Optional[StateMap[EventBase]] = None + if rooms_membership_for_user_at_to_token.membership not in ( + Membership.INVITE, + Membership.KNOCK, + ): + # Calculate the required state for the room and make it into the form of a + # `StateFilter` + required_state_types: List[Tuple[str, Optional[str]]] = [] + for ( + state_type, + state_key_set, + ) in room_sync_config.required_state_map.items(): + for _state_type, state_key in state_key_set: + if state_key == StateKeys.WILDCARD: + # `None` is a wildcard in the `StateFilter` + required_state_types.append((state_type, None)) + # We need to fetch all relevant people when we're lazy-loading membership + if state_type == EventTypes.Member and state_key == StateKeys.LAZY: + # Everyone in the timeline is relevant + timeline_membership: Set[str] = set() + if timeline_events is not None: + for timeline_event in timeline_events: + timeline_membership.add(timeline_event.sender) + + for user_id in timeline_membership: + required_state_types.append((EventTypes.Member, user_id)) + + # TODO: We probably also care about invite, ban, kick, targets, etc + # but the spec only mentions "senders". + else: + required_state_types.append((state_type, state_key)) - if initial: - room_state = await self.storage_controllers.state.get_current_state( - room_id, - StateFilter.from_types(required_state_types), - await_full_state=False, - ) - # TODO: Query `current_state_delta_stream` and reverse/rewind back to the `to_token` - else: - # TODO: Once we can figure out if we've sent a room down this connection before, - # we can return updates instead of the full required state. - raise NotImplementedError() + state_filter = StateFilter.from_types(required_state_types) + + # We can return the full state that was requested if we're doing an initial + # sync + if initial: + # People shouldn't see past their leave/ban event + if rooms_membership_for_user_at_to_token.membership in ( + Membership.LEAVE, + Membership.BAN, + ): + room_state = await self.storage_controllers.state.get_state_at( + room_id, + stream_position=rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token(), + state_filter=state_filter, + await_full_state=False, + ) + else: + # Otherwise, we can get the latest current state in the room + room_state = await self.storage_controllers.state.get_current_state( + room_id, + state_filter, + await_full_state=False, + ) + # TODO: Query `current_state_delta_stream` and reverse/rewind back to the `to_token` + else: + # TODO: Once we can figure out if we've sent a room down this connection before, + # we can return updates instead of the full required state. + raise NotImplementedError() return SlidingSyncResult.RoomResult( # TODO: Dummy value From 8b73185094657beb02c8e3d1c678fb8832ad53c6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 22:41:08 -0500 Subject: [PATCH 081/109] Trigger CI again From 939b9ceffdf70ea81c5395516489f6cbbe1b45d3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 23:17:58 -0500 Subject: [PATCH 082/109] room_state can be None --- synapse/handlers/sliding_sync.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0d04ba0d640..2f8deae3351 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1192,8 +1192,8 @@ async def get_room_sync_data( state_filter = StateFilter.from_types(required_state_types) - # We can return the full state that was requested if we're doing an initial - # sync + # We can return all of the state that was requested if we're doing an + # initial sync if initial: # People shouldn't see past their leave/ban event if rooms_membership_for_user_at_to_token.membership in ( @@ -1227,7 +1227,7 @@ async def get_room_sync_data( # TODO: Dummy value heroes=None, initial=initial, - required_state=list(room_state.values()), + required_state=list(room_state.values()) if room_state else None, timeline_events=timeline_events, bundled_aggregations=bundled_aggregations, # TODO: Dummy value From 01a4b4345cd484044645e22c548c61ae7a03c28c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 11:58:03 -0500 Subject: [PATCH 083/109] Rename `get_state_at` -> `get_state_ids_at` --- synapse/handlers/sliding_sync.py | 16 +++++++++------- synapse/handlers/sync.py | 22 ++++++++++++---------- synapse/storage/controllers/state.py | 2 +- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 2f8deae3351..3abd58790d8 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -848,12 +848,14 @@ async def filter_rooms( # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in list(filtered_room_id_set): - state_at_to_token = await self.storage_controllers.state.get_state_at( - room_id, - to_token, - state_filter=StateFilter.from_types( - [(EventTypes.RoomEncryption, "")] - ), + state_at_to_token = ( + await self.storage_controllers.state.get_state_ids_at( + room_id, + to_token, + state_filter=StateFilter.from_types( + [(EventTypes.RoomEncryption, "")] + ), + ) ) is_encrypted = state_at_to_token.get((EventTypes.RoomEncryption, "")) @@ -1200,7 +1202,7 @@ async def get_room_sync_data( Membership.LEAVE, Membership.BAN, ): - room_state = await self.storage_controllers.state.get_state_at( + room_state = await self.storage_controllers.state.get_state_ids_at( room_id, stream_position=rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token(), state_filter=state_filter, diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e2563428d2e..de227faec3f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1352,7 +1352,7 @@ async def _compute_state_delta_for_full_sync( await_full_state = True lazy_load_members = False - state_at_timeline_end = await self._state_storage_controller.get_state_at( + state_at_timeline_end = await self._state_storage_controller.get_state_ids_at( room_id, stream_position=end_token, state_filter=state_filter, @@ -1480,11 +1480,13 @@ async def _compute_state_delta_for_incremental_sync( else: # We can get here if the user has ignored the senders of all # the recent events. - state_at_timeline_start = await self._state_storage_controller.get_state_at( - room_id, - stream_position=end_token, - state_filter=state_filter, - await_full_state=await_full_state, + state_at_timeline_start = ( + await self._state_storage_controller.get_state_ids_at( + room_id, + stream_position=end_token, + state_filter=state_filter, + await_full_state=await_full_state, + ) ) if batch.limited: @@ -1502,14 +1504,14 @@ async def _compute_state_delta_for_incremental_sync( # about them). state_filter = StateFilter.all() - state_at_previous_sync = await self._state_storage_controller.get_state_at( + state_at_previous_sync = await self._state_storage_controller.get_state_ids_at( room_id, stream_position=since_token, state_filter=state_filter, await_full_state=await_full_state, ) - state_at_timeline_end = await self._state_storage_controller.get_state_at( + state_at_timeline_end = await self._state_storage_controller.get_state_ids_at( room_id, stream_position=end_token, state_filter=state_filter, @@ -2508,7 +2510,7 @@ async def _get_room_changes_for_incremental_sync( continue if room_id in sync_result_builder.joined_room_ids or has_join: - old_state_ids = await self._state_storage_controller.get_state_at( + old_state_ids = await self._state_storage_controller.get_state_ids_at( room_id, since_token, state_filter=StateFilter.from_types([(EventTypes.Member, user_id)]), @@ -2539,7 +2541,7 @@ async def _get_room_changes_for_incremental_sync( else: if not old_state_ids: old_state_ids = ( - await self._state_storage_controller.get_state_at( + await self._state_storage_controller.get_state_ids_at( room_id, since_token, state_filter=StateFilter.from_types( diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index cc9b162ae40..a5966e4b930 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -409,7 +409,7 @@ async def get_state_after_event( return state_ids - async def get_state_at( + async def get_state_ids_at( self, room_id: str, stream_position: StreamToken, From 77b8d9cdbfb99bf4d38189e6084c35115dcda030 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 12:31:47 -0500 Subject: [PATCH 084/109] Add `get_state_at(...)` that returns full events --- synapse/handlers/sliding_sync.py | 9 ++++++--- synapse/storage/controllers/state.py | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3abd58790d8..c198a8da580 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1202,14 +1202,17 @@ async def get_room_sync_data( Membership.LEAVE, Membership.BAN, ): - room_state = await self.storage_controllers.state.get_state_ids_at( + room_state = await self.storage_controllers.state.get_state_at( room_id, - stream_position=rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token(), + stream_position=to_token.copy_and_replace( + StreamKeyType.ROOM, + rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token(), + ), state_filter=state_filter, await_full_state=False, ) + # Otherwise, we can get the latest current state in the room else: - # Otherwise, we can get the latest current state in the room room_state = await self.storage_controllers.state.get_current_state( room_id, state_filter, diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index a5966e4b930..3782cfa553d 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -457,6 +457,30 @@ async def get_state_ids_at( ) return state + @trace + @tag_args + async def get_state_at( + self, + room_id: str, + stream_position: StreamToken, + state_filter: Optional[StateFilter] = None, + await_full_state: bool = True, + ) -> StateMap[EventBase]: + """Same as `get_state_ids_at` but also fetches the events""" + state_map_ids = await self.get_state_ids_at( + room_id, stream_position, state_filter, await_full_state + ) + + event_map = await self.stores.main.get_events(list(state_map_ids.values())) + + state_map = {} + for key, event_id in state_map_ids.items(): + event = event_map.get(event_id) + if event: + state_map[key] = event + + return state_map + @trace @tag_args async def get_state_for_groups( From a3ac7186aa62e7f6cbe30379b736f08441c4c808 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 13:42:07 -0500 Subject: [PATCH 085/109] Some clean-up --- synapse/handlers/sliding_sync.py | 6 +-- synapse/rest/client/sync.py | 10 ++--- synapse/types/handlers/__init__.py | 24 +++++------ tests/rest/client/test_sync.py | 65 ++++++++++++++++++++++++++++-- 4 files changed, 82 insertions(+), 23 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c198a8da580..04f9fbc858e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1231,15 +1231,16 @@ async def get_room_sync_data( avatar=None, # TODO: Dummy value heroes=None, + # TODO: Dummy value + is_dm=False, initial=initial, required_state=list(room_state.values()) if room_state else None, timeline_events=timeline_events, bundled_aggregations=bundled_aggregations, - # TODO: Dummy value - is_dm=False, stripped_state=stripped_state, prev_batch=prev_batch_token, limited=limited, + num_live=num_live, # TODO: Dummy values joined_count=0, invited_count=0, @@ -1248,5 +1249,4 @@ async def get_room_sync_data( # (encrypted rooms). notification_count=0, highlight_count=0, - num_live=num_live, ) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 1d955a2e893..e52e771538b 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -996,7 +996,7 @@ async def encode_rooms( if room_result.initial: serialized_rooms[room_id]["initial"] = room_result.initial - # This will omitted for invite/knock rooms with `stripped_state` + # This will be omitted for invite/knock rooms with `stripped_state` if room_result.required_state is not None: serialized_required_state = ( await self.event_serializer.serialize_events( @@ -1007,7 +1007,7 @@ async def encode_rooms( ) serialized_rooms[room_id]["required_state"] = serialized_required_state - # This will omitted for invite/knock rooms with `stripped_state` + # This will be omitted for invite/knock rooms with `stripped_state` if room_result.timeline_events is not None: serialized_timeline = await self.event_serializer.serialize_events( room_result.timeline_events, @@ -1017,17 +1017,17 @@ async def encode_rooms( ) serialized_rooms[room_id]["timeline"] = serialized_timeline - # This will omitted for invite/knock rooms with `stripped_state` + # This will be omitted for invite/knock rooms with `stripped_state` if room_result.limited is not None: serialized_rooms[room_id]["limited"] = room_result.limited - # This will omitted for invite/knock rooms with `stripped_state` + # This will be omitted for invite/knock rooms with `stripped_state` if room_result.prev_batch is not None: serialized_rooms[room_id]["prev_batch"] = ( await room_result.prev_batch.to_string(self.store) ) - # This will omitted for invite/knock rooms with `stripped_state` + # This will be omitted for invite/knock rooms with `stripped_state` if room_result.num_live is not None: serialized_rooms[room_id]["num_live"] = room_result.num_live diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 3cd3c8fb0fa..3bd3268e595 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -156,6 +156,8 @@ class RoomResult: avatar: Room avatar heroes: List of stripped membership events (containing `user_id` and optionally `avatar_url` and `displayname`) for the users used to calculate the room name. + is_dm: Flag to specify whether the room is a direct-message room (most likely + between two people). initial: Flag which is set when this is the first time the server is sending this data on this connection. Clients can use this flag to replace or update their local state. When there is an update, servers MUST omit this flag @@ -167,8 +169,6 @@ class RoomResult: the timeline events above. This allows clients to show accurate reaction counts (or edits, threads), even if some of the reaction events were skipped over in a gappy sync. - is_dm: Flag to specify whether the room is a direct-message room (most likely - between two people). stripped_state: Stripped state events (for rooms where the usre is invited/knocked). Same as `rooms.invite.$room_id.invite_state` in sync v2, absent on joined/left rooms @@ -176,6 +176,13 @@ class RoomResult: `/rooms//messages` API to retrieve earlier messages. limited: True if their are more events than fit between the given position and now. Sync again to get more. + num_live: The number of timeline events which have just occurred and are not historical. + The last N events are 'live' and should be treated as such. This is mostly + useful to determine whether a given @mention event should make a noise or not. + Clients cannot rely solely on the absence of `initial: true` to determine live + events because if a room not in the sliding window bumps into the window because + of an @mention it will have `initial: true` yet contain a single live event + (with potentially other old events in the timeline). joined_count: The number of users with membership of join, including the client's own user ID. (same as sync `v2 m.joined_member_count`) invited_count: The number of users with membership of invite. (same as sync v2 @@ -184,37 +191,30 @@ class RoomResult: as sync v2) highlight_count: The number of unread notifications for this room with the highlight flag set. (same as sync v2) - num_live: The number of timeline events which have just occurred and are not historical. - The last N events are 'live' and should be treated as such. This is mostly - useful to determine whether a given @mention event should make a noise or not. - Clients cannot rely solely on the absence of `initial: true` to determine live - events because if a room not in the sliding window bumps into the window because - of an @mention it will have `initial: true` yet contain a single live event - (with potentially other old events in the timeline). """ name: Optional[str] avatar: Optional[str] heroes: Optional[List[EventBase]] + is_dm: bool initial: bool # Only optional because it won't be included for invite/knock rooms with `stripped_state` required_state: Optional[List[EventBase]] # Only optional because it won't be included for invite/knock rooms with `stripped_state` timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] - is_dm: bool # Optional because it's only relevant to invite/knock rooms stripped_state: Optional[List[JsonDict]] # Only optional because it won't be included for invite/knock rooms with `stripped_state` prev_batch: Optional[StreamToken] # Only optional because it won't be included for invite/knock rooms with `stripped_state` limited: Optional[bool] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + num_live: Optional[int] joined_count: int invited_count: int notification_count: int highlight_count: int - # Only optional because it won't be included for invite/knock rooms with `stripped_state` - num_live: Optional[int] @attr.s(slots=True, frozen=True, auto_attribs=True) class SlidingWindowList: diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 8f77381dcc9..0302cae84ce 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -2718,7 +2718,7 @@ def test_rooms_required_state_initial_sync(self) -> None: ) self.assertEqual(channel.code, 200, channel.json_body) - state_map = self.get_success( + state_ids_map = self.get_success( self.storage_controllers.state.get_current_state_ids(room_id1) ) @@ -2730,8 +2730,67 @@ def test_rooms_required_state_initial_sync(self) -> None: ] ], [ - state_map[(EventTypes.Create, "")], - state_map[(EventTypes.RoomHistoryVisibility, "")], + state_ids_map[(EventTypes.Create, "")], + state_ids_map[(EventTypes.RoomHistoryVisibility, "")], + ], + channel.json_body["rooms"][room_id1]["required_state"], + ) + + def test_rooms_required_state_incremental_sync(self) -> None: + """ + Test `rooms.required_state` returns requested state events in the room during an + incremental sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room_token = self.event_sources.get_current_token() + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success(after_room_token.to_string(self.store))}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.RoomHistoryVisibility, ""], + # This one doesn't exist in the room + [EventTypes.Tombstone, ""], + ], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + state_ids_map = self.get_success( + self.storage_controllers.state.get_current_state_ids(room_id1) + ) + + # The returned state doesn't change from initial to incremental sync. In the + # future, we will only return updates but only if we've sent the room down the + # connection before. + self.assertEqual( + [ + state_event["event_id"] + for state_event in channel.json_body["rooms"][room_id1][ + "required_state" + ] + ], + [ + state_ids_map[(EventTypes.Create, "")], + state_ids_map[(EventTypes.RoomHistoryVisibility, "")], ], channel.json_body["rooms"][room_id1]["required_state"], ) From c71cf86f100006a6b002b790599beca9ecc524e1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 15:04:36 -0500 Subject: [PATCH 086/109] Handle wildcard merges in from_config --- synapse/handlers/sliding_sync.py | 55 +++++++++-- tests/handlers/test_sliding_sync.py | 145 +++++++++++++++++++++++++++- 2 files changed, 191 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 707ff59d2ac..b8c39905234 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -138,7 +138,20 @@ def from_room_config( state_type, state_key, ) in room_params.required_state: - # If we already have a wildcard, we don't need to add anything else + # We assume that if a wildcard is present, it's the only thing in the set. + wildcard_type_entry = get_first_item_in_set( + required_state_map.get(StateKeys.WILDCARD) + ) + # If we already have a wildcard for *any* `state_key` or this specific + # `state_key`, we don't need to add anything else + if wildcard_type_entry == ( + StateKeys.WILDCARD, + StateKeys.WILDCARD, + ) or wildcard_type_entry == (StateKeys.WILDCARD, state_key): + continue + + # If we already have a wildcard `state_key` for this `state_type`, we don't need + # to add anything else if ( # We assume that if a wildcard is present, it's the only thing in the # set. @@ -147,8 +160,38 @@ def from_room_config( ): continue - # If we're getting a wildcard, that's all that matters so get rid of any - # other state keys + # If we're getting wildcards for the `state_type` and `state_key`, that's + # all that matters so get rid of any other entries + if state_type == StateKeys.WILDCARD and state_key == StateKeys.WILDCARD: + required_state_map = {state_type: {(state_type, state_key)}} + # If we're getting a wildcard for the `state_type`, get rid of any other + # entries with the same `state_key`, since the wildcard will cover it already. + elif state_type == StateKeys.WILDCARD: + # Get rid of any entries that match the `state_key` + for ( + existing_state_type, + existing_state_key_set, + ) in list(required_state_map.items()): + # Make a copy so we don't run into an error: `Set changed size during + # iteration`, when we filter out and remove items + for ( + _existing_state_type, + existing_state_key, + ) in existing_state_key_set.copy(): + if existing_state_key == state_key: + existing_state_key_set.remove( + (existing_state_type, state_key) + ) + + if existing_state_key_set == set(): + required_state_map.pop(existing_state_type, None) + + # Add our wildcard entry to the map after we remove things so we don't + # have to iterate over it and accidentally remove it. + required_state_map[state_type] = {(state_type, state_key)} + + # If we're getting a wildcard `state_key`, get rid of any other state_keys + # for this `state_type` since the wildcard will cover it already. if state_key == StateKeys.WILDCARD: required_state_map[state_type] = {(state_type, state_key)} # Otherwise, just add it to the set @@ -853,7 +896,7 @@ async def filter_rooms( if filters.is_encrypted is not None: # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items - for room_id in list(filtered_room_id_set): + for room_id in filtered_room_id_set.copy(): state_at_to_token = await self.storage_controllers.state.get_state_at( room_id, to_token, @@ -880,7 +923,7 @@ async def filter_rooms( if filters.is_invite is not None: # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items - for room_id in list(filtered_room_id_set): + for room_id in filtered_room_id_set.copy(): room_for_user = sync_room_map[room_id] # If we're looking for invite rooms, filter out rooms that the user is # not invited to and vice versa @@ -898,7 +941,7 @@ async def filter_rooms( if filters.room_types is not None or filters.not_room_types is not None: # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items - for room_id in list(filtered_room_id_set): + for room_id in filtered_room_id_set.copy(): create_event = await self.store.get_create_event_for_room(room_id) room_type = create_event.content.get(EventContentFields.ROOM_TYPE) if ( diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 93b509acc10..82cdef65a5f 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -56,11 +56,14 @@ def _assert_room_config_equal( self.assertEqual(actual.timeline_limit, expected.timeline_limit) # `self.assertEqual(...)` works fine to catch differences but the output is - # almost impossible to read because of the way it truncates the output + # almost impossible to read because of the way it truncates the output and the + # order doesn't actually matter. self.assertCountEqual(actual.required_state_map, expected.required_state_map) for event_type, expected_state_keys in expected.required_state_map.items(): self.assertCountEqual( - actual.required_state_map[event_type], expected_state_keys + actual.required_state_map[event_type], + expected_state_keys, + f"Mismatch for {event_type}", ) def test_from_list_config(self) -> None: @@ -134,7 +137,81 @@ def test_from_room_subscription(self) -> None: def test_from_room_config_wildcard(self) -> None: """ - Test that a wildcard (*) will override all other values for the same event type. + Test that a wildcard (*) for both the `event_type` and `state_key` will override + all other values. + + Note: MSC3575 describes different behavior to how we're handling things here but + since it's not wrong to return more state than requested (`required_state` is + just the minimum requested), it doesn't matter if we include things that the + client wanted excluded. This complexity is also under scrutiny, see + https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1185109050 + + > One unique exception is when you request all state events via ["*", "*"]. When used, + > all state events are returned by default, and additional entries FILTER OUT the returned set + > of state events. These additional entries cannot use '*' themselves. + > For example, ["*", "*"], ["m.room.member", "@alice:example.com"] will _exclude_ every m.room.member + > event _except_ for @alice:example.com, and include every other state event. + > In addition, ["*", "*"], ["m.space.child", "*"] is an error, the m.space.child filter is not + > required as it would have been returned anyway. + > + > -- MSC3575 (https://github.com/matrix-org/matrix-spec-proposals/pull/3575) + """ + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, StateKeys.WILDCARD), + (EventTypes.Member, "@foo"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, + }, + ), + ) + + def test_from_room_config_wildcard_type(self) -> None: + """ + Test that a wildcard (*) as a `event_type` will override all other values for the + same `state_key`. + """ + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, ""), + (EventTypes.Member, "@foo"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + }, + }, + ), + ) + + def test_from_room_config_wildcard_state_key(self) -> None: + """ + Test that a wildcard (*) as a `state_key` will override all other values for the + same `event_type`. """ list_config = SlidingSyncConfig.SlidingSyncList( timeline_limit=10, @@ -165,6 +242,68 @@ def test_from_room_config_wildcard(self) -> None: ), ) + def test_from_room_config_wildcard_merge(self) -> None: + """ + Test that a wildcard (*) entries for the `event_type` and another one for + `state_key` will play together. + """ + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, ""), + (EventTypes.Member, "@foo"), + (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, "@bar"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, + EventTypes.Member: { + (EventTypes.Member, StateKeys.WILDCARD), + }, + }, + ), + ) + + def test_from_room_config_wildcard_merge2(self) -> None: + """ + Test that an all wildcard ("*", "*") entry will override any other values (including other wildcards). + """ + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, ""), + (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, "@foo"), + # One of these should take precedence over everything else + (StateKeys.WILDCARD, StateKeys.WILDCARD), + (StateKeys.WILDCARD, StateKeys.WILDCARD), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, + }, + ), + ) + def test_from_room_config_lazy_members(self) -> None: """ `$LAZY` room members should just be another additional key next to other From dcad14bf60ee505478443c17d2da12aeee4e3214 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 15:24:36 -0500 Subject: [PATCH 087/109] Support multiple wildcard types --- synapse/handlers/sliding_sync.py | 26 +++++++++----------- tests/handlers/test_sliding_sync.py | 38 ++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b8c39905234..791b7cce358 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -138,16 +138,15 @@ def from_room_config( state_type, state_key, ) in room_params.required_state: - # We assume that if a wildcard is present, it's the only thing in the set. - wildcard_type_entry = get_first_item_in_set( - required_state_map.get(StateKeys.WILDCARD) - ) - # If we already have a wildcard for *any* `state_key` or this specific - # `state_key`, we don't need to add anything else - if wildcard_type_entry == ( - StateKeys.WILDCARD, - StateKeys.WILDCARD, - ) or wildcard_type_entry == (StateKeys.WILDCARD, state_key): + # If we already have a wildcard for *any* `state_type`, we don't need to add + # anything else + wildcard_set = required_state_map.get(StateKeys.WILDCARD, {}) + if (StateKeys.WILDCARD, StateKeys.WILDCARD) in wildcard_set: + break + + # If we already have a wildcard for this specific `state_type`, we don't need + # to add it since the wildcard already covers it. + if (StateKeys.WILDCARD, state_key) in wildcard_set: continue # If we already have a wildcard `state_key` for this `state_type`, we don't need @@ -164,6 +163,8 @@ def from_room_config( # all that matters so get rid of any other entries if state_type == StateKeys.WILDCARD and state_key == StateKeys.WILDCARD: required_state_map = {state_type: {(state_type, state_key)}} + # We can break, since we don't need to add anything else + break # If we're getting a wildcard for the `state_type`, get rid of any other # entries with the same `state_key`, since the wildcard will cover it already. elif state_type == StateKeys.WILDCARD: @@ -183,13 +184,10 @@ def from_room_config( (existing_state_type, state_key) ) + # If we've the left the `set()` empty, remove it from the map if existing_state_key_set == set(): required_state_map.pop(existing_state_type, None) - # Add our wildcard entry to the map after we remove things so we don't - # have to iterate over it and accidentally remove it. - required_state_map[state_type] = {(state_type, state_key)} - # If we're getting a wildcard `state_key`, get rid of any other state_keys # for this `state_type` since the wildcard will cover it already. if state_key == StateKeys.WILDCARD: diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 82cdef65a5f..37ec41ad683 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -208,6 +208,42 @@ def test_from_room_config_wildcard_type(self) -> None: ), ) + def test_from_room_config_multiple_wildcard_type(self) -> None: + """ + Test that multiple wildcard (*) as a `event_type` will override all other values + for the same `state_key`. + """ + list_config = SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, ""), + (EventTypes.Member, "@foo"), + (StateKeys.WILDCARD, "@foo"), + ("org.matrix.personal_count", "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.CanonicalAlias, ""), + ], + ) + + room_sync_config = RoomSyncConfig.from_room_config(list_config) + + self._assert_room_config_equal( + room_sync_config, + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + (StateKeys.WILDCARD, "@foo"), + }, + EventTypes.Member: { + (EventTypes.Member, "@bar"), + }, + }, + ), + ) + def test_from_room_config_wildcard_state_key(self) -> None: """ Test that a wildcard (*) as a `state_key` will override all other values for the @@ -235,7 +271,7 @@ def test_from_room_config_wildcard_state_key(self) -> None: required_state_map={ EventTypes.Name: {(EventTypes.Name, "")}, EventTypes.Member: { - (EventTypes.Member, "*"), + (EventTypes.Member, StateKeys.WILDCARD), }, EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, From ce503fcd14fdca16a18a843649250a04e4161a25 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 15:42:53 -0500 Subject: [PATCH 088/109] Parameterize tests --- synapse/handlers/sliding_sync.py | 7 +- tests/handlers/test_sliding_sync.py | 546 ++++++++++++++-------------- 2 files changed, 275 insertions(+), 278 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 791b7cce358..ab9addb7dff 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -138,13 +138,13 @@ def from_room_config( state_type, state_key, ) in room_params.required_state: - # If we already have a wildcard for *any* `state_type`, we don't need to add + # If we already have a wildcard for everything, we don't need to add # anything else wildcard_set = required_state_map.get(StateKeys.WILDCARD, {}) if (StateKeys.WILDCARD, StateKeys.WILDCARD) in wildcard_set: break - # If we already have a wildcard for this specific `state_type`, we don't need + # If we already have a wildcard for this specific `state_key`, we don't need # to add it since the wildcard already covers it. if (StateKeys.WILDCARD, state_key) in wildcard_set: continue @@ -169,6 +169,9 @@ def from_room_config( # entries with the same `state_key`, since the wildcard will cover it already. elif state_type == StateKeys.WILDCARD: # Get rid of any entries that match the `state_key` + # + # Make a copy so we don't run into an error: `dictionary changed size + # during iteration`, when we remove items for ( existing_state_type, existing_state_key_set, diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 37ec41ad683..1014449ebd2 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -66,314 +66,308 @@ def _assert_room_config_equal( f"Mismatch for {event_type}", ) - def test_from_list_config(self) -> None: - """ - Test that we can convert a `SlidingSyncConfig.SlidingSyncList` to a - `RoomSyncConfig`. - """ - - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, "@baz"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { + @parameterized.expand( + [ + ( + "from_list_config", + """ + Test that we can convert a `SlidingSyncConfig.SlidingSyncList` to a + `RoomSyncConfig`. + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), (EventTypes.Member, "@foo"), (EventTypes.Member, "@bar"), (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ), - ) - - def test_from_room_subscription(self) -> None: - """ - Test that we can convert a `SlidingSyncConfig.RoomSubscription` to a - `RoomSyncConfig`. - """ - room_subscription_config = SlidingSyncConfig.RoomSubscription( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, "@baz"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(room_subscription_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { + ( + "from_room_subscription", + """ + Test that we can convert a `SlidingSyncConfig.RoomSubscription` to a + `RoomSyncConfig`. + """, + # Input + SlidingSyncConfig.RoomSubscription( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), (EventTypes.Member, "@foo"), (EventTypes.Member, "@bar"), (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ), - ) - - def test_from_room_config_wildcard(self) -> None: - """ - Test that a wildcard (*) for both the `event_type` and `state_key` will override - all other values. - - Note: MSC3575 describes different behavior to how we're handling things here but - since it's not wrong to return more state than requested (`required_state` is - just the minimum requested), it doesn't matter if we include things that the - client wanted excluded. This complexity is also under scrutiny, see - https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1185109050 - - > One unique exception is when you request all state events via ["*", "*"]. When used, - > all state events are returned by default, and additional entries FILTER OUT the returned set - > of state events. These additional entries cannot use '*' themselves. - > For example, ["*", "*"], ["m.room.member", "@alice:example.com"] will _exclude_ every m.room.member - > event _except_ for @alice:example.com, and include every other state event. - > In addition, ["*", "*"], ["m.space.child", "*"] is an error, the m.space.child filter is not - > required as it would have been returned anyway. - > - > -- MSC3575 (https://github.com/matrix-org/matrix-spec-proposals/pull/3575) - """ - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (StateKeys.WILDCARD, StateKeys.WILDCARD), - (EventTypes.Member, "@foo"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, - }, + ( + "wildcard", + """ + Test that a wildcard (*) for both the `event_type` and `state_key` will override + all other values. + + Note: MSC3575 describes different behavior to how we're handling things here but + since it's not wrong to return more state than requested (`required_state` is + just the minimum requested), it doesn't matter if we include things that the + client wanted excluded. This complexity is also under scrutiny, see + https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1185109050 + + > One unique exception is when you request all state events via ["*", "*"]. When used, + > all state events are returned by default, and additional entries FILTER OUT the returned set + > of state events. These additional entries cannot use '*' themselves. + > For example, ["*", "*"], ["m.room.member", "@alice:example.com"] will _exclude_ every m.room.member + > event _except_ for @alice:example.com, and include every other state event. + > In addition, ["*", "*"], ["m.space.child", "*"] is an error, the m.space.child filter is not + > required as it would have been returned anyway. + > + > -- MSC3575 (https://github.com/matrix-org/matrix-spec-proposals/pull/3575) + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, StateKeys.WILDCARD), + (EventTypes.Member, "@foo"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, + }, + ), ), - ) - - def test_from_room_config_wildcard_type(self) -> None: - """ - Test that a wildcard (*) as a `event_type` will override all other values for the - same `state_key`. - """ - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), - (EventTypes.Member, "@foo"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, - EventTypes.Member: { + ( + "wildcard_type", + """ + Test that a wildcard (*) as a `event_type` will override all other values for the + same `state_key`. + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, ""), (EventTypes.Member, "@foo"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + }, }, - }, + ), ), - ) - - def test_from_room_config_multiple_wildcard_type(self) -> None: - """ - Test that multiple wildcard (*) as a `event_type` will override all other values - for the same `state_key`. - """ - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), - (EventTypes.Member, "@foo"), - (StateKeys.WILDCARD, "@foo"), - ("org.matrix.personal_count", "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - StateKeys.WILDCARD: { + ( + "multiple_wildcard_type", + """ + Test that multiple wildcard (*) as a `event_type` will override all other values + for the same `state_key`. + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), (StateKeys.WILDCARD, ""), + (EventTypes.Member, "@foo"), (StateKeys.WILDCARD, "@foo"), - }, - EventTypes.Member: { + ("org.matrix.personal_count", "@foo"), (EventTypes.Member, "@bar"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + (StateKeys.WILDCARD, "@foo"), + }, + EventTypes.Member: { + (EventTypes.Member, "@bar"), + }, }, - }, + ), ), - ) - - def test_from_room_config_wildcard_state_key(self) -> None: - """ - Test that a wildcard (*) as a `state_key` will override all other values for the - same `event_type`. - """ - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (EventTypes.Member, "@foo"), - (EventTypes.Member, StateKeys.WILDCARD), - (EventTypes.Member, "@bar"), - (EventTypes.Member, StateKeys.LAZY), - (EventTypes.Member, "@baz"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { + ( + "wildcard_state_key", + """ + Test that a wildcard (*) as a `state_key` will override all other values for the + same `event_type`. + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (EventTypes.Member, "@foo"), (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, "@bar"), + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, StateKeys.WILDCARD), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ), - ) - - def test_from_room_config_wildcard_merge(self) -> None: - """ - Test that a wildcard (*) entries for the `event_type` and another one for - `state_key` will play together. - """ - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), - (EventTypes.Member, "@foo"), - (EventTypes.Member, StateKeys.WILDCARD), - (EventTypes.Member, "@bar"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, - EventTypes.Member: { + ( + "wildcard_merge", + """ + Test that a wildcard (*) entries for the `event_type` and another one for + `state_key` will play together. + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, ""), + (EventTypes.Member, "@foo"), (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, "@bar"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, + EventTypes.Member: { + (EventTypes.Member, StateKeys.WILDCARD), + }, }, - }, + ), ), - ) - - def test_from_room_config_wildcard_merge2(self) -> None: - """ - Test that an all wildcard ("*", "*") entry will override any other values (including other wildcards). - """ - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), - (EventTypes.Member, StateKeys.WILDCARD), - (EventTypes.Member, "@foo"), - # One of these should take precedence over everything else - (StateKeys.WILDCARD, StateKeys.WILDCARD), - (StateKeys.WILDCARD, StateKeys.WILDCARD), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, - }, + ( + "wildcard_merge2", + """ + Test that an all wildcard ("*", "*") entry will override any other + values (including other wildcards). + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), + (StateKeys.WILDCARD, ""), + (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, "@foo"), + # One of these should take precedence over everything else + (StateKeys.WILDCARD, StateKeys.WILDCARD), + (StateKeys.WILDCARD, StateKeys.WILDCARD), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, + }, + ), ), - ) - - def test_from_room_config_lazy_members(self) -> None: - """ - `$LAZY` room members should just be another additional key next to other - explicit keys. We will unroll the special `$LAZY` meaning later. - """ - list_config = SlidingSyncConfig.SlidingSyncList( - timeline_limit=10, - required_state=[ - (EventTypes.Name, ""), - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, StateKeys.LAZY), - (EventTypes.Member, "@baz"), - (EventTypes.CanonicalAlias, ""), - ], - ) - - room_sync_config = RoomSyncConfig.from_room_config(list_config) - - self._assert_room_config_equal( - room_sync_config, - RoomSyncConfig( - timeline_limit=10, - required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, - EventTypes.Member: { + ( + "lazy_members", + """ + `$LAZY` room members should just be another additional key next to other + explicit keys. We will unroll the special `$LAZY` meaning later. + """, + # Input + SlidingSyncConfig.SlidingSyncList( + timeline_limit=10, + required_state=[ + (EventTypes.Name, ""), (EventTypes.Member, "@foo"), (EventTypes.Member, "@bar"), (EventTypes.Member, StateKeys.LAZY), (EventTypes.Member, "@baz"), + (EventTypes.CanonicalAlias, ""), + ], + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + (EventTypes.Member, "@bar"), + (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, "@baz"), + }, + EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, - }, + ), ), + ] + ) + def test_from_room_config( + self, + _test_label: str, + _test_description: str, + room_params: SlidingSyncConfig.CommonRoomParameters, + expected_room_sync_config: RoomSyncConfig, + ) -> None: + """ + Test `RoomSyncConfig.from_room_config(room_params)` will result in the `expected_room_sync_config`. + """ + room_sync_config = RoomSyncConfig.from_room_config(room_params) + + self._assert_room_config_equal( + room_sync_config, + expected_room_sync_config, ) @parameterized.expand( From 4eb82e35d3b542172c86029f012136e1aac45f07 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 16:31:39 -0500 Subject: [PATCH 089/109] Combine wildcard configs --- synapse/handlers/sliding_sync.py | 71 ++++++++++-- tests/handlers/test_sliding_sync.py | 168 +++++++++++++++++++++++++--- 2 files changed, 217 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index ab9addb7dff..0e307346087 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -162,7 +162,9 @@ def from_room_config( # If we're getting wildcards for the `state_type` and `state_key`, that's # all that matters so get rid of any other entries if state_type == StateKeys.WILDCARD and state_key == StateKeys.WILDCARD: - required_state_map = {state_type: {(state_type, state_key)}} + required_state_map = { + StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)} + } # We can break, since we don't need to add anything else break # If we're getting a wildcard for the `state_type`, get rid of any other @@ -234,16 +236,71 @@ def combine_room_sync_config( state_type, state_key_set, ) in other_room_sync_config.required_state_map.items(): - # If we already have a wildcard, we don't need to add anything else - if get_first_item_in_set(self.required_state_map.get(state_type)) == ( - state_type, - StateKeys.WILDCARD, + # If we already have a wildcard for everything, we don't need to add + # anything else + if (StateKeys.WILDCARD, StateKeys.WILDCARD) in self.required_state_map.get( + StateKeys.WILDCARD, {} + ): + break + + # If we already have a wildcard `state_key` for this `state_type`, we don't need + # to add anything else + if ( + # We assume that if a wildcard is present, it's the only thing in the + # set. + get_first_item_in_set(self.required_state_map.get(state_type)) + == (state_type, StateKeys.WILDCARD) ): continue + # If we're getting wildcards for the `state_type` and `state_key`, that's + # all that matters so get rid of any other entries + if ( + state_type == StateKeys.WILDCARD + and (StateKeys.WILDCARD, StateKeys.WILDCARD) in state_key_set + ): + self.required_state_map = { + state_type: {(StateKeys.WILDCARD, StateKeys.WILDCARD)} + } + # We can break, since we don't need to add anything else + break + for _state_type, state_key in state_key_set: - # If we're getting a wildcard, that's all that matters so get rid of any - # other state keys + # If we already have a wildcard for this specific `state_key`, we don't need + # to add it since the wildcard already covers it. + if (StateKeys.WILDCARD, state_key) in self.required_state_map.get( + StateKeys.WILDCARD, {} + ): + continue + + # If we're getting a wildcard for the `state_type`, get rid of any other + # entries with the same `state_key`, since the wildcard will cover it already. + if state_type == StateKeys.WILDCARD: + # Get rid of any entries that match the `state_key` + # + # Make a copy so we don't run into an error: `dictionary changed size + # during iteration`, when we remove items + for ( + existing_state_type, + existing_state_key_set, + ) in list(self.required_state_map.items()): + # Make a copy so we don't run into an error: `Set changed size during + # iteration`, when we filter out and remove items + for ( + _existing_state_type, + existing_state_key, + ) in existing_state_key_set.copy(): + if existing_state_key == state_key: + existing_state_key_set.remove( + (existing_state_type, state_key) + ) + + # If we've the left the `set()` empty, remove it from the map + if existing_state_key_set == set(): + self.required_state_map.pop(existing_state_type, None) + + # If we're getting a wildcard `state_key`, get rid of any other state_keys + # for this `state_type` since the wildcard will cover it already. if state_key == StateKeys.WILDCARD: self.required_state_map[state_type] = {(state_type, state_key)} break diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 1014449ebd2..949e1353f75 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -22,6 +22,7 @@ from unittest.mock import patch from parameterized import parameterized +from typing import Optional from twisted.test.proto_helpers import MemoryReactor @@ -51,19 +52,24 @@ class RoomSyncConfigTestCase(TestCase): def _assert_room_config_equal( - self, actual: RoomSyncConfig, expected: RoomSyncConfig + self, + actual: RoomSyncConfig, + expected: RoomSyncConfig, + message_prefix: Optional[str] = None, ) -> None: - self.assertEqual(actual.timeline_limit, expected.timeline_limit) + self.assertEqual(actual.timeline_limit, expected.timeline_limit, message_prefix) # `self.assertEqual(...)` works fine to catch differences but the output is # almost impossible to read because of the way it truncates the output and the # order doesn't actually matter. - self.assertCountEqual(actual.required_state_map, expected.required_state_map) + self.assertCountEqual( + actual.required_state_map, expected.required_state_map, message_prefix + ) for event_type, expected_state_keys in expected.required_state_map.items(): self.assertCountEqual( actual.required_state_map[event_type], expected_state_keys, - f"Mismatch for {event_type}", + f"{message_prefix}: Mismatch for {event_type}", ) @parameterized.expand( @@ -373,7 +379,7 @@ def test_from_room_config( @parameterized.expand( [ ( - "No direct overlap", + "no_direct_overlap", # A RoomSyncConfig( timeline_limit=9, @@ -412,23 +418,106 @@ def test_from_room_config( ), ), ( - "Wildcard overlap", + "wildcard_overlap", + # A + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, StateKeys.WILDCARD), + }, + }, + ), + # B + RoomSyncConfig( + timeline_limit=9, + required_state_map={ + EventTypes.Dummy: {(EventTypes.Dummy, StateKeys.WILDCARD)}, + StateKeys.WILDCARD: {(StateKeys.WILDCARD, "@bar")}, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + }, + }, + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, StateKeys.WILDCARD), + }, + }, + ), + ), + ( + "state_type_wildcard_overlap", + # A + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + (StateKeys.WILDCARD, "@foo"), + }, + EventTypes.Member: { + (EventTypes.Member, "@bar"), + }, + }, + ), + # B + RoomSyncConfig( + timeline_limit=9, + required_state_map={ + EventTypes.Dummy: {(EventTypes.Dummy, "dummy2")}, + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + (StateKeys.WILDCARD, "@bar"), + }, + EventTypes.Member: { + (EventTypes.Member, "@foo"), + }, + }, + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Dummy: { + (EventTypes.Dummy, "dummy"), + (EventTypes.Dummy, "dummy2"), + }, + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + (StateKeys.WILDCARD, "@foo"), + (StateKeys.WILDCARD, "@bar"), + }, + }, + ), + ), + ( + "state_key_wildcard_overlap", # A RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, "foo")}, + EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, EventTypes.Member: { - (EventTypes.Member, "*"), + (EventTypes.Member, StateKeys.WILDCARD), + }, + "org.matrix.flowers": { + ("org.matrix.flowers", StateKeys.WILDCARD) }, - "org.matrix.flowers": {("org.matrix.flowers", "*")}, }, ), # B RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, "*")}, + EventTypes.Dummy: {(EventTypes.Dummy, StateKeys.WILDCARD)}, + EventTypes.Member: { + (EventTypes.Member, StateKeys.WILDCARD), + }, "org.matrix.flowers": {("org.matrix.flowers", "tulips")}, }, ), @@ -437,12 +526,61 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Dummy: { - (EventTypes.Dummy, "*"), + (EventTypes.Dummy, StateKeys.WILDCARD), }, EventTypes.Member: { - (EventTypes.Member, "*"), + (EventTypes.Member, StateKeys.WILDCARD), + }, + "org.matrix.flowers": { + ("org.matrix.flowers", StateKeys.WILDCARD) + }, + }, + ), + ), + ( + "state_type_and_state_key_wildcard_merge", + # A + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + (StateKeys.WILDCARD, "@foo"), + }, + EventTypes.Member: { + (EventTypes.Member, "@bar"), + }, + }, + ), + # B + RoomSyncConfig( + timeline_limit=9, + required_state_map={ + EventTypes.Dummy: {(EventTypes.Dummy, "dummy2")}, + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + }, + EventTypes.Member: { + (EventTypes.Member, StateKeys.WILDCARD), + }, + }, + ), + # Expected + RoomSyncConfig( + timeline_limit=10, + required_state_map={ + EventTypes.Dummy: { + (EventTypes.Dummy, "dummy"), + (EventTypes.Dummy, "dummy2"), + }, + StateKeys.WILDCARD: { + (StateKeys.WILDCARD, ""), + (StateKeys.WILDCARD, "@foo"), + }, + EventTypes.Member: { + (EventTypes.Member, StateKeys.WILDCARD), }, - "org.matrix.flowers": {("org.matrix.flowers", "*")}, }, ), ), @@ -465,7 +603,7 @@ def test_combine_room_sync_config( # Combine B into A room_sync_config_a.combine_room_sync_config(room_sync_config_b) - self._assert_room_config_equal(room_sync_config_a, expected) + self._assert_room_config_equal(room_sync_config_a, expected, "B into A") # Since we're mutating these in place, make a copy for each of our trials room_sync_config_a = deepcopy(a) @@ -474,7 +612,7 @@ def test_combine_room_sync_config( # Combine A into B room_sync_config_b.combine_room_sync_config(room_sync_config_a) - self._assert_room_config_equal(room_sync_config_b, expected) + self._assert_room_config_equal(room_sync_config_b, expected, "A into B") class GetSyncRoomIdsForUserTestCase(HomeserverTestCase): From 646b3282c46537588ff7566c1b0770fd1e7761c5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 16:36:20 -0500 Subject: [PATCH 090/109] No need for `get_first_item_in_set(...)` --- synapse/handlers/sliding_sync.py | 50 +++++++---------------------- tests/handlers/test_sliding_sync.py | 2 +- 2 files changed, 13 insertions(+), 39 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0e307346087..47be0f93d4f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,18 +18,7 @@ # # import logging -from typing import ( - TYPE_CHECKING, - AbstractSet, - Any, - Dict, - Final, - List, - Optional, - Set, - Tuple, - TypeVar, -) +from typing import TYPE_CHECKING, Any, Dict, Final, List, Optional, Set, Tuple import attr from immutabledict import immutabledict @@ -93,17 +82,6 @@ def filter_membership_for_sync( return membership != Membership.LEAVE or sender not in (user_id, None) -R = TypeVar("R") - - -def get_first_item_in_set(target_set: Optional[AbstractSet[R]]) -> Optional[R]: - """ - Helper to grab the "first" item in a set. A set is an unordered collection so this - is just a way to grab some item in the set. - """ - return next(iter(target_set or []), None) - - # We can't freeze this class because we want to update it in place with the # de-duplicated data. @attr.s(slots=True, auto_attribs=True) @@ -140,7 +118,9 @@ def from_room_config( ) in room_params.required_state: # If we already have a wildcard for everything, we don't need to add # anything else - wildcard_set = required_state_map.get(StateKeys.WILDCARD, {}) + wildcard_set: Set[Tuple[str, str]] = required_state_map.get( + StateKeys.WILDCARD, set() + ) if (StateKeys.WILDCARD, StateKeys.WILDCARD) in wildcard_set: break @@ -151,11 +131,8 @@ def from_room_config( # If we already have a wildcard `state_key` for this `state_type`, we don't need # to add anything else - if ( - # We assume that if a wildcard is present, it's the only thing in the - # set. - get_first_item_in_set(required_state_map.get(state_type)) - == (state_type, StateKeys.WILDCARD) + if (state_type, StateKeys.WILDCARD) in required_state_map.get( + state_type, set() ): continue @@ -239,17 +216,14 @@ def combine_room_sync_config( # If we already have a wildcard for everything, we don't need to add # anything else if (StateKeys.WILDCARD, StateKeys.WILDCARD) in self.required_state_map.get( - StateKeys.WILDCARD, {} + StateKeys.WILDCARD, set() ): break # If we already have a wildcard `state_key` for this `state_type`, we don't need # to add anything else - if ( - # We assume that if a wildcard is present, it's the only thing in the - # set. - get_first_item_in_set(self.required_state_map.get(state_type)) - == (state_type, StateKeys.WILDCARD) + if (state_type, StateKeys.WILDCARD) in self.required_state_map.get( + state_type, set() ): continue @@ -269,7 +243,7 @@ def combine_room_sync_config( # If we already have a wildcard for this specific `state_key`, we don't need # to add it since the wildcard already covers it. if (StateKeys.WILDCARD, state_key) in self.required_state_map.get( - StateKeys.WILDCARD, {} + StateKeys.WILDCARD, set() ): continue @@ -531,8 +505,8 @@ async def current_sync_for_user( partial_state_room_map.get(room_id) and membership_state_keys is not None and len(membership_state_keys) == 1 - and get_first_item_in_set(membership_state_keys) - == (EventTypes.Member, StateKeys.LAZY) + and (EventTypes.Member, StateKeys.LAZY) + in membership_state_keys ): # Since we're skipping this room, we need to allow # for the next room to take its place in the list diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 949e1353f75..c23c4ad4682 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -19,10 +19,10 @@ # import logging from copy import deepcopy +from typing import Optional from unittest.mock import patch from parameterized import parameterized -from typing import Optional from twisted.test.proto_helpers import MemoryReactor From f2e0c5e7213728a1e54919a38b48691df01f1398 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 17:08:53 -0500 Subject: [PATCH 091/109] Workaround wildcard event types --- synapse/handlers/sliding_sync.py | 159 ++++++++++++++++++------------- 1 file changed, 91 insertions(+), 68 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 47be0f93d4f..b6be2c95f0e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1265,76 +1265,99 @@ async def get_room_sync_data( Membership.INVITE, Membership.KNOCK, ): - # Calculate the required state for the room and make it into the form of a - # `StateFilter` - required_state_types: List[Tuple[str, Optional[str]]] = [] - for ( - state_type, - state_key_set, - ) in room_sync_config.required_state_map.items(): - for _state_type, state_key in state_key_set: - if state_key == StateKeys.WILDCARD: - # `None` is a wildcard in the `StateFilter` - required_state_types.append((state_type, None)) - # We need to fetch all relevant people when we're lazy-loading membership - if state_type == EventTypes.Member and state_key == StateKeys.LAZY: - # Everyone in the timeline is relevant - timeline_membership: Set[str] = set() - if timeline_events is not None: - for timeline_event in timeline_events: - timeline_membership.add(timeline_event.sender) - - for user_id in timeline_membership: - required_state_types.append((EventTypes.Member, user_id)) - - # TODO: We probably also care about invite, ban, kick, targets, etc - # but the spec only mentions "senders". - else: - required_state_types.append((state_type, state_key)) - - state_filter = StateFilter.from_types(required_state_types) + # Calculate the `StateFilter` based on the `required_state` for the room + state_filter: Optional[StateFilter] = StateFilter.none() + # If we have a double wildcard ("*", "*") in the `required_state`, we need + # to fetch all state for the room + if ( + StateKeys.WILDCARD, + StateKeys.WILDCARD, + ) in room_sync_config.required_state_map.get(StateKeys.WILDCARD, set()): + state_filter = StateFilter.all() + # TODO: `StateFilter` currently doesn't support wildcard event types. We're + # currently working around this by returning all state to the client but it + # would be nice to fetch less from the database and return just what the + # client wanted. + elif ( + room_sync_config.required_state_map.get(StateKeys.WILDCARD) is not None + ): + state_filter = StateFilter.all() + else: + required_state_types: List[Tuple[str, Optional[str]]] = [] + for ( + state_type, + state_key_set, + ) in room_sync_config.required_state_map.items(): + for _state_type, state_key in state_key_set: + if state_key == StateKeys.WILDCARD: + # `None` is a wildcard in the `StateFilter` + required_state_types.append((state_type, None)) + # We need to fetch all relevant people when we're lazy-loading membership + if ( + state_type == EventTypes.Member + and state_key == StateKeys.LAZY + ): + # Everyone in the timeline is relevant + timeline_membership: Set[str] = set() + if timeline_events is not None: + for timeline_event in timeline_events: + timeline_membership.add(timeline_event.sender) + + for user_id in timeline_membership: + required_state_types.append( + (EventTypes.Member, user_id) + ) - # We can return all of the state that was requested if we're doing an - # initial sync - if initial: - # People shouldn't see past their leave/ban event - if rooms_membership_for_user_at_to_token.membership in ( - Membership.LEAVE, - Membership.BAN, - ): - room_state = await self.storage_controllers.state.get_state_at( - room_id, - stream_position=to_token.copy_and_replace( - StreamKeyType.ROOM, - rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token(), - ), - state_filter=state_filter, - # Partially-stated rooms should have all state events except for - # the membership events and since we've already excluded - # partially-stated rooms unless `required_state` only has - # `["m.room.member", "$LAZY"]` for membership, we should be able - # to retrieve everything requested. Plus we don't want to block - # the whole sync waiting for this one room. - await_full_state=False, - ) - # Otherwise, we can get the latest current state in the room + # TODO: We probably also care about invite, ban, kick, targets, etc + # but the spec only mentions "senders". + else: + required_state_types.append((state_type, state_key)) + + state_filter = StateFilter.from_types(required_state_types) + + # We can skip fetching state if we don't need any + if state_filter != StateFilter.none(): + # We can return all of the state that was requested if we're doing an + # initial sync + if initial: + # People shouldn't see past their leave/ban event + if rooms_membership_for_user_at_to_token.membership in ( + Membership.LEAVE, + Membership.BAN, + ): + room_state = await self.storage_controllers.state.get_state_at( + room_id, + stream_position=to_token.copy_and_replace( + StreamKeyType.ROOM, + rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token(), + ), + state_filter=state_filter, + # Partially-stated rooms should have all state events except for + # the membership events and since we've already excluded + # partially-stated rooms unless `required_state` only has + # `["m.room.member", "$LAZY"]` for membership, we should be able + # to retrieve everything requested. Plus we don't want to block + # the whole sync waiting for this one room. + await_full_state=False, + ) + # Otherwise, we can get the latest current state in the room + else: + room_state = await self.storage_controllers.state.get_current_state( + room_id, + state_filter, + # Partially-stated rooms should have all state events except for + # the membership events and since we've already excluded + # partially-stated rooms unless `required_state` only has + # `["m.room.member", "$LAZY"]` for membership, we should be able + # to retrieve everything requested. Plus we don't want to block + # the whole sync waiting for this one room. + await_full_state=False, + ) + # TODO: Query `current_state_delta_stream` and reverse/rewind back to the `to_token` else: - room_state = await self.storage_controllers.state.get_current_state( - room_id, - state_filter, - # Partially-stated rooms should have all state events except for - # the membership events and since we've already excluded - # partially-stated rooms unless `required_state` only has - # `["m.room.member", "$LAZY"]` for membership, we should be able - # to retrieve everything requested. Plus we don't want to block - # the whole sync waiting for this one room. - await_full_state=False, - ) - # TODO: Query `current_state_delta_stream` and reverse/rewind back to the `to_token` - else: - # TODO: Once we can figure out if we've sent a room down this connection before, - # we can return updates instead of the full required state. - raise NotImplementedError() + # TODO: Once we can figure out if we've sent a room down this connection before, + # we can return updates instead of the full required state. + raise NotImplementedError() return SlidingSyncResult.RoomResult( # TODO: Dummy value From 859f9b0e9631fce49e93e658ecf64872e316feb6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 17:10:36 -0500 Subject: [PATCH 092/109] Rename to `StateValues` because it can be used in the event_type or state_key --- synapse/handlers/sliding_sync.py | 62 +++++++------- tests/handlers/test_sliding_sync.py | 122 ++++++++++++++-------------- 2 files changed, 95 insertions(+), 89 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b6be2c95f0e..b7ab56ac8b2 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -119,34 +119,34 @@ def from_room_config( # If we already have a wildcard for everything, we don't need to add # anything else wildcard_set: Set[Tuple[str, str]] = required_state_map.get( - StateKeys.WILDCARD, set() + StateValues.WILDCARD, set() ) - if (StateKeys.WILDCARD, StateKeys.WILDCARD) in wildcard_set: + if (StateValues.WILDCARD, StateValues.WILDCARD) in wildcard_set: break # If we already have a wildcard for this specific `state_key`, we don't need # to add it since the wildcard already covers it. - if (StateKeys.WILDCARD, state_key) in wildcard_set: + if (StateValues.WILDCARD, state_key) in wildcard_set: continue # If we already have a wildcard `state_key` for this `state_type`, we don't need # to add anything else - if (state_type, StateKeys.WILDCARD) in required_state_map.get( + if (state_type, StateValues.WILDCARD) in required_state_map.get( state_type, set() ): continue # If we're getting wildcards for the `state_type` and `state_key`, that's # all that matters so get rid of any other entries - if state_type == StateKeys.WILDCARD and state_key == StateKeys.WILDCARD: + if state_type == StateValues.WILDCARD and state_key == StateValues.WILDCARD: required_state_map = { - StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)} + StateValues.WILDCARD: {(StateValues.WILDCARD, StateValues.WILDCARD)} } # We can break, since we don't need to add anything else break # If we're getting a wildcard for the `state_type`, get rid of any other # entries with the same `state_key`, since the wildcard will cover it already. - elif state_type == StateKeys.WILDCARD: + elif state_type == StateValues.WILDCARD: # Get rid of any entries that match the `state_key` # # Make a copy so we don't run into an error: `dictionary changed size @@ -172,7 +172,7 @@ def from_room_config( # If we're getting a wildcard `state_key`, get rid of any other state_keys # for this `state_type` since the wildcard will cover it already. - if state_key == StateKeys.WILDCARD: + if state_key == StateValues.WILDCARD: required_state_map[state_type] = {(state_type, state_key)} # Otherwise, just add it to the set else: @@ -215,14 +215,15 @@ def combine_room_sync_config( ) in other_room_sync_config.required_state_map.items(): # If we already have a wildcard for everything, we don't need to add # anything else - if (StateKeys.WILDCARD, StateKeys.WILDCARD) in self.required_state_map.get( - StateKeys.WILDCARD, set() - ): + if ( + StateValues.WILDCARD, + StateValues.WILDCARD, + ) in self.required_state_map.get(StateValues.WILDCARD, set()): break # If we already have a wildcard `state_key` for this `state_type`, we don't need # to add anything else - if (state_type, StateKeys.WILDCARD) in self.required_state_map.get( + if (state_type, StateValues.WILDCARD) in self.required_state_map.get( state_type, set() ): continue @@ -230,11 +231,11 @@ def combine_room_sync_config( # If we're getting wildcards for the `state_type` and `state_key`, that's # all that matters so get rid of any other entries if ( - state_type == StateKeys.WILDCARD - and (StateKeys.WILDCARD, StateKeys.WILDCARD) in state_key_set + state_type == StateValues.WILDCARD + and (StateValues.WILDCARD, StateValues.WILDCARD) in state_key_set ): self.required_state_map = { - state_type: {(StateKeys.WILDCARD, StateKeys.WILDCARD)} + state_type: {(StateValues.WILDCARD, StateValues.WILDCARD)} } # We can break, since we don't need to add anything else break @@ -242,14 +243,14 @@ def combine_room_sync_config( for _state_type, state_key in state_key_set: # If we already have a wildcard for this specific `state_key`, we don't need # to add it since the wildcard already covers it. - if (StateKeys.WILDCARD, state_key) in self.required_state_map.get( - StateKeys.WILDCARD, set() + if (StateValues.WILDCARD, state_key) in self.required_state_map.get( + StateValues.WILDCARD, set() ): continue # If we're getting a wildcard for the `state_type`, get rid of any other # entries with the same `state_key`, since the wildcard will cover it already. - if state_type == StateKeys.WILDCARD: + if state_type == StateValues.WILDCARD: # Get rid of any entries that match the `state_key` # # Make a copy so we don't run into an error: `dictionary changed size @@ -275,7 +276,7 @@ def combine_room_sync_config( # If we're getting a wildcard `state_key`, get rid of any other state_keys # for this `state_type` since the wildcard will cover it already. - if state_key == StateKeys.WILDCARD: + if state_key == StateValues.WILDCARD: self.required_state_map[state_type] = {(state_type, state_key)} break # Otherwise, just add it to the set @@ -286,16 +287,16 @@ def combine_room_sync_config( self.required_state_map[state_type].add((state_type, state_key)) -class StateKeys: +class StateValues: """ - Understood values of the `state_key` part of the tuple (type, state_key) in - `required_state`. + Understood values of the (type, state_key) tuple in `required_state`. """ # Include all state events of the given type WILDCARD: Final = "*" # Lazy-load room membership events (include room membership events for any event - # `sender` in the timeline) + # `sender` in the timeline). We only give special meaning to this value when it's a + # `state_key`. LAZY: Final = "$LAZY" @@ -505,7 +506,7 @@ async def current_sync_for_user( partial_state_room_map.get(room_id) and membership_state_keys is not None and len(membership_state_keys) == 1 - and (EventTypes.Member, StateKeys.LAZY) + and (EventTypes.Member, StateValues.LAZY) in membership_state_keys ): # Since we're skipping this room, we need to allow @@ -1270,16 +1271,17 @@ async def get_room_sync_data( # If we have a double wildcard ("*", "*") in the `required_state`, we need # to fetch all state for the room if ( - StateKeys.WILDCARD, - StateKeys.WILDCARD, - ) in room_sync_config.required_state_map.get(StateKeys.WILDCARD, set()): + StateValues.WILDCARD, + StateValues.WILDCARD, + ) in room_sync_config.required_state_map.get(StateValues.WILDCARD, set()): state_filter = StateFilter.all() # TODO: `StateFilter` currently doesn't support wildcard event types. We're # currently working around this by returning all state to the client but it # would be nice to fetch less from the database and return just what the # client wanted. elif ( - room_sync_config.required_state_map.get(StateKeys.WILDCARD) is not None + room_sync_config.required_state_map.get(StateValues.WILDCARD) + is not None ): state_filter = StateFilter.all() else: @@ -1289,13 +1291,13 @@ async def get_room_sync_data( state_key_set, ) in room_sync_config.required_state_map.items(): for _state_type, state_key in state_key_set: - if state_key == StateKeys.WILDCARD: + if state_key == StateValues.WILDCARD: # `None` is a wildcard in the `StateFilter` required_state_types.append((state_type, None)) # We need to fetch all relevant people when we're lazy-loading membership if ( state_type == EventTypes.Member - and state_key == StateKeys.LAZY + and state_key == StateValues.LAZY ): # Everyone in the timeline is relevant timeline_membership: Set[str] = set() diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index c23c4ad4682..c1a1d72a195 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -35,7 +35,7 @@ RoomTypes, ) from synapse.api.room_versions import RoomVersions -from synapse.handlers.sliding_sync import RoomSyncConfig, StateKeys +from synapse.handlers.sliding_sync import RoomSyncConfig, StateValues from synapse.rest import admin from synapse.rest.client import knock, login, room from synapse.server import HomeServer @@ -163,7 +163,7 @@ def _assert_room_config_equal( timeline_limit=10, required_state=[ (EventTypes.Name, ""), - (StateKeys.WILDCARD, StateKeys.WILDCARD), + (StateValues.WILDCARD, StateValues.WILDCARD), (EventTypes.Member, "@foo"), (EventTypes.CanonicalAlias, ""), ], @@ -172,7 +172,9 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, + StateValues.WILDCARD: { + (StateValues.WILDCARD, StateValues.WILDCARD) + }, }, ), ), @@ -187,7 +189,7 @@ def _assert_room_config_equal( timeline_limit=10, required_state=[ (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), + (StateValues.WILDCARD, ""), (EventTypes.Member, "@foo"), (EventTypes.CanonicalAlias, ""), ], @@ -196,7 +198,7 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, + StateValues.WILDCARD: {(StateValues.WILDCARD, "")}, EventTypes.Member: { (EventTypes.Member, "@foo"), }, @@ -214,9 +216,9 @@ def _assert_room_config_equal( timeline_limit=10, required_state=[ (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), + (StateValues.WILDCARD, ""), (EventTypes.Member, "@foo"), - (StateKeys.WILDCARD, "@foo"), + (StateValues.WILDCARD, "@foo"), ("org.matrix.personal_count", "@foo"), (EventTypes.Member, "@bar"), (EventTypes.CanonicalAlias, ""), @@ -226,9 +228,9 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, ""), - (StateKeys.WILDCARD, "@foo"), + StateValues.WILDCARD: { + (StateValues.WILDCARD, ""), + (StateValues.WILDCARD, "@foo"), }, EventTypes.Member: { (EventTypes.Member, "@bar"), @@ -248,9 +250,9 @@ def _assert_room_config_equal( required_state=[ (EventTypes.Name, ""), (EventTypes.Member, "@foo"), - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), (EventTypes.Member, "@bar"), - (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, StateValues.LAZY), (EventTypes.Member, "@baz"), (EventTypes.CanonicalAlias, ""), ], @@ -261,7 +263,7 @@ def _assert_room_config_equal( required_state_map={ EventTypes.Name: {(EventTypes.Name, "")}, EventTypes.Member: { - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), }, EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, }, @@ -278,9 +280,9 @@ def _assert_room_config_equal( timeline_limit=10, required_state=[ (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), + (StateValues.WILDCARD, ""), (EventTypes.Member, "@foo"), - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), (EventTypes.Member, "@bar"), (EventTypes.CanonicalAlias, ""), ], @@ -289,9 +291,9 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, "")}, + StateValues.WILDCARD: {(StateValues.WILDCARD, "")}, EventTypes.Member: { - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), }, }, ), @@ -307,12 +309,12 @@ def _assert_room_config_equal( timeline_limit=10, required_state=[ (EventTypes.Name, ""), - (StateKeys.WILDCARD, ""), - (EventTypes.Member, StateKeys.WILDCARD), + (StateValues.WILDCARD, ""), + (EventTypes.Member, StateValues.WILDCARD), (EventTypes.Member, "@foo"), # One of these should take precedence over everything else - (StateKeys.WILDCARD, StateKeys.WILDCARD), - (StateKeys.WILDCARD, StateKeys.WILDCARD), + (StateValues.WILDCARD, StateValues.WILDCARD), + (StateValues.WILDCARD, StateValues.WILDCARD), (EventTypes.CanonicalAlias, ""), ], ), @@ -320,7 +322,9 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateKeys.WILDCARD: {(StateKeys.WILDCARD, StateKeys.WILDCARD)}, + StateValues.WILDCARD: { + (StateValues.WILDCARD, StateValues.WILDCARD) + }, }, ), ), @@ -337,7 +341,7 @@ def _assert_room_config_equal( (EventTypes.Name, ""), (EventTypes.Member, "@foo"), (EventTypes.Member, "@bar"), - (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, StateValues.LAZY), (EventTypes.Member, "@baz"), (EventTypes.CanonicalAlias, ""), ], @@ -350,7 +354,7 @@ def _assert_room_config_equal( EventTypes.Member: { (EventTypes.Member, "@foo"), (EventTypes.Member, "@bar"), - (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, StateValues.LAZY), (EventTypes.Member, "@baz"), }, EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, @@ -396,7 +400,7 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Member: { - (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, StateValues.LAZY), (EventTypes.Member, "@baz"), }, EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, @@ -410,7 +414,7 @@ def test_from_room_config( EventTypes.Member: { (EventTypes.Member, "@foo"), (EventTypes.Member, "@bar"), - (EventTypes.Member, StateKeys.LAZY), + (EventTypes.Member, StateValues.LAZY), (EventTypes.Member, "@baz"), }, EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, @@ -423,8 +427,8 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, StateKeys.WILDCARD), + StateValues.WILDCARD: { + (StateValues.WILDCARD, StateValues.WILDCARD), }, }, ), @@ -432,8 +436,8 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, StateKeys.WILDCARD)}, - StateKeys.WILDCARD: {(StateKeys.WILDCARD, "@bar")}, + EventTypes.Dummy: {(EventTypes.Dummy, StateValues.WILDCARD)}, + StateValues.WILDCARD: {(StateValues.WILDCARD, "@bar")}, EventTypes.Member: { (EventTypes.Member, "@foo"), }, @@ -443,8 +447,8 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, StateKeys.WILDCARD), + StateValues.WILDCARD: { + (StateValues.WILDCARD, StateValues.WILDCARD), }, }, ), @@ -456,9 +460,9 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, ""), - (StateKeys.WILDCARD, "@foo"), + StateValues.WILDCARD: { + (StateValues.WILDCARD, ""), + (StateValues.WILDCARD, "@foo"), }, EventTypes.Member: { (EventTypes.Member, "@bar"), @@ -470,9 +474,9 @@ def test_from_room_config( timeline_limit=9, required_state_map={ EventTypes.Dummy: {(EventTypes.Dummy, "dummy2")}, - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, ""), - (StateKeys.WILDCARD, "@bar"), + StateValues.WILDCARD: { + (StateValues.WILDCARD, ""), + (StateValues.WILDCARD, "@bar"), }, EventTypes.Member: { (EventTypes.Member, "@foo"), @@ -487,10 +491,10 @@ def test_from_room_config( (EventTypes.Dummy, "dummy"), (EventTypes.Dummy, "dummy2"), }, - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, ""), - (StateKeys.WILDCARD, "@foo"), - (StateKeys.WILDCARD, "@bar"), + StateValues.WILDCARD: { + (StateValues.WILDCARD, ""), + (StateValues.WILDCARD, "@foo"), + (StateValues.WILDCARD, "@bar"), }, }, ), @@ -503,10 +507,10 @@ def test_from_room_config( required_state_map={ EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, EventTypes.Member: { - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), }, "org.matrix.flowers": { - ("org.matrix.flowers", StateKeys.WILDCARD) + ("org.matrix.flowers", StateValues.WILDCARD) }, }, ), @@ -514,9 +518,9 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, StateKeys.WILDCARD)}, + EventTypes.Dummy: {(EventTypes.Dummy, StateValues.WILDCARD)}, EventTypes.Member: { - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), }, "org.matrix.flowers": {("org.matrix.flowers", "tulips")}, }, @@ -526,13 +530,13 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Dummy: { - (EventTypes.Dummy, StateKeys.WILDCARD), + (EventTypes.Dummy, StateValues.WILDCARD), }, EventTypes.Member: { - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), }, "org.matrix.flowers": { - ("org.matrix.flowers", StateKeys.WILDCARD) + ("org.matrix.flowers", StateValues.WILDCARD) }, }, ), @@ -544,9 +548,9 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, ""), - (StateKeys.WILDCARD, "@foo"), + StateValues.WILDCARD: { + (StateValues.WILDCARD, ""), + (StateValues.WILDCARD, "@foo"), }, EventTypes.Member: { (EventTypes.Member, "@bar"), @@ -558,11 +562,11 @@ def test_from_room_config( timeline_limit=9, required_state_map={ EventTypes.Dummy: {(EventTypes.Dummy, "dummy2")}, - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, ""), + StateValues.WILDCARD: { + (StateValues.WILDCARD, ""), }, EventTypes.Member: { - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), }, }, ), @@ -574,12 +578,12 @@ def test_from_room_config( (EventTypes.Dummy, "dummy"), (EventTypes.Dummy, "dummy2"), }, - StateKeys.WILDCARD: { - (StateKeys.WILDCARD, ""), - (StateKeys.WILDCARD, "@foo"), + StateValues.WILDCARD: { + (StateValues.WILDCARD, ""), + (StateValues.WILDCARD, "@foo"), }, EventTypes.Member: { - (EventTypes.Member, StateKeys.WILDCARD), + (EventTypes.Member, StateValues.WILDCARD), }, }, ), From 2c4eea70a2952eef5141d0c02b6a3c95a33a8063 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Jul 2024 21:06:56 -0500 Subject: [PATCH 093/109] Add `_assertIncludes(...)` and `_assertRequiredStateIncludes(...)` --- tests/rest/client/test_sync.py | 227 ++++++++++++++++++++++++++++----- 1 file changed, 197 insertions(+), 30 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 0302cae84ce..81dcc5b24cf 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -20,7 +20,7 @@ # import json import logging -from typing import Dict, List +from typing import AbstractSet, Dict, List, Optional, Iterable from parameterized import parameterized, parameterized_class @@ -35,6 +35,7 @@ ReceiptTypes, RelationTypes, ) +from synapse.events import EventBase from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken, UserID @@ -2710,7 +2711,7 @@ def test_rooms_required_state_initial_sync(self) -> None: # This one doesn't exist in the room [EventTypes.Tombstone, ""], ], - "timeline_limit": 3, + "timeline_limit": 0, } } }, @@ -2718,22 +2719,16 @@ def test_rooms_required_state_initial_sync(self) -> None: ) self.assertEqual(channel.code, 200, channel.json_body) - state_ids_map = self.get_success( - self.storage_controllers.state.get_current_state_ids(room_id1) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) ) - self.assertEqual( - [ - state_event["event_id"] - for state_event in channel.json_body["rooms"][room_id1][ - "required_state" - ] - ], - [ - state_ids_map[(EventTypes.Create, "")], - state_ids_map[(EventTypes.RoomHistoryVisibility, "")], - ], + self._assertRequiredStateIncludes( channel.json_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Create, "")], + state_map[(EventTypes.RoomHistoryVisibility, "")], + }, ) def test_rooms_required_state_incremental_sync(self) -> None: @@ -2766,7 +2761,7 @@ def test_rooms_required_state_incremental_sync(self) -> None: # This one doesn't exist in the room [EventTypes.Tombstone, ""], ], - "timeline_limit": 3, + "timeline_limit": 0, } } }, @@ -2774,29 +2769,201 @@ def test_rooms_required_state_incremental_sync(self) -> None: ) self.assertEqual(channel.code, 200, channel.json_body) - state_ids_map = self.get_success( - self.storage_controllers.state.get_current_state_ids(room_id1) + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) ) # The returned state doesn't change from initial to incremental sync. In the # future, we will only return updates but only if we've sent the room down the # connection before. - self.assertEqual( - [ - state_event["event_id"] - for state_event in channel.json_body["rooms"][room_id1][ - "required_state" - ] - ], - [ - state_ids_map[(EventTypes.Create, "")], - state_ids_map[(EventTypes.RoomHistoryVisibility, "")], - ], + self._assertRequiredStateIncludes( + channel.json_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Create, "")], + state_map[(EventTypes.RoomHistoryVisibility, "")], + }, + ) + + def test_rooms_required_state_wildcard(self) -> None: + """ + Test `rooms.required_state` returns all state events when using wildcard `["*", "*"]`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + self.helper.send_state( + room_id1, + event_type="org.matrix.foo_state", + state_key="", + body={"foo": "bar"}, + tok=user2_tok, + ) + self.helper.send_state( + room_id1, + event_type="org.matrix.foo_state", + state_key="namespaced", + body={"foo": "bar"}, + tok=user2_tok, + ) + + # Make the Sliding Sync request with wildcards for the `event_type` and `state_key` + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + ["*", "*"], + ], + "timeline_limit": 0, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + self._assertRequiredStateIncludes( + channel.json_body["rooms"][room_id1]["required_state"], + # We should see all the state events in the room + state_map.values(), + ) + + def test_rooms_required_state_wildcard_event_type(self) -> None: + """ + Test `rooms.required_state` returns all state events when using wildcard in the + event type `["*", "foobarbaz"]`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + self.helper.send_state( + room_id1, + event_type="org.matrix.foo_state", + state_key="", + body={"foo": "bar"}, + tok=user2_tok, + ) + self.helper.send_state( + room_id1, + event_type="org.matrix.foo_state", + state_key="namespaced", + body={"foo": "bar"}, + tok=user2_tok, + ) + + # Make the Sliding Sync request with wildcards for the `event_type` and `state_key` + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + ["*", user1_id], + ], + "timeline_limit": 0, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + # We expect at-least any state event with the `user1_id` as the `state_key` + self._assertRequiredStateIncludes( channel.json_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Member, user1_id)], + }, ) + def _assertRequiredStateIncludes( + self, + actual_required_state: JsonDict, + expected_state_events: Iterable[EventBase], + ) -> None: + """ + Wrapper around `_assertIncludes` to give slightly better looking diff error + messages that include some context "$event_id (type, state_key)". + """ + + self._assertIncludes( + { + f'{event["event_id"]} ("{event["type"]}", "{event["state_key"]}")' + for event in actual_required_state + }, + { + f'{event.event_id} ("{event.type}", "{event.state_key}")' + for event in expected_state_events + }, + # Message to help understand the diff in context + actual_required_state, + ) + + def _assertIncludes( + self, + actual_items: AbstractSet[str], + expected_items: AbstractSet[str], + msg: Optional[str] = None, + ) -> None: + """ + Assert that all of the `expected_items` are included in the `actual_items`. + + This assert could also be called `assertContains`, `assertItemsInSet` + """ + # Check for a superset + if actual_items >= expected_items: + return + + expected_lines: List[str] = [] + for expected_item in expected_items: + is_expected_in_actual = expected_item in actual_items + expected_lines.append( + "{} {}".format(" " if is_expected_in_actual else "?", expected_item) + ) + + actual_lines: List[str] = [] + for actual_item in actual_items: + is_actual_in_expected = actual_item in expected_items + actual_lines.append( + "{} {}".format("+" if is_actual_in_expected else " ", actual_item) + ) + + expected_string = f"Expected items to be in actual ('?' = missing expected items):\n {{\n{'\n'.join(expected_lines)}\n }}" + actual_string = ( + f"Actual ('+' = found expected items):\n {{\n{'\n'.join(actual_lines)}\n }}" + ) + diff_message = ( + f"Some expected items are missing.\n{expected_string}\n{actual_string}" + ) + + self.fail(f"{diff_message}\n{msg}") + # TODO: Add more `required_state` tests - # TODO: Add test for `"required_state": [ ["*","*"] ],` # TODO: Add test for `"required_state": [ ["*","foobarbaz"] ],` + # TODO: Add test for `"required_state": [ ["foobarbaz","*"] ],` + # TODO: Add test for `"required_state": [ ["m.room.member","$LAZY"] ],` # TODO: Add tests for partially-stated rooms being excluded From c1222c69a6dbd530fe2f5499a539d09c36d4ddd9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 00:18:53 -0500 Subject: [PATCH 094/109] Add exact option --- synapse/handlers/sliding_sync.py | 16 +++ tests/rest/client/test_sync.py | 208 ++++++++++++++++++++++--------- 2 files changed, 164 insertions(+), 60 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b7ab56ac8b2..fdaedb6adf4 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1279,6 +1279,22 @@ async def get_room_sync_data( # currently working around this by returning all state to the client but it # would be nice to fetch less from the database and return just what the # client wanted. + # + # Note: MSC3575 describes different behavior to how we're handling things here but + # since it's not wrong to return more state than requested (`required_state` is + # just the minimum requested), it doesn't matter if we include things that the + # client wanted excluded. This complexity is also under scrutiny, see + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1185109050 + + # > One unique exception is when you request all state events via ["*", "*"]. When used, + # > all state events are returned by default, and additional entries FILTER OUT the returned set + # > of state events. These additional entries cannot use '*' themselves. + # > For example, ["*", "*"], ["m.room.member", "@alice:example.com"] will _exclude_ every m.room.member + # > event _except_ for @alice:example.com, and include every other state event. + # > In addition, ["*", "*"], ["m.space.child", "*"] is an error, the m.space.child filter is not + # > required as it would have been returned anyway. + # > + # > -- MSC3575 (https://github.com/matrix-org/matrix-spec-proposals/pull/3575) elif ( room_sync_config.required_state_map.get(StateValues.WILDCARD) is not None diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 81dcc5b24cf..155a644ed65 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1240,6 +1240,90 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.event_sources = hs.get_event_sources() self.storage_controllers = hs.get_storage_controllers() + def _assertRequiredStateIncludes( + self, + actual_required_state: JsonDict, + expected_state_events: Iterable[EventBase], + exact: bool = False, + ) -> None: + """ + Wrapper around `_assertIncludes` to give slightly better looking diff error + messages that include some context "$event_id (type, state_key)". + + Args: + actual_required_state: The "required_state" of a room from a Sliding Sync + request response. + expected_state_events: The expected state events to be included in the + `actual_required_state`. + exact: Whether the actual state should be exactly equal to the expected + state (no extras). + """ + + self._assertIncludes( + { + f'{event["event_id"]} ("{event["type"]}", "{event["state_key"]}")' + for event in actual_required_state + }, + { + f'{event.event_id} ("{event.type}", "{event.state_key}")' + for event in expected_state_events + }, + exact=exact, + # Message to help understand the diff in context + message=actual_required_state, + ) + + def _assertIncludes( + self, + actual_items: AbstractSet[str], + expected_items: AbstractSet[str], + exact: bool = False, + message: Optional[str] = None, + ) -> None: + """ + Assert that all of the `expected_items` are included in the `actual_items`. + + This assert could also be called `assertContains`, `assertItemsInSet` + + Args: + actual_items: The container + expected_items: The items to check for in the container + exact: Whether the actual state should be exactly equal to the expected + state (no extras). + message: Optional message to include in the failure message. + """ + # Check that each set has the same items + if exact and actual_items == expected_items: + return + # Check for a superset + elif not exact and actual_items >= expected_items: + return + + expected_lines: List[str] = [] + for expected_item in expected_items: + is_expected_in_actual = expected_item in actual_items + expected_lines.append( + "{} {}".format(" " if is_expected_in_actual else "?", expected_item) + ) + + actual_lines: List[str] = [] + for actual_item in actual_items: + is_actual_in_expected = actual_item in expected_items + actual_lines.append( + "{} {}".format("+" if is_actual_in_expected else " ", actual_item) + ) + + expected_string = f"Expected items to be in actual ('?' = missing expected items):\n {{\n{'\n'.join(expected_lines)}\n }}" + actual_string = ( + f"Actual ('+' = found expected items):\n {{\n{'\n'.join(actual_lines)}\n }}" + ) + first_message = ( + "Items must match exactly" if exact else "Some expected items are missing." + ) + diff_message = f"{first_message}\n{expected_string}\n{actual_string}" + + self.fail(f"{diff_message}\n{message}") + def _add_new_dm_to_global_account_data( self, source_user_id: str, target_user_id: str, target_room_id: str ) -> None: @@ -2729,6 +2813,7 @@ def test_rooms_required_state_initial_sync(self) -> None: state_map[(EventTypes.Create, "")], state_map[(EventTypes.RoomHistoryVisibility, "")], }, + exact=True, ) def test_rooms_required_state_incremental_sync(self) -> None: @@ -2782,6 +2867,7 @@ def test_rooms_required_state_incremental_sync(self) -> None: state_map[(EventTypes.Create, "")], state_map[(EventTypes.RoomHistoryVisibility, "")], }, + exact=True, ) def test_rooms_required_state_wildcard(self) -> None: @@ -2838,12 +2924,13 @@ def test_rooms_required_state_wildcard(self) -> None: channel.json_body["rooms"][room_id1]["required_state"], # We should see all the state events in the room state_map.values(), + exact=True, ) def test_rooms_required_state_wildcard_event_type(self) -> None: """ - Test `rooms.required_state` returns all state events when using wildcard in the - event type `["*", "foobarbaz"]`. + Test `rooms.required_state` returns relevant state events when using wildcard in + the event_type `["*", "foobarbaz"]`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -2863,7 +2950,7 @@ def test_rooms_required_state_wildcard_event_type(self) -> None: self.helper.send_state( room_id1, event_type="org.matrix.foo_state", - state_key="namespaced", + state_key=user2_id, body={"foo": "bar"}, tok=user2_tok, ) @@ -2877,7 +2964,7 @@ def test_rooms_required_state_wildcard_event_type(self) -> None: "foo-list": { "ranges": [[0, 1]], "required_state": [ - ["*", user1_id], + ["*", user2_id], ], "timeline_limit": 0, } @@ -2891,79 +2978,80 @@ def test_rooms_required_state_wildcard_event_type(self) -> None: self.storage_controllers.state.get_current_state(room_id1) ) - # We expect at-least any state event with the `user1_id` as the `state_key` + # We expect at-least any state event with the `user2_id` as the `state_key` self._assertRequiredStateIncludes( channel.json_body["rooms"][room_id1]["required_state"], { - state_map[(EventTypes.Member, user1_id)], + state_map[(EventTypes.Member, user2_id)], + state_map[("org.matrix.foo_state", user2_id)], }, + # Ideally, this would be exact but we're currently returning all state + # events when the `event_type` is a wildcard. + exact=False, ) - def _assertRequiredStateIncludes( - self, - actual_required_state: JsonDict, - expected_state_events: Iterable[EventBase], - ) -> None: + def test_rooms_required_state_wildcard_state_key(self) -> None: """ - Wrapper around `_assertIncludes` to give slightly better looking diff error - messages that include some context "$event_id (type, state_key)". + Test `rooms.required_state` returns relevant state events when using wildcard in + the state_key `["foobarbaz","*"]`. """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - self._assertIncludes( - { - f'{event["event_id"]} ("{event["type"]}", "{event["state_key"]}")' - for event in actual_required_state - }, + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + self.helper.send_state( + room_id1, + event_type="org.matrix.foo_state", + state_key="", + body={"foo": "bar"}, + tok=user2_tok, + ) + self.helper.send_state( + room_id1, + event_type="org.matrix.foo_state", + state_key=user2_id, + body={"foo": "bar"}, + tok=user2_tok, + ) + + # Make the Sliding Sync request with wildcards for the `event_type` and `state_key` + channel = self.make_request( + "POST", + self.sync_endpoint, { - f'{event.event_id} ("{event.type}", "{event.state_key}")' - for event in expected_state_events + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Member, "*"], + ], + "timeline_limit": 0, + } + } }, - # Message to help understand the diff in context - actual_required_state, + access_token=user1_tok, ) + self.assertEqual(channel.code, 200, channel.json_body) - def _assertIncludes( - self, - actual_items: AbstractSet[str], - expected_items: AbstractSet[str], - msg: Optional[str] = None, - ) -> None: - """ - Assert that all of the `expected_items` are included in the `actual_items`. - - This assert could also be called `assertContains`, `assertItemsInSet` - """ - # Check for a superset - if actual_items >= expected_items: - return - - expected_lines: List[str] = [] - for expected_item in expected_items: - is_expected_in_actual = expected_item in actual_items - expected_lines.append( - "{} {}".format(" " if is_expected_in_actual else "?", expected_item) - ) - - actual_lines: List[str] = [] - for actual_item in actual_items: - is_actual_in_expected = actual_item in expected_items - actual_lines.append( - "{} {}".format("+" if is_actual_in_expected else " ", actual_item) - ) - - expected_string = f"Expected items to be in actual ('?' = missing expected items):\n {{\n{'\n'.join(expected_lines)}\n }}" - actual_string = ( - f"Actual ('+' = found expected items):\n {{\n{'\n'.join(actual_lines)}\n }}" - ) - diff_message = ( - f"Some expected items are missing.\n{expected_string}\n{actual_string}" + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) ) - self.fail(f"{diff_message}\n{msg}") + self._assertRequiredStateIncludes( + channel.json_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Member, user1_id)], + state_map[(EventTypes.Member, user2_id)], + }, + exact=True, + ) # TODO: Add more `required_state` tests - # TODO: Add test for `"required_state": [ ["*","foobarbaz"] ],` - # TODO: Add test for `"required_state": [ ["foobarbaz","*"] ],` # TODO: Add test for `"required_state": [ ["m.room.member","$LAZY"] ],` + # TODO: Add tests for configs from different lists combining # TODO: Add tests for partially-stated rooms being excluded From 75446e738118bf03742ccbe7ce5bc77623b0d381 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 01:24:50 -0500 Subject: [PATCH 095/109] Add more tests --- synapse/handlers/sliding_sync.py | 14 +- tests/rest/client/test_sync.py | 320 +++++++++++++++++++++++++++- tests/test_utils/event_injection.py | 12 +- 3 files changed, 324 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fdaedb6adf4..49bae63bde5 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -500,15 +500,17 @@ async def current_sync_for_user( EventTypes.Member ) ) - # Exclude partially-stated rooms unless the `required_state` - # only has `["m.room.member", "$LAZY"]` for membership. - if ( - partial_state_room_map.get(room_id) - and membership_state_keys is not None + lazy_loading = ( + membership_state_keys is not None and len(membership_state_keys) == 1 and (EventTypes.Member, StateValues.LAZY) in membership_state_keys - ): + ) + # Exclude partially-stated rooms unless the `required_state` + # only has `["m.room.member", "$LAZY"]` for membership + # (lazy-loading room members). + if partial_state_room_map.get(room_id) and not lazy_loading: + current_range_index += 1 # Since we're skipping this room, we need to allow # for the next room to take its place in the list range_end_index += 1 diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 155a644ed65..44896151c2e 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -32,6 +32,7 @@ EventContentFields, EventTypes, HistoryVisibility, + Membership, ReceiptTypes, RelationTypes, ) @@ -39,6 +40,7 @@ from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken, UserID +from synapse.handlers.sliding_sync import StateValues from synapse.util import Clock from tests import unittest @@ -46,6 +48,7 @@ KnockingStrippedStateEventHelperMixin, ) from tests.server import TimedOutException +from tests.test_utils.event_injection import mark_event_as_partial_state logger = logging.getLogger(__name__) @@ -2177,6 +2180,11 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) + # `required_state` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("required_state"), + channel.json_body["rooms"][room_id1], + ) # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( @@ -2286,6 +2294,11 @@ def test_rooms_invite_shared_history_incremental_sync(self) -> None: channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) + # `required_state` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("required_state"), + channel.json_body["rooms"][room_id1], + ) # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( @@ -2407,6 +2420,11 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) + # `required_state` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("required_state"), + channel.json_body["rooms"][room_id1], + ) # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( @@ -2534,6 +2552,11 @@ def test_rooms_invite_world_readable_history_incremental_sync(self) -> None: channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) + # `required_state` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("required_state"), + channel.json_body["rooms"][room_id1], + ) # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( @@ -2906,7 +2929,7 @@ def test_rooms_required_state_wildcard(self) -> None: "foo-list": { "ranges": [[0, 1]], "required_state": [ - ["*", "*"], + [StateValues.WILDCARD, StateValues.WILDCARD], ], "timeline_limit": 0, } @@ -2955,7 +2978,7 @@ def test_rooms_required_state_wildcard_event_type(self) -> None: tok=user2_tok, ) - # Make the Sliding Sync request with wildcards for the `event_type` and `state_key` + # Make the Sliding Sync request with wildcards for the `event_type` channel = self.make_request( "POST", self.sync_endpoint, @@ -2964,7 +2987,7 @@ def test_rooms_required_state_wildcard_event_type(self) -> None: "foo-list": { "ranges": [[0, 1]], "required_state": [ - ["*", user2_id], + [StateValues.WILDCARD, user2_id], ], "timeline_limit": 0, } @@ -3003,6 +3026,111 @@ def test_rooms_required_state_wildcard_state_key(self) -> None: room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) self.helper.join(room_id1, user1_id, tok=user1_tok) + # Make the Sliding Sync request with wildcards for the `state_key` + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Member, StateValues.WILDCARD], + ], + "timeline_limit": 0, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + self._assertRequiredStateIncludes( + channel.json_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Member, user1_id)], + state_map[(EventTypes.Member, user2_id)], + }, + exact=True, + ) + + def test_rooms_required_state_lazy_loading_room_members(self) -> None: + """ + Test `rooms.required_state` returns people relevant to the timeline when + lazy-loading room members, `["m.room.member","$LAZY"]`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + user3_tok = self.login(user3_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + self.helper.join(room_id1, user3_id, tok=user3_tok) + + self.helper.send(room_id1, "1", tok=user2_tok) + self.helper.send(room_id1, "2", tok=user3_tok) + self.helper.send(room_id1, "3", tok=user2_tok) + + # Make the Sliding Sync request with lazy loading for the room members + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.Member, StateValues.LAZY], + ], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + # Only user2 and user3 sent events in the 3 events we see in the `timeline` + self._assertRequiredStateIncludes( + channel.json_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Create, "")], + state_map[(EventTypes.Member, user2_id)], + state_map[(EventTypes.Member, user3_id)], + }, + exact=True, + ) + + @parameterized.expand([(Membership.LEAVE,), (Membership.BAN,)]) + def test_rooms_required_state_leave_ban(self, stop_membership: str) -> None: + """ + Test `rooms.required_state` should not return state past a leave/ban event. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + user3_tok = self.login(user3_id, "pass") + + from_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + self.helper.join(room_id1, user3_id, tok=user3_tok) + self.helper.send_state( room_id1, event_type="org.matrix.foo_state", @@ -3010,15 +3138,84 @@ def test_rooms_required_state_wildcard_state_key(self) -> None: body={"foo": "bar"}, tok=user2_tok, ) + + if stop_membership == Membership.LEAVE: + # User 1 leaves + self.helper.leave(room_id1, user1_id, tok=user1_tok) + elif stop_membership == Membership.BAN: + # User 1 is banned + self.helper.ban(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + # Change the state after user 1 leaves self.helper.send_state( room_id1, event_type="org.matrix.foo_state", - state_key=user2_id, + state_key="", + body={"foo": "qux"}, + tok=user2_tok, + ) + self.helper.leave(room_id1, user3_id, tok=user3_tok) + + # Make the Sliding Sync request with lazy loading for the room members + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success(from_token.to_string(self.store))}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.Member, "*"], + ["org.matrix.foo_state", ""], + ], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Only user2 and user3 sent events in the 3 events we see in the `timeline` + self._assertRequiredStateIncludes( + channel.json_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Create, "")], + state_map[(EventTypes.Member, user1_id)], + state_map[(EventTypes.Member, user2_id)], + state_map[(EventTypes.Member, user3_id)], + state_map[("org.matrix.foo_state", "")], + }, + exact=True, + ) + + def test_rooms_required_state_combine_superset(self) -> None: + """ + Test `rooms.required_state` is combined across lists and room subscriptions. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + self.helper.send_state( + room_id1, + event_type="org.matrix.foo_state", + state_key="", body={"foo": "bar"}, tok=user2_tok, ) - # Make the Sliding Sync request with wildcards for the `event_type` and `state_key` + # Make the Sliding Sync request with wildcards for the `state_key` channel = self.make_request( "POST", self.sync_endpoint, @@ -3027,11 +3224,29 @@ def test_rooms_required_state_wildcard_state_key(self) -> None: "foo-list": { "ranges": [[0, 1]], "required_state": [ - [EventTypes.Member, "*"], + [EventTypes.Create, ""], + [EventTypes.Member, user1_id], ], "timeline_limit": 0, - } + }, + "bar-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Member, StateValues.WILDCARD], + ["org.matrix.foo_state", ""], + ], + "timeline_limit": 0, + }, } + # TODO: Room subscription should also combine with the `required_state` + # "room_subscriptions": { + # room_id1: { + # "required_state": [ + # ["org.matrix.bar_state", ""] + # ], + # "timeline_limit": 0, + # } + # } }, access_token=user1_tok, ) @@ -3044,14 +3259,97 @@ def test_rooms_required_state_wildcard_state_key(self) -> None: self._assertRequiredStateIncludes( channel.json_body["rooms"][room_id1]["required_state"], { + state_map[(EventTypes.Create, "")], state_map[(EventTypes.Member, user1_id)], state_map[(EventTypes.Member, user2_id)], + state_map[("org.matrix.foo_state", "")], }, exact=True, ) - # TODO: Add more `required_state` tests - # TODO: Add test for `"required_state": [ ["m.room.member","$LAZY"] ],` - # TODO: Add tests for configs from different lists combining + def test_rooms_required_state_partial_state(self) -> None: + """ + Test partially-stated room are excluded unless `rooms.required_state` is + lazy-loading room members. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + _join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id2, user1_id, tok=user1_tok) + + # Mark room2 as partial state + self.get_success( + mark_event_as_partial_state(self.hs, join_response2["event_id"], room_id2) + ) - # TODO: Add tests for partially-stated rooms being excluded + # Make the Sliding Sync request (NOT lazy-loading room members) + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + ], + "timeline_limit": 0, + }, + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure the list includes room1 but room2 is excluded because it's still + # partially-stated + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 1], + "room_ids": [room_id1], + } + ], + channel.json_body["lists"]["foo-list"], + ) + + # Make the Sliding Sync request (with lazy-loading room members) + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + # Lazy-load room members + [EventTypes.Member, StateValues.LAZY], + ], + "timeline_limit": 0, + }, + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # The list should include both rooms now because we're lazy-loading room members + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 1], + "room_ids": [room_id2, room_id1], + } + ], + channel.json_body["lists"]["foo-list"], + ) diff --git a/tests/test_utils/event_injection.py b/tests/test_utils/event_injection.py index fd03c23b890..cda53149f4d 100644 --- a/tests/test_utils/event_injection.py +++ b/tests/test_utils/event_injection.py @@ -125,13 +125,15 @@ async def mark_event_as_partial_state( in this table). """ store = hs.get_datastores().main - await store.db_pool.simple_upsert( - table="partial_state_rooms", - keyvalues={"room_id": room_id}, - values={}, - insertion_values={"room_id": room_id}, + # Use the store helper to insert into the database so the caches are busted + await store.store_partial_state_room( + room_id=room_id, + servers=[hs.hostname], + device_lists_stream_id=0, + joined_via=hs.hostname, ) + # FIXME: Bust the cache await store.db_pool.simple_insert( table="partial_state_events", values={ From aa59eadd0dbace30a606807c7afa3454d8f4abfb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 01:36:08 -0500 Subject: [PATCH 096/109] Fix lints --- tests/rest/client/test_sync.py | 12 ++++++++---- tests/test_utils/event_injection.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 44896151c2e..7caef89509b 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -20,7 +20,7 @@ # import json import logging -from typing import AbstractSet, Dict, List, Optional, Iterable +from typing import AbstractSet, Any, Dict, Iterable, List, Optional from parameterized import parameterized, parameterized_class @@ -37,10 +37,10 @@ RelationTypes, ) from synapse.events import EventBase +from synapse.handlers.sliding_sync import StateValues from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken, UserID -from synapse.handlers.sliding_sync import StateValues from synapse.util import Clock from tests import unittest @@ -1245,7 +1245,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: def _assertRequiredStateIncludes( self, - actual_required_state: JsonDict, + actual_required_state: Any, expected_state_events: Iterable[EventBase], exact: bool = False, ) -> None: @@ -1262,6 +1262,10 @@ def _assertRequiredStateIncludes( state (no extras). """ + assert isinstance(actual_required_state, list) + for event in actual_required_state: + assert isinstance(event, dict) + self._assertIncludes( { f'{event["event_id"]} ("{event["type"]}", "{event["state_key"]}")' @@ -1273,7 +1277,7 @@ def _assertRequiredStateIncludes( }, exact=exact, # Message to help understand the diff in context - message=actual_required_state, + message=str(actual_required_state), ) def _assertIncludes( diff --git a/tests/test_utils/event_injection.py b/tests/test_utils/event_injection.py index cda53149f4d..35b3245708e 100644 --- a/tests/test_utils/event_injection.py +++ b/tests/test_utils/event_injection.py @@ -128,7 +128,7 @@ async def mark_event_as_partial_state( # Use the store helper to insert into the database so the caches are busted await store.store_partial_state_room( room_id=room_id, - servers=[hs.hostname], + servers={hs.hostname}, device_lists_stream_id=0, joined_via=hs.hostname, ) From cc30a8547c9842322af3d90886092d1decbda4b0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 01:38:05 -0500 Subject: [PATCH 097/109] Add changelog --- changelog.d/17342.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17342.feature diff --git a/changelog.d/17342.feature b/changelog.d/17342.feature new file mode 100644 index 00000000000..b2671ea14a3 --- /dev/null +++ b/changelog.d/17342.feature @@ -0,0 +1 @@ +Return "required state" in experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. From 5c8dd69753cdfa3d8d29dd49cc389aa7629a5c3d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 01:43:57 -0500 Subject: [PATCH 098/109] Either or --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 49bae63bde5..48bef25a710 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1313,7 +1313,7 @@ async def get_room_sync_data( # `None` is a wildcard in the `StateFilter` required_state_types.append((state_type, None)) # We need to fetch all relevant people when we're lazy-loading membership - if ( + elif ( state_type == EventTypes.Member and state_key == StateValues.LAZY ): From ea78da59edc52adef1cb9290e686bf9a66aac872 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 01:47:37 -0500 Subject: [PATCH 099/109] Add explicit test for no required_state --- tests/rest/client/test_sync.py | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 7caef89509b..53ad2e59358 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -2795,6 +2795,42 @@ def test_rooms_ban_incremental_sync2(self) -> None: channel.json_body["rooms"][room_id1], ) + def test_rooms_no_required_state(self) -> None: + """ + Empty `rooms.required_state` should not return any state events in the room + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + # Empty `required_state` + "required_state": [], + "timeline_limit": 0, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # No `required_state` in response + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("required_state"), + channel.json_body["rooms"][room_id1], + ) + def test_rooms_required_state_initial_sync(self) -> None: """ Test `rooms.required_state` returns requested state events in the room during an From 5023943622cb6b62c332d415a6336f7f4d932188 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 01:50:40 -0500 Subject: [PATCH 100/109] Bette as a fixme --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 48bef25a710..5dac9572a3e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1328,7 +1328,7 @@ async def get_room_sync_data( (EventTypes.Member, user_id) ) - # TODO: We probably also care about invite, ban, kick, targets, etc + # FIXME: We probably also care about invite, ban, kick, targets, etc # but the spec only mentions "senders". else: required_state_types.append((state_type, state_key)) From fbad6b07ff575256d633f263295a9af8f9e32124 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 01:55:01 -0500 Subject: [PATCH 101/109] Fix `\n` backslash usage forbidden in f-string ``` builtins.SyntaxError: f-string expression part cannot include a backslash (test_sync.py, line 1323) ``` --- tests/rest/client/test_sync.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 53ad2e59358..cb2888409e6 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1320,10 +1320,9 @@ def _assertIncludes( "{} {}".format("+" if is_actual_in_expected else " ", actual_item) ) - expected_string = f"Expected items to be in actual ('?' = missing expected items):\n {{\n{'\n'.join(expected_lines)}\n }}" - actual_string = ( - f"Actual ('+' = found expected items):\n {{\n{'\n'.join(actual_lines)}\n }}" - ) + newline = "\n" + expected_string = f"Expected items to be in actual ('?' = missing expected items):\n {{\n{newline.join(expected_lines)}\n }}" + actual_string = f"Actual ('+' = found expected items):\n {{\n{newline.join(actual_lines)}\n }}" first_message = ( "Items must match exactly" if exact else "Some expected items are missing." ) From b3ca35e4fa78616f4f596c244d6e5443fab7160f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 11:35:46 -0500 Subject: [PATCH 102/109] Hoist repetitive logic outside loop See https://github.com/element-hq/synapse/pull/17342#discussion_r1664230716 --- synapse/handlers/sliding_sync.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5dac9572a3e..dd02d8a9a2e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -476,6 +476,14 @@ async def current_sync_for_user( # Since creating the `RoomSyncConfig` takes some work, let's just do it # once and make a copy whenever we need it. room_sync_config = RoomSyncConfig.from_room_config(list_config) + membership_state_keys = room_sync_config.required_state_map.get( + EventTypes.Member + ) + lazy_loading = ( + membership_state_keys is not None + and len(membership_state_keys) == 1 + and (EventTypes.Member, StateValues.LAZY) in membership_state_keys + ) ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: @@ -495,17 +503,6 @@ async def current_sync_for_user( ): room_id, _ = sorted_room_info[current_range_index] - membership_state_keys = ( - room_sync_config.required_state_map.get( - EventTypes.Member - ) - ) - lazy_loading = ( - membership_state_keys is not None - and len(membership_state_keys) == 1 - and (EventTypes.Member, StateValues.LAZY) - in membership_state_keys - ) # Exclude partially-stated rooms unless the `required_state` # only has `["m.room.member", "$LAZY"]` for membership # (lazy-loading room members). From dc8bef0074691867e48c87617b81728c35b3f213 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 11:40:38 -0500 Subject: [PATCH 103/109] Remove extra safety check See https://github.com/element-hq/synapse/pull/17342#discussion_r1663956666 --- synapse/handlers/sliding_sync.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index dd02d8a9a2e..0931f080073 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -116,17 +116,11 @@ def from_room_config( state_type, state_key, ) in room_params.required_state: - # If we already have a wildcard for everything, we don't need to add - # anything else - wildcard_set: Set[Tuple[str, str]] = required_state_map.get( - StateValues.WILDCARD, set() - ) - if (StateValues.WILDCARD, StateValues.WILDCARD) in wildcard_set: - break - # If we already have a wildcard for this specific `state_key`, we don't need # to add it since the wildcard already covers it. - if (StateValues.WILDCARD, state_key) in wildcard_set: + if (StateValues.WILDCARD, state_key) in required_state_map.get( + StateValues.WILDCARD, set() + ): continue # If we already have a wildcard `state_key` for this `state_type`, we don't need From f2174c140095b387a24637332771752ed7a3773e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 12:06:00 -0500 Subject: [PATCH 104/109] Move comment to the right place --- synapse/handlers/sliding_sync.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0931f080073..484be5ae5de 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1263,22 +1263,14 @@ async def get_room_sync_data( state_filter: Optional[StateFilter] = StateFilter.none() # If we have a double wildcard ("*", "*") in the `required_state`, we need # to fetch all state for the room - if ( - StateValues.WILDCARD, - StateValues.WILDCARD, - ) in room_sync_config.required_state_map.get(StateValues.WILDCARD, set()): - state_filter = StateFilter.all() - # TODO: `StateFilter` currently doesn't support wildcard event types. We're - # currently working around this by returning all state to the client but it - # would be nice to fetch less from the database and return just what the - # client wanted. # - # Note: MSC3575 describes different behavior to how we're handling things here but - # since it's not wrong to return more state than requested (`required_state` is - # just the minimum requested), it doesn't matter if we include things that the - # client wanted excluded. This complexity is also under scrutiny, see + # Note: MSC3575 describes different behavior to how we're handling things + # here but since it's not wrong to return more state than requested + # (`required_state` is just the minimum requested), it doesn't matter if we + # include more than client wanted. This complexity is also under scrutiny, + # see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1185109050 - + # # > One unique exception is when you request all state events via ["*", "*"]. When used, # > all state events are returned by default, and additional entries FILTER OUT the returned set # > of state events. These additional entries cannot use '*' themselves. @@ -1288,6 +1280,15 @@ async def get_room_sync_data( # > required as it would have been returned anyway. # > # > -- MSC3575 (https://github.com/matrix-org/matrix-spec-proposals/pull/3575) + if ( + StateValues.WILDCARD, + StateValues.WILDCARD, + ) in room_sync_config.required_state_map.get(StateValues.WILDCARD, set()): + state_filter = StateFilter.all() + # TODO: `StateFilter` currently doesn't support wildcard event types. We're + # currently working around this by returning all state to the client but it + # would be nice to fetch less from the database and return just what the + # client wanted. elif ( room_sync_config.required_state_map.get(StateValues.WILDCARD) is not None From e22a2913c7e82fd0ef456e6e90e187a613e9e010 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 12:24:46 -0500 Subject: [PATCH 105/109] Refactor to map from event type to list of state keys See https://github.com/element-hq/synapse/pull/17342#discussion_r1663958151 --- synapse/handlers/sliding_sync.py | 84 +++++------- tests/handlers/test_sliding_sync.py | 198 +++++++++++----------------- 2 files changed, 110 insertions(+), 172 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 484be5ae5de..002d9bf1b6c 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -91,14 +91,14 @@ class RoomSyncConfig: Attributes: timeline_limit: The maximum number of events to return in the timeline. - required_state_map: Map from state type to a set of state (type, state_key) - tuples requested for the room. The values are close to `StateKey` but actually - use a syntax where you can provide `*` wildcard and `$LAZY` for lazy room - members as the `state_key` part of the tuple (type, state_key). + + required_state_map: Map from state event type to state_keys requested for the + room. The values are close to `StateKey` but actually use a syntax where you + can provide `*` wildcard and `$LAZY` for lazy-loading room members. """ timeline_limit: int - required_state_map: Dict[str, Set[Tuple[str, str]]] + required_state_map: Dict[str, Set[str]] @classmethod def from_room_config( @@ -111,31 +111,25 @@ def from_room_config( Args: room_params: `SlidingSyncConfig.SlidingSyncList` or `SlidingSyncConfig.RoomSubscription` """ - required_state_map: Dict[str, Set[Tuple[str, str]]] = {} + required_state_map: Dict[str, Set[str]] = {} for ( state_type, state_key, ) in room_params.required_state: # If we already have a wildcard for this specific `state_key`, we don't need # to add it since the wildcard already covers it. - if (StateValues.WILDCARD, state_key) in required_state_map.get( - StateValues.WILDCARD, set() - ): + if state_key in required_state_map.get(StateValues.WILDCARD, set()): continue # If we already have a wildcard `state_key` for this `state_type`, we don't need # to add anything else - if (state_type, StateValues.WILDCARD) in required_state_map.get( - state_type, set() - ): + if StateValues.WILDCARD in required_state_map.get(state_type, set()): continue # If we're getting wildcards for the `state_type` and `state_key`, that's # all that matters so get rid of any other entries if state_type == StateValues.WILDCARD and state_key == StateValues.WILDCARD: - required_state_map = { - StateValues.WILDCARD: {(StateValues.WILDCARD, StateValues.WILDCARD)} - } + required_state_map = {StateValues.WILDCARD: {StateValues.WILDCARD}} # We can break, since we don't need to add anything else break # If we're getting a wildcard for the `state_type`, get rid of any other @@ -151,14 +145,9 @@ def from_room_config( ) in list(required_state_map.items()): # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items - for ( - _existing_state_type, - existing_state_key, - ) in existing_state_key_set.copy(): + for existing_state_key in existing_state_key_set.copy(): if existing_state_key == state_key: - existing_state_key_set.remove( - (existing_state_type, state_key) - ) + existing_state_key_set.remove(state_key) # If we've the left the `set()` empty, remove it from the map if existing_state_key_set == set(): @@ -167,13 +156,13 @@ def from_room_config( # If we're getting a wildcard `state_key`, get rid of any other state_keys # for this `state_type` since the wildcard will cover it already. if state_key == StateValues.WILDCARD: - required_state_map[state_type] = {(state_type, state_key)} + required_state_map[state_type] = {state_key} # Otherwise, just add it to the set else: if required_state_map.get(state_type) is None: - required_state_map[state_type] = {(state_type, state_key)} + required_state_map[state_type] = {state_key} else: - required_state_map[state_type].add((state_type, state_key)) + required_state_map[state_type].add(state_key) return cls( timeline_limit=room_params.timeline_limit, @@ -181,7 +170,7 @@ def from_room_config( ) def deep_copy(self) -> "RoomSyncConfig": - required_state_map: Dict[str, Set[Tuple[str, str]]] = { + required_state_map: Dict[str, Set[str]] = { state_type: state_key_set.copy() for state_type, state_key_set in self.required_state_map.items() } @@ -209,35 +198,30 @@ def combine_room_sync_config( ) in other_room_sync_config.required_state_map.items(): # If we already have a wildcard for everything, we don't need to add # anything else - if ( - StateValues.WILDCARD, - StateValues.WILDCARD, - ) in self.required_state_map.get(StateValues.WILDCARD, set()): + if StateValues.WILDCARD in self.required_state_map.get( + StateValues.WILDCARD, set() + ): break # If we already have a wildcard `state_key` for this `state_type`, we don't need # to add anything else - if (state_type, StateValues.WILDCARD) in self.required_state_map.get( - state_type, set() - ): + if StateValues.WILDCARD in self.required_state_map.get(state_type, set()): continue # If we're getting wildcards for the `state_type` and `state_key`, that's # all that matters so get rid of any other entries if ( state_type == StateValues.WILDCARD - and (StateValues.WILDCARD, StateValues.WILDCARD) in state_key_set + and StateValues.WILDCARD in state_key_set ): - self.required_state_map = { - state_type: {(StateValues.WILDCARD, StateValues.WILDCARD)} - } + self.required_state_map = {state_type: {StateValues.WILDCARD}} # We can break, since we don't need to add anything else break - for _state_type, state_key in state_key_set: + for state_key in state_key_set: # If we already have a wildcard for this specific `state_key`, we don't need # to add it since the wildcard already covers it. - if (StateValues.WILDCARD, state_key) in self.required_state_map.get( + if state_key in self.required_state_map.get( StateValues.WILDCARD, set() ): continue @@ -249,20 +233,14 @@ def combine_room_sync_config( # # Make a copy so we don't run into an error: `dictionary changed size # during iteration`, when we remove items - for ( - existing_state_type, - existing_state_key_set, - ) in list(self.required_state_map.items()): + for existing_state_type, existing_state_key_set in list( + self.required_state_map.items() + ): # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items - for ( - _existing_state_type, - existing_state_key, - ) in existing_state_key_set.copy(): + for existing_state_key in existing_state_key_set.copy(): if existing_state_key == state_key: - existing_state_key_set.remove( - (existing_state_type, state_key) - ) + existing_state_key_set.remove(state_key) # If we've the left the `set()` empty, remove it from the map if existing_state_key_set == set(): @@ -271,14 +249,14 @@ def combine_room_sync_config( # If we're getting a wildcard `state_key`, get rid of any other state_keys # for this `state_type` since the wildcard will cover it already. if state_key == StateValues.WILDCARD: - self.required_state_map[state_type] = {(state_type, state_key)} + self.required_state_map[state_type] = {state_key} break # Otherwise, just add it to the set else: if self.required_state_map.get(state_type) is None: - self.required_state_map[state_type] = {(state_type, state_key)} + self.required_state_map[state_type] = {state_key} else: - self.required_state_map[state_type].add((state_type, state_key)) + self.required_state_map[state_type].add(state_key) class StateValues: diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index c1a1d72a195..5f83b637c59 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -95,13 +95,13 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Name: {""}, EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, "@baz"), + "@foo", + "@bar", + "@baz", }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + EventTypes.CanonicalAlias: {""}, }, ), ), @@ -126,13 +126,13 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Name: {""}, EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, "@baz"), + "@foo", + "@bar", + "@baz", }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + EventTypes.CanonicalAlias: {""}, }, ), ), @@ -172,9 +172,7 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateValues.WILDCARD: { - (StateValues.WILDCARD, StateValues.WILDCARD) - }, + StateValues.WILDCARD: {StateValues.WILDCARD}, }, ), ), @@ -198,10 +196,8 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateValues.WILDCARD: {(StateValues.WILDCARD, "")}, - EventTypes.Member: { - (EventTypes.Member, "@foo"), - }, + StateValues.WILDCARD: {""}, + EventTypes.Member: {"@foo"}, }, ), ), @@ -229,12 +225,10 @@ def _assert_room_config_equal( timeline_limit=10, required_state_map={ StateValues.WILDCARD: { - (StateValues.WILDCARD, ""), - (StateValues.WILDCARD, "@foo"), - }, - EventTypes.Member: { - (EventTypes.Member, "@bar"), + "", + "@foo", }, + EventTypes.Member: {"@bar"}, }, ), ), @@ -261,11 +255,11 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Name: {""}, EventTypes.Member: { - (EventTypes.Member, StateValues.WILDCARD), + StateValues.WILDCARD, }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + EventTypes.CanonicalAlias: {""}, }, ), ), @@ -291,10 +285,8 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateValues.WILDCARD: {(StateValues.WILDCARD, "")}, - EventTypes.Member: { - (EventTypes.Member, StateValues.WILDCARD), - }, + StateValues.WILDCARD: {""}, + EventTypes.Member: {StateValues.WILDCARD}, }, ), ), @@ -322,9 +314,7 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateValues.WILDCARD: { - (StateValues.WILDCARD, StateValues.WILDCARD) - }, + StateValues.WILDCARD: {StateValues.WILDCARD}, }, ), ), @@ -350,14 +340,14 @@ def _assert_room_config_equal( RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Name: {""}, EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, StateValues.LAZY), - (EventTypes.Member, "@baz"), + "@foo", + "@bar", + StateValues.LAZY, + "@baz", }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + EventTypes.CanonicalAlias: {""}, }, ), ), @@ -388,10 +378,10 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Name: {""}, EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), + "@foo", + "@bar", }, }, ), @@ -400,24 +390,24 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Member: { - (EventTypes.Member, StateValues.LAZY), - (EventTypes.Member, "@baz"), + StateValues.LAZY, + "@baz", }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + EventTypes.CanonicalAlias: {""}, }, ), # Expected RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Name: {(EventTypes.Name, "")}, + EventTypes.Name: {""}, EventTypes.Member: { - (EventTypes.Member, "@foo"), - (EventTypes.Member, "@bar"), - (EventTypes.Member, StateValues.LAZY), - (EventTypes.Member, "@baz"), + "@foo", + "@bar", + StateValues.LAZY, + "@baz", }, - EventTypes.CanonicalAlias: {(EventTypes.CanonicalAlias, "")}, + EventTypes.CanonicalAlias: {""}, }, ), ), @@ -427,29 +417,23 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=10, required_state_map={ - StateValues.WILDCARD: { - (StateValues.WILDCARD, StateValues.WILDCARD), - }, + StateValues.WILDCARD: {StateValues.WILDCARD}, }, ), # B RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, StateValues.WILDCARD)}, - StateValues.WILDCARD: {(StateValues.WILDCARD, "@bar")}, - EventTypes.Member: { - (EventTypes.Member, "@foo"), - }, + EventTypes.Dummy: {StateValues.WILDCARD}, + StateValues.WILDCARD: {"@bar"}, + EventTypes.Member: {"@foo"}, }, ), # Expected RoomSyncConfig( timeline_limit=10, required_state_map={ - StateValues.WILDCARD: { - (StateValues.WILDCARD, StateValues.WILDCARD), - }, + StateValues.WILDCARD: {StateValues.WILDCARD}, }, ), ), @@ -459,28 +443,24 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, + EventTypes.Dummy: {"dummy"}, StateValues.WILDCARD: { - (StateValues.WILDCARD, ""), - (StateValues.WILDCARD, "@foo"), - }, - EventTypes.Member: { - (EventTypes.Member, "@bar"), + "", + "@foo", }, + EventTypes.Member: {"@bar"}, }, ), # B RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, "dummy2")}, + EventTypes.Dummy: {"dummy2"}, StateValues.WILDCARD: { - (StateValues.WILDCARD, ""), - (StateValues.WILDCARD, "@bar"), - }, - EventTypes.Member: { - (EventTypes.Member, "@foo"), + "", + "@bar", }, + EventTypes.Member: {"@foo"}, }, ), # Expected @@ -488,13 +468,13 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Dummy: { - (EventTypes.Dummy, "dummy"), - (EventTypes.Dummy, "dummy2"), + "dummy", + "dummy2", }, StateValues.WILDCARD: { - (StateValues.WILDCARD, ""), - (StateValues.WILDCARD, "@foo"), - (StateValues.WILDCARD, "@bar"), + "", + "@foo", + "@bar", }, }, ), @@ -505,39 +485,27 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, - EventTypes.Member: { - (EventTypes.Member, StateValues.WILDCARD), - }, - "org.matrix.flowers": { - ("org.matrix.flowers", StateValues.WILDCARD) - }, + EventTypes.Dummy: {"dummy"}, + EventTypes.Member: {StateValues.WILDCARD}, + "org.matrix.flowers": {StateValues.WILDCARD}, }, ), # B RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, StateValues.WILDCARD)}, - EventTypes.Member: { - (EventTypes.Member, StateValues.WILDCARD), - }, - "org.matrix.flowers": {("org.matrix.flowers", "tulips")}, + EventTypes.Dummy: {StateValues.WILDCARD}, + EventTypes.Member: {StateValues.WILDCARD}, + "org.matrix.flowers": {"tulips"}, }, ), # Expected RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Dummy: { - (EventTypes.Dummy, StateValues.WILDCARD), - }, - EventTypes.Member: { - (EventTypes.Member, StateValues.WILDCARD), - }, - "org.matrix.flowers": { - ("org.matrix.flowers", StateValues.WILDCARD) - }, + EventTypes.Dummy: {StateValues.WILDCARD}, + EventTypes.Member: {StateValues.WILDCARD}, + "org.matrix.flowers": {StateValues.WILDCARD}, }, ), ), @@ -547,27 +515,21 @@ def test_from_room_config( RoomSyncConfig( timeline_limit=10, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, "dummy")}, + EventTypes.Dummy: {"dummy"}, StateValues.WILDCARD: { - (StateValues.WILDCARD, ""), - (StateValues.WILDCARD, "@foo"), - }, - EventTypes.Member: { - (EventTypes.Member, "@bar"), + "", + "@foo", }, + EventTypes.Member: {"@bar"}, }, ), # B RoomSyncConfig( timeline_limit=9, required_state_map={ - EventTypes.Dummy: {(EventTypes.Dummy, "dummy2")}, - StateValues.WILDCARD: { - (StateValues.WILDCARD, ""), - }, - EventTypes.Member: { - (EventTypes.Member, StateValues.WILDCARD), - }, + EventTypes.Dummy: {"dummy2"}, + StateValues.WILDCARD: {""}, + EventTypes.Member: {StateValues.WILDCARD}, }, ), # Expected @@ -575,16 +537,14 @@ def test_from_room_config( timeline_limit=10, required_state_map={ EventTypes.Dummy: { - (EventTypes.Dummy, "dummy"), - (EventTypes.Dummy, "dummy2"), + "dummy", + "dummy2", }, StateValues.WILDCARD: { - (StateValues.WILDCARD, ""), - (StateValues.WILDCARD, "@foo"), - }, - EventTypes.Member: { - (EventTypes.Member, StateValues.WILDCARD), + "", + "@foo", }, + EventTypes.Member: {StateValues.WILDCARD}, }, ), ), From fe4f2eabd04ff01b0eba8b5016d8a42b3be307c6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 12:28:37 -0500 Subject: [PATCH 106/109] Refactor usage --- synapse/handlers/sliding_sync.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 002d9bf1b6c..13a15c92c90 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -454,7 +454,7 @@ async def current_sync_for_user( lazy_loading = ( membership_state_keys is not None and len(membership_state_keys) == 1 - and (EventTypes.Member, StateValues.LAZY) in membership_state_keys + and StateValues.LAZY in membership_state_keys ) ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] @@ -1258,10 +1258,9 @@ async def get_room_sync_data( # > required as it would have been returned anyway. # > # > -- MSC3575 (https://github.com/matrix-org/matrix-spec-proposals/pull/3575) - if ( - StateValues.WILDCARD, - StateValues.WILDCARD, - ) in room_sync_config.required_state_map.get(StateValues.WILDCARD, set()): + if StateValues.WILDCARD in room_sync_config.required_state_map.get( + StateValues.WILDCARD, set() + ): state_filter = StateFilter.all() # TODO: `StateFilter` currently doesn't support wildcard event types. We're # currently working around this by returning all state to the client but it @@ -1278,7 +1277,7 @@ async def get_room_sync_data( state_type, state_key_set, ) in room_sync_config.required_state_map.items(): - for _state_type, state_key in state_key_set: + for state_key in state_key_set: if state_key == StateValues.WILDCARD: # `None` is a wildcard in the `StateFilter` required_state_types.append((state_type, None)) From 52444d9adbdf8579d7214947c2454c0b9cff7b7c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 12:32:39 -0500 Subject: [PATCH 107/109] Better loop pattern See https://github.com/element-hq/synapse/pull/17342#discussion_r1664230128 --- synapse/handlers/sliding_sync.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 13a15c92c90..6d4160ed9a8 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -466,23 +466,16 @@ async def current_sync_for_user( # at the range start index and keep adding rooms until we fill # up the range or run out of rooms. # - # Both sides of range are inclusive - current_range_index = range[0] - range_end_index = range[1] - while ( - current_range_index <= range_end_index - and current_range_index <= len(sorted_room_info) - 1 - ): - room_id, _ = sorted_room_info[current_range_index] + # Both sides of range are inclusive so we `+ 1` + max_num_rooms = range[1] - range[0] + 1 + for room_id, _ in sorted_room_info[range[0] :]: + if len(room_ids_in_list) >= max_num_rooms: + break # Exclude partially-stated rooms unless the `required_state` # only has `["m.room.member", "$LAZY"]` for membership # (lazy-loading room members). if partial_state_room_map.get(room_id) and not lazy_loading: - current_range_index += 1 - # Since we're skipping this room, we need to allow - # for the next room to take its place in the list - range_end_index += 1 continue # Take the superset of the `RoomSyncConfig` for each room. @@ -502,7 +495,6 @@ async def current_sync_for_user( ) room_ids_in_list.append(room_id) - current_range_index += 1 ops.append( SlidingSyncResult.SlidingWindowList.Operation( From 7c133a8fc522c66a81d9d10aad2790e1d089df02 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Jul 2024 12:34:38 -0500 Subject: [PATCH 108/109] Fix lint --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 6d4160ed9a8..23e6b7d3734 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -460,7 +460,7 @@ async def current_sync_for_user( ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: - room_ids_in_list = [] + room_ids_in_list: List[str] = [] # We're going to loop through the sorted list of rooms starting # at the range start index and keep adding rooms until we fill From 3cf9a779fac0d0d86a8a23de32368c63cceed353 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 Jul 2024 11:56:23 -0500 Subject: [PATCH 109/109] Add line break between if-cases Co-authored-by: Erik Johnston --- synapse/handlers/sliding_sync.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 23e6b7d3734..a1ddac903ea 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -132,6 +132,7 @@ def from_room_config( required_state_map = {StateValues.WILDCARD: {StateValues.WILDCARD}} # We can break, since we don't need to add anything else break + # If we're getting a wildcard for the `state_type`, get rid of any other # entries with the same `state_key`, since the wildcard will cover it already. elif state_type == StateValues.WILDCARD: