-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fix backfilled events being rejected for no state_groups
#10439
Changes from 6 commits
2db287e
28a5e6a
64c8e5e
e9f3e9d
487247f
b1ce3af
005382f
87b7fb9
a94217e
b2e26b9
b0886f8
eb32018
1e97096
3ca36c9
16b113b
a89a95f
287b32c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2566,13 +2566,23 @@ async def _check_event_auth( | |
room_version = await self.store.get_room_version_id(event.room_id) | ||
room_version_obj = KNOWN_ROOM_VERSIONS[room_version] | ||
|
||
logger.error( | ||
"_check_event_auth before event_id=%s auth_events=%s", | ||
event.event_id, | ||
auth_events, | ||
) | ||
if not auth_events: | ||
prev_state_ids = await context.get_prev_state_ids() | ||
auth_events_ids = self._event_auth_handler.compute_auth_events( | ||
event, prev_state_ids, for_verification=True | ||
) | ||
auth_events_x = await self.store.get_events(auth_events_ids) | ||
auth_events = {(e.type, e.state_key): e for e in auth_events_x.values()} | ||
logger.error( | ||
"_check_event_auth after event_id=%s auth_events=%s", | ||
event.event_id, | ||
auth_events, | ||
) | ||
|
||
# This is a hack to fix some old rooms where the initial join event | ||
# didn't reference the create event in its auth events. | ||
|
@@ -2704,16 +2714,40 @@ async def _update_auth_events_and_context_for_auth( | |
for e in remote_auth_chain | ||
if e.event_id in auth_ids or e.type == EventTypes.Create | ||
} | ||
logger.error( | ||
"_check_event_auth cvcxvxcbs event_id=%s e.event_id=%s auth_ids=%s auth=%s", | ||
event.event_id, | ||
e.event_id, | ||
auth_ids, | ||
auth, | ||
) | ||
e.internal_metadata.outlier = True | ||
|
||
logger.debug( | ||
"_check_event_auth %s missing_auth: %s", | ||
event.event_id, | ||
e.event_id, | ||
) | ||
context = await self.state_handler.compute_event_context(e) | ||
logger.error( | ||
"_update_auth_events_and_context_for_auth before event_id=%s state_group=%s", | ||
event.event_id, | ||
context.state_group, | ||
) | ||
missing_auth_event_context = ( | ||
await self.state_handler.compute_event_context(e) | ||
) | ||
logger.error( | ||
"_update_auth_events_and_context_for_auth after1 event_id=%s state_group=%s", | ||
event.event_id, | ||
context.state_group, | ||
) | ||
await self._auth_and_persist_event( | ||
origin, e, context, auth_events=auth | ||
origin, e, missing_auth_event_context, auth_events=auth | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Main fix is here ^ First stumbled upon in #10245 (comment) Reproducible on a federated homeserver when there is a membership auth event as a floating outlier. Then when we try to backfill one of that persons messages, it has missing membership auth to fetch which caused us to mistakenly replace the Call stack:
We touch some of the same code on the the receive PDU route but if it is able to reach this line, it is able to regenerate the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main fix disappeared from the diff here because it was merged alongside #10245. Full diff before merges is view-able via https://github.com/matrix-org/synapse/pull/10439/files/eb32018cb3d9e048fa4b3523a6fb29b1e2b36faf This PR is still relevant to add the regression test for it though. |
||
) | ||
logger.error( | ||
"_update_auth_events_and_context_for_auth after2 event_id=%s state_group=%s", | ||
event.event_id, | ||
context.state_group, | ||
) | ||
|
||
if e.event_id in event_auth_events: | ||
|
@@ -2725,6 +2759,11 @@ async def _update_auth_events_and_context_for_auth( | |
logger.exception("Failed to get auth chain") | ||
|
||
if event.internal_metadata.is_outlier(): | ||
logger.error( | ||
"_update_auth_events_and_context_for_auth is outlier event_id=%s state_group=%s", | ||
event.event_id, | ||
context.state_group, | ||
) | ||
# XXX: given that, for an outlier, we'll be working with the | ||
# event's *claimed* auth events rather than those we calculated: | ||
# (a) is there any point in this test, since different_auth below will | ||
|
@@ -2737,7 +2776,22 @@ async def _update_auth_events_and_context_for_auth( | |
e.event_id for e in auth_events.values() | ||
) | ||
|
||
if different_auth: | ||
logger.error( | ||
"_update_auth_events_and_context_for_auth different_auth! event_id=%s state_group=%s\ndifferent_auth=%s\nevent_auth_events=%s\nauth_events=%s", | ||
event.event_id, | ||
context.state_group, | ||
different_auth, | ||
event_auth_events, | ||
auth_events, | ||
) | ||
|
||
if not different_auth: | ||
logger.error( | ||
"_update_auth_events_and_context_for_auth no different_auth event_id=%s state_group=%s", | ||
event.event_id, | ||
context.state_group, | ||
) | ||
return context | ||
|
||
logger.info( | ||
|
@@ -2793,6 +2847,11 @@ async def _update_auth_events_and_context_for_auth( | |
context = await self._update_context_for_auth_events( | ||
event, context, auth_events | ||
) | ||
logger.error( | ||
"_update_auth_events_and_context_for_auth after3 event_id=%s state_group=%s", | ||
event.event_id, | ||
context.state_group, | ||
) | ||
|
||
return context | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,15 +13,22 @@ | |
# limitations under the License. | ||
import logging | ||
from unittest import TestCase | ||
from unittest.mock import Mock | ||
from twisted.internet import defer | ||
from typing import ( | ||
List, | ||
) | ||
|
||
from synapse.api.constants import EventTypes | ||
from synapse.api.errors import AuthError, Codes, LimitExceededError, SynapseError | ||
from synapse.api.room_versions import RoomVersions | ||
from synapse.events import EventBase | ||
from synapse.federation.federation_base import event_from_pdu_json | ||
from synapse.federation.units import Transaction | ||
from synapse.logging.context import LoggingContext, run_in_background | ||
from synapse.rest import admin | ||
from synapse.rest.client.v1 import login, room | ||
from synapse.util.stringutils import random_string | ||
|
||
from tests import unittest | ||
|
||
|
@@ -36,9 +43,15 @@ class FederationTestCase(unittest.HomeserverTestCase): | |
] | ||
|
||
def make_homeserver(self, reactor, clock): | ||
hs = self.setup_test_homeserver(federation_http_client=None) | ||
# we mock out the federation client too | ||
self.mock_federation_client = Mock() | ||
hs = self.setup_test_homeserver( | ||
federation_http_client=self.mock_federation_client | ||
) | ||
self.handler = hs.get_federation_handler() | ||
self.store = hs.get_datastore() | ||
self.state_store = hs.get_storage().state | ||
self._event_auth_handler = hs.get_event_auth_handler() | ||
return hs | ||
|
||
def test_exchange_revoked_invite(self): | ||
|
@@ -190,6 +203,145 @@ def test_rejected_state_event_state(self): | |
|
||
self.assertEqual(sg, sg2) | ||
|
||
def test_backfill_floating_outlier_membership_auth(self): | ||
""" | ||
As the remote federated homeserver, check that we can properly process | ||
an event with auth_events that include a floating membership event from | ||
the OTHER_SERVER. | ||
|
||
Regression test, see #10439. | ||
""" | ||
|
||
def filter_auth_events_on_event(event: EventBase, auth_events: List[EventBase]): | ||
# Create a StateMap[str] | ||
auth_event_state_map = { | ||
(e.type, e.state_key): e.event_id for e in auth_events | ||
} | ||
# Actually strip down and use the necessary auth events | ||
auth_event_ids = self._event_auth_handler.compute_auth_events( | ||
event=event, | ||
current_state_ids=auth_event_state_map, | ||
for_verification=False, | ||
) | ||
|
||
# Replace the auth_events with the stripped down ones | ||
event.auth_events = auth_event_ids | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You shouldn't be able to edit the auth events directly (as it invalidates all the hashes), so I am loathed to rely on this. Instead, can we use an event builder to create the event? Something like: builder = hs.get_event_builder_factory().for_room_version(room_version, event_dict)
event = await builder.build(
prev_event_ids=[fake_prev_event_id],
auth_events=compute_auth_events(builder, state),
depth=depth,
) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've switched to the event builder but I still have to edit the This seems reasonable as I think the event hashes are the same because we pop off the |
||
|
||
OTHER_SERVER = "otherserver" | ||
OTHER_USER = "@otheruser:" + OTHER_SERVER | ||
|
||
# create the room | ||
user_id = self.register_user("kermit", "test") | ||
tok = self.login("kermit", "test") | ||
room_id = self.helper.create_room_as( | ||
room_creator=user_id, | ||
is_public=True, | ||
tok=tok, | ||
extra_content={ | ||
"preset": "public_chat", | ||
}, | ||
) | ||
room_version = self.get_success(self.store.get_room_version(room_id)) | ||
|
||
prev_event_ids = self.get_success(self.store.get_prev_events_for_room(room_id)) | ||
( | ||
most_recent_prev_event_id, | ||
most_recent_prev_event_depth, | ||
) = self.get_success(self.store.get_max_depth_of(prev_event_ids)) | ||
# mapping from (type, state_key) -> state_event_id | ||
prev_state_map = self.get_success( | ||
self.state_store.get_state_ids_for_event(most_recent_prev_event_id) | ||
) | ||
# List of state event ID's | ||
prev_state_ids = list(prev_state_map.values()) | ||
auth_event_ids = prev_state_ids | ||
auth_events = list( | ||
self.get_success(self.store.get_events(auth_event_ids)).values() | ||
) | ||
MadLittleMods marked this conversation as resolved.
Show resolved
Hide resolved
|
||
logger.error("auth_events gresegrsegrb %s", auth_events) | ||
|
||
# build a floating outlier member state event | ||
fake_prev_event_id = "$" + random_string(43) | ||
member_event = event_from_pdu_json( | ||
{ | ||
"type": EventTypes.Member, | ||
"content": { | ||
"membership": "join", | ||
}, | ||
"state_key": OTHER_USER, | ||
"room_id": room_id, | ||
"sender": OTHER_USER, | ||
"depth": most_recent_prev_event_depth, | ||
"prev_events": [fake_prev_event_id], | ||
"auth_events": auth_event_ids.copy(), | ||
"origin_server_ts": self.clock.time_msec(), | ||
"signatures": { | ||
OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"} | ||
}, | ||
}, | ||
room_version, | ||
outlier=True, | ||
) | ||
logger.error("member_event event_id=%s", member_event.event_id) | ||
logger.error( | ||
"member_event before event_id=%s auth_events=%s", | ||
member_event.event_id, | ||
member_event.auth_events, | ||
) | ||
MadLittleMods marked this conversation as resolved.
Show resolved
Hide resolved
|
||
filter_auth_events_on_event(member_event, auth_events) | ||
logger.error("member_event after auth_events=%s", member_event.auth_events) | ||
|
||
# build and send an event authed based on the member event | ||
raw_auth_events = auth_events + [member_event] | ||
message_event = event_from_pdu_json( | ||
{ | ||
"type": EventTypes.Message, | ||
"content": {}, | ||
"room_id": room_id, | ||
"sender": OTHER_USER, | ||
"depth": most_recent_prev_event_depth, | ||
"prev_events": prev_event_ids.copy(), | ||
"auth_events": [ev.event_id for ev in raw_auth_events], | ||
"origin_server_ts": self.clock.time_msec(), | ||
"signatures": { | ||
OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"} | ||
}, | ||
}, | ||
room_version, | ||
) | ||
logger.error("message_event event_id=%s", message_event.event_id) | ||
logger.error("message_event before auth_events=%s", message_event.auth_events) | ||
filter_auth_events_on_event(message_event, raw_auth_events.copy()) | ||
logger.error("message_event after auth_events=%s", message_event.auth_events) | ||
|
||
# Stub the /event_auth response from the OTHER_SERVER | ||
async def get_event_auth(destination, room_id, event_id): | ||
return [ | ||
auth_event | ||
for auth_event in raw_auth_events | ||
if auth_event.type is EventTypes.Create | ||
or auth_event.type is EventTypes.PowerLevels | ||
or auth_event.type is EventTypes.JoinRules | ||
or ( | ||
auth_event.type is EventTypes.Member | ||
and auth_event.state_key == message_event.sender | ||
) | ||
] | ||
MadLittleMods marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
self.handler.federation_client.get_event_auth = get_event_auth | ||
|
||
with LoggingContext("receive_pdu"): | ||
d = run_in_background( | ||
self.handler.on_receive_pdu, OTHER_SERVER, message_event | ||
) | ||
self.get_success(d) | ||
|
||
# Try and get the events | ||
stored_event = self.get_success( | ||
self.store.get_event(message_event.event_id, allow_none=True) | ||
) | ||
self.assertTrue(stored_event is not None) | ||
|
||
@unittest.override_config( | ||
{"rc_invites": {"per_user": {"per_second": 0.5, "burst_count": 3}}} | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any linting we can add to avoid re-assigning or mask existing variables?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@erikjohnston Still curious if there is some tooling we can use to avoid this problem?
Since Python is not block scoped, I think we do this all over on purpose but would be nice to be more conscious about it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From some cursory googling around Python linting (black, flake8, mypy, type hinting, etc), I'm not finding anything around constants, declarations vs assignments, and re-assignments 😥
Even re-assignments of function parameters like you would see with ESlint
no-param-reassign
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like there is a handful of related violations in the
wemake-python-styleguide
best practices section. But none of these will specifically stop function parameter re-assignment for this exact bug scenario (tried it in a code playground).BlockAndLocalOverlapViolation
: Forbid overlapping local and block variables.OuterScopeShadowingViolation
: Forbid shadowing variables from outer scopes.ControlVarUsedAfterBlockViolation
: Forbid control variables after the block body.Dev notes
Interesting comparison of linting type tools: https://github.com/wemake-services/wemake-python-styleguide#what-we-are-about
Related but not related links (just linking for my own reference):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I created wemake-services/wemake-python-styleguide#2128 to propose a new linting rule for this scenario 🐣
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could use Semgrep to find all of the scenarios in the codebase where we do this.