Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix backfilled events being rejected for no state_groups #10439

Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions synapse/event_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,22 @@ def _is_membership_change_allowed(

key = (EventTypes.JoinRules, "")
join_rule_event = auth_events.get(key)
logger.error(
"_is_membership_change_allowed: event_id=%s join_rule_event=%s event=%s event.auth_events=%s auth_events=%s",
event.event_id,
join_rule_event,
event,
event.auth_events,
auth_events,
)
if join_rule_event:
join_rule = join_rule_event.content.get("join_rule", JoinRules.INVITE)
else:
join_rule = JoinRules.INVITE

# TODO: remove. The auth_rules don't have the join_rules for the given even we're trying auth
# join_rule = JoinRules.PUBLIC

user_level = get_user_power_level(event.user_id, auth_events)
target_level = get_user_power_level(target_user_id, auth_events)

Expand Down Expand Up @@ -341,6 +352,7 @@ def _is_membership_change_allowed(
if user_level < invite_level:
raise AuthError(403, "You don't have permission to invite users")
elif Membership.JOIN == membership:
logger.error("join_rule fefefeafw %s", join_rule)
# Joins are valid iff caller == target and:
# * They are not banned.
# * They are accepting a previously sent invitation.
Expand Down
63 changes: 61 additions & 2 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2566,13 +2566,23 @@ async def _check_event_auth(
room_version = await self.store.get_room_version_id(event.room_id)
room_version_obj = KNOWN_ROOM_VERSIONS[room_version]

logger.error(
"_check_event_auth before event_id=%s auth_events=%s",
event.event_id,
auth_events,
)
if not auth_events:
prev_state_ids = await context.get_prev_state_ids()
auth_events_ids = self._event_auth_handler.compute_auth_events(
event, prev_state_ids, for_verification=True
)
auth_events_x = await self.store.get_events(auth_events_ids)
auth_events = {(e.type, e.state_key): e for e in auth_events_x.values()}
logger.error(
"_check_event_auth after event_id=%s auth_events=%s",
event.event_id,
auth_events,
)

# This is a hack to fix some old rooms where the initial join event
# didn't reference the create event in its auth events.
Expand Down Expand Up @@ -2704,16 +2714,40 @@ async def _update_auth_events_and_context_for_auth(
for e in remote_auth_chain
if e.event_id in auth_ids or e.type == EventTypes.Create
}
logger.error(
"_check_event_auth cvcxvxcbs event_id=%s e.event_id=%s auth_ids=%s auth=%s",
event.event_id,
e.event_id,
auth_ids,
auth,
)
e.internal_metadata.outlier = True

logger.debug(
"_check_event_auth %s missing_auth: %s",
event.event_id,
e.event_id,
)
context = await self.state_handler.compute_event_context(e)
logger.error(
"_update_auth_events_and_context_for_auth before event_id=%s state_group=%s",
event.event_id,
context.state_group,
)
missing_auth_event_context = (
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any linting we can add to avoid re-assigning or mask existing variables?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@erikjohnston Still curious if there is some tooling we can use to avoid this problem?

Since Python is not block scoped, I think we do this all over on purpose but would be nice to be more conscious about it.

Copy link
Contributor Author

@MadLittleMods MadLittleMods Jul 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From some cursory googling around Python linting (black, flake8, mypy, type hinting, etc), I'm not finding anything around constants, declarations vs assignments, and re-assignments 😥

Even re-assignments of function parameters like you would see with ESlint no-param-reassign

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like there is a handful of related violations in the wemake-python-styleguide best practices section. But none of these will specifically stop function parameter re-assignment for this exact bug scenario (tried it in a code playground).

Dev notes

Interesting comparison of linting type tools: https://github.com/wemake-services/wemake-python-styleguide#what-we-are-about


Related but not related links (just linking for my own reference):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created wemake-services/wemake-python-styleguide#2128 to propose a new linting rule for this scenario 🐣

Copy link
Contributor Author

@MadLittleMods MadLittleMods Oct 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could use Semgrep to find all of the scenarios in the codebase where we do this.

await self.state_handler.compute_event_context(e)
)
logger.error(
"_update_auth_events_and_context_for_auth after1 event_id=%s state_group=%s",
event.event_id,
context.state_group,
)
await self._auth_and_persist_event(
origin, e, context, auth_events=auth
origin, e, missing_auth_event_context, auth_events=auth
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Main fix is here ^

First stumbled upon in #10245 (comment)

Reproducible on a federated homeserver when there is a membership auth event as a floating outlier. Then when we try to backfill one of that persons messages, it has missing membership auth to fetch which caused us to mistakenly replace the context for the message with that of the floating membership outlier event. Since outliers have no state or state_group, the error bubbles up when we continue down the persisting route: sqlite3.IntegrityError: NOT NULL constraint failed: event_to_state_groups.state_group

Call stack:

backfill
_auth_and_persist_event
_check_event_auth
# Modifies the context (where the bug is)
_update_auth_events_and_context_for_auth

We touch some of the same code on the the receive PDU route but if it is able to reach this line, it is able to regenerate the context and recover. The cases where we return before and it can reproduce is when it's another outlier, no different_auth, or auth_event is from another room.

on_receive_pdu
_process_received_pdu
_auth_and_persist_event
_check_event_auth
# Modifies the context (where the bug is)
_update_auth_events_and_context_for_auth

Copy link
Contributor Author

@MadLittleMods MadLittleMods Jul 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main fix disappeared from the diff here because it was merged alongside #10245. Full diff before merges is view-able via https://github.com/matrix-org/synapse/pull/10439/files/eb32018cb3d9e048fa4b3523a6fb29b1e2b36faf

This PR is still relevant to add the regression test for it though.

)
logger.error(
"_update_auth_events_and_context_for_auth after2 event_id=%s state_group=%s",
event.event_id,
context.state_group,
)

if e.event_id in event_auth_events:
Expand All @@ -2725,6 +2759,11 @@ async def _update_auth_events_and_context_for_auth(
logger.exception("Failed to get auth chain")

if event.internal_metadata.is_outlier():
logger.error(
"_update_auth_events_and_context_for_auth is outlier event_id=%s state_group=%s",
event.event_id,
context.state_group,
)
# XXX: given that, for an outlier, we'll be working with the
# event's *claimed* auth events rather than those we calculated:
# (a) is there any point in this test, since different_auth below will
Expand All @@ -2737,7 +2776,22 @@ async def _update_auth_events_and_context_for_auth(
e.event_id for e in auth_events.values()
)

if different_auth:
logger.error(
"_update_auth_events_and_context_for_auth different_auth! event_id=%s state_group=%s\ndifferent_auth=%s\nevent_auth_events=%s\nauth_events=%s",
event.event_id,
context.state_group,
different_auth,
event_auth_events,
auth_events,
)

if not different_auth:
logger.error(
"_update_auth_events_and_context_for_auth no different_auth event_id=%s state_group=%s",
event.event_id,
context.state_group,
)
return context

logger.info(
Expand Down Expand Up @@ -2793,6 +2847,11 @@ async def _update_auth_events_and_context_for_auth(
context = await self._update_context_for_auth_events(
event, context, auth_events
)
logger.error(
"_update_auth_events_and_context_for_auth after3 event_id=%s state_group=%s",
event.event_id,
context.state_group,
)

return context

Expand Down
154 changes: 153 additions & 1 deletion tests/handlers/test_federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,22 @@
# limitations under the License.
import logging
from unittest import TestCase
from unittest.mock import Mock
from twisted.internet import defer
from typing import (
List,
)

from synapse.api.constants import EventTypes
from synapse.api.errors import AuthError, Codes, LimitExceededError, SynapseError
from synapse.api.room_versions import RoomVersions
from synapse.events import EventBase
from synapse.federation.federation_base import event_from_pdu_json
from synapse.federation.units import Transaction
from synapse.logging.context import LoggingContext, run_in_background
from synapse.rest import admin
from synapse.rest.client.v1 import login, room
from synapse.util.stringutils import random_string

from tests import unittest

Expand All @@ -36,9 +43,15 @@ class FederationTestCase(unittest.HomeserverTestCase):
]

def make_homeserver(self, reactor, clock):
hs = self.setup_test_homeserver(federation_http_client=None)
# we mock out the federation client too
self.mock_federation_client = Mock()
hs = self.setup_test_homeserver(
federation_http_client=self.mock_federation_client
)
self.handler = hs.get_federation_handler()
self.store = hs.get_datastore()
self.state_store = hs.get_storage().state
self._event_auth_handler = hs.get_event_auth_handler()
return hs

def test_exchange_revoked_invite(self):
Expand Down Expand Up @@ -190,6 +203,145 @@ def test_rejected_state_event_state(self):

self.assertEqual(sg, sg2)

def test_backfill_floating_outlier_membership_auth(self):
"""
As the remote federated homeserver, check that we can properly process
an event with auth_events that include a floating membership event from
the OTHER_SERVER.

Regression test, see #10439.
"""

def filter_auth_events_on_event(event: EventBase, auth_events: List[EventBase]):
# Create a StateMap[str]
auth_event_state_map = {
(e.type, e.state_key): e.event_id for e in auth_events
}
# Actually strip down and use the necessary auth events
auth_event_ids = self._event_auth_handler.compute_auth_events(
event=event,
current_state_ids=auth_event_state_map,
for_verification=False,
)

# Replace the auth_events with the stripped down ones
event.auth_events = auth_event_ids
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You shouldn't be able to edit the auth events directly (as it invalidates all the hashes), so I am loathed to rely on this.

Instead, can we use an event builder to create the event? Something like:

builder = hs.get_event_builder_factory().for_room_version(room_version, event_dict)
event = await builder.build(
    prev_event_ids=[fake_prev_event_id],
    auth_events=compute_auth_events(builder, state),
    depth=depth,
)

Copy link
Contributor Author

@MadLittleMods MadLittleMods Jul 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've switched to the event builder but I still have to edit the signatures to make them appear from the OTHER_SERVER. Otherwise they are rejected as not being signed by that server.

This seems reasonable as I think the event hashes are the same because we pop off the signatures when we compute them.


OTHER_SERVER = "otherserver"
OTHER_USER = "@otheruser:" + OTHER_SERVER

# create the room
user_id = self.register_user("kermit", "test")
tok = self.login("kermit", "test")
room_id = self.helper.create_room_as(
room_creator=user_id,
is_public=True,
tok=tok,
extra_content={
"preset": "public_chat",
},
)
room_version = self.get_success(self.store.get_room_version(room_id))

prev_event_ids = self.get_success(self.store.get_prev_events_for_room(room_id))
(
most_recent_prev_event_id,
most_recent_prev_event_depth,
) = self.get_success(self.store.get_max_depth_of(prev_event_ids))
# mapping from (type, state_key) -> state_event_id
prev_state_map = self.get_success(
self.state_store.get_state_ids_for_event(most_recent_prev_event_id)
)
# List of state event ID's
prev_state_ids = list(prev_state_map.values())
auth_event_ids = prev_state_ids
auth_events = list(
self.get_success(self.store.get_events(auth_event_ids)).values()
)
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
logger.error("auth_events gresegrsegrb %s", auth_events)

# build a floating outlier member state event
fake_prev_event_id = "$" + random_string(43)
member_event = event_from_pdu_json(
{
"type": EventTypes.Member,
"content": {
"membership": "join",
},
"state_key": OTHER_USER,
"room_id": room_id,
"sender": OTHER_USER,
"depth": most_recent_prev_event_depth,
"prev_events": [fake_prev_event_id],
"auth_events": auth_event_ids.copy(),
"origin_server_ts": self.clock.time_msec(),
"signatures": {
OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"}
},
},
room_version,
outlier=True,
)
logger.error("member_event event_id=%s", member_event.event_id)
logger.error(
"member_event before event_id=%s auth_events=%s",
member_event.event_id,
member_event.auth_events,
)
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
filter_auth_events_on_event(member_event, auth_events)
logger.error("member_event after auth_events=%s", member_event.auth_events)

# build and send an event authed based on the member event
raw_auth_events = auth_events + [member_event]
message_event = event_from_pdu_json(
{
"type": EventTypes.Message,
"content": {},
"room_id": room_id,
"sender": OTHER_USER,
"depth": most_recent_prev_event_depth,
"prev_events": prev_event_ids.copy(),
"auth_events": [ev.event_id for ev in raw_auth_events],
"origin_server_ts": self.clock.time_msec(),
"signatures": {
OTHER_SERVER: {"ed25519:key_version": "SomeSignatureHere"}
},
},
room_version,
)
logger.error("message_event event_id=%s", message_event.event_id)
logger.error("message_event before auth_events=%s", message_event.auth_events)
filter_auth_events_on_event(message_event, raw_auth_events.copy())
logger.error("message_event after auth_events=%s", message_event.auth_events)

# Stub the /event_auth response from the OTHER_SERVER
async def get_event_auth(destination, room_id, event_id):
return [
auth_event
for auth_event in raw_auth_events
if auth_event.type is EventTypes.Create
or auth_event.type is EventTypes.PowerLevels
or auth_event.type is EventTypes.JoinRules
or (
auth_event.type is EventTypes.Member
and auth_event.state_key == message_event.sender
)
]
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved

self.handler.federation_client.get_event_auth = get_event_auth

with LoggingContext("receive_pdu"):
d = run_in_background(
self.handler.on_receive_pdu, OTHER_SERVER, message_event
)
self.get_success(d)

# Try and get the events
stored_event = self.get_success(
self.store.get_event(message_event.event_id, allow_none=True)
)
self.assertTrue(stored_event is not None)

@unittest.override_config(
{"rc_invites": {"per_user": {"per_second": 0.5, "burst_count": 3}}}
)
Expand Down