Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/16498.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
proxycfg: fix a bug where terminating gateways were not cleaning up deleted service resolvers for their referenced services
```
22 changes: 22 additions & 0 deletions agent/proxycfg/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2093,6 +2093,28 @@ func TestState_WatchesAndUpdates(t *testing.T) {
require.Equal(t, dbResolver.Entry, snap.TerminatingGateway.ServiceResolvers[db])
},
},
{
requiredWatches: map[string]verifyWatchRequest{
"service-resolver:" + db.String(): genVerifyResolverWatch("db", "dc1", structs.ServiceResolver),
},
events: []UpdateEvent{
{
CorrelationID: "service-resolver:" + db.String(),
Result: &structs.ConfigEntryResponse{
Entry: nil,
},
Err: nil,
},
},
verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) {
require.True(t, snap.Valid(), "gateway with service list is valid")
// Finally ensure we cleaned up the resolver
require.Equal(t, []structs.ServiceName{db}, snap.TerminatingGateway.ValidServices())

require.False(t, snap.TerminatingGateway.ServiceResolversSet[db])
require.Nil(t, snap.TerminatingGateway.ServiceResolvers[db])
},
},
{
events: []UpdateEvent{
{
Expand Down
7 changes: 6 additions & 1 deletion agent/proxycfg/terminating_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,13 @@ func (s *handlerTerminatingGateway) handleUpdate(ctx context.Context, u UpdateEv
// There should only ever be one entry for a service resolver within a namespace
if resolver, ok := resp.Entry.(*structs.ServiceResolverConfigEntry); ok {
snap.TerminatingGateway.ServiceResolvers[sn] = resolver
snap.TerminatingGateway.ServiceResolversSet[sn] = true
} else {
// we likely have a deleted service resolver, and our cast is a nil
// cast, so clear this out
delete(snap.TerminatingGateway.ServiceResolvers, sn)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to handle this similar to here? This allows us to differentiate between nil and a bad config entry.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, that should be fine, in the case of a bad cast though, we're likely going to want to clean this up, no? That or return an error about casting?

Though, not sure what use returning an error would do if when we're requesting a service resolver we get something else.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that seems reasonable to me. This isn't a huge deal either way.

snap.TerminatingGateway.ServiceResolversSet[sn] = false
}
snap.TerminatingGateway.ServiceResolversSet[sn] = true

case strings.HasPrefix(u.CorrelationID, serviceIntentionsIDPrefix):
resp, ok := u.Result.(structs.Intentions)
Expand Down
18 changes: 16 additions & 2 deletions test/integration/connect/envoy/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -383,15 +383,27 @@ function assert_upstream_has_endpoints_in_status_once {

GOT_COUNT=$(get_upstream_endpoint_in_status_count $HOSTPORT $CLUSTER_NAME $HEALTH_STATUS)

echo "GOT: $GOT_COUNT"
[ "$GOT_COUNT" -eq $EXPECT_COUNT ]
}

function assert_upstream_missing_once {
local HOSTPORT=$1
local CLUSTER_NAME=$2

run get_upstream_endpoint $HOSTPORT $CLUSTER_NAME
[ "$status" -eq 0 ]
echo "$output"
[ "" == "$output" ]
}

function assert_upstream_missing {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keeping the changes to the helper functions here since they make this, which I had added previously a little bit more robust

local HOSTPORT=$1
local CLUSTER_NAME=$2
run retry_default get_upstream_endpoint $HOSTPORT $CLUSTER_NAME
run retry_long assert_upstream_missing_once $HOSTPORT $CLUSTER_NAME
echo "OUTPUT: $output $status"
[ "" == "$output" ]

[ "$status" -eq 0 ]
}

function assert_upstream_has_endpoints_in_status {
Expand All @@ -400,6 +412,8 @@ function assert_upstream_has_endpoints_in_status {
local HEALTH_STATUS=$3
local EXPECT_COUNT=$4
run retry_long assert_upstream_has_endpoints_in_status_once $HOSTPORT $CLUSTER_NAME $HEALTH_STATUS $EXPECT_COUNT
echo "$output"

[ "$status" -eq 0 ]
}

Expand Down