Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/auth/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ func initCluster(ctx context.Context, cfg InitConfig, asrv *Server) error {
// Key deletion is best-effort, log a warning if it fails and carry on.
// We don't want to prevent a CA rotation, which may be necessary in
// some cases where this would fail.
log.WithError(err).Warning("Failed attempt to delete unused HSM keys")
log.Warnf("An attempt to clean up unused HSM or KMS CA keys has failed unexpectedly: %v", err)
}

if lib.IsInsecureDevMode() {
Expand Down
76 changes: 55 additions & 21 deletions lib/auth/keystore/gcp_kms.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,65 +228,99 @@ func (g *gcpKMSKeyStore) canSignWithKey(ctx context.Context, raw []byte, keyType
return true, nil
}

// DeleteUnusedKeys deletes all keys from KMS if they are:
// 1. Labeled by this server (matching HostUUID) when they were created
// 2. Not included in the argument activeKeys
// DeleteUnusedKeys deletes all keys from the configured KMS keyring if they:
// 1. Are not included in the argument activeKeys
// 2. Are labeled with hostLabel (teleport_auth_host)
// 3. The hostLabel value matches the local host UUID
//
// The activeKeys argument is meant to contain to complete set of raw key IDs as
// stored in the current CA specs in the backend.
//
// The reason this does not delete any keys created by a different auth server
// is to avoid a case where:
// 1. A different auth server (auth2) creates a new key in GCP KMS
// 2. This function (running on auth1) deletes that new key
// 3. auth2 saves the id of this deleted key to the backend CA
//
// or a simpler case where: the other auth server is running in a completely
// different Teleport cluster and the keys it's actively using will never appear
// in the activeKeys argument.
func (g *gcpKMSKeyStore) DeleteUnusedKeys(ctx context.Context, activeKeys [][]byte) error {
// Make a map of currently active key versions, this is used for lookups to
// check which keys in KMS are unused, and holds a count of how many times
// they are found in KMS. If any active keys are not found in KMS, we are in
// a bad state, so key deletion will be aborted.
// check which keys in KMS are unused.
activeKmsKeyVersions := make(map[string]int)
for _, activeKey := range activeKeys {
keyID, err := parseGCPKMSKeyID(activeKey)
keyIsRelevant, err := g.canSignWithKey(ctx, activeKey, keyType(activeKey))
if err != nil {
// could be a different type of key
// Don't expect this error to ever hit, safer to return if it does.
return trace.Wrap(err)
}
if !keyIsRelevant {
// Ignore active keys that are not GCP KMS keys or are in a
// different keyring than the one this Auth is configured to use.
continue
}
keyID, err := parseGCPKMSKeyID(activeKey)
if err != nil {
// Realistically we should not hit this since canSignWithKey already
// calls parseGCPKMSKeyID.
return trace.Wrap(err)
}
activeKmsKeyVersions[keyID.keyVersionName] = 0
}

var unusedKeyIDs []gcpKMSKeyID
var keysToDelete []gcpKMSKeyID

listKeyRequest := &kmspb.ListCryptoKeysRequest{
Parent: g.keyRing,
Filter: fmt.Sprintf("labels.%s=%s", hostLabel, g.hostUUID),
// Only bother listing keys created by Teleport which should have the
// hostLabel set. A filter of "labels.label:*" tests if the label is
// defined.
// https://cloud.google.com/sdk/gcloud/reference/topic/filters
// > Use key:* to test if key is defined
Filter: fmt.Sprintf("labels.%s:*", hostLabel),
}
iter := g.kmsClient.ListCryptoKeys(ctx, listKeyRequest)
key, err := iter.Next()
for err == nil {
keyVersionName := key.Name + keyVersionSuffix
if _, active := activeKmsKeyVersions[keyVersionName]; active {
activeKmsKeyVersions[keyVersionName]++
} else {
unusedKeyIDs = append(unusedKeyIDs, gcpKMSKeyID{
// Record that this current active key was actually found.
activeKmsKeyVersions[keyVersionName] += 1
} else if key.Labels[hostLabel] == g.hostUUID {
// This key is not active (it is not currently stored in any
// Teleport CA) and it was created by this Auth server, so it should
// be safe to delete.
keysToDelete = append(keysToDelete, gcpKMSKeyID{
keyVersionName: keyVersionName,
})
}
key, err = iter.Next()
}
if err != nil && !errors.Is(err, iterator.Done) {
return trace.Wrap(err)
return trace.Wrap(err, "unexpected error while iterating GCP KMS keys")
}

// If any member of activeKeys which is part of the same GCP KMS keyring
// queried here was not found in the ListCryptoKeys response, something has
// gone wrong and there's a chance we have a bug or GCP has made a breaking
// API change. In this case we should abort to avoid the chance of deleting
// any currently active keys.
for keyVersion, found := range activeKmsKeyVersions {
if found == 0 {
// Failed to find a currently active key owned by this host.
// The cluster is in a bad state, refuse to delete any keys.
return trace.NotFound(
"cannot find currently active CA key in %q GCP KMS, aborting attempt to delete unused keys",
"cannot find currently active CA key %q in GCP KMS, aborting attempt to delete unused keys",
keyVersion)
}
}

for _, unusedKey := range unusedKeyIDs {
g.log.WithFields(logrus.Fields{"key_version": unusedKey.keyVersionName}).Info("deleting unused GCP KMS key created by this server")
for _, unusedKey := range keysToDelete {
g.log.WithFields(logrus.Fields{"key_version": unusedKey.keyVersionName}).Info("Deleting unused GCP KMS key.")
err := g.deleteKey(ctx, unusedKey.marshal())
// Ignore errors where we can't destroy because the state is already
// DESTROYED or DESTROY_SCHEDULED
if err != nil && !strings.Contains(err.Error(), "has value DESTROY") {
g.log.WithFields(logrus.Fields{"key_version": unusedKey.keyVersionName}).WithError(err).Warn("error deleting unused GCP KMS key")
return trace.Wrap(err)
return trace.Wrap(err, "error deleting unused GCP KMS key %q", unusedKey.keyVersionName)
}
}
return nil
Expand Down
172 changes: 161 additions & 11 deletions lib/auth/keystore/gcp_kms_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
kms "cloud.google.com/go/kms/apiv1"
"cloud.google.com/go/kms/apiv1/kmspb"
"github.com/gravitational/trace"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/crypto/ssh"
"google.golang.org/api/option"
Expand Down Expand Up @@ -226,15 +227,15 @@ func (f *fakeGCPKMSServer) AsymmetricSign(ctx context.Context, req *kmspb.Asymme
return resp, nil
}

func (f *fakeGCPKMSServer) ListCryptoKeys(context.Context, *kmspb.ListCryptoKeysRequest) (*kmspb.ListCryptoKeysResponse, error) {
func (f *fakeGCPKMSServer) ListCryptoKeys(ctx context.Context, req *kmspb.ListCryptoKeysRequest) (*kmspb.ListCryptoKeysResponse, error) {
f.mu.RLock()
defer f.mu.RUnlock()
var cryptoKeys []*kmspb.CryptoKey
for keyVersionName := range f.keyVersions {
cryptoKey := &kmspb.CryptoKey{
Name: strings.TrimSuffix(keyVersionName, "/cryptoKeyVersions/1"),
for keyVersionName, keyState := range f.keyVersions {
if !strings.HasPrefix(keyVersionName, req.Parent) {
continue
}
cryptoKeys = append(cryptoKeys, cryptoKey)
cryptoKeys = append(cryptoKeys, keyState.cryptoKey)
}
resp := &kmspb.ListCryptoKeysResponse{
CryptoKeys: cryptoKeys,
Expand All @@ -252,12 +253,7 @@ func (f *fakeGCPKMSServer) DestroyCryptoKeyVersion(ctx context.Context, req *kms
}

keyState.cryptoKeyVersion.State = kmspb.CryptoKeyVersion_DESTROY_SCHEDULED

resp := &kmspb.CryptoKeyVersion{
Name: req.Name,
State: keyState.cryptoKeyVersion.State,
}
return resp, nil
return keyState.cryptoKeyVersion, nil
}

// deleteKey is a test helper to delete a key by the raw ID which would be
Expand Down Expand Up @@ -593,3 +589,157 @@ func TestGCPKMSKeystore(t *testing.T) {
})
}
}

func TestGCPKMSDeleteUnusedKeys(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

const (
localHostID = "local-host-id"
otherHostID = "other-host-id"
localKeyring = "local-keyring"
otherKeyring = "other-keyring"
)

for _, tc := range []struct {
desc string
existingKeys []keySpec
activeKeys []keySpec
expectDestroyed []keySpec
}{
{
// Only inactive keys should be destroyed.
desc: "active and inactive",
existingKeys: []keySpec{
{keyring: localKeyring, id: "id_active", host: localHostID},
{keyring: localKeyring, id: "id_inactive", host: localHostID},
},
activeKeys: []keySpec{
{keyring: localKeyring, id: "id_active", host: localHostID},
},
expectDestroyed: []keySpec{
{keyring: localKeyring, id: "id_inactive", host: localHostID},
},
},
{
// Inactive key from other host should not be destroyed, it may
// be recently created and just not added to Teleport CA yet, or the
// other Auth might be in a completely different Teleport cluster
// using the same keyring (I wouldn't advise this but someone might
// do it).
desc: "inactive key from other host",
existingKeys: []keySpec{
{keyring: localKeyring, id: "id_inactive_local", host: localHostID},
{keyring: localKeyring, id: "id_inactive_remote", host: otherHostID},
},
expectDestroyed: []keySpec{
{keyring: localKeyring, id: "id_inactive_local", host: localHostID},
},
},
{
// The presence of active keys created by a remote host in the local
// keyring should not break the DeleteUnusedKeys operation.
desc: "active key from other host",
existingKeys: []keySpec{
{keyring: localKeyring, id: "id_active_local", host: localHostID},
{keyring: localKeyring, id: "id_inactive_local", host: localHostID},
{keyring: localKeyring, id: "id_active_remote", host: otherHostID},
{keyring: localKeyring, id: "id_inactive_remote", host: otherHostID},
},
activeKeys: []keySpec{
{keyring: localKeyring, id: "id_active_local", host: localHostID},
{keyring: localKeyring, id: "id_active_remote", host: otherHostID},
},
expectDestroyed: []keySpec{
{keyring: localKeyring, id: "id_inactive_local", host: localHostID},
},
},
{
// Keys in other keyring should never be destroyed.
desc: "keys in other keyring",
existingKeys: []keySpec{
{keyring: localKeyring, id: "id_inactive_local", host: localHostID},
{keyring: otherKeyring, id: "id_inactive_other1", host: localHostID},
{keyring: otherKeyring, id: "id_inactive_other2", host: otherHostID},
},
expectDestroyed: []keySpec{
{keyring: localKeyring, id: "id_inactive_local", host: localHostID},
},
},
} {
t.Run(tc.desc, func(t *testing.T) {
fakeKMSServer, dialer := newTestGCPKMSService(t)
kmsClient := newTestGCPKMSClient(t, dialer)
manager, err := NewManager(ctx, Config{
GCPKMS: GCPKMSConfig{
ProtectionLevel: "HSM",
HostUUID: localHostID,
KeyRing: localKeyring,
kmsClientOverride: kmsClient,
},
})
require.NoError(t, err, "error while creating test keystore manager")

// Pre-req: create existing keys in fake KMS backend
for _, ks := range tc.existingKeys {
_, err := fakeKMSServer.CreateCryptoKey(ctx, createKeyRequest(ks))
require.NoError(t, err)
}

// Test: DeleteUnusedKeys with activeKeys from the testcase
activeKeyIDs := make([][]byte, len(tc.activeKeys))
for i, ks := range tc.activeKeys {
activeKeyIDs[i] = ks.keyID()
}
err = manager.DeleteUnusedKeys(ctx, activeKeyIDs)
assert.NoError(t, err)

expectDestroyedSet := make(map[string]bool, len(tc.expectDestroyed))
for _, ks := range tc.expectDestroyed {
expectDestroyedSet[ks.keyVersionName()] = true
}
require.Len(t, fakeKMSServer.keyVersions, len(tc.existingKeys))
for keyVersionName, state := range fakeKMSServer.keyVersions {
if expectDestroyedSet[keyVersionName] {
// Fake KMS server only sets state to DESTROY_SCHEDULED,
// that's good enough for the test.
require.Equal(t, kmspb.CryptoKeyVersion_DESTROY_SCHEDULED.String(), state.cryptoKeyVersion.State.String())
} else {
require.Equal(t, kmspb.CryptoKeyVersion_ENABLED.String(), state.cryptoKeyVersion.State.String())
}
}
})
}
}

type keySpec struct {
keyring, id, host string
}

func (ks *keySpec) keyVersionName() string {
return ks.keyring + "/cryptoKeys/" + ks.id + keyVersionSuffix
}

func (ks *keySpec) keyID() []byte {
return gcpKMSKeyID{
keyVersionName: ks.keyVersionName(),
}.marshal()
}

func createKeyRequest(ks keySpec) *kmspb.CreateCryptoKeyRequest {
return &kmspb.CreateCryptoKeyRequest{
Parent: ks.keyring,
CryptoKeyId: ks.id,
CryptoKey: &kmspb.CryptoKey{
Purpose: kmspb.CryptoKey_ASYMMETRIC_SIGN,
Labels: map[string]string{
hostLabel: ks.host,
},
VersionTemplate: &kmspb.CryptoKeyVersionTemplate{
ProtectionLevel: kmspb.ProtectionLevel_SOFTWARE,
Algorithm: kmspb.CryptoKeyVersion_RSA_SIGN_PKCS1_2048_SHA256,
},
},
}
}
Loading