Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/api/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ var (
CloudErrorCodeUnsupportedMediaType = "UnsupportedMediaType"
CloudErrorCodeInvalidLinkedVNet = "InvalidLinkedVNet"
CloudErrorCodeInvalidLinkedRouteTable = "InvalidLinkedRouteTable"
CloudErrorCodeInvalidLinkedDiskEncryptionSet = "InvalidLinkedDiskEncryptionSet"
CloudErrorCodeNotFound = "NotFound"
CloudErrorCodeForbidden = "Forbidden"
CloudErrorCodeInvalidSubscriptionState = "InvalidSubscriptionState"
Expand Down
16 changes: 15 additions & 1 deletion pkg/api/v20210131preview/openshiftcluster_validatestatic.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ func (sv *openShiftClusterStaticValidator) validateNetworkProfile(path string, n
}

func (sv *openShiftClusterStaticValidator) validateMasterProfile(path string, mp *MasterProfile) error {

if !validate.VMSizeIsValid(api.VMSize(mp.VMSize), sv.requireD2sV3Workers, true) {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidParameter, path+".vmSize", "The provided master VM size '%s' is invalid.", mp.VMSize)
}
Expand All @@ -228,6 +227,18 @@ func (sv *openShiftClusterStaticValidator) validateMasterProfile(path string, mp
if sr.SubscriptionID != sv.r.SubscriptionID {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidParameter, path+".subnetId", "The provided master VM subnet '%s' is invalid: must be in same subscription as cluster.", mp.SubnetID)
}
if mp.DiskEncryptionSetID != "" {
if !validate.RxDiskEncryptionSetID.MatchString(mp.DiskEncryptionSetID) {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidParameter, path+".diskEncryptionSetId", "The provided master disk encryption set '%s' is invalid.", mp.DiskEncryptionSetID)
}
desr, err := azure.ParseResourceID(mp.DiskEncryptionSetID)
if err != nil {
return err
}
if desr.SubscriptionID != sv.r.SubscriptionID {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidParameter, path+".diskEncryptionSetId", "The provided master disk encryption set '%s' is invalid: must be in same subscription as cluster.", mp.DiskEncryptionSetID)
}
}

return nil
}
Expand Down Expand Up @@ -262,6 +273,9 @@ func (sv *openShiftClusterStaticValidator) validateWorkerProfile(path string, wp
if wp.Count < 2 || wp.Count > 50 {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidParameter, path+".count", "The provided worker count '%d' is invalid.", wp.Count)
}
if !strings.EqualFold(mp.DiskEncryptionSetID, wp.DiskEncryptionSetID) {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidParameter, path+".subnetId", "The provided worker disk encryption set '%s' is invalid: must be the same as master disk encryption set '%s'.", wp.DiskEncryptionSetID, mp.DiskEncryptionSetID)
}

return nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ func TestOpenShiftClusterStaticValidateNetworkProfile(t *testing.T) {
}

func TestOpenShiftClusterStaticValidateMasterProfile(t *testing.T) {
tests := []*validateTest{
commonTests := []*validateTest{
{
name: "valid",
},
Expand All @@ -488,10 +488,37 @@ func TestOpenShiftClusterStaticValidateMasterProfile(t *testing.T) {
},
wantErr: "400: InvalidParameter: properties.masterProfile.subnetId: The provided master VM subnet '/subscriptions/7a3036d1-60a1-4605-8a41-44955e050804/resourcegroups/test-vnet/providers/Microsoft.Network/virtualNetworks/test-vnet/subnets/master' is invalid: must be in same subscription as cluster.",
},
{
name: "disk encryption set is invalid",
modify: func(oc *OpenShiftCluster) {
oc.Properties.MasterProfile.DiskEncryptionSetID = "invalid"
oc.Properties.WorkerProfiles[0].DiskEncryptionSetID = "invalid"
},
wantErr: "400: InvalidParameter: properties.masterProfile.diskEncryptionSetId: The provided master disk encryption set 'invalid' is invalid.",
},
{
name: "disk encryption set not matching cluster subscriptionId",
modify: func(oc *OpenShiftCluster) {
oc.Properties.MasterProfile.DiskEncryptionSetID = "/subscriptions/7a3036d1-60a1-4605-8a41-44955e050804/resourceGroups/fakeRG/providers/Microsoft.Compute/diskEncryptionSets/fakeDES1"
},
wantErr: "400: InvalidParameter: properties.masterProfile.diskEncryptionSetId: The provided master disk encryption set '/subscriptions/7a3036d1-60a1-4605-8a41-44955e050804/resourceGroups/fakeRG/providers/Microsoft.Compute/diskEncryptionSets/fakeDES1' is invalid: must be in same subscription as cluster.",
},
}

runTests(t, testModeCreate, tests)
runTests(t, testModeUpdate, tests)
createTests := []*validateTest{
{
name: "disk encryption set is valid",
modify: func(oc *OpenShiftCluster) {
desID := fmt.Sprintf("/subscriptions/%s/resourceGroups/test-cluster/providers/Microsoft.Compute/diskEncryptionSets/test-disk-encryption-set", subscriptionID)
oc.Properties.MasterProfile.DiskEncryptionSetID = desID
oc.Properties.WorkerProfiles[0].DiskEncryptionSetID = desID
},
},
}

runTests(t, testModeCreate, createTests)
runTests(t, testModeCreate, commonTests)
runTests(t, testModeUpdate, commonTests)
}

func TestOpenShiftClusterStaticValidateWorkerProfile(t *testing.T) {
Expand Down Expand Up @@ -570,6 +597,14 @@ func TestOpenShiftClusterStaticValidateWorkerProfile(t *testing.T) {
},
wantErr: "400: InvalidParameter: properties.workerProfiles['worker'].count: The provided worker count '51' is invalid.",
},
{
name: "disk encryption set not matching master disk encryption set",
modify: func(oc *OpenShiftCluster) {
oc.Properties.MasterProfile.DiskEncryptionSetID = fmt.Sprintf("/subscriptions/%s/resourceGroups/test-cluster/providers/Microsoft.Compute/diskEncryptionSets/test-disk-encryption-set", subscriptionID)
oc.Properties.WorkerProfiles[0].DiskEncryptionSetID = "/subscriptions/7a3036d1-60a1-4605-8a41-44955e050804/resourceGroups/fakeRG/providers/Microsoft.Compute/diskEncryptionSets/fakeDES1"
},
wantErr: "400: InvalidParameter: properties.workerProfiles['worker'].subnetId: The provided worker disk encryption set '/subscriptions/7a3036d1-60a1-4605-8a41-44955e050804/resourceGroups/fakeRG/providers/Microsoft.Compute/diskEncryptionSets/fakeDES1' is invalid: must be the same as master disk encryption set '/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/test-cluster/providers/Microsoft.Compute/diskEncryptionSets/test-disk-encryption-set'.",
},
}

// We do not perform this validation on update
Expand Down
108 changes: 108 additions & 0 deletions pkg/api/validate/dynamic/diskencryptionset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package dynamic

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"fmt"
"net/http"
"strings"

"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
"k8s.io/apimachinery/pkg/util/wait"

"github.com/Azure/ARO-RP/pkg/api"
)

func (dv *dynamic) ValidateDiskEncryptionSets(ctx context.Context, oc *api.OpenShiftCluster) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to validate location too. If I recall right keyvault + DES has to be in the same region too where cluster is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right:

All resources related to your customer-managed keys (Azure Key Vaults, disk encryption sets, VMs, disks, and snapshots) must be in the same subscription and region.

However, I think it should be enough to validation only location of DES here: we do not work with KV directly and do not even need permissions for it. Sounds like validation of location of KV is a responsibility of compute RP: you first create a KV, then you create a DES via compute RP which should have own validation and only after this you create an ARO cluster providing DES resoruce ID.

Looking into it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mjudeikis I added location validation.

I think it should be enough to validation only location of DES here: we do not work with KV directly and do not even need permissions for it. Sounds like validation of location of KV is a responsibility of compute RP...

I looked into it and it seems like compute RP doesn't validate location of the KV when creating DES: I was able to create a DES in westeurope with a KV from eastus. Not sure whether it is just outdated documentation or lack of validation on the compute RP.

I would prefer not to validate KV location, becuase it will likely require extra permissions for RP and cluster SP. But we can look into it. What do you think?

Copy link
Contributor

@mjudeikis mjudeikis Jul 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But were you able to use DES in westeurope with cluster in eastusu? Im interested if this works/does not work. I expect this to fail.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But were you able to use DES in westeurope with cluster in eastusu?

@mjudeikis with the new validation in place it will not be possible because our validation will fail: des location != cluster location. But I'm going to test this case this afternoon:

  • KV from westeurope
  • DES in eastus with KV from westeurope
  • Cluster in eastus

It should pass our validation, but I expect it to fail on the underlying RP somewhere. But who knows...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mjudeikis, ok so it looks like compute RP doesn't validate when you create a DES, but does validate when you create a VM. I got a DeploymentFailed with KeyVaultAndDiskInDifferentRegions for each master and bootstrap VM.

Not great. I wish they were validating it during DES creation...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now this is something I seen :) we need to catch this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into catching it and there are two options:

  1. Catch in the dynamic validation.
  2. Catch DeploymentFailed and check if one of the deploymnet errors has something to do with KeyVaultAndDiskInDifferentRegions.

And both end up being a mess.

If we want to catch on validation we need:

  1. Customer to give us extra permissions which is not great as we only need it during cluster creation for validation of something which is actually not ARO responsibility.
  2. We will have to do the following (additional) checks:
    • Does disk encrhption set have an a source vault set (not sure in which circumstances it can be nil, but the field is optional/nilable)
    • Do we have permissions to read KV object?
    • Does the key vault actually exist (you can create a KV & disk encryption set, then delete key vault and try to create a cluster)
    • Is KV in the same region as cluster?
    • Probably some other edge cases.

If we want to catch this on DeploymentFailed - then it is also tricky. We already catching DeploymentFailed in pkg/cluster/arm.go and propogating it to a customer. If we want to have a nice message then, we need to handle at least these failures:

  1. Disk encryption set exists and in correct region, but KV is in different region. We can catch it by KeyVaultAndDiskInDifferentRegions.
  2. Disk encryption set exists, but key vault doesn't exist. We can't catch it easily: errors are just BadRequest and text message.

Even if we do catch these cases, what do we do next? Do we introduce a special case of api.CloudErrorCodeDeploymentFailed where we say "your key vault is in different region/doesn't exist"? What about other errors from DeploymentFailed? It is possible that DES/KV errors are not the only cause of deploymnet being failed. Do we just ignore other errors?

After looking and trying different options, my opition on this is - it is not worth handling as we already handling DeploymentFailed in pkg/cluster/arm.go and propogating all the erors from ARM to the customer:

if serviceErr != nil {
b, _ := json.Marshal(serviceErr)
return &api.CloudError{
StatusCode: http.StatusBadRequest,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeDeploymentFailed,
Message: "Deployment failed.",
Details: []api.CloudErrorBody{
{
Message: string(b),
},
},
},
}
}

dv.log.Print("ValidateDiskEncryptionSet")

// It is very likely that master and worker profiles use the same
// disk encryption set, so to optimise we only validate unique ones.
// We maintain the slice of ids separately to the map to have stable
// validation order because iteration order for maps is not stable.
uniqueIds := map[string]struct{}{}
ids := []string{}
paths := []string{}
if oc.Properties.MasterProfile.DiskEncryptionSetID != "" {
uniqueIds[strings.ToLower(oc.Properties.MasterProfile.DiskEncryptionSetID)] = struct{}{}
ids = append(ids, oc.Properties.MasterProfile.DiskEncryptionSetID)
paths = append(paths, "properties.masterProfile.diskEncryptionSetId")
}
for i, wp := range oc.Properties.WorkerProfiles {
if wp.DiskEncryptionSetID != "" {
lowercasedId := strings.ToLower(wp.DiskEncryptionSetID)
if _, ok := uniqueIds[lowercasedId]; ok {
continue
}

uniqueIds[lowercasedId] = struct{}{}
ids = append(ids, wp.DiskEncryptionSetID)
paths = append(paths, fmt.Sprintf("properties.workerProfiles[%d].diskEncryptionSetId", i))
}
}

for i, id := range ids {
r, err := azure.ParseResourceID(id)
if err != nil {
return err
}

err = dv.validateDiskEncryptionSetPermissions(ctx, &r, paths[i])
if err != nil {
return err
}

err = dv.validateDiskEncryptionSetLocation(ctx, &r, oc.Location, paths[i])
if err != nil {
return err
}

}

return nil
}

func (dv *dynamic) validateDiskEncryptionSetPermissions(ctx context.Context, desr *azure.Resource, path string) error {
dv.log.Print("validateDiskEncryptionSetPermissions")

errCode := api.CloudErrorCodeInvalidResourceProviderPermissions
if dv.authorizerType == AuthorizerClusterServicePrincipal {
errCode = api.CloudErrorCodeInvalidServicePrincipalPermissions
}

err := dv.validateActions(ctx, desr, []string{
"Microsoft.Compute/diskEncryptionSets/read",
})

if err == wait.ErrWaitTimeout {
return api.NewCloudError(http.StatusBadRequest, errCode, path, "The %s service principal does not have Reader permission on disk encryption set '%s'.", dv.authorizerType, desr.String())
}
if detailedErr, ok := err.(autorest.DetailedError); ok &&
detailedErr.StatusCode == http.StatusNotFound {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidLinkedDiskEncryptionSet, path, "The disk encryption set '%s' could not be found.", desr.String())
}

return err
}

func (dv *dynamic) validateDiskEncryptionSetLocation(ctx context.Context, desr *azure.Resource, location, path string) error {
dv.log.Print("validateDiskEncryptionSetLocation")

des, err := dv.diskEncryptionSets.Get(ctx, desr.ResourceGroup, desr.ResourceName)
if err != nil {
if detailedErr, ok := err.(autorest.DetailedError); ok &&
detailedErr.StatusCode == http.StatusNotFound {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidLinkedDiskEncryptionSet, path, "The disk encryption set '%s' could not be found.", desr.String())
}
return err
}

if !strings.EqualFold(*des.Location, location) {
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidLinkedDiskEncryptionSet, "", "The disk encryption set location '%s' must match the cluster location '%s'.", *des.Location, location)
}

return nil
}
Loading