diff --git a/cli/azd/pkg/azapi/deployment_state_test.go b/cli/azd/pkg/azapi/deployment_state_test.go new file mode 100644 index 00000000000..ae100b98924 --- /dev/null +++ b/cli/azd/pkg/azapi/deployment_state_test.go @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package azapi + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsActiveDeploymentState(t *testing.T) { + active := []DeploymentProvisioningState{ + DeploymentProvisioningStateAccepted, + DeploymentProvisioningStateCanceling, + DeploymentProvisioningStateCreating, + DeploymentProvisioningStateDeleting, + DeploymentProvisioningStateDeletingResources, + DeploymentProvisioningStateDeploying, + DeploymentProvisioningStateRunning, + DeploymentProvisioningStateUpdating, + DeploymentProvisioningStateUpdatingDenyAssignments, + DeploymentProvisioningStateValidating, + DeploymentProvisioningStateWaiting, + } + + for _, state := range active { + t.Run(string(state), func(t *testing.T) { + require.True(t, IsActiveDeploymentState(state), + "expected %s to be active", state) + }) + } + + inactive := []DeploymentProvisioningState{ + DeploymentProvisioningStateSucceeded, + DeploymentProvisioningStateFailed, + DeploymentProvisioningStateCanceled, + DeploymentProvisioningStateDeleted, + DeploymentProvisioningStateNotSpecified, + DeploymentProvisioningStateReady, + } + + for _, state := range inactive { + t.Run(string(state), func(t *testing.T) { + require.False(t, IsActiveDeploymentState(state), + "expected %s to be inactive", state) + }) + } +} diff --git a/cli/azd/pkg/azapi/deployments.go b/cli/azd/pkg/azapi/deployments.go index 1e079370a4c..886d1e7c47c 100644 --- a/cli/azd/pkg/azapi/deployments.go +++ b/cli/azd/pkg/azapi/deployments.go @@ -107,6 +107,28 @@ const ( DeploymentProvisioningStateUpdating DeploymentProvisioningState = "Updating" ) +// IsActiveDeploymentState reports whether the given provisioning state +// indicates a deployment that is still in progress, including transitional +// states like canceling or deleting that can still block new deployments. +func IsActiveDeploymentState(state DeploymentProvisioningState) bool { + switch state { + case DeploymentProvisioningStateAccepted, + DeploymentProvisioningStateCanceling, + DeploymentProvisioningStateCreating, + DeploymentProvisioningStateDeleting, + DeploymentProvisioningStateDeletingResources, + DeploymentProvisioningStateDeploying, + DeploymentProvisioningStateRunning, + DeploymentProvisioningStateUpdating, + DeploymentProvisioningStateUpdatingDenyAssignments, + DeploymentProvisioningStateValidating, + DeploymentProvisioningStateWaiting: + return true + default: + return false + } +} + type DeploymentService interface { GenerateDeploymentName(baseName string) string CalculateTemplateHash( diff --git a/cli/azd/pkg/infra/provisioning/bicep/active_deployment_check_test.go b/cli/azd/pkg/infra/provisioning/bicep/active_deployment_check_test.go new file mode 100644 index 00000000000..e4f71693458 --- /dev/null +++ b/cli/azd/pkg/infra/provisioning/bicep/active_deployment_check_test.go @@ -0,0 +1,229 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package bicep + +import ( + "context" + "fmt" + "sync/atomic" + "testing" + "time" + + "github.com/azure/azure-dev/cli/azd/pkg/azapi" + "github.com/azure/azure-dev/cli/azd/pkg/infra" + "github.com/azure/azure-dev/cli/azd/test/mocks/mockinput" + "github.com/stretchr/testify/require" +) + +// activeDeploymentScope is a test helper that implements infra.Scope and lets +// the caller control what ListDeployments returns on each call. +type activeDeploymentScope struct { + // calls tracks how many times ListDeployments has been invoked. + calls atomic.Int32 + // activePerCall maps a 0-based call index to the list of deployments + // returned for that call. If the index is missing, nil is returned. + activePerCall map[int][]*azapi.ResourceDeployment + // errOnCall, if non-nil, maps a call index to an error to return. + errOnCall map[int]error +} + +func (s *activeDeploymentScope) SubscriptionId() string { return "test-sub" } + +func (s *activeDeploymentScope) Deployment(_ string) infra.Deployment { return nil } + +func (s *activeDeploymentScope) ListDeployments( + _ context.Context, +) ([]*azapi.ResourceDeployment, error) { + idx := int(s.calls.Add(1)) - 1 + if s.errOnCall != nil { + if e, ok := s.errOnCall[idx]; ok { + return nil, e + } + } + if s.activePerCall != nil { + return s.activePerCall[idx], nil + } + return nil, nil +} + +// newTestProvider returns a BicepProvider with fast poll settings for tests. +func newTestProvider() *BicepProvider { + return &BicepProvider{ + console: mockinput.NewMockConsole(), + activeDeployPollInterval: 10 * time.Millisecond, + activeDeployTimeout: 2 * time.Second, + } +} + +func TestWaitForActiveDeployments_NoActive(t *testing.T) { + scope := &activeDeploymentScope{} + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope, "test-deploy") + require.NoError(t, err) + require.Equal(t, int32(1), scope.calls.Load(), + "should call ListDeployments once") +} + +func TestWaitForActiveDeployments_InitialListError_NotFound(t *testing.T) { + scope := &activeDeploymentScope{ + errOnCall: map[int]error{ + 0: fmt.Errorf("listing: %w", infra.ErrDeploymentsNotFound), + }, + } + p := newTestProvider() + + // ErrDeploymentsNotFound (resource group doesn't exist yet) is safe to ignore. + err := p.waitForActiveDeployments(t.Context(), scope, "test-deploy") + require.NoError(t, err) +} + +func TestWaitForActiveDeployments_InitialListError_Other(t *testing.T) { + scope := &activeDeploymentScope{ + errOnCall: map[int]error{ + 0: fmt.Errorf("auth failure: access denied"), + }, + } + p := newTestProvider() + + // Non-NotFound errors are logged and skipped — the check is best-effort. + err := p.waitForActiveDeployments(t.Context(), scope, "test-deploy") + require.NoError(t, err) +} + +func TestWaitForActiveDeployments_ActiveThenClear(t *testing.T) { + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-1", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, // first call: active + // second call (index 1): missing key → returns nil (no active) + }, + } + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope, "deploy-1") + require.NoError(t, err) + require.Equal(t, int32(2), scope.calls.Load(), + "should poll once, then see clear") +} + +func TestWaitForActiveDeployments_CancelledContext(t *testing.T) { + ctx, cancel := context.WithCancel(t.Context()) + + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-forever", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + // Always return active deployments. + // Seed multiple indices so a tick before ctx.Done doesn't hit a missing key. + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, + 1: running, + 2: running, + 3: running, + 4: running, + }, + } + p := newTestProvider() + + // Cancel immediately so the wait loop exits on the first select. + cancel() + + err := p.waitForActiveDeployments(ctx, scope, "deploy-forever") + require.ErrorIs(t, err, context.Canceled) +} + +func TestWaitForActiveDeployments_PollError(t *testing.T) { + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-1", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, + }, + errOnCall: map[int]error{ + 1: fmt.Errorf("transient ARM failure"), + }, + } + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope, "deploy-1") + // Transient poll errors are logged and treated as cleared. + require.NoError(t, err) +} + +func TestWaitForActiveDeployments_PollNotFound(t *testing.T) { + // If the resource group is deleted externally while polling, + // ListDeployments returns ErrDeploymentsNotFound. The wait should + // treat this as "no active deployments" and return nil. + running := []*azapi.ResourceDeployment{ + { + Name: "deploy-1", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + scope := &activeDeploymentScope{ + activePerCall: map[int][]*azapi.ResourceDeployment{ + 0: running, + }, + errOnCall: map[int]error{ + 1: infra.ErrDeploymentsNotFound, + }, + } + p := newTestProvider() + + err := p.waitForActiveDeployments(t.Context(), scope, "deploy-1") + require.NoError(t, err) +} + +func TestWaitForActiveDeployments_Timeout(t *testing.T) { + running := []*azapi.ResourceDeployment{ + { + Name: "stuck-deploy", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }, + } + // Return active on every call. + perCall := make(map[int][]*azapi.ResourceDeployment) + for i := range 200 { + perCall[i] = running + } + + scope := &activeDeploymentScope{activePerCall: perCall} + p := &BicepProvider{ + console: mockinput.NewMockConsole(), + activeDeployPollInterval: 5 * time.Millisecond, + activeDeployTimeout: 50 * time.Millisecond, + } + + err := p.waitForActiveDeployments(t.Context(), scope, "stuck-deploy") + require.Error(t, err) + require.Contains(t, err.Error(), "timed out") + require.Contains(t, err.Error(), "stuck-deploy") +} + +func TestWaitForActiveDeployments_DifferentNameNotBlocked(t *testing.T) { + running := []*azapi.ResourceDeployment{{ + Name: "other-deploy", + ProvisioningState: azapi.DeploymentProvisioningStateRunning, + }} + scope := &activeDeploymentScope{ + activePerCall: map[int][]*azapi.ResourceDeployment{0: running}, + } + p := newTestProvider() + err := p.waitForActiveDeployments(t.Context(), scope, "my-deploy") + require.NoError(t, err) + require.Equal(t, int32(1), scope.calls.Load()) +} diff --git a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go index d49d347b050..21d29b481e4 100644 --- a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go +++ b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go @@ -95,6 +95,12 @@ type BicepProvider struct { // Internal state // compileBicepResult is cached to avoid recompiling the same bicep file multiple times in the same azd run. compileBicepMemoryCache *compileBicepResult + + // activeDeployPollInterval and activeDeployTimeout override the defaults + // for the active-deployment wait loop. Zero means use the default. These + // are only set in tests. + activeDeployPollInterval time.Duration + activeDeployTimeout time.Duration } // Name gets the name of the infra provider @@ -611,6 +617,104 @@ func logDS(msg string, v ...any) { log.Printf("%s : %s", "deployment-state: ", fmt.Sprintf(msg, v...)) } +const ( + // defaultActiveDeploymentPollInterval is how often we re-check for active deployments. + defaultActiveDeploymentPollInterval = 30 * time.Second + // defaultActiveDeploymentTimeout caps the total wait time for active deployments. + defaultActiveDeploymentTimeout = 30 * time.Minute +) + +// waitForActiveDeployments checks for deployments that are already in progress +// at the target scope. If any are found it logs a warning and polls until they +// finish or the timeout is reached. +func (p *BicepProvider) waitForActiveDeployments( + ctx context.Context, + scope infra.Scope, + deploymentName string, +) error { + active, err := infra.ListActiveDeploymentsByName(ctx, scope, deploymentName) + if err != nil { + // If the resource group doesn't exist yet, there are no active + // deployments — proceed normally. + if errors.Is(err, infra.ErrDeploymentsNotFound) { + return nil + } + // For other errors (auth, throttling, transient, unrecorded test + // responses), log and proceed. The active deployment check is a + // best-effort optimization — failing to list shouldn't block the deploy. + log.Printf( + "active-deployment-check: unable to list deployments, skipping: %v", err) + return nil + } + + if len(active) == 0 { + return nil + } + + names := make([]string, len(active)) + for i, d := range active { + names[i] = d.Name + } + p.console.MessageUxItem(ctx, &ux.WarningMessage{ + Description: fmt.Sprintf( + "Waiting for %d active deployment(s) to complete: %s", + len(active), strings.Join(names, ", ")), + }) + + p.console.ShowSpinner(ctx, + "Waiting for active deployment(s) to complete", input.Step) + spinnerResult := input.StepFailed + defer func() { p.console.StopSpinner(ctx, "", spinnerResult) }() + + pollInterval := p.activeDeployPollInterval + if pollInterval == 0 { + pollInterval = defaultActiveDeploymentPollInterval + } + timeout := p.activeDeployTimeout + if timeout == 0 { + timeout = defaultActiveDeploymentTimeout + } + + deadlineTimer := time.NewTimer(timeout) + defer deadlineTimer.Stop() + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-deadlineTimer.C: + // Refresh names from latest poll for an accurate timeout message + currentNames := make([]string, len(active)) + for i, d := range active { + currentNames[i] = d.Name + } + return fmt.Errorf( + "timed out after %s waiting for active "+ + "deployment(s) to complete: %s", + timeout, strings.Join(currentNames, ", ")) + case <-ticker.C: + active, err = infra.ListActiveDeploymentsByName(ctx, scope, deploymentName) + if err != nil { + if errors.Is(err, infra.ErrDeploymentsNotFound) { + spinnerResult = input.StepDone + return nil + } + // Transient poll error — treat as cleared and proceed + log.Printf( + "active-deployment-check: poll error, assuming cleared: %v", err) + spinnerResult = input.StepDone + return nil + } + if len(active) == 0 { + spinnerResult = input.StepDone + return nil + } + } + } +} + // Provisioning the infrastructure within the specified template func (p *BicepProvider) Deploy(ctx context.Context) (*provisioning.DeployResult, error) { if p.ignoreDeploymentState { @@ -722,6 +826,17 @@ func (p *BicepProvider) Deploy(ctx context.Context) (*provisioning.DeployResult, p.console.StopSpinner(ctx, "", input.StepDone) } + // Check for active deployments at the target scope and wait if any are in progress. + // Use scopeForTemplate to get the raw scope — deployment.Scope may have a nil + // inner scope in test mocks. + if activeScope, err := p.scopeForTemplate(planned.Template); err == nil { + if err := p.waitForActiveDeployments(ctx, activeScope, deployment.Name()); err != nil { + return nil, err + } + } else { + log.Printf("active-deployment-check: skipping, unable to determine scope: %v", err) + } + progressCtx, cancelProgress := context.WithCancel(ctx) var wg sync.WaitGroup wg.Add(1) diff --git a/cli/azd/pkg/infra/scope.go b/cli/azd/pkg/infra/scope.go index 303766d2d95..82ebed88cdb 100644 --- a/cli/azd/pkg/infra/scope.go +++ b/cli/azd/pkg/infra/scope.go @@ -24,6 +24,29 @@ type Scope interface { Deployment(deploymentName string) Deployment } +// ListActiveDeploymentsByName lists deployments at the given scope and returns +// only those matching the specified name with an active provisioning state. +// This allows parallel deployments with different names to proceed without +// blocking each other, while still detecting same-name conflicts. +func ListActiveDeploymentsByName( + ctx context.Context, + scope Scope, + deploymentName string, +) ([]*azapi.ResourceDeployment, error) { + all, err := scope.ListDeployments(ctx) + if err != nil { + return nil, err + } + + var active []*azapi.ResourceDeployment + for _, d := range all { + if d.Name == deploymentName && azapi.IsActiveDeploymentState(d.ProvisioningState) { + active = append(active, d) + } + } + return active, nil +} + type Deployment interface { Scope // Name is the name of this deployment. diff --git a/cli/azd/resources/error_suggestions.yaml b/cli/azd/resources/error_suggestions.yaml index 5dc30847f2f..e8e3266c5b1 100644 --- a/cli/azd/resources/error_suggestions.yaml +++ b/cli/azd/resources/error_suggestions.yaml @@ -49,6 +49,18 @@ rules: # 4th most common error category (~128,054 errors in 90-day analysis) # ============================================================================ + - errorType: "DeploymentErrorLine" + properties: + Code: "DeploymentActive" + message: "Another deployment is already in progress in the target scope." + suggestion: > + Wait for the current deployment to complete, then retry. + You can check deployment status in the Azure portal under the + Deployments view for the target scope. + links: + - url: "https://learn.microsoft.com/azure/azure-resource-manager/troubleshooting/error-deployment-active" + title: "Troubleshoot DeploymentActive errors" + - errorType: "DeploymentErrorLine" properties: Code: "FlagMustBeSetForRestore"