Skip to content

Commit

Permalink
fix(cd): allow release tags to be deployed
Browse files Browse the repository at this point in the history
  • Loading branch information
smrz2001 committed Nov 7, 2023
1 parent 1ae8fce commit 5629b06
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 103 deletions.
30 changes: 15 additions & 15 deletions cd/manager/common/aws/ddb/dynamoDb.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ type DynamoDb struct {
}

const defaultJobStateTtl = 2 * 7 * 24 * time.Hour // Two weeks
const buildHashTag = "sha_tag"
const buildTag = "sha_tag"

// buildState represents build/deploy commit hash information. This information is maintained in a legacy DynamoDB table
// used by our utility AWS Lambdas.
// buildState represents build/deploy tag information. This information is maintained in a legacy DynamoDB table used by
// our utility AWS Lambdas.
type buildState struct {
Key manager.DeployComponent `dynamodbav:"key"`
DeployTag string `dynamodbav:"deployTag"`
Expand Down Expand Up @@ -321,7 +321,7 @@ func (db DynamoDb) WriteJob(jobState job.JobState) error {
}
}

func (db DynamoDb) UpdateBuildHash(component manager.DeployComponent, sha string) error {
func (db DynamoDb) UpdateBuildTag(component manager.DeployComponent, buildTag string) error {
ctx, cancel := context.WithTimeout(context.Background(), manager.DefaultHttpWaitTime)
defer cancel()

Expand All @@ -336,13 +336,13 @@ func (db DynamoDb) UpdateBuildHash(component manager.DeployComponent, sha string
"#shaTag": "sha_tag",
},
ExpressionAttributeValues: map[string]types.AttributeValue{
":sha": &types.AttributeValueMemberS{Value: sha},
":sha": &types.AttributeValueMemberS{Value: buildTag},
},
})
return err
}

func (db DynamoDb) UpdateDeployHash(component manager.DeployComponent, sha string) error {
func (db DynamoDb) UpdateDeployTag(component manager.DeployComponent, deployTag string) error {
ctx, cancel := context.WithTimeout(context.Background(), manager.DefaultHttpWaitTime)
defer cancel()

Expand All @@ -356,33 +356,33 @@ func (db DynamoDb) UpdateDeployHash(component manager.DeployComponent, sha strin
"#deployTag": "deployTag",
},
ExpressionAttributeValues: map[string]types.AttributeValue{
":sha": &types.AttributeValueMemberS{Value: sha},
":sha": &types.AttributeValueMemberS{Value: deployTag},
},
})
return err
}

func (db DynamoDb) GetBuildHashes() (map[manager.DeployComponent]string, error) {
func (db DynamoDb) GetBuildTags() (map[manager.DeployComponent]string, error) {
if buildStates, err := db.getBuildStates(); err != nil {
return nil, err
} else {
commitHashes := make(map[manager.DeployComponent]string, len(buildStates))
buildTags := make(map[manager.DeployComponent]string, len(buildStates))
for _, state := range buildStates {
commitHashes[state.Key] = state.BuildInfo[buildHashTag].(string)
buildTags[state.Key] = state.BuildInfo[buildTag].(string)
}
return commitHashes, nil
return buildTags, nil
}
}

func (db DynamoDb) GetDeployHashes() (map[manager.DeployComponent]string, error) {
func (db DynamoDb) GetDeployTags() (map[manager.DeployComponent]string, error) {
if buildStates, err := db.getBuildStates(); err != nil {
return nil, err
} else {
commitHashes := make(map[manager.DeployComponent]string, len(buildStates))
deployTags := make(map[manager.DeployComponent]string, len(buildStates))
for _, state := range buildStates {
commitHashes[state.Key] = state.DeployTag
deployTags[state.Key] = state.DeployTag
}
return commitHashes, nil
return deployTags, nil
}
}

Expand Down
24 changes: 12 additions & 12 deletions cd/manager/common/aws/ecs/ecs.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,13 @@ func (e Ecs) GetLayout(clusters []string) (*manager.Layout, error) {
return layout, nil
}

func (e Ecs) UpdateLayout(layout *manager.Layout, commitHash string) error {
func (e Ecs) UpdateLayout(layout *manager.Layout, deployTag string) error {
for clusterName, cluster := range layout.Clusters {
clusterRepo := e.getEcrRepo(*layout.Repo) // The main layout repo should never be null
if cluster.Repo != nil {
clusterRepo = e.getEcrRepo(*cluster.Repo)
}
if err := e.updateEnvCluster(cluster, clusterName, clusterRepo, commitHash); err != nil {
if err := e.updateEnvCluster(cluster, clusterName, clusterRepo, deployTag); err != nil {
return err
}
}
Expand Down Expand Up @@ -431,16 +431,16 @@ func (e Ecs) listEcsTasks(cluster, family string) ([]string, error) {
return listTasksOutput.TaskArns, nil
}

func (e Ecs) updateEnvCluster(cluster *manager.Cluster, clusterName, clusterRepo, commitHash string) error {
if err := e.updateEnvTaskSet(cluster.ServiceTasks, deployType_Service, clusterName, clusterRepo, commitHash); err != nil {
func (e Ecs) updateEnvCluster(cluster *manager.Cluster, clusterName, clusterRepo, deployTag string) error {
if err := e.updateEnvTaskSet(cluster.ServiceTasks, deployType_Service, clusterName, clusterRepo, deployTag); err != nil {
return err
} else if err = e.updateEnvTaskSet(cluster.Tasks, deployType_Task, clusterName, clusterRepo, commitHash); err != nil {
} else if err = e.updateEnvTaskSet(cluster.Tasks, deployType_Task, clusterName, clusterRepo, deployTag); err != nil {
return err
}
return nil
}

func (e Ecs) updateEnvTaskSet(taskSet *manager.TaskSet, deployType string, cluster, clusterRepo, commitHash string) error {
func (e Ecs) updateEnvTaskSet(taskSet *manager.TaskSet, deployType string, cluster, clusterRepo, deployTag string) error {
if taskSet != nil {
for taskSetName, task := range taskSet.Tasks {
taskSetRepo := clusterRepo
Expand All @@ -449,11 +449,11 @@ func (e Ecs) updateEnvTaskSet(taskSet *manager.TaskSet, deployType string, clust
}
switch deployType {
case deployType_Service:
if err := e.updateEnvServiceTask(task, cluster, taskSetName, taskSetRepo, commitHash); err != nil {
if err := e.updateEnvServiceTask(task, cluster, taskSetName, taskSetRepo, deployTag); err != nil {
return err
}
case deployType_Task:
if err := e.updateEnvTask(task, cluster, taskSetName, taskSetRepo, commitHash); err != nil {
if err := e.updateEnvTask(task, cluster, taskSetName, taskSetRepo, deployTag); err != nil {
return err
}
default:
Expand All @@ -464,25 +464,25 @@ func (e Ecs) updateEnvTaskSet(taskSet *manager.TaskSet, deployType string, clust
return nil
}

func (e Ecs) updateEnvServiceTask(task *manager.Task, cluster, service, taskSetRepo, commitHash string) error {
func (e Ecs) updateEnvServiceTask(task *manager.Task, cluster, service, taskSetRepo, deployTag string) error {
taskRepo := taskSetRepo
if task.Repo != nil {
taskRepo = e.getEcrRepo(*task.Repo)
}
if id, err := e.updateEcsService(cluster, service, taskRepo+":"+commitHash, task.Name, task.Temp); err != nil {
if id, err := e.updateEcsService(cluster, service, taskRepo+":"+deployTag, task.Name, task.Temp); err != nil {
return err
} else {
task.Id = id
return nil
}
}

func (e Ecs) updateEnvTask(task *manager.Task, cluster, taskName, taskSetRepo, commitHash string) error {
func (e Ecs) updateEnvTask(task *manager.Task, cluster, taskName, taskSetRepo, deployTag string) error {
taskRepo := taskSetRepo
if task.Repo != nil {
taskRepo = e.getEcrRepo(*task.Repo)
}
if id, err := e.updateEcsTask(cluster, taskName, taskRepo+":"+commitHash, task.Name, task.Temp); err != nil {
if id, err := e.updateEcsTask(cluster, taskName, taskRepo+":"+deployTag, task.Name, task.Temp); err != nil {
return err
} else {
task.Id = id
Expand Down
7 changes: 7 additions & 0 deletions cd/manager/common/job/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,19 @@ const (
DeployJobParam_Component string = "component"
DeployJobParam_Sha string = "sha"
DeployJobParam_ShaTag string = "shaTag"
DeployJobParam_DeployTag string = "deployTag"
DeployJobParam_Layout string = "layout"
DeployJobParam_Manual string = "manual"
DeployJobParam_Force string = "force"
DeployJobParam_Rollback string = "rollback"
)

const (
DeployJobTarget_Latest = "latest"
DeployJobTarget_Release = "release"
DeployJobTarget_Rollback = "rollback"
)

const (
AnchorJobParam_Delayed string = "delayed"
AnchorJobParam_Stalled string = "stalled"
Expand Down
17 changes: 13 additions & 4 deletions cd/manager/jobmanager/jobManager.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,18 +500,27 @@ func (m *JobManager) postProcessJob(jobState job.JobState) {
log.Printf("postProcessJob: failed to queue test workflow after deploy: %v, %s", err, manager.PrintJob(jobState))
}
}
// For failed deployments, rollback to the previously deployed commit hash.
// For failed deployments, rollback to the previously deployed tag.
case job.JobStage_Failed:
{
// Only rollback if this wasn't already a rollback attempt that failed
if rollback, _ := jobState.Params[job.DeployJobParam_Rollback].(bool); !rollback {
if _, err := m.NewJob(job.JobState{
if component, found := jobState.Params[job.DeployJobParam_Component].(string); !found {
log.Printf("postProcessJob: missing component (ceramic, ipfs, cas, casv5, rust-ceramic): %s", manager.PrintJob(jobState))
} else
// Get the latest deployed tag from the database. We're getting this from the build tags because
// the last successfully deployed tag would have been the same as the current build tag.
if buildTags, err := m.db.GetBuildTags(); err != nil {
log.Printf("postProcessJob: failed to retrieve build tags: %v, %s", err, manager.PrintJob(jobState))
} else if buildTag, found := buildTags[manager.DeployComponent(component)]; !found {
log.Printf("postProcessJob: missing component build tag: %s, %s", component, manager.PrintJob(jobState))
} else if _, err := m.NewJob(job.JobState{
Type: job.JobType_Deploy,
Params: map[string]interface{}{
job.DeployJobParam_Component: jobState.Params[job.DeployJobParam_Component],
job.DeployJobParam_Rollback: true,
// Make the job lookup the last successfully deployed commit hash from the database
job.DeployJobParam_Sha: ".",
job.DeployJobParam_Sha: job.DeployJobTarget_Rollback,
job.DeployJobParam_ShaTag: buildTag,
// No point in waiting for other jobs to complete before redeploying a working image
job.DeployJobParam_Force: true,
job.JobParam_Source: manager.ServiceName,
Expand Down
89 changes: 47 additions & 42 deletions cd/manager/jobs/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ type deployJob struct {
baseJob
component manager.DeployComponent
sha string
shaTag string
deployTag string
manual bool
rollback bool
force bool
Expand Down Expand Up @@ -53,19 +55,21 @@ const (
)

const defaultFailureTime = 30 * time.Minute
const buildHashLatest = "latest"
const anchorWorkerRepo = "ceramic-prod-cas-runner"

func DeployJob(jobState job.JobState, db manager.Database, notifs manager.Notifs, d manager.Deployment, repo manager.Repository) (manager.JobSm, error) {
if component, found := jobState.Params[job.DeployJobParam_Component].(string); !found {
return nil, fmt.Errorf("deployJob: missing component (ceramic, ipfs, cas)")
return nil, fmt.Errorf("deployJob: missing component (ceramic, ipfs, cas, casv5, rust-ceramic)")
} else if sha, found := jobState.Params[job.DeployJobParam_Sha].(string); !found {
return nil, fmt.Errorf("deployJob: missing sha")
return nil, fmt.Errorf("deployJob: missing target")
} else if shaTag, found := jobState.Params[job.DeployJobParam_ShaTag].(string); !found {
return nil, fmt.Errorf("deployJob: missing tag")
} else {
deployTag, _ := jobState.Params[job.DeployJobParam_DeployTag].(string)
manual, _ := jobState.Params[job.DeployJobParam_Manual].(bool)
rollback, _ := jobState.Params[job.DeployJobParam_Rollback].(bool)
force, _ := jobState.Params[job.DeployJobParam_Force].(bool)
return &deployJob{baseJob{jobState, db, notifs}, manager.DeployComponent(component), sha, manual, rollback, force, os.Getenv(manager.EnvVar_Env), d, repo}, nil
return &deployJob{baseJob{jobState, db, notifs}, manager.DeployComponent(component), sha, shaTag, deployTag, manual, rollback, force, os.Getenv(manager.EnvVar_Env), d, repo}, nil
}
}

Expand All @@ -74,14 +78,17 @@ func (d deployJob) Advance() (job.JobState, error) {
switch d.state.Stage {
case job.JobStage_Queued:
{
if deployHashes, err := d.db.GetDeployHashes(); err != nil {
if deployTags, err := d.db.GetDeployTags(); err != nil {
return d.advance(job.JobStage_Failed, now, err)
} else if err = d.prepareJob(deployHashes); err != nil {
} else if err = d.prepareJob(deployTags); err != nil {
return d.advance(job.JobStage_Failed, now, err)
} else if !d.manual && !d.force && (d.sha == deployHashes[d.component]) {
// Skip automated jobs if the commit hash being deployed is the same as the commit hash already
// deployed. We don't do this for manual jobs because deploying an already deployed hash might be
// intentional, or for force deploys/rollbacks because we WANT to push through such deployments.
} else if deployTag, found := d.state.Params[job.DeployJobParam_DeployTag].(string); found &&
!d.manual && !d.force &&
(deployTag == strings.Split(deployTags[d.component], ",")[0]) {
// Skip automated jobs if the tag being deployed is the same as the tag already deployed. We don't do
// this for manual jobs because deploying an already deployed tag might be intentional, or for force
// deploys/rollbacks because we WANT to push through such deployments.
//
// Rollbacks are also force deploys, so we don't need to check for the former explicitly since we're
// already checking for force deploys.
return d.advance(job.JobStage_Skipped, now, nil)
Expand All @@ -96,14 +103,14 @@ func (d deployJob) Advance() (job.JobState, error) {
}
case job.JobStage_Dequeued:
{
if err := d.updateEnv(d.sha); err != nil {
if err := d.updateEnv(); err != nil {
return d.advance(job.JobStage_Failed, now, err)
} else {
d.state.Params[job.JobParam_Start] = float64(time.Now().UnixNano())
// For started deployments update the build commit hash in the DB.
if err = d.db.UpdateBuildHash(d.component, d.sha); err != nil {
// For started deployments update the build tag in the DB
if err = d.db.UpdateBuildTag(d.component, d.deployTag); err != nil {
// This isn't an error big enough to fail the job, just report and move on.
log.Printf("deployJob: failed to update build hash: %v, %s", err, manager.PrintJob(d.state))
log.Printf("deployJob: failed to update build tag: %v, %s", err, manager.PrintJob(d.state))
}
return d.advance(job.JobStage_Started, now, nil)
}
Expand All @@ -113,10 +120,10 @@ func (d deployJob) Advance() (job.JobState, error) {
if deployed, err := d.checkEnv(); err != nil {
return d.advance(job.JobStage_Failed, now, err)
} else if deployed {
// For completed deployments update the deployed commit hash in the DB.
if err = d.db.UpdateDeployHash(d.component, d.sha); err != nil {
// For completed deployments update the deployed tag in the DB, and append the deployment target.
if err = d.db.UpdateDeployTag(d.component, d.deployTag+","+d.sha); err != nil {
// This isn't an error big enough to fail the job, just report and move on.
log.Printf("deployJob: failed to update deploy hash: %v, %s", err, manager.PrintJob(d.state))
log.Printf("deployJob: failed to update deploy tag: %v, %s", err, manager.PrintJob(d.state))
}
return d.advance(job.JobStage_Completed, now, nil)
} else if job.IsTimedOut(d.state, defaultFailureTime) {
Expand All @@ -137,52 +144,52 @@ func (d deployJob) Advance() (job.JobState, error) {
}
}

func (d deployJob) prepareJob(deployHashes map[manager.DeployComponent]string) error {
func (d deployJob) prepareJob(deployTags map[manager.DeployComponent]string) error {
deployTag := ""
manual := false
if d.rollback {
// Use the latest successfully deployed commit hash when rolling back
d.sha = deployHashes[d.component]
// Use the latest successfully deployed tag when rolling back
deployTag = deployTags[d.component]
} else
// - If the specified commit hash is "latest", fetch the latest branch commit hash from GitHub.
// - If the specified deployment target is "latest", fetch the latest branch commit hash from GitHub.
// - Else if the specified deployment target is "release", use the specified release tag.
// - Else if it's a valid hash, use it.
// - Else use the latest build hash from the database.
// - Else use the last successfully deployed target from the database.
//
// The last two cases will only happen when redeploying manually, so we can note that in the notification.
if d.sha == buildHashLatest {
shaTag, _ := d.state.Params[job.DeployJobParam_ShaTag].(string)
// The last 3 cases will only happen when (re)deploying manually, so we can note that in the notification.
if d.sha == job.DeployJobTarget_Latest {
if repo, err := manager.ComponentRepo(d.component); err != nil {
return err
} else if latestSha, err := d.repo.GetLatestCommitHash(
repo.Org,
repo.Name,
d.envBranch(d.component, manager.EnvType(os.Getenv(manager.EnvVar_Env))),
shaTag,
d.shaTag,
); err != nil {
return err
} else {
d.sha = latestSha
deployTag = latestSha
}
} else if (d.sha == job.DeployJobTarget_Release) || (d.sha == job.DeployJobTarget_Rollback) {
deployTag = d.shaTag
manual = true
} else if manager.IsValidSha(d.sha) {
deployTag = d.sha
manual = true
} else {
if !manager.IsValidSha(d.sha) {
// Get the latest build commit hash from the database when making a fresh deployment
if buildHashes, err := d.db.GetBuildHashes(); err != nil {
return err
} else {
d.sha = buildHashes[d.component]
}
}
d.manual = true
return fmt.Errorf("prepareJob: invalid deployment type")
}
d.state.Params[job.DeployJobParam_Sha] = d.sha
if d.manual {
d.state.Params[job.DeployJobParam_DeployTag] = deployTag
if manual {
d.state.Params[job.DeployJobParam_Manual] = true
}
return nil
}

func (d deployJob) updateEnv(commitHash string) error {
func (d deployJob) updateEnv() error {
// Layout should already be present
layout, _ := d.state.Params[job.DeployJobParam_Layout].(manager.Layout)
return d.d.UpdateLayout(&layout, commitHash)
return d.d.UpdateLayout(&layout, d.deployTag)
}

func (d deployJob) checkEnv() (bool, error) {
Expand Down Expand Up @@ -213,12 +220,10 @@ func (d deployJob) generateEnvLayout(component manager.DeployComponent) (*manage
casV5Cluster := "app-cas-" + d.env
clusters := []string{privateCluster, publicCluster, casCluster, casV5Cluster}
if ecrRepo, err := d.componentEcrRepo(component); err != nil {
log.Printf("generateEnvLayout: get ecr repo error: %s, %v", component, err)
return nil, err
} else
// Populate the service layout by retrieving the clusters/services from ECS
if currentLayout, err := d.d.GetLayout(clusters); err != nil {
log.Printf("generateEnvLayout: get layout error: %s, %v", component, err)
return nil, err
} else {
newLayout := &manager.Layout{Clusters: map[string]*manager.Cluster{}, Repo: &ecrRepo}
Expand Down
Loading

0 comments on commit 5629b06

Please sign in to comment.