Skip to content
This repository has been archived by the owner on Jan 8, 2024. It is now read-only.

Feature: status reporting for ALB releases #1567

Merged
merged 3 commits into from
Jun 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/1567.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:feature
plugin/aws-alb: Report on status of releases
```
45 changes: 30 additions & 15 deletions builtin/aws/alb/plugin.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions builtin/aws/alb/plugin.proto
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ message TargetGroup {
message Release {
string url = 1;
string load_balancer_arn = 2;
string target_group_arn = 4;
string region = 3;
}
163 changes: 162 additions & 1 deletion builtin/aws/alb/releaser.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package alb
import (
"context"
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
Expand All @@ -12,6 +13,7 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/waypoint-plugin-sdk/component"
"github.com/hashicorp/waypoint-plugin-sdk/docs"
sdk "github.com/hashicorp/waypoint-plugin-sdk/proto/gen"
"github.com/hashicorp/waypoint-plugin-sdk/terminal"
"github.com/hashicorp/waypoint/builtin/aws/utils"
)
Expand All @@ -20,6 +22,11 @@ type Releaser struct {
config ReleaserConfig
}

const (
targetGroupInitializationTimeoutSeconds int = 60
targetGroupInitializationPollingIntervalSeconds int = 5
)

// Config implements Configurable
func (r *Releaser) Config() (interface{}, error) {
return &r.config, nil
Expand All @@ -30,7 +37,12 @@ func (r *Releaser) ReleaseFunc() interface{} {
return r.Release
}

// Release manages target group attachement to a configured ALB
// StatusFunc implements component.Status
func (r *Releaser) StatusFunc() interface{} {
return r.Status
}

// Release manages target group attachment to a configured ALB
func (r *Releaser) Release(
ctx context.Context,
log hclog.Logger,
Expand Down Expand Up @@ -304,9 +316,157 @@ func (r *Releaser) Release(
return &Release{
Url: "http://" + hostname,
LoadBalancerArn: *lb.LoadBalancerArn,
TargetGroupArn: target.Arn,
Region: target.Region,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed the region to build the right client. Other plugins have this on their config. Not sure which pattern we prefer.

}, nil
}

func (r *Releaser) Status(
ctx context.Context,
log hclog.Logger,
release *Release,
ui terminal.UI,
) (*sdk.StatusReport, error) {

var report sdk.StatusReport
report.External = true

if release.Region == "" {
log.Debug("Region is not available for this release. Unable to determine status.")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a feeling that we might hit this case in the upgrade path, where someone upgrades a server then tries to promote a deployment that didn't encode the region?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be good to test, sounds like a correct assumption.

return &report, nil
}

sess, err := utils.GetSession(&utils.SessionConfig{
Region: release.Region,
Logger: log,
})
if err != nil {
return nil, err
}

elbsrv := elbv2.New(sess)

sg := ui.StepGroup()
defer sg.Wait()

step := sg.Add("Gathering health report for AWS/ALB platform...")
defer step.Abort()

step.Update("Waiting for at least one target to pass initialization...")

var targetHealthDescriptions []*elbv2.TargetHealthDescription

startTime := time.Now().Unix()
for {
if targetHealthDescriptions != nil {
sleepDuration := time.Second * time.Duration(targetGroupInitializationPollingIntervalSeconds)
log.Debug("Sleeping %0.f seconds to give the following target group time to initialize:\n%s", sleepDuration.Seconds(), release.TargetGroupArn)
time.Sleep(sleepDuration)
}

if startTime+int64(targetGroupInitializationTimeoutSeconds) <= time.Now().Unix() {
report.HealthMessage = fmt.Sprintf("timed out after %d seconds waiting for the following target group to initialize:\n%s", time.Now().Unix()-startTime, release.TargetGroupArn)
report.Health = sdk.StatusReport_UNKNOWN
step.Status(terminal.StatusWarn)
step.Update(report.HealthMessage)
return &report, nil
}

tgHealthResp, err := elbsrv.DescribeTargetHealthWithContext(ctx, &elbv2.DescribeTargetHealthInput{
TargetGroupArn: &release.TargetGroupArn,
})
if err != nil {
return nil, fmt.Errorf("failed to describe target group %s health: %s", release.TargetGroupArn, err)
}

targetHealthDescriptions = tgHealthResp.TargetHealthDescriptions

// We may not have any targets if the target group was created very recently.
if len(targetHealthDescriptions) == 0 {
step.Update("Waiting for registered targets with health...")
continue
}

initializingCount := 0
for _, tgHealth := range targetHealthDescriptions {

// NOTE(izaaklauer) potentially unsafe dereference
if *tgHealth.TargetHealth.State == elbv2.TargetHealthStateEnumInitial {
initializingCount++
}
}
if initializingCount == len(targetHealthDescriptions) {
step.Update("Waiting for at least one target to finish initializing...")
continue
}

step.Update("Target group has been initialized.")
break
}

report.Resources = []*sdk.StatusReport_Resource{}

healthyCount := 0
for _, tgHealth := range targetHealthDescriptions {

targetId := *tgHealth.Target.Id

var health sdk.StatusReport_Health

switch *tgHealth.TargetHealth.State {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially a dangerous deference, but we seem to do it in other places and it's ok.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe leave a note above the switch?

case elbv2.TargetHealthStateEnumHealthy:
healthyCount++
health = sdk.StatusReport_READY
case elbv2.TargetHealthStateEnumUnavailable:
// Lambda functions present this way. Defaulting to UNKNOWN seems reasonable here too.
healthyCount++
health = sdk.StatusReport_READY
case elbv2.TargetHealthStateEnumUnhealthy:
health = sdk.StatusReport_DOWN
default:
// There are more TargetHealthStateEnums, but they do not cleanly map to our states.
health = sdk.StatusReport_UNKNOWN
}

var healthMessage string
if tgHealth.TargetHealth.Description != nil {
healthMessage = *tgHealth.TargetHealth.Description
}

report.Resources = append(report.Resources, &sdk.StatusReport_Resource{
Health: health,
HealthMessage: healthMessage,
Name: targetId,
})
}

step.Update("Finished building report for AWS/ALB platform")
step.Done()

// NOTE(briancain): Replace ui.Status with StepGroups once this bug
// has been fixed: https://github.com/hashicorp/waypoint/issues/1536
st := ui.Status()
defer st.Close()

// If AWS registers targets slowly or incrementally, we may report an artificially low number of total targets.
totalTargets := len(targetHealthDescriptions)

if healthyCount == totalTargets {
report.Health = sdk.StatusReport_READY
report.HealthMessage = fmt.Sprintf("All %d targets are healthy.", totalTargets)
} else if healthyCount > 0 {
report.Health = sdk.StatusReport_PARTIAL
report.HealthMessage = fmt.Sprintf("Only %d/%d targets are healthy.", healthyCount, totalTargets)
st.Step(terminal.StatusWarn, report.HealthMessage)
} else {
report.Health = sdk.StatusReport_DOWN
report.HealthMessage = fmt.Sprintf("All targets are unhealthy, however your application might be available or still starting up.")
st.Step(terminal.StatusWarn, report.HealthMessage)
}

return &report, nil
}

// ReleaserConfig is the configuration structure for the Releaser.
type ReleaserConfig struct {
Name string `hcl:"name,optional"`
Expand Down Expand Up @@ -420,4 +580,5 @@ var (
_ component.ReleaseManager = (*Releaser)(nil)
_ component.Configurable = (*Releaser)(nil)
_ component.Documented = (*Releaser)(nil)
_ component.Status = (*Releaser)(nil)
)