Skip to content

Commit

Permalink
Add health check endpoint (#665)
Browse files Browse the repository at this point in the history
  • Loading branch information
dhurley authored May 8, 2024
1 parent 84a3b6b commit 24d8ad1
Show file tree
Hide file tree
Showing 44 changed files with 756 additions and 165 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ jobs:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
with:
fetch-depth: 0
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version-file: 'go.mod'
Expand Down Expand Up @@ -144,6 +146,8 @@ jobs:
version: "bookworm-slim"
steps:
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
with:
fetch-depth: 0
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version-file: 'go.mod'
Expand Down
61 changes: 61 additions & 0 deletions docs/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@
"info": {},
"host": "localhost:8081",
"paths": {
"/health": {
"get": {
"tags": [
"nginx-agent"
],
"summary": "Check the health of the NGINX Agent",
"operationId": "health-check",
"responses": {
"200": {
"description": "HealthResponse",
"schema": {
"$ref": "#/definitions/HealthResponse"
}
}
}
}
},
"/metrics/": {
"get": {
"description": "# Returns prometheus metrics",
Expand Down Expand Up @@ -127,6 +144,12 @@
"schema": {
"$ref": "#/definitions/AgentAPIConfigApplyStatusResponse"
}
},
"500": {
"description": "AgentAPICommonResponse",
"schema": {
"$ref": "#/definitions/AgentAPICommonResponse"
}
}
}
}
Expand Down Expand Up @@ -195,6 +218,44 @@
},
"x-go-package": "github.com/nginx/agent/v2/src/plugins"
},
"HealthResponse": {
"type": "object",
"properties": {
"checks": {
"description": "Array of health checks",
"type": "array",
"items": {
"$ref": "#/definitions/HealthStatusCheck"
},
"x-go-name": "Checks"
},
"status": {
"description": "Overall health status",
"type": "string",
"x-go-name": "Status",
"example": "OK"
}
},
"x-go-package": "github.com/nginx/agent/v2/src/plugins"
},
"HealthStatusCheck": {
"type": "object",
"properties": {
"name": {
"description": "Health check name",
"type": "string",
"x-go-name": "Name",
"example": "commandConnection"
},
"status": {
"description": "Health check status",
"type": "string",
"x-go-name": "Status",
"example": "OK"
}
},
"x-go-package": "github.com/nginx/agent/v2/src/plugins"
},
"NginxDetails": {
"type": "object",
"properties": {
Expand Down
5 changes: 1 addition & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,7 @@ func main() {
controller, commander, reporter := core.CreateGrpcClients(ctx, loadedConfig)

if controller != nil {
if err := controller.Connect(); err != nil {
log.Warnf("Unable to connect to control plane: %v", err)
return
}
go controller.Connect()
}

binary := core.NewNginxBinary(env, loadedConfig)
Expand Down
2 changes: 1 addition & 1 deletion sdk/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ type (
WithClient(Client) Controller
Context() context.Context
WithContext(context.Context) Controller
Connect() error
Connect()
Close() error
}
)
10 changes: 10 additions & 0 deletions sdk/client/commander.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ package client
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"sync"
Expand Down Expand Up @@ -74,11 +75,15 @@ func (c *commander) Connect(ctx context.Context) error {
log.Debugf("Commander connecting to %s", c.server)

c.ctx = ctx

c.retryLock.Lock()
err := backoff.WaitUntil(
c.ctx,
c.backoffSettings,
c.createClient,
)
c.retryLock.Unlock()

if err != nil {
return err
}
Expand Down Expand Up @@ -163,6 +168,11 @@ func (c *commander) Send(ctx context.Context, message Message) error {
return err
}

if c.channel == nil {
c.setIsRetrying(true)
return c.handleGrpcError("Commander Channel Send", errors.New("command channel client not created yet"))
}

if err := c.channel.Send(cmd); err != nil {
c.setIsRetrying(true)
return c.handleGrpcError("Commander Channel Send", err)
Expand Down
13 changes: 4 additions & 9 deletions sdk/client/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ package client
import (
"context"
"fmt"

log "github.com/sirupsen/logrus"
)

func NewClientController() Controller {
Expand All @@ -33,19 +35,12 @@ func (c *ctrl) WithContext(ctx context.Context) Controller {
return c
}

func (c *ctrl) Connect() error {
var retErr error
func (c *ctrl) Connect() {
for _, client := range c.clients {
if err := client.Connect(c.ctx); err != nil {
if retErr == nil {
retErr = fmt.Errorf("%s failed to connect: %w", client.Server(), err)
} else {
retErr = fmt.Errorf("%v\n%s failed to connect: %w", retErr, client.Server(), err)
}
log.Warnf("%s failed to connect: %v", client.Server(), err)
}
}

return retErr
}

func (c *ctrl) Close() error {
Expand Down
6 changes: 2 additions & 4 deletions sdk/client/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ func TestControllerConnect(t *testing.T) {
controller.WithClient(metricsReportClient)
controller.WithContext(ctx)

err := controller.Connect()
assert.Nil(t, err)
controller.Connect()

commanderClient.AssertNumberOfCalls(t, "Connect", 1)
metricsReportClient.AssertNumberOfCalls(t, "Connect", 1)
Expand Down Expand Up @@ -75,8 +74,7 @@ func TestControllerConnect_error(t *testing.T) {
controller.WithClient(metricsReportClient)
controller.WithContext(ctx)

err := controller.Connect()
assert.NotNil(t, err)
controller.Connect()

commanderClient.AssertNumberOfCalls(t, "Connect", 1)
metricsReportClient.AssertNumberOfCalls(t, "Connect", 1)
Expand Down
55 changes: 37 additions & 18 deletions sdk/client/metric_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package client

import (
"context"
"errors"
"fmt"
"io"
"sync"
Expand All @@ -28,6 +29,7 @@ func NewMetricReporterClient() MetricReporter {
return &metricReporter{
connector: newConnector(),
backoffSettings: DefaultBackoffSettings,
isRetrying: false,
}
}

Expand All @@ -39,6 +41,8 @@ type metricReporter struct {
ctx context.Context
mu sync.Mutex
backoffSettings backoff.BackoffSettings
isRetrying bool
retryLock sync.Mutex
}

func (r *metricReporter) WithInterceptor(interceptor interceptors.Interceptor) Client {
Expand All @@ -57,11 +61,14 @@ func (r *metricReporter) Connect(ctx context.Context) error {
log.Debugf("Metric Reporter connecting to %s", r.server)

r.ctx = ctx

r.retryLock.Lock()
err := backoff.WaitUntil(
r.ctx,
r.backoffSettings,
r.createClient,
)
r.retryLock.Unlock()
if err != nil {
return err
}
Expand Down Expand Up @@ -151,18 +158,19 @@ func (r *metricReporter) Send(ctx context.Context, message Message) error {
return fmt.Errorf("MetricReporter expected a metrics report message, but received %T", message.Data())
}

isRetrying := false

err = backoff.WaitUntil(r.ctx, r.backoffSettings, func() error {
if isRetrying {
log.Infof("Metric Reporter Channel Send: retrying to connect to %s", r.grpc.Target())
err := r.createClient()
if err != nil {
return err
}
err := r.checkClientConnection()
if err != nil {
return err
}

if r.channel == nil {
r.isRetrying = true
return r.handleGrpcError("Metric Reporter Channel Send", errors.New("metric service stream client not created yet"))
}

if err := r.channel.Send(report); err != nil {
isRetrying = true
r.isRetrying = true
return r.handleGrpcError("Metric Reporter Channel Send", err)
}

Expand All @@ -176,18 +184,14 @@ func (r *metricReporter) Send(ctx context.Context, message Message) error {
return fmt.Errorf("MetricReporter expected an events report message, but received %T", message.Data())
}

isRetrying := false

err = backoff.WaitUntil(r.ctx, r.backoffSettings, func() error {
if isRetrying {
log.Infof("Metric Reporter Channel Send: retrying to connect to %s", r.grpc.Target())
err = r.createClient()
if err != nil {
return err
}
err := r.checkClientConnection()
if err != nil {
return err
}

if err := r.eventsChannel.Send(report); err != nil {
isRetrying = true
r.isRetrying = true
return r.handleGrpcError("Metric Reporter Events Channel Send", err)
}

Expand All @@ -202,6 +206,21 @@ func (r *metricReporter) Send(ctx context.Context, message Message) error {
return err
}

func (r *metricReporter) checkClientConnection() error {
r.retryLock.Lock()
defer r.retryLock.Unlock()

if r.isRetrying {
log.Infof("Metric Reporter Channel Send: retrying to connect to %s", r.grpc.Target())
err := r.createClient()
if err != nil {
return err
}
}

return nil
}

func (r *metricReporter) closeConnection() error {
var err error
if r.channel != nil {
Expand Down
3 changes: 2 additions & 1 deletion src/core/topics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ package core
const (
UNKNOWN = "unknown"
RegistrationPrefix = "registration."
RegistrationCompletedTopic = RegistrationPrefix + "completed"
CommNginxConfig = "nginx.config"
NginxConfigUpload = "nginx.config.upload"
NginxReload = "nginx.reload"
Expand Down Expand Up @@ -49,4 +48,6 @@ const (
EnableExtension = "enable.extension"
EnableFeature = "enable.feature"
AgentAPIConfigApplyResponse = "agent.api.config.apply.response"
CommandSent = "command.sent"
MetricReportSent = "metrics.report.sent"
)
Loading

0 comments on commit 24d8ad1

Please sign in to comment.