Skip to content

Commit

Permalink
New health status check and report for the status api, it detects if …
Browse files Browse the repository at this point in the history
…the CommandApi is not returning a 200. For example if License Key is incorrect. (#1908)
  • Loading branch information
alvarocabanas authored Aug 20, 2024
1 parent 0f3c666 commit dab967b
Show file tree
Hide file tree
Showing 7 changed files with 409 additions and 56 deletions.
2 changes: 1 addition & 1 deletion cmd/newrelic-infra/newrelic-infra.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ func initializeAgentAndRun(c *config.Config, logFwCfg config.LogForward) error {
// This should never happen, as the correct format is checked during NormalizeConfig.
aslog.WithError(err).Error("invalid startup_connection_timeout value, cannot run status server")
} else {
rep := status.NewReporter(agt.Context.Ctx, rlog, c.StatusEndpoints, timeoutD, transport, agt.Context.AgentIdnOrEmpty, agt.Context.EntityKey, c.License, userAgent)
rep := status.NewReporter(agt.Context.Ctx, rlog, c.StatusEndpoints, c.HealthEndpoint, timeoutD, transport, agt.Context.AgentIdnOrEmpty, agt.Context.EntityKey, c.License, userAgent)

apiSrv, err := httpapi.NewServer(rep, integrationEmitter)
if c.HTTPServerEnabled {
Expand Down
57 changes: 55 additions & 2 deletions internal/agent/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const (
// Report agent status report. It contains:
// - checks:
// - backend endpoints reachability statuses
// - backend communication healthiness
//
// - configuration
// fields will be empty when ReportErrors() report no errors.
Expand All @@ -31,6 +32,7 @@ type Report struct {

type ChecksReport struct {
Endpoints []EndpointReport `json:"endpoints,omitempty"`
Health HealthReport `json:"health,omitempty"`
}

// ConfigReport configuration used for status report.
Expand All @@ -45,6 +47,12 @@ type EndpointReport struct {
Error string `json:"error,omitempty"`
}

// HealthReport represents the backend communication healthiness status.
type HealthReport struct {
Healthy bool `json:"healthy"`
Error string `json:"error,omitempty"`
}

// ReportEntity agent entity report.
type ReportEntity struct {
GUID string `json:"guid"`
Expand All @@ -59,12 +67,15 @@ type Reporter interface {
ReportErrors() (Report, error)
// ReportEntity agent entity report.
ReportEntity() (ReportEntity, error)
// ReportHealth agent healthy report.
ReportHealth() HealthReport
}

type nrReporter struct {
ctx context.Context
log log.Entry
endpoints []string // NR backend URLs
healthEndpoint string // NR command backend URL to check communication healthiness
license string
userAgent string
idProvide id.Provide
Expand Down Expand Up @@ -119,8 +130,19 @@ func (r *nrReporter) report(onlyErrors bool) (report Report, err error) {
}(ep)
}

hReportC := make(chan HealthReport, 1)

wg.Add(1)

go func() {
hReportC <- r.getHealth(agentID)

wg.Done()
}()

wg.Wait()
close(eReportsC)
close(hReportC)

var errored bool
var eReports []EndpointReport
Expand All @@ -132,16 +154,17 @@ func (r *nrReporter) report(onlyErrors bool) (report Report, err error) {
errored = true
}
}
hreport := <-hReportC

if !onlyErrors || errored {
if report.Checks == nil {
report.Checks = &ChecksReport{}
}
report.Checks.Endpoints = eReports
report.Checks.Health = hreport
report.Config = &ConfigReport{
ReachabilityTimeout: r.timeout.String(),
}

}

return
Expand All @@ -154,23 +177,53 @@ func (r *nrReporter) ReportEntity() (re ReportEntity, err error) {
}, nil
}

func (r *nrReporter) ReportHealth() HealthReport {
agentID := r.idProvide().ID.String()

return r.getHealth(agentID)
}

// Make a http req to the command api to validate the ingest key is valid and connectivity is ok.
func (r *nrReporter) getHealth(agentID string) HealthReport {
health, err := backendhttp.CheckEndpointHealthiness(
r.ctx,
r.healthEndpoint,
r.license,
r.userAgent,
agentID,
r.timeout,
r.transport,
)

healthReport := HealthReport{
Healthy: health,
Error: "",
}
if err != nil {
healthReport.Error = err.Error()
}

return healthReport
}

// NewReporter creates a new status reporter.
func NewReporter(
ctx context.Context,
l log.Entry,
backendEndpoints []string,
healthEndpoint string,
timeout time.Duration,
transport http.RoundTripper,
agentIDProvide id.Provide,
agentEntityKeyProvider func() string,
license,
userAgent string,
) Reporter {

return &nrReporter{
ctx: ctx,
log: l,
endpoints: backendEndpoints,
healthEndpoint: healthEndpoint,
license: license,
userAgent: userAgent,
idProvide: agentIDProvide,
Expand Down
Loading

0 comments on commit dab967b

Please sign in to comment.