diff --git a/ecscollector/collector.go b/ecscollector/collector.go index c696d44..01fda9c 100644 --- a/ecscollector/collector.go +++ b/ecscollector/collector.go @@ -17,6 +17,7 @@ package ecscollector import ( "context" + "fmt" "log/slog" "github.com/docker/docker/api/types/container" @@ -193,7 +194,15 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { ctx := context.Background() metadata, err := c.client.RetrieveTaskMetadata(ctx) if err != nil { - c.logger.Debug("Failed to retrieve metadata", "error", err) + c.logger.Debug("Failed to retrieve task metadata", "error", err) + // Signal that this Collect has failed. This ultimately results in an + // HTTP 500 being served for the /metrics response. + // + // While it would be most technically correct to do `NewInvalidMetric` + // for all of the Descs here, it just results in N identical error + // messages being printed in the /metrics response, so it seems a bit + // absurd to do that. + ch <- prometheus.NewInvalidMetric(taskMetadataDesc, fmt.Errorf("failed to retrieve task metadata: %w", err)) return } c.logger.Debug("Got ECS task metadata response", "metadata", metadata) @@ -261,7 +270,15 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { stats, err := c.client.RetrieveTaskStats(ctx) if err != nil { - c.logger.Debug("Failed to retrieve container stats", "error", err) + c.logger.Debug("Failed to retrieve task stats", "error", err) + // Signal that this Collect has failed. This ultimately results in an + // HTTP 500 being served for the /metrics response. + // + // While it would be most technically correct to do `NewInvalidMetric` + // for all of the Descs here, it just results in N identical error + // messages being printed in the /metrics response, so it seems a bit + // absurd to do that. + ch <- prometheus.NewInvalidMetric(memUsageDesc, fmt.Errorf("failed to retrieve task stats: %w", err)) return } c.logger.Debug("Got ECS task stats response", "stats", stats) diff --git a/ecscollector/collector_test.go b/ecscollector/collector_test.go index 57e54f5..04ec429 100644 --- a/ecscollector/collector_test.go +++ b/ecscollector/collector_test.go @@ -138,3 +138,24 @@ func TestEc2Metrics(t *testing.T) { collector := NewCollector(metadataClient, slog.Default()) assertSnapshot(t, collector, "testdata/snapshots/ec2_metrics.txt") } + +func TestApiErrors(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("GET /task", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(500) + w.Write([]byte("Internal Server Error")) + }) + mux.HandleFunc("GET /task/stats", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(500) + w.Write([]byte("Internal Server Error")) + }) + metadataServer := httptest.NewServer(mux) + defer metadataServer.Close() + metadataClient := ecsmetadata.NewClient(metadataServer.URL) + collector := NewCollector(metadataClient, slog.Default()) + + _, err := renderMetrics(collector) + if err == nil || err.Error() != "non-200 metrics response: 500" { + t.Fatalf("expected 500 error but got err: %v", err) + } +}