diff --git a/pkg/audit/manager.go b/pkg/audit/manager.go index 4ad8bc7a86a..207ba621ac7 100644 --- a/pkg/audit/manager.go +++ b/pkg/audit/manager.go @@ -161,10 +161,14 @@ func (am *Manager) audit(ctx context.Context) error { // record audit latency defer func() { logFinish(am.log) - latency := time.Since(startTime) + endTime := time.Now() + latency := endTime.Sub(startTime) if err := am.reporter.reportLatency(latency); err != nil { am.log.Error(err, "failed to report latency") } + if err := am.reporter.reportRunEnd(endTime); err != nil { + am.log.Error(err, "failed to report run end time") + } }() if err := am.reporter.reportRunStart(startTime); err != nil { diff --git a/pkg/audit/stats_reporter.go b/pkg/audit/stats_reporter.go index 7baa7335c90..8cfe9cd1645 100644 --- a/pkg/audit/stats_reporter.go +++ b/pkg/audit/stats_reporter.go @@ -12,15 +12,17 @@ import ( ) const ( - violationsMetricName = "violations" - auditDurationMetricName = "audit_duration_seconds" - lastRunTimeMetricName = "audit_last_run_time" + violationsMetricName = "violations" + auditDurationMetricName = "audit_duration_seconds" + lastRunStartTimeMetricName = "audit_last_run_time" + lastRunEndTimeMetricName = "audit_last_run_end_time" ) var ( - violationsM = stats.Int64(violationsMetricName, "Total number of audited violations", stats.UnitDimensionless) - auditDurationM = stats.Float64(auditDurationMetricName, "Latency of audit operation in seconds", stats.UnitSeconds) - lastRunTimeM = stats.Float64(lastRunTimeMetricName, "Timestamp of last audit run time", stats.UnitSeconds) + violationsM = stats.Int64(violationsMetricName, "Total number of audited violations", stats.UnitDimensionless) + auditDurationM = stats.Float64(auditDurationMetricName, "Latency of audit operation in seconds", stats.UnitSeconds) + lastRunStartTimeM = stats.Float64(lastRunStartTimeMetricName, "Timestamp of last audit run starting time", stats.UnitSeconds) + lastRunEndTimeM = stats.Float64(lastRunEndTimeMetricName, "Timestamp of last audit run ending time", stats.UnitSeconds) enforcementActionKey = tag.MustNewKey("enforcement_action") ) @@ -45,9 +47,13 @@ func register() error { Aggregation: view.Distribution(1*60, 3*60, 5*60, 10*60, 15*60, 20*60, 40*60, 80*60, 160*60, 320*60), }, { - Name: lastRunTimeMetricName, - Measure: lastRunTimeM, - Description: "Timestamp of last audit run time", + Name: lastRunStartTimeMetricName, + Measure: lastRunStartTimeM, + Aggregation: view.LastValue(), + }, + { + Name: lastRunEndTimeMetricName, + Measure: lastRunEndTimeM, Aggregation: view.LastValue(), }, } @@ -80,8 +86,18 @@ func (r *reporter) reportRunStart(t time.Time) error { return err } - val := float64(t.UnixNano()) / 1e9 - return metrics.Record(ctx, lastRunTimeM.M(val)) + val := float64(t.Unix()) + return metrics.Record(ctx, lastRunStartTimeM.M(val)) +} + +func (r *reporter) reportRunEnd(t time.Time) error { + ctx, err := tag.New(context.Background()) + if err != nil { + return err + } + + val := float64(t.Unix()) + return metrics.Record(ctx, lastRunEndTimeM.M(val)) } // newStatsReporter creates a reporter for audit metrics. diff --git a/pkg/audit/stats_reporter_test.go b/pkg/audit/stats_reporter_test.go index 4e371d6115e..3a75c71d27f 100644 --- a/pkg/audit/stats_reporter_test.go +++ b/pkg/audit/stats_reporter_test.go @@ -101,8 +101,10 @@ func checkData(t *testing.T, name string, wantRowLength int) *view.Row { } func TestLastRestartCheck(t *testing.T) { - wantTime := time.Now() - wantTs := float64(wantTime.UnixNano()) / 1e9 + wantStartTime := time.Now() + wantEndTime := wantStartTime.Add(1 * time.Minute) + wantStartTs := float64(wantStartTime.Unix()) + wantEndTs := float64(wantEndTime.Unix()) const wantRowLength = 1 r, err := newStatsReporter() @@ -110,21 +112,39 @@ func TestLastRestartCheck(t *testing.T) { t.Fatalf("got newStatsReporter() error %v", err) } - err = r.reportRunStart(wantTime) + err = r.reportRunStart(wantStartTime) if err != nil { t.Fatalf("reportRunStart error %v", err) } - row := checkData(t, lastRunTimeMetricName, wantRowLength) + row := checkData(t, lastRunStartTimeMetricName, wantRowLength) got, ok := row.Data.(*view.LastValueData) if !ok { - t.Error("lastRunTimeMetricName should have aggregation LastValue()") + t.Errorf("%s should have aggregation LastValue()", lastRunStartTimeMetricName) } if len(row.Tags) != 0 { - t.Errorf("got %q tags %v, want empty", lastRunTimeMetricName, row.Tags) + t.Errorf("got %q tags %v, want empty", lastRunStartTimeMetricName, row.Tags) } - if got.Value != wantTs { - t.Errorf("got %q = %v, want %v", lastRunTimeMetricName, got.Value, wantTs) + if got.Value != wantStartTs { + t.Errorf("got %q = %v, want %v", lastRunStartTimeMetricName, got.Value, wantStartTs) + } + + err = r.reportRunEnd(wantEndTime) + if err != nil { + t.Fatalf("reportRunEnd error %v", err) + } + row = checkData(t, lastRunEndTimeMetricName, wantRowLength) + got, ok = row.Data.(*view.LastValueData) + if !ok { + t.Errorf("%s should have aggregation LastValue()", lastRunEndTimeMetricName) + } + + if len(row.Tags) != 0 { + t.Errorf("got %q tags %v, want empty", lastRunEndTimeMetricName, row.Tags) + } + + if got.Value != wantEndTs { + t.Errorf("got %q = %v, want %v", lastRunEndTimeMetricName, got.Value, wantEndTs) } } diff --git a/website/docs/audit.md b/website/docs/audit.md index 9802324ea42..059b3d8e78b 100644 --- a/website/docs/audit.md +++ b/website/docs/audit.md @@ -13,7 +13,8 @@ There are three ways to gather audit results, depending on the level of detail n Prometheus metrics provide an aggregated look at the number of audit violations: -* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run +* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run +* `gatekeeper_audit_last_run_end_time` provides the end time timestamp of the last completed audit run * `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity ### Constraint Status diff --git a/website/docs/metrics.md b/website/docs/metrics.md index b81643a0d66..7ac08a3646d 100644 --- a/website/docs/metrics.md +++ b/website/docs/metrics.md @@ -113,7 +113,13 @@ Below are the list of metrics provided by Gatekeeper: - Name: `gatekeeper_audit_last_run_time` - Description: `Timestamp of last audit run time` + Description: `Timestamp of last audit run starting time` + + Aggregation: `LastValue` + +- Name: `gatekeeper_audit_last_run_end_time` + + Description: `Timestamp of last audit run ending time` Aggregation: `LastValue` diff --git a/website/versioned_docs/version-v3.6.x/audit.md b/website/versioned_docs/version-v3.6.x/audit.md index 1d6b73a792e..b0d405fef30 100644 --- a/website/versioned_docs/version-v3.6.x/audit.md +++ b/website/versioned_docs/version-v3.6.x/audit.md @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n Prometheus metrics provide an aggregated look at the number of audit violations: -* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run +* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run * `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity ### Constraint Status diff --git a/website/versioned_docs/version-v3.7.x/audit.md b/website/versioned_docs/version-v3.7.x/audit.md index f8b520c38a0..206042ca1cb 100644 --- a/website/versioned_docs/version-v3.7.x/audit.md +++ b/website/versioned_docs/version-v3.7.x/audit.md @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n Prometheus metrics provide an aggregated look at the number of audit violations: -* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run +* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run * `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity ### Constraint Status diff --git a/website/versioned_docs/version-v3.8.x/audit.md b/website/versioned_docs/version-v3.8.x/audit.md index f8b520c38a0..206042ca1cb 100644 --- a/website/versioned_docs/version-v3.8.x/audit.md +++ b/website/versioned_docs/version-v3.8.x/audit.md @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n Prometheus metrics provide an aggregated look at the number of audit violations: -* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run +* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run * `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity ### Constraint Status diff --git a/website/versioned_docs/version-v3.9.x/audit.md b/website/versioned_docs/version-v3.9.x/audit.md index 9802324ea42..186f8adf20c 100644 --- a/website/versioned_docs/version-v3.9.x/audit.md +++ b/website/versioned_docs/version-v3.9.x/audit.md @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n Prometheus metrics provide an aggregated look at the number of audit violations: -* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run +* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run * `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity ### Constraint Status