Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pkg/audit/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,14 @@ func (am *Manager) audit(ctx context.Context) error {
// record audit latency
defer func() {
logFinish(am.log)
latency := time.Since(startTime)
endTime := time.Now()
latency := endTime.Sub(startTime)
if err := am.reporter.reportLatency(latency); err != nil {
am.log.Error(err, "failed to report latency")
}
if err := am.reporter.reportRunEnd(endTime); err != nil {
am.log.Error(err, "failed to report run end time")
}
}()

if err := am.reporter.reportRunStart(startTime); err != nil {
Expand Down
38 changes: 27 additions & 11 deletions pkg/audit/stats_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@ import (
)

const (
violationsMetricName = "violations"
auditDurationMetricName = "audit_duration_seconds"
lastRunTimeMetricName = "audit_last_run_time"
violationsMetricName = "violations"
auditDurationMetricName = "audit_duration_seconds"
lastRunStartTimeMetricName = "audit_last_run_time"
lastRunEndTimeMetricName = "audit_last_run_end_time"
)

var (
violationsM = stats.Int64(violationsMetricName, "Total number of audited violations", stats.UnitDimensionless)
auditDurationM = stats.Float64(auditDurationMetricName, "Latency of audit operation in seconds", stats.UnitSeconds)
lastRunTimeM = stats.Float64(lastRunTimeMetricName, "Timestamp of last audit run time", stats.UnitSeconds)
violationsM = stats.Int64(violationsMetricName, "Total number of audited violations", stats.UnitDimensionless)
auditDurationM = stats.Float64(auditDurationMetricName, "Latency of audit operation in seconds", stats.UnitSeconds)
lastRunStartTimeM = stats.Float64(lastRunStartTimeMetricName, "Timestamp of last audit run starting time", stats.UnitSeconds)
lastRunEndTimeM = stats.Float64(lastRunEndTimeMetricName, "Timestamp of last audit run ending time", stats.UnitSeconds)

enforcementActionKey = tag.MustNewKey("enforcement_action")
)
Expand All @@ -45,9 +47,13 @@ func register() error {
Aggregation: view.Distribution(1*60, 3*60, 5*60, 10*60, 15*60, 20*60, 40*60, 80*60, 160*60, 320*60),
},
{
Name: lastRunTimeMetricName,
Measure: lastRunTimeM,
Description: "Timestamp of last audit run time",
Name: lastRunStartTimeMetricName,
Measure: lastRunStartTimeM,
Aggregation: view.LastValue(),
},
{
Name: lastRunEndTimeMetricName,
Measure: lastRunEndTimeM,
Aggregation: view.LastValue(),
},
}
Expand Down Expand Up @@ -80,8 +86,18 @@ func (r *reporter) reportRunStart(t time.Time) error {
return err
}

val := float64(t.UnixNano()) / 1e9
return metrics.Record(ctx, lastRunTimeM.M(val))
val := float64(t.Unix())
return metrics.Record(ctx, lastRunStartTimeM.M(val))
}

func (r *reporter) reportRunEnd(t time.Time) error {
ctx, err := tag.New(context.Background())
if err != nil {
return err
}

val := float64(t.Unix())
return metrics.Record(ctx, lastRunEndTimeM.M(val))
}

// newStatsReporter creates a reporter for audit metrics.
Expand Down
36 changes: 28 additions & 8 deletions pkg/audit/stats_reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,30 +101,50 @@ func checkData(t *testing.T, name string, wantRowLength int) *view.Row {
}

func TestLastRestartCheck(t *testing.T) {
wantTime := time.Now()
wantTs := float64(wantTime.UnixNano()) / 1e9
wantStartTime := time.Now()
wantEndTime := wantStartTime.Add(1 * time.Minute)
wantStartTs := float64(wantStartTime.Unix())
wantEndTs := float64(wantEndTime.Unix())
const wantRowLength = 1

r, err := newStatsReporter()
if err != nil {
t.Fatalf("got newStatsReporter() error %v", err)
}

err = r.reportRunStart(wantTime)
err = r.reportRunStart(wantStartTime)
if err != nil {
t.Fatalf("reportRunStart error %v", err)
}
row := checkData(t, lastRunTimeMetricName, wantRowLength)
row := checkData(t, lastRunStartTimeMetricName, wantRowLength)
got, ok := row.Data.(*view.LastValueData)
if !ok {
t.Error("lastRunTimeMetricName should have aggregation LastValue()")
t.Errorf("%s should have aggregation LastValue()", lastRunStartTimeMetricName)
}

if len(row.Tags) != 0 {
t.Errorf("got %q tags %v, want empty", lastRunTimeMetricName, row.Tags)
t.Errorf("got %q tags %v, want empty", lastRunStartTimeMetricName, row.Tags)
}

if got.Value != wantTs {
t.Errorf("got %q = %v, want %v", lastRunTimeMetricName, got.Value, wantTs)
if got.Value != wantStartTs {
t.Errorf("got %q = %v, want %v", lastRunStartTimeMetricName, got.Value, wantStartTs)
}

err = r.reportRunEnd(wantEndTime)
if err != nil {
t.Fatalf("reportRunEnd error %v", err)
}
row = checkData(t, lastRunEndTimeMetricName, wantRowLength)
got, ok = row.Data.(*view.LastValueData)
if !ok {
t.Errorf("%s should have aggregation LastValue()", lastRunEndTimeMetricName)
}

if len(row.Tags) != 0 {
t.Errorf("got %q tags %v, want empty", lastRunEndTimeMetricName, row.Tags)
}

if got.Value != wantEndTs {
t.Errorf("got %q = %v, want %v", lastRunEndTimeMetricName, got.Value, wantEndTs)
}
}
3 changes: 2 additions & 1 deletion website/docs/audit.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ There are three ways to gather audit results, depending on the level of detail n

Prometheus metrics provide an aggregated look at the number of audit violations:

* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run
* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run
* `gatekeeper_audit_last_run_end_time` provides the end time timestamp of the last completed audit run
* `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity

### Constraint Status
Expand Down
8 changes: 7 additions & 1 deletion website/docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,13 @@ Below are the list of metrics provided by Gatekeeper:

- Name: `gatekeeper_audit_last_run_time`

Description: `Timestamp of last audit run time`
Description: `Timestamp of last audit run starting time`

Aggregation: `LastValue`

- Name: `gatekeeper_audit_last_run_end_time`

Description: `Timestamp of last audit run ending time`

Aggregation: `LastValue`

Expand Down
2 changes: 1 addition & 1 deletion website/versioned_docs/version-v3.6.x/audit.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n

Prometheus metrics provide an aggregated look at the number of audit violations:

* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run
* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run
* `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity

### Constraint Status
Expand Down
2 changes: 1 addition & 1 deletion website/versioned_docs/version-v3.7.x/audit.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n

Prometheus metrics provide an aggregated look at the number of audit violations:

* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run
* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run
* `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity

### Constraint Status
Expand Down
2 changes: 1 addition & 1 deletion website/versioned_docs/version-v3.8.x/audit.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n

Prometheus metrics provide an aggregated look at the number of audit violations:

* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run
* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run
* `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity

### Constraint Status
Expand Down
2 changes: 1 addition & 1 deletion website/versioned_docs/version-v3.9.x/audit.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ There are three ways to gather audit results, depending on the level of detail n

Prometheus metrics provide an aggregated look at the number of audit violations:

* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run
* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run
* `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity

### Constraint Status
Expand Down