Skip to content

Commit

Permalink
Basics of Cert-Count Non-Locking Telemetry (#16676)
Browse files Browse the repository at this point in the history
Basics of Cert-Count Telemetry, changelog,  "best attempt" slice to capture (and test for) duplicates, Move sorting of possibleDoubleCountedRevokedSerials to after compare of entries. Add values to counter when still initializing.
Set lists to nil after use, Fix atomic2 import, Delay reporting metrics until after deduplication has completed, 
The test works now, Move string slice to helper function; Add backendUUID to gauge name.
  • Loading branch information
kitography authored Sep 20, 2022
1 parent 88a6945 commit 06097d8
Show file tree
Hide file tree
Showing 9 changed files with 380 additions and 16 deletions.
2 changes: 1 addition & 1 deletion builtin/logical/aws/iam_policies_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func Test_combinePolicyDocuments(t *testing.T) {
`{"Version": "2012-10-17", "Statement": [{"Effect": "Allow", "NotAction": "ec2:DescribeAvailabilityZones", "Resource": "*"}]}`,
},
expectedOutput: `{"Version": "2012-10-17","Statement":[{"Effect": "Allow","NotAction": "ec2:DescribeAvailabilityZones", "Resource": "*"}]}`,
expectedErr: false,
expectedErr: false,
},
{
description: "one blank policy",
Expand Down
188 changes: 188 additions & 0 deletions builtin/logical/pki/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@ package pki
import (
"context"
"fmt"
"sort"
"strings"
"sync"
"sync/atomic"
"time"

atomic2 "go.uber.org/atomic"

"github.com/hashicorp/vault/sdk/helper/consts"

"github.com/armon/go-metrics"
Expand Down Expand Up @@ -203,6 +206,13 @@ func Backend(conf *logical.BackendConfig) *backend {
// Delay the first tidy until after we've started up.
b.lastTidy = time.Now()

// Metrics initialization for count of certificates in storage
b.certsCounted = atomic2.NewBool(false)
b.certCount = new(uint32)
b.revokedCertCount = new(uint32)
b.possibleDoubleCountedSerials = make([]string, 0, 250)
b.possibleDoubleCountedRevokedSerials = make([]string, 0, 250)

return &b
}

Expand All @@ -219,6 +229,12 @@ type backend struct {
tidyStatus *tidyStatus
lastTidy time.Time

certCount *uint32
revokedCertCount *uint32
certsCounted *atomic2.Bool
possibleDoubleCountedSerials []string
possibleDoubleCountedRevokedSerials []string

pkiStorageVersion atomic.Value
crlBuilder *crlBuilder

Expand Down Expand Up @@ -330,6 +346,21 @@ func (b *backend) initialize(ctx context.Context, _ *logical.InitializationReque
return err
}

err := b.initializePKIIssuersStorage(ctx)
if err != nil {
return err
}

// Initialize also needs to populate our certificate and revoked certificate count
err = b.initializeStoredCertificateCounts(ctx)
if err != nil {
return err
}

return nil
}

func (b *backend) initializePKIIssuersStorage(ctx context.Context) error {
// Grab the lock prior to the updating of the storage lock preventing us flipping
// the storage flag midway through the request stream of other requests.
b.issuersLock.Lock()
Expand Down Expand Up @@ -532,3 +563,160 @@ func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) er
// All good!
return nil
}

func (b *backend) initializeStoredCertificateCounts(ctx context.Context) error {
b.tidyStatusLock.RLock()
defer b.tidyStatusLock.RUnlock()
// For performance reasons, we can't lock on issuance/storage of certs until a list operation completes,
// but we want to limit possible miscounts / double-counts to over-counting, so we take the tidy lock which
// prevents (most) deletions - in particular we take a read lock (sufficient to block the write lock in
// tidyStatusStart while allowing tidy to still acquire a read lock to report via its endpoint)

entries, err := b.storage.List(ctx, "certs/")
if err != nil {
return err
}
atomic.AddUint32(b.certCount, uint32(len(entries)))

revokedEntries, err := b.storage.List(ctx, "revoked/")
if err != nil {
return err
}
atomic.AddUint32(b.revokedCertCount, uint32(len(revokedEntries)))

b.certsCounted.Store(true)
// Now that the metrics are set, we can switch from appending newly-stored certificates to the possible double-count
// list, and instead have them update the counter directly. We need to do this so that we are looking at a static
// slice of possibly double counted serials. Note that certsCounted is computed before the storage operation, so
// there may be some delay here.

// Sort the listed-entries first, to accommodate that delay.
sort.Slice(entries, func(i, j int) bool {
return entries[i] < entries[j]
})

sort.Slice(revokedEntries, func(i, j int) bool {
return revokedEntries[i] < revokedEntries[j]
})

// We assume here that these lists are now complete.
sort.Slice(b.possibleDoubleCountedSerials, func(i, j int) bool {
return b.possibleDoubleCountedSerials[i] < b.possibleDoubleCountedSerials[j]
})

listEntriesIndex := 0
possibleDoubleCountIndex := 0
for {
if listEntriesIndex >= len(entries) {
break
}
if possibleDoubleCountIndex >= len(b.possibleDoubleCountedSerials) {
break
}
if entries[listEntriesIndex] == b.possibleDoubleCountedSerials[possibleDoubleCountIndex] {
// This represents a double-counted entry
b.decrementTotalCertificatesCountNoReport()
listEntriesIndex = listEntriesIndex + 1
possibleDoubleCountIndex = possibleDoubleCountIndex + 1
continue
}
if entries[listEntriesIndex] < b.possibleDoubleCountedSerials[possibleDoubleCountIndex] {
listEntriesIndex = listEntriesIndex + 1
continue
}
if entries[listEntriesIndex] > b.possibleDoubleCountedSerials[possibleDoubleCountIndex] {
possibleDoubleCountIndex = possibleDoubleCountIndex + 1
continue
}
}

sort.Slice(b.possibleDoubleCountedRevokedSerials, func(i, j int) bool {
return b.possibleDoubleCountedRevokedSerials[i] < b.possibleDoubleCountedRevokedSerials[j]
})

listRevokedEntriesIndex := 0
possibleRevokedDoubleCountIndex := 0
for {
if listRevokedEntriesIndex >= len(revokedEntries) {
break
}
if possibleRevokedDoubleCountIndex >= len(b.possibleDoubleCountedRevokedSerials) {
break
}
if revokedEntries[listRevokedEntriesIndex] == b.possibleDoubleCountedRevokedSerials[possibleRevokedDoubleCountIndex] {
// This represents a double-counted revoked entry
b.decrementTotalRevokedCertificatesCountNoReport()
listRevokedEntriesIndex = listRevokedEntriesIndex + 1
possibleRevokedDoubleCountIndex = possibleRevokedDoubleCountIndex + 1
continue
}
if revokedEntries[listRevokedEntriesIndex] < b.possibleDoubleCountedRevokedSerials[possibleRevokedDoubleCountIndex] {
listRevokedEntriesIndex = listRevokedEntriesIndex + 1
continue
}
if revokedEntries[listRevokedEntriesIndex] > b.possibleDoubleCountedRevokedSerials[possibleRevokedDoubleCountIndex] {
possibleRevokedDoubleCountIndex = possibleRevokedDoubleCountIndex + 1
continue
}
}

b.possibleDoubleCountedRevokedSerials = nil
b.possibleDoubleCountedSerials = nil

metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_certificates_stored"}, float32(*b.certCount))
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_revoked_certificates_stored"}, float32(*b.revokedCertCount))

return nil
}

// The "certsCounted" boolean here should be loaded from the backend certsCounted before the corresponding storage call:
// eg. certsCounted := b.certsCounted.Load()
func (b *backend) incrementTotalCertificatesCount(certsCounted bool, newSerial string) {
atomic.AddUint32(b.certCount, 1)
switch {
case !certsCounted:
// This is unsafe, but a good best-attempt
if strings.HasPrefix(newSerial, "certs/") {
newSerial = newSerial[6:]
}
b.possibleDoubleCountedSerials = append(b.possibleDoubleCountedSerials, newSerial)
default:
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_certificates_stored"}, float32(*b.certCount))
}
}

func (b *backend) decrementTotalCertificatesCountReport() {
b.decrementTotalCertificatesCountNoReport()
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_certificates_stored"}, float32(*b.certCount))
}

// Called directly only by the initialize function to deduplicate the count, when we don't have a full count yet
func (b *backend) decrementTotalCertificatesCountNoReport() {
atomic.AddUint32(b.certCount, ^uint32(0))
}

// The "certsCounted" boolean here should be loaded from the backend certsCounted before the corresponding storage call:
// eg. certsCounted := b.certsCounted.Load()
func (b *backend) incrementTotalRevokedCertificatesCount(certsCounted bool, newSerial string) {
atomic.AddUint32(b.revokedCertCount, 1)
switch {
case !certsCounted:
// This is unsafe, but a good best-attempt
if strings.HasPrefix(newSerial, "revoked/") { // allow passing in the path (revoked/serial) OR the serial
newSerial = newSerial[8:]
}
b.possibleDoubleCountedRevokedSerials = append(b.possibleDoubleCountedRevokedSerials, newSerial)
default:
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_revoked_certificates_stored"}, float32(*b.revokedCertCount))
}
}

func (b *backend) decrementTotalRevokedCertificatesCountReport() {
b.decrementTotalRevokedCertificatesCountNoReport()
metrics.SetGauge([]string{"secrets", "pki", b.backendUUID, "total_revoked_certificates_stored"}, float32(*b.revokedCertCount))
}

// Called directly only by the initialize function to deduplicate the count, when we don't have a full count yet
func (b *backend) decrementTotalRevokedCertificatesCountNoReport() {
atomic.AddUint32(b.revokedCertCount, ^uint32(0))
}
Loading

0 comments on commit 06097d8

Please sign in to comment.