Skip to content

Commit

Permalink
Added IP Usage metrics at Rest server. (#1932)
Browse files Browse the repository at this point in the history
* Added IP Usage metrics at Rest server.
  • Loading branch information
rsagasthya authored May 2, 2023
1 parent 66acf01 commit 41f451a
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 45 deletions.
10 changes: 10 additions & 0 deletions cns/restserver/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,11 @@ func (service *HTTPRestService) reserveIPAddress(w http.ResponseWriter, r *http.
Message: returnMessage,
}

if resp.ReturnCode == 0 {
// If Response is success i.e. code 0, then publish metrics.
publishIPStateMetrics(service.buildIPState())
}

reserveResp := &cns.ReserveIPAddressResponse{Response: resp, IPAddress: address}
err = service.Listener.Encode(w, &reserveResp)
logger.Response(service.Name, reserveResp, resp.ReturnCode, err)
Expand Down Expand Up @@ -475,6 +480,11 @@ func (service *HTTPRestService) releaseIPAddress(w http.ResponseWriter, r *http.
Message: returnMessage,
}

if resp.ReturnCode == 0 {
// If Response is success i.e. code 0, then publish metrics.
publishIPStateMetrics(service.buildIPState())
}

err = service.Listener.Encode(w, &resp)
logger.Response(service.Name, resp, resp.ReturnCode, err)
}
Expand Down
2 changes: 2 additions & 0 deletions cns/restserver/internalapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,8 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns.
// If the NC was created successfully, log NC snapshot.
if returnCode == 0 {
logNCSnapshot(*req)

publishIPStateMetrics(service.buildIPState())
} else {
logger.Errorf(returnMessage)
}
Expand Down
50 changes: 50 additions & 0 deletions cns/restserver/ipusage.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package restserver

import (
"github.com/Azure/azure-container-networking/cns/logger"
"github.com/Azure/azure-container-networking/cns/types"
)

type ipState struct {
// allocatedIPs are all the IPs given to CNS by DNC.
allocatedIPs int64
// assignedIPs are the IPs CNS gives to Pods.
assignedIPs int64
// availableIPs are the IPs in state "Available".
availableIPs int64
// programmingIPs are the IPs in state "PendingProgramming".
programmingIPs int64
// releasingIPs are the IPs in state "PendingReleasr".
releasingIPs int64
}

func (service *HTTPRestService) buildIPState() *ipState {
service.Lock()
defer service.Unlock()

state := ipState{
allocatedIPs: 0,
assignedIPs: 0,
availableIPs: 0,
}

//nolint:gocritic // This has to iterate over the IP Config state to get the counts.
for _, ipConfig := range service.PodIPConfigState {
state.allocatedIPs++
if ipConfig.GetState() == types.Assigned {
state.assignedIPs++
}
if ipConfig.GetState() == types.Available {
state.availableIPs++
}
if ipConfig.GetState() == types.PendingProgramming {
state.programmingIPs++
}
if ipConfig.GetState() == types.PendingRelease {
state.releasingIPs++
}
}

logger.Printf("[IP Usage] allocated IPs: %d, assigned IPs: %d, available IPs: %d", state.allocatedIPs, state.assignedIPs, state.availableIPs)
return &state
}
149 changes: 104 additions & 45 deletions cns/restserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,51 +10,98 @@ import (
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

var httpRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_latency_seconds",
Help: "Request latency in seconds by endpoint, verb, and response code.",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"url", "verb", "cns_return_code"},
const (
subnetLabel = "subnet"
subnetCIDRLabel = "subnet_cidr"
podnetARMIDLabel = "podnet_arm_id"
cnsReturnCode = "cns_return_code"
customerMetricLabel = "customer_metric"
customerMetricLabelValue = "customer metric"
)

var ipAssignmentLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "ip_assignment_latency_seconds",
Help: "Pod IP assignment latency in seconds",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
)

var ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "ipconfigstatus_state_transition_seconds",
Help: "Time spent by the IP Configuration Status in each state transition",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"previous_state", "next_state"},
)

var syncHostNCVersionCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sync_host_nc_version_total",
Help: "Count of Sync Host NC by success or failure",
},
[]string{"ok"},
)

var syncHostNCVersionLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sync_host_nc_version_latency_seconds",
Help: "Sync Host NC Latency",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"ok"},
var (
httpRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_latency_seconds",
Help: "Request latency in seconds by endpoint, verb, and response code.",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"url", "verb", "cns_return_code"},
)
ipAssignmentLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "ip_assignment_latency_seconds",
Help: "Pod IP assignment latency in seconds",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
)
ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "ipconfigstatus_state_transition_seconds",
Help: "Time spent by the IP Configuration Status in each state transition",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"previous_state", "next_state"},
)
syncHostNCVersionCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sync_host_nc_version_total",
Help: "Count of Sync Host NC by success or failure",
},
[]string{"ok"},
)
syncHostNCVersionLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sync_host_nc_version_latency_seconds",
Help: "Sync Host NC Latency",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"ok"},
)
allocatedIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_allocated_ips_v2",
Help: "Count of IPs CNS has Allocated",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
assignedIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_assigned_ips_v2",
Help: "Count of IPs CNS has Assigned to Pods",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
availableIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_available_ips_v2",
Help: "Count of IPs Available",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
pendingProgrammingIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_pending_programming_ips_v2",
Help: "Count of IPs in Pending Programming State",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
pendingReleaseIPCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cx_pending_release_ips_v2",
Help: "Count of IPs in Pending Release State",
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
},
[]string{},
)
)

func init() {
Expand All @@ -64,11 +111,14 @@ func init() {
ipConfigStatusStateTransitionTime,
syncHostNCVersionCount,
syncHostNCVersionLatency,
allocatedIPCount,
assignedIPCount,
availableIPCount,
pendingProgrammingIPCount,
pendingReleaseIPCount,
)
}

const cnsReturnCode = "Cns-Return-Code"

// Every http response is 200 so we really want cns response code.
// Hard tto do with middleware unless we derserialize the responses but making it an explit header works around it.
// if that doesn't work we could have a separate countervec just for response codes.
Expand All @@ -91,3 +141,12 @@ func stateTransitionMiddleware(i *cns.IPConfigurationStatus, s types.IPState) {
}
ipConfigStatusStateTransitionTime.WithLabelValues(string(i.GetState()), string(s)).Observe(time.Since(i.LastStateTransition).Seconds())
}

func publishIPStateMetrics(state *ipState) {
labels := []string{} // TODO. ragasthya Add dimensions to the IP Usage metrics.
allocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedIPs))
assignedIPCount.WithLabelValues(labels...).Set(float64(state.assignedIPs))
availableIPCount.WithLabelValues(labels...).Set(float64(state.availableIPs))
pendingProgrammingIPCount.WithLabelValues(labels...).Set(float64(state.programmingIPs))
pendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.releasingIPs))
}

0 comments on commit 41f451a

Please sign in to comment.