Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion contrib/kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ usage() {
echo "-cn | --cluster-name Configure the kind cluster's name"
echo "-ric | --run-in-container Configure the script to be run from a docker container, allowing it to still communicate with the kind controlplane"
echo "-ehp | --egress-ip-healthcheck-port TCP port used for gRPC session by egress IP node check. DEFAULT: 9107 (Use "0" for legacy dial to port 9)."
echo "-sm | --scale-metrics Enable scale metrics"
echo "--delete Delete current cluster"
echo ""
}
Expand Down Expand Up @@ -281,6 +282,8 @@ parse_args() {
fi
OVN_EGRESSIP_HEALTHCHECK_PORT=$1
;;
-sm | --scale-metrics ) OVN_METRICS_SCALE_ENABLE=true
;;
--delete ) delete
exit
;;
Expand Down Expand Up @@ -337,6 +340,7 @@ print_params() {
echo "OVN_ENABLE_EX_GW_NETWORK_BRIDGE = $OVN_ENABLE_EX_GW_NETWORK_BRIDGE"
echo "OVN_EX_GW_NETWORK_INTERFACE = $OVN_EX_GW_NETWORK_INTERFACE"
echo "OVN_EGRESSIP_HEALTHCHECK_PORT = $OVN_EGRESSIP_HEALTHCHECK_PORT"
echo "OVN_METRICS_SCALE_ENABLE = $OVN_METRICS_SCALE_ENABLE"
echo ""
}

Expand Down Expand Up @@ -454,6 +458,7 @@ set_default_params() {
OVN_HOST_NETWORK_NAMESPACE=${OVN_HOST_NETWORK_NAMESPACE:-ovn-host-network}
OVN_EGRESSIP_HEALTHCHECK_PORT=${OVN_EGRESSIP_HEALTHCHECK_PORT:-9107}
OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker}
OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false}
}

detect_apiserver_url() {
Expand Down Expand Up @@ -655,7 +660,8 @@ create_ovn_kube_manifests() {
--egress-qos-enable=true \
--v4-join-subnet="${JOIN_SUBNET_IPV4}" \
--v6-join-subnet="${JOIN_SUBNET_IPV6}" \
--ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}"
--ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}" \
--ovnkube-metrics-scale-enable="${OVN_METRICS_SCALE_ENABLE}"
popd
}

Expand Down
7 changes: 7 additions & 0 deletions dist/images/daemonset.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ OVN_HOST_NETWORK_NAMESPACE=""
OVN_EX_GW_NETWORK_INTERFACE=""
OVNKUBE_NODE_MGMT_PORT_NETDEV=""
OVNKUBE_CONFIG_DURATION_ENABLE=
OVNKUBE_METRICS_SCALE_ENABLE=
# IN_UPGRADE is true only if called by upgrade-ovn.sh during the upgrade test,
# it will render only the parts in ovn-setup.yaml related to RBAC permissions.
IN_UPGRADE=
Expand Down Expand Up @@ -256,6 +257,9 @@ while [ "$1" != "" ]; do
--ovnkube-config-duration-enable)
OVNKUBE_CONFIG_DURATION_ENABLE=$VALUE
;;
--ovnkube-metrics-scale-enable)
OVNKUBE_METRICS_SCALE_ENABLE=$VALUE
;;
--in-upgrade)
IN_UPGRADE=true
;;
Expand Down Expand Up @@ -393,6 +397,8 @@ ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV}
echo "ovnkube_node_mgmt_port_netdev: ${ovnkube_node_mgmt_port_netdev}"
ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE}
echo "ovnkube_config_duration_enable: ${ovnkube_config_duration_enable}"
ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE}
echo "ovnkube_metrics_scale_enable: ${ovnkube_metrics_scale_enable}"

ovn_image=${image} \
ovn_image_pull_policy=${image_pull_policy} \
Expand Down Expand Up @@ -473,6 +479,7 @@ ovn_image=${image} \
ovnkube_logfile_maxbackups=${ovnkube_logfile_maxbackups} \
ovnkube_logfile_maxage=${ovnkube_logfile_maxage} \
ovnkube_config_duration_enable=${ovnkube_config_duration_enable} \
ovnkube_metrics_scale_enable=${ovnkube_metrics_scale_enable} \
ovn_acl_logging_rate_limit=${ovn_acl_logging_rate_limit} \
ovn_hybrid_overlay_net_cidr=${ovn_hybrid_overlay_net_cidr} \
ovn_hybrid_overlay_enable=${ovn_hybrid_overlay_enable} \
Expand Down
8 changes: 8 additions & 0 deletions dist/images/ovnkube.sh
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ ovnkube_node_mode=${OVNKUBE_NODE_MODE:-"full"}
# OVNKUBE_NODE_MGMT_PORT_NETDEV - is the net device to be used for management port
ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV:-}
ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE:-false}
ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE:-false}
# OVN_ENCAP_IP - encap IP to be used for OVN traffic on the node
ovn_encap_ip=${OVN_ENCAP_IP:-}

Expand Down Expand Up @@ -982,6 +983,12 @@ ovn-master() {
fi
echo "ovnkube_config_duration_enable_flag: ${ovnkube_config_duration_enable_flag}"

ovnkube_metrics_scale_enable_flag=
if [[ ${ovnkube_metrics_scale_enable} == "true" ]]; then
ovnkube_metrics_scale_enable_flag="--metrics-enable-scale"
fi
echo "ovnkube_metrics_scale_enable_flag: ${ovnkube_metrics_scale_enable_flag}"

echo "=============== ovn-master ========== MASTER ONLY"
/usr/bin/ovnkube \
--init-master ${K8S_NODE} \
Expand All @@ -1008,6 +1015,7 @@ ovn-master() {
${egressfirewall_enabled_flag} \
${egressqos_enabled_flag} \
${ovnkube_config_duration_enable_flag} \
${ovnkube_metrics_scale_enable_flag} \
--metrics-bind-address ${ovnkube_master_metrics_bind_address} \
--host-network-namespace ${ovn_host_network_namespace} &

Expand Down
2 changes: 2 additions & 0 deletions dist/templates/ovnkube-master.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ spec:
value: "{{ ovnkube_logfile_maxage }}"
- name: OVNKUBE_CONFIG_DURATION_ENABLE
value: "{{ ovnkube_config_duration_enable }}"
- name: OVNKUBE_METRICS_SCALE_ENABLE
value: "{{ ovnkube_metrics_scale_enable }}"
- name: OVN_NET_CIDR
valueFrom:
configMapKeyRef:
Expand Down
10 changes: 5 additions & 5 deletions go-controller/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,8 @@ type MetricsConfig struct {
NodeServerCert string `gcfg:"node-server-cert"`
// EnableConfigDuration holds the boolean flag to enable OVN-Kubernetes master to monitor OVN-Kubernetes master
// configuration duration and optionally, its application to all nodes
EnableConfigDuration bool `gcfg:"enable-config-duration"`
EnableEIPScaleMetrics bool `gcfg:"enable-eip-scale-metrics"`
EnableConfigDuration bool `gcfg:"enable-config-duration"`
EnableScaleMetrics bool `gcfg:"enable-scale-metrics"`
}

// OVNKubernetesFeatureConfig holds OVN-Kubernetes feature enhancement config file parameters and command-line overrides
Expand Down Expand Up @@ -1015,9 +1015,9 @@ var MetricsFlags = []cli.Flag{
Destination: &cliConfig.Metrics.EnableConfigDuration,
},
&cli.BoolFlag{
Name: "metrics-enable-eip-scale",
Usage: "Enables metrics related to Egress IP scaling",
Destination: &cliConfig.Metrics.EnableEIPScaleMetrics,
Name: "metrics-enable-scale",
Usage: "Enables metrics related to scaling",
Destination: &cliConfig.Metrics.EnableScaleMetrics,
},
}

Expand Down
6 changes: 3 additions & 3 deletions go-controller/pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ enable-pprof=true
node-server-privkey=/path/to/node-metrics-private.key
node-server-cert=/path/to/node-metrics.crt
enable-config-duration=true
enable-eip-scale-metrics=true
enable-scale-metrics=true

[logging]
loglevel=5
Expand Down Expand Up @@ -581,7 +581,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(Metrics.NodeServerPrivKey).To(gomega.Equal("/path/to/node-metrics-private.key"))
gomega.Expect(Metrics.NodeServerCert).To(gomega.Equal("/path/to/node-metrics.crt"))
gomega.Expect(Metrics.EnableConfigDuration).To(gomega.Equal(true))
gomega.Expect(Metrics.EnableEIPScaleMetrics).To(gomega.Equal(true))
gomega.Expect(Metrics.EnableScaleMetrics).To(gomega.Equal(true))

gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeSSL))
gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("/path/to/nb-client-private.key"))
Expand Down Expand Up @@ -667,7 +667,7 @@ var _ = Describe("Config Operations", func() {
gomega.Expect(Metrics.NodeServerPrivKey).To(gomega.Equal("/tls/nodeprivkey"))
gomega.Expect(Metrics.NodeServerCert).To(gomega.Equal("/tls/nodecert"))
gomega.Expect(Metrics.EnableConfigDuration).To(gomega.Equal(true))
gomega.Expect(Metrics.EnableEIPScaleMetrics).To(gomega.Equal(true))
gomega.Expect(Metrics.EnableScaleMetrics).To(gomega.Equal(true))

gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeSSL))
gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("/client/privkey"))
Expand Down
105 changes: 104 additions & 1 deletion go-controller/pkg/metrics/master.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,17 @@ var MetricMasterReadyDuration = prometheus.NewGauge(prometheus.GaugeOpts{
Help: "The duration for the master to get to ready state",
})

// MetricMasterSyncDuration is the time taken to complete initial Watch for different resource.
// Resource name is in the label.
var MetricMasterSyncDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Name: "sync_duration_seconds",
Help: "The duration to sync and setup all handlers for a given resource"},
[]string{
"resource_name",
})

// MetricMasterLeader identifies whether this instance of ovnkube-master is a leader or not
var MetricMasterLeader = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricOvnkubeNamespace,
Expand Down Expand Up @@ -219,6 +230,66 @@ var metricEgressIPRebalanceCount = prometheus.NewCounter(prometheus.CounterOpts{
Help: "The total number of times assigned egress IP(s) needed to be moved to a different node"},
)

var metricNetpolEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Name: "network_policy_event_latency_seconds",
Help: "The latency of full network policy event handling (create, delete)",
Buckets: prometheus.ExponentialBuckets(.004, 2, 15)},
[]string{
"event",
})

var metricNetpolLocalPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Name: "network_policy_local_pod_event_latency_seconds",
Help: "The latency of local pod events handling (add, delete)",
Buckets: prometheus.ExponentialBuckets(.002, 2, 15)},
[]string{
"event",
})

var metricNetpolPeerPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Name: "network_policy_peer_pod_event_latency_seconds",
Help: "The latency of peer pod events handling (add, delete)",
Buckets: prometheus.ExponentialBuckets(.002, 2, 15)},
[]string{
"event",
})

var metricNetpolPeerNamespaceEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Name: "network_policy_peer_namespace_event_latency_seconds",
Help: "The latency of peer namespace events handling (add, delete)",
Buckets: prometheus.ExponentialBuckets(.002, 2, 15)},
[]string{
"event",
})

var metricNetpolPeerNamespaceAndPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Name: "network_policy_peer_namespace_and_pod_event_latency_seconds",
Help: "The latency of peer namespace events handling (add, delete)",
Buckets: prometheus.ExponentialBuckets(.002, 2, 15)},
[]string{
"event",
})

var metricPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Name: "pod_event_latency_seconds",
Help: "The latency of pod events handling (add, update, delete)",
Buckets: prometheus.ExponentialBuckets(.002, 2, 15)},
[]string{
"event",
})

var metricEgressFirewallRuleCount = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemMaster,
Expand Down Expand Up @@ -317,6 +388,7 @@ const (
func RegisterMasterBase() {
prometheus.MustRegister(MetricMasterLeader)
prometheus.MustRegister(MetricMasterReadyDuration)
prometheus.MustRegister(MetricMasterSyncDuration)
prometheus.MustRegister(prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Namespace: MetricOvnkubeNamespace,
Expand Down Expand Up @@ -374,9 +446,16 @@ func RegisterMasterFunctional() {
prometheus.MustRegister(metricV4AllocatedHostSubnetCount)
prometheus.MustRegister(metricV6AllocatedHostSubnetCount)
prometheus.MustRegister(metricEgressIPCount)
if config.Metrics.EnableEIPScaleMetrics {
if config.Metrics.EnableScaleMetrics {
klog.Infof("Scale metrics are enabled")
prometheus.MustRegister(metricEgressIPAssignLatency)
prometheus.MustRegister(metricEgressIPUnassignLatency)
prometheus.MustRegister(metricNetpolEventLatency)
prometheus.MustRegister(metricNetpolLocalPodEventLatency)
prometheus.MustRegister(metricNetpolPeerPodEventLatency)
prometheus.MustRegister(metricNetpolPeerNamespaceEventLatency)
prometheus.MustRegister(metricNetpolPeerNamespaceAndPodEventLatency)
prometheus.MustRegister(metricPodEventLatency)
}
prometheus.MustRegister(metricEgressIPNodeUnreacheableCount)
prometheus.MustRegister(metricEgressIPRebalanceCount)
Expand Down Expand Up @@ -489,6 +568,30 @@ func RecordEgressIPRebalance(count int) {
metricEgressIPRebalanceCount.Add(float64(count))
}

func RecordNetpolEvent(eventName string, duration time.Duration) {
metricNetpolEventLatency.WithLabelValues(eventName).Observe(duration.Seconds())
}

func RecordNetpolLocalPodEvent(eventName string, duration time.Duration) {
metricNetpolLocalPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds())
}

func RecordNetpolPeerPodEvent(eventName string, duration time.Duration) {
metricNetpolPeerPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds())
}

func RecordNetpolPeerNamespaceEvent(eventName string, duration time.Duration) {
metricNetpolPeerNamespaceEventLatency.WithLabelValues(eventName).Observe(duration.Seconds())
}

func RecordNetpolPeerNamespaceAndPodEvent(eventName string, duration time.Duration) {
metricNetpolPeerNamespaceAndPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds())
}

func RecordPodEvent(eventName string, duration time.Duration) {
metricPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds())
}

// UpdateEgressFirewallRuleCount records the number of Egress firewall rules.
func UpdateEgressFirewallRuleCount(count float64) {
metricEgressFirewallRuleCount.Add(count)
Expand Down
4 changes: 2 additions & 2 deletions go-controller/pkg/ovn/egressip.go
Original file line number Diff line number Diff line change
Expand Up @@ -2171,7 +2171,7 @@ func (oc *Controller) addStandByEgressIPAssignment(podKey string, podStatus *pod
// (routing pod traffic to the egress node) and NAT objects on the egress node
// (SNAT-ing to the egress IP).
func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) {
if config.Metrics.EnableEIPScaleMetrics {
if config.Metrics.EnableScaleMetrics {
start := time.Now()
defer func() {
if err != nil {
Expand Down Expand Up @@ -2199,7 +2199,7 @@ func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, statu
// deletePodEgressIPAssignment deletes the OVN programmed egress IP
// configuration mentioned for addPodEgressIPAssignment.
func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, podIPs []*net.IPNet) (err error) {
if config.Metrics.EnableEIPScaleMetrics {
if config.Metrics.EnableScaleMetrics {
start := time.Now()
defer func() {
if err != nil {
Expand Down
Loading