From f172a5a0e20efe48e1ab084b4447a735c03be91e Mon Sep 17 00:00:00 2001 From: Kobi Samoray Date: Tue, 31 Dec 2019 17:55:17 +0200 Subject: [PATCH] Antrea Prometheus integration (#236) Integrate with Prometheus monitoring solution. Integration of the Prometheus client into Antrea controller and agent allows the exposure of various metrics to Prometheus server. In addition to Antrea's own set of metrics, Prometheus client will also expose metrics which are defined by various components which are part of the Antrea ecosystem, e.g golang, Prometheus itself etc. --- build/yamls/antrea-eks.yml | 13 ++- build/yamls/antrea-gke.yml | 13 ++- build/yamls/antrea-ipsec.yml | 13 ++- build/yamls/antrea.yml | 13 ++- build/yamls/base/agent.yml | 1 - build/yamls/base/conf/antrea-agent.conf | 3 + build/yamls/base/conf/antrea-controller.conf | 3 + cmd/antrea-agent/agent.go | 11 +- cmd/antrea-agent/config.go | 3 + cmd/antrea-controller/config.go | 3 + cmd/antrea-controller/controller.go | 15 ++- go.mod | 1 + pkg/agent/agent.go | 23 +---- pkg/agent/agent_test.go | 33 ------ pkg/agent/apiserver/apiserver.go | 12 ++- pkg/agent/metrics/prometheus.go | 103 +++++++++++++++++++ pkg/controller/metrics/prometheus.go | 40 +++++++ pkg/util/env/env.go | 54 ++++++++++ pkg/util/env/env_test.go | 76 ++++++++++++++ 19 files changed, 356 insertions(+), 77 deletions(-) create mode 100644 pkg/agent/metrics/prometheus.go create mode 100644 pkg/controller/metrics/prometheus.go create mode 100644 pkg/util/env/env.go create mode 100644 pkg/util/env/env_test.go diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index e402288763a..6038cf296ab 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -332,6 +332,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -354,12 +357,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-t4m46b8f6h + name: antrea-config-td846cf4bm namespace: kube-system --- apiVersion: v1 @@ -459,7 +465,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-t4m46b8f6h + name: antrea-config-td846cf4bm name: antrea-config - hostPath: path: /var/log/antrea @@ -555,7 +561,6 @@ spec: name: antrea-agent ports: - containerPort: 10350 - hostPort: 10350 name: api protocol: TCP readinessProbe: @@ -658,7 +663,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-t4m46b8f6h + name: antrea-config-td846cf4bm name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index ce07e6cab03..3e274d0e9da 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -332,6 +332,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -354,12 +357,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-5754dg84hf + name: antrea-config-h45gtb8dbg namespace: kube-system --- apiVersion: v1 @@ -459,7 +465,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-5754dg84hf + name: antrea-config-h45gtb8dbg name: antrea-config - hostPath: path: /var/log/antrea @@ -555,7 +561,6 @@ spec: name: antrea-agent ports: - containerPort: 10350 - hostPort: 10350 name: api protocol: TCP readinessProbe: @@ -658,7 +663,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-5754dg84hf + name: antrea-config-h45gtb8dbg name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index d1f40e234a5..c959e237b4b 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -332,6 +332,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -354,12 +357,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-c7579447k2 + name: antrea-config-d7m57h87ck namespace: kube-system --- apiVersion: v1 @@ -468,7 +474,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-c7579447k2 + name: antrea-config-d7m57h87ck name: antrea-config - hostPath: path: /var/log/antrea @@ -596,7 +602,6 @@ spec: name: antrea-agent ports: - containerPort: 10350 - hostPort: 10350 name: api protocol: TCP readinessProbe: @@ -699,7 +704,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-c7579447k2 + name: antrea-config-d7m57h87ck name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index 5b025c5b0e4..d9926c57ed2 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -332,6 +332,9 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + #enablePrometheusMetrics: false antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -354,12 +357,15 @@ data: # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-428d4tg64g + name: antrea-config-f95kf94mk9 namespace: kube-system --- apiVersion: v1 @@ -459,7 +465,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-428d4tg64g + name: antrea-config-f95kf94mk9 name: antrea-config - hostPath: path: /var/log/antrea @@ -555,7 +561,6 @@ spec: name: antrea-agent ports: - containerPort: 10350 - hostPort: 10350 name: api protocol: TCP readinessProbe: @@ -658,7 +663,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-428d4tg64g + name: antrea-config-f95kf94mk9 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/base/agent.yml b/build/yamls/base/agent.yml index e801f5dca26..4efe36fe2c4 100644 --- a/build/yamls/base/agent.yml +++ b/build/yamls/base/agent.yml @@ -77,7 +77,6 @@ spec: fieldPath: spec.nodeName ports: - containerPort: 10350 - hostPort: 10350 name: api protocol: TCP livenessProbe: diff --git a/build/yamls/base/conf/antrea-agent.conf b/build/yamls/base/conf/antrea-agent.conf index 108596bbf02..8d4e53eae09 100644 --- a/build/yamls/base/conf/antrea-agent.conf +++ b/build/yamls/base/conf/antrea-agent.conf @@ -46,3 +46,6 @@ # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-agent` container must be set to the same value. #apiPort: 10350 + +# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener +#enablePrometheusMetrics: false diff --git a/build/yamls/base/conf/antrea-controller.conf b/build/yamls/base/conf/antrea-controller.conf index 3823f0723c3..d6dda5bbccd 100644 --- a/build/yamls/base/conf/antrea-controller.conf +++ b/build/yamls/base/conf/antrea-controller.conf @@ -2,3 +2,6 @@ # Note that if it's set to another value, the `containerPort` of the `api` port of the # `antrea-controller` container must be set to the same value. #apiPort: 10349 + +# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. +#enablePrometheusMetrics: false diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 685bd388afe..cfef57e51aa 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -30,6 +30,7 @@ import ( "github.com/vmware-tanzu/antrea/pkg/agent/controller/networkpolicy" "github.com/vmware-tanzu/antrea/pkg/agent/controller/noderoute" "github.com/vmware-tanzu/antrea/pkg/agent/interfacestore" + "github.com/vmware-tanzu/antrea/pkg/agent/metrics" "github.com/vmware-tanzu/antrea/pkg/agent/openflow" "github.com/vmware-tanzu/antrea/pkg/agent/querier" "github.com/vmware-tanzu/antrea/pkg/agent/route" @@ -161,11 +162,19 @@ func run(o *Options) error { ovsBridgeClient, networkPolicyController) + if o.config.EnablePrometheusMetrics { + metrics.InitializePrometheusMetrics(o.config.OVSBridge, ifaceStore, ofClient) + } + agentMonitor := monitor.NewAgentMonitor(crdClient, agentQuerier) go agentMonitor.Run(stopCh) - apiServer, err := apiserver.New(agentQuerier, networkPolicyController, o.config.APIPort) + apiServer, err := apiserver.New( + agentQuerier, + networkPolicyController, + o.config.APIPort, + o.config.EnablePrometheusMetrics) if err != nil { return fmt.Errorf("error when creating agent API server: %v", err) } diff --git a/cmd/antrea-agent/config.go b/cmd/antrea-agent/config.go index 4dfaace5109..25de498b6cd 100644 --- a/cmd/antrea-agent/config.go +++ b/cmd/antrea-agent/config.go @@ -75,4 +75,7 @@ type AgentConfig struct { // APIPort is the port for the antrea-agent APIServer to serve on. // Defaults to 10350. APIPort int `yaml:"apiPort,omitempty"` + // Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + // Defaults to false. + EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"` } diff --git a/cmd/antrea-controller/config.go b/cmd/antrea-controller/config.go index cca42592507..1c52730be92 100644 --- a/cmd/antrea-controller/config.go +++ b/cmd/antrea-controller/config.go @@ -25,4 +25,7 @@ type ControllerConfig struct { // APIPort is the port for the antrea-controller APIServer to serve on. // Defaults to 10349. APIPort int `yaml:"apiPort,omitempty"` + // Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + // Defaults to false. + EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"` } diff --git a/cmd/antrea-controller/controller.go b/cmd/antrea-controller/controller.go index f01aa7c2d25..b9e9f20d65c 100644 --- a/cmd/antrea-controller/controller.go +++ b/cmd/antrea-controller/controller.go @@ -28,6 +28,7 @@ import ( "github.com/vmware-tanzu/antrea/pkg/apiserver" "github.com/vmware-tanzu/antrea/pkg/apiserver/openapi" "github.com/vmware-tanzu/antrea/pkg/apiserver/storage" + "github.com/vmware-tanzu/antrea/pkg/controller/metrics" "github.com/vmware-tanzu/antrea/pkg/controller/networkpolicy" "github.com/vmware-tanzu/antrea/pkg/controller/networkpolicy/store" "github.com/vmware-tanzu/antrea/pkg/controller/querier" @@ -78,7 +79,8 @@ func run(o *Options) error { addressGroupStore, appliedToGroupStore, networkPolicyStore, - controllerQuerier) + controllerQuerier, + o.config.EnablePrometheusMetrics) if err != nil { return fmt.Errorf("error creating API server config: %v", err) } @@ -100,6 +102,10 @@ func run(o *Options) error { go apiServer.GenericAPIServer.PrepareRun().Run(stopCh) + if o.config.EnablePrometheusMetrics { + metrics.InitializePrometheusMetrics() + } + <-stopCh klog.Info("Stopping Antrea controller") return nil @@ -110,13 +116,17 @@ func createAPIServerConfig(kubeconfig string, addressGroupStore storage.Interface, appliedToGroupStore storage.Interface, networkPolicyStore storage.Interface, - controllerQuerier querier.ControllerQuerier) (*apiserver.Config, error) { + controllerQuerier querier.ControllerQuerier, + enableMetrics bool) (*apiserver.Config, error) { // TODO: // 1. Support user-provided certificate. secureServing := genericoptions.NewSecureServingOptions().WithLoopback() authentication := genericoptions.NewDelegatingAuthenticationOptions() authorization := genericoptions.NewDelegatingAuthorizationOptions() + if enableMetrics { + authorization.WithAlwaysAllowPaths("/metrics") + } // Set the PairName but leave certificate directory blank to generate in-memory by default secureServing.ServerCert.CertDirectory = "" secureServing.ServerCert.PairName = "antrea-apiserver" @@ -146,6 +156,7 @@ func createAPIServerConfig(kubeconfig string, openapi.GetOpenAPIDefinitions, genericopenapi.NewDefinitionNamer(apiserver.Scheme)) serverConfig.OpenAPIConfig.Info.Title = "Antrea" + serverConfig.EnableMetrics = enableMetrics return apiserver.NewConfig( serverConfig, diff --git a/go.mod b/go.mod index 331be93ba33..0c62dc187b3 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/imdario/mergo v0.3.7 // indirect github.com/j-keck/arping v1.0.0 github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd + github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 github.com/satori/go.uuid v1.2.0 github.com/sirupsen/logrus v1.4.1 github.com/spf13/cobra v0.0.5 diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 54454608358..9d56fc0813f 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -37,12 +37,11 @@ import ( "github.com/vmware-tanzu/antrea/pkg/agent/types" "github.com/vmware-tanzu/antrea/pkg/agent/util" "github.com/vmware-tanzu/antrea/pkg/ovs/ovsconfig" + "github.com/vmware-tanzu/antrea/pkg/util/env" ) const ( maxRetryForHostLink = 5 - // nodeNameEnvKey is environment variable. - nodeNameEnvKey = "NODE_NAME" // ipsecPSKEnvKey is environment variable. ipsecPSKEnvKey = "ANTREA_IPSEC_PSK" roundNumKey = "roundNum" // round number key in externalIDs. @@ -436,7 +435,7 @@ func (i *Initializer) setupDefaultTunnelInterface(tunnelPortName string) error { // initNodeLocalConfig retrieves node's subnet CIDR from node.spec.PodCIDR, which is used for IPAM and setup // host gateway interface. func (i *Initializer) initNodeLocalConfig() error { - nodeName, err := getNodeName() + nodeName, err := env.GetNodeName() if err != nil { return err } @@ -476,24 +475,6 @@ func (i *Initializer) initNodeLocalConfig() error { return nil } -// getNodeName returns the node's name used in Kubernetes, based on the priority: -// - Environment variable NODE_NAME, which should be set by Downward API -// - OS's hostname -func getNodeName() (string, error) { - nodeName := os.Getenv(nodeNameEnvKey) - if nodeName != "" { - return nodeName, nil - } - klog.Infof("Environment variable %s not found, using hostname instead", nodeNameEnvKey) - var err error - nodeName, err = os.Hostname() - if err != nil { - klog.Errorf("Failed to get local hostname: %v", err) - return "", err - } - return nodeName, nil -} - // readIPSecPSK reads the IPSec PSK value from environment variable // ANTREA_IPSEC_PSK, when enableIPSecTunnel is set to true. func (i *Initializer) readIPSecPSK() error { diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index 33930ad4641..f7fbaff4448 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -17,7 +17,6 @@ package agent import ( "fmt" "net" - "os" "testing" mock "github.com/golang/mock/gomock" @@ -30,38 +29,6 @@ import ( ovsconfigtest "github.com/vmware-tanzu/antrea/pkg/ovs/ovsconfig/testing" ) -func TestGetNodeName(t *testing.T) { - hostName, err := os.Hostname() - if err != nil { - t.Fatalf("Failed to retrieve hostname, %v", err) - } - testTable := map[string]string{ - "node1": "node1", - "node_12": "node_12", - "": hostName, - "node-1234": "node-1234", - } - - for k, v := range testTable { - compareNodeName(k, v, t) - } -} - -func compareNodeName(k, v string, t *testing.T) { - if k != "" { - _ = os.Setenv(nodeNameEnvKey, k) - defer os.Unsetenv(nodeNameEnvKey) - } - nodeName, err := getNodeName() - if err != nil { - t.Errorf("Failure with expected name %s, %v", k, err) - return - } - if nodeName != v { - t.Errorf("Failed to retrieve nodename, want: %s, get: %s", v, nodeName) - } -} - func newAgentInitializer(ovsBridgeClient ovsconfig.OVSBridgeClient, ifaceStore interfacestore.InterfaceStore) *Initializer { return &Initializer{ovsBridgeClient: ovsBridgeClient, ifaceStore: ifaceStore, hostGateway: "gw0"} } diff --git a/pkg/agent/apiserver/apiserver.go b/pkg/agent/apiserver/apiserver.go index 425d932c00e..9a525940d13 100644 --- a/pkg/agent/apiserver/apiserver.go +++ b/pkg/agent/apiserver/apiserver.go @@ -62,8 +62,9 @@ func installHandlers(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolic } // New creates an APIServer for running in antrea agent. -func New(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolicyInfoQuerier, bindPort int) (*agentAPIServer, error) { - cfg, err := newConfig(bindPort) +func New(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolicyInfoQuerier, bindPort int, + enableMetrics bool) (*agentAPIServer, error) { + cfg, err := newConfig(bindPort, enableMetrics) if err != nil { return nil, err } @@ -75,11 +76,15 @@ func New(aq agentquerier.AgentQuerier, npq querier.AgentNetworkPolicyInfoQuerier return &agentAPIServer{GenericAPIServer: s}, nil } -func newConfig(bindPort int) (*genericapiserver.CompletedConfig, error) { +func newConfig(bindPort int, enableMetrics bool) (*genericapiserver.CompletedConfig, error) { secureServing := genericoptions.NewSecureServingOptions().WithLoopback() authentication := genericoptions.NewDelegatingAuthenticationOptions() authorization := genericoptions.NewDelegatingAuthorizationOptions() + if enableMetrics { + authorization.WithAlwaysAllowPaths("/metrics") + } + // Set the PairName but leave certificate directory blank to generate in-memory by default. secureServing.ServerCert.CertDirectory = "" secureServing.ServerCert.PairName = Name @@ -106,6 +111,7 @@ func newConfig(bindPort int) (*genericapiserver.CompletedConfig, error) { GitTreeState: antreaversion.GitTreeState, GitCommit: antreaversion.GetGitSHA(), } + serverConfig.EnableMetrics = enableMetrics completedServerCfg := serverConfig.Complete(nil) return &completedServerCfg, nil diff --git a/pkg/agent/metrics/prometheus.go b/pkg/agent/metrics/prometheus.go new file mode 100644 index 00000000000..d61726b64c2 --- /dev/null +++ b/pkg/agent/metrics/prometheus.go @@ -0,0 +1,103 @@ +// Copyright 2020 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "k8s.io/klog" + "strconv" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/vmware-tanzu/antrea/pkg/agent/interfacestore" + "github.com/vmware-tanzu/antrea/pkg/agent/openflow" + "github.com/vmware-tanzu/antrea/pkg/util/env" +) + +type OVSStatManager struct { + ofClient openflow.Client + OVSBridge string + OVSTableDesc *prometheus.Desc +} + +func (c *OVSStatManager) GetOVSStatistics() (ovsFlowsByTable map[string]float64) { + ovsFlowsByTable = make(map[string]float64) + flowTableStatus := c.ofClient.GetFlowTableStatus() + for _, tableStatus := range flowTableStatus { + ovsFlowsByTable[strconv.Itoa(int(tableStatus.ID))] = float64(tableStatus.FlowCount) + } + return +} + +func (c *OVSStatManager) Describe(ch chan<- *prometheus.Desc) { + ch <- c.OVSTableDesc +} + +func (c *OVSStatManager) Collect(ch chan<- prometheus.Metric) { + ovsFlowsByTable := c.GetOVSStatistics() + for tableId, tableFlowCount := range ovsFlowsByTable { + ch <- prometheus.MustNewConstMetric( + c.OVSTableDesc, + prometheus.GaugeValue, + tableFlowCount, + tableId, + ) + } +} + +func NewOVSStatManager(ovsBridge string, ofClient openflow.Client) *OVSStatManager { + return &OVSStatManager{ + ofClient: ofClient, + OVSBridge: ovsBridge, + OVSTableDesc: prometheus.NewDesc( + "antrea_agent_ovs_flow_table", + "OVS flow table flow count.", + []string{"table_id"}, + prometheus.Labels{"bridge": ovsBridge}, + ), + } +} + +func InitializePrometheusMetrics( + ovsBridge string, + ifaceStore interfacestore.InterfaceStore, + ofClient openflow.Client) { + + klog.Info("Binding antrea_local_pod_count") + if err := prometheus.Register(prometheus.NewGaugeFunc( + prometheus.GaugeOpts{ + Name: "antrea_agent_local_pod_count", + Help: "Number of pods on local node.", + }, + func() float64 { return float64(ifaceStore.GetContainerInterfaceNum()) }, + )); err != nil { + klog.Error("Failed to register local_pod_count with Prometheus") + } + + nodeName, err := env.GetNodeName() + if err != nil { + klog.Errorf("Failed to retrieve agent K8S node name: %v", err) + } + + gaugeHost := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "antrea_agent_runtime_info", + Help: "Antrea agent runtime info , defined as a labels. The value of the gauge is always set to 1.", + ConstLabels: prometheus.Labels{"k8s_nodename": nodeName, "k8s_podname": env.GetPodName()}, + }) + gaugeHost.Set(1) + prometheus.MustRegister(gaugeHost) + + ovsStats := NewOVSStatManager(ovsBridge, ofClient) + prometheus.MustRegister(ovsStats) +} diff --git a/pkg/controller/metrics/prometheus.go b/pkg/controller/metrics/prometheus.go new file mode 100644 index 00000000000..05a8af0ccc3 --- /dev/null +++ b/pkg/controller/metrics/prometheus.go @@ -0,0 +1,40 @@ +// Copyright 2020 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/vmware-tanzu/antrea/pkg/util/env" + "k8s.io/klog" +) + +// Initialize Prometheus metrics collection. +func InitializePrometheusMetrics() { + + nodeName, err := env.GetNodeName() + if err != nil { + klog.Errorf("Failed to retrieve controller K8S node name: %v", err) + } + + klog.Info("Initializing prometheus") + gaugeHost := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "antrea_controller_runtime_info", + Help: "Antrea controller runtime info, defined as a labels. The value of the gauge is always set to 1.", + ConstLabels: prometheus.Labels{"k8s_nodename": nodeName, "k8s_podname": env.GetPodName()}, + }) + gaugeHost.Set(1) + prometheus.MustRegister(gaugeHost) +} + diff --git a/pkg/util/env/env.go b/pkg/util/env/env.go new file mode 100644 index 00000000000..77b6915d4c7 --- /dev/null +++ b/pkg/util/env/env.go @@ -0,0 +1,54 @@ +// Copyright 2020 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "os" + + "k8s.io/klog" +) + +// nodeNameEnvKey is environment variable. +const ( + nodeNameEnvKey = "NODE_NAME" + podNameEnvKey = "POD_NAME" +) + +// GetNodeName returns the node's name used in Kubernetes, based on the priority: +// - Environment variable NODE_NAME, which should be set by Downward API +// - OS's hostname +func GetNodeName() (string, error) { + nodeName := os.Getenv(nodeNameEnvKey) + if nodeName != "" { + return nodeName, nil + } + klog.Infof("Environment variable %s not found, using hostname instead", nodeNameEnvKey) + var err error + nodeName, err = os.Hostname() + if err != nil { + klog.Errorf("Failed to get local hostname: %v", err) + return "", err + } + return nodeName, nil +} + +// GetPodName returns the pod name where the code executes +func GetPodName() (string) { + podName := os.Getenv(podNameEnvKey) + if podName == "" { + klog.Warningf("Environment variable %s not found", podNameEnvKey) + } + return podName +} diff --git a/pkg/util/env/env_test.go b/pkg/util/env/env_test.go new file mode 100644 index 00000000000..e6ddefcc877 --- /dev/null +++ b/pkg/util/env/env_test.go @@ -0,0 +1,76 @@ +// Copyright 2020 Antrea Authors + +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "os" + "testing" +) + +func TestGetNodeName(t *testing.T) { + hostName, err := os.Hostname() + if err != nil { + t.Fatalf("Failed to retrieve hostname: %v", err) + } + testTable := map[string]string{ + "node1": "node1", + "node_12": "node_12", + "": hostName, + "node-1234": "node-1234", + } + + for k, v := range testTable { + compareNodeName(k, v, t) + } +} + +func compareNodeName(k, v string, t *testing.T) { + if k != "" { + _ = os.Setenv(nodeNameEnvKey, k) + defer os.Unsetenv(nodeNameEnvKey) + } + nodeName, err := GetNodeName() + if err != nil { + t.Errorf("Failure with expected name %s: %v", k, err) + return + } + if nodeName != v { + t.Errorf("Failed to retrieve nodename, want: %s, get: %s", v, nodeName) + } +} + +func TestGetPodName(t *testing.T) { + testTable := map[string]string{ + "pod1": "pod1", + "pod-1212-x": "pod-1212-x", + "antrea-controller-577f4ffb4b-njprt": "antrea-controller-577f4ffb4b-njprt", + } + + for k, v := range testTable { + comparePodName(k, v, t) + } +} + +func comparePodName(k, v string, t *testing.T) { + if k != "" { + _ = os.Setenv(podNameEnvKey, k) + defer os.Unsetenv(podNameEnvKey) + } + podName := GetPodName() + if podName != v { + t.Errorf("Failed to retrieve pod name, want: %s, get: %s", v, podName) + } +}