diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index a43ccb9d4f1..a3e45c65257 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -286,6 +286,15 @@ data: # networkPolicyOnly: Antrea enforces NetworkPolicy only, and utilizes CNI chaining and delegates Pod IPAM and connectivity to primary CNI. # trafficEncapMode: networkPolicyOnly + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + #enablePrometheusMetrics: false + + # Enable golang metrics exposure via Prometheus. + #enablePrometheusGoMetrics: false + + # Enable process metrics exposure via Prometheus. + #enablePrometheusProcessMetrics: false antrea-cni.conf: | { "cniVersion":"0.3.0", @@ -295,18 +304,30 @@ data: "type": "host-local" } } - antrea-controller.conf: "" + antrea-controller.conf: | + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false + + # Enable golang metrics exposure via Prometheus. + #enablePrometheusGoMetrics: false + + # Enable process metrics exposure via Prometheus. + #enablePrometheusProcessMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-57hfkfg8kd + name: antrea-config-b4d69452ff namespace: kube-system --- apiVersion: v1 kind: Service metadata: + annotations: + prometheus.io/port: "443" + prometheus.io/scheme: https + prometheus.io/scrape: "true" labels: app: antrea name: antrea @@ -394,7 +415,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-57hfkfg8kd + name: antrea-config-b4d69452ff name: antrea-config --- apiVersion: apiregistration.k8s.io/v1 @@ -444,6 +465,10 @@ spec: component: antrea-agent template: metadata: + annotations: + prometheus.io/port: "10443" + prometheus.io/scheme: https + prometheus.io/scrape: "true" labels: app: antrea component: antrea-agent @@ -578,7 +603,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-57hfkfg8kd + name: antrea-config-b4d69452ff name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index 7c102b89ae1..56ca842f755 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -286,6 +286,15 @@ data: # networkPolicyOnly: Antrea enforces NetworkPolicy only, and utilizes CNI chaining and delegates Pod IPAM and connectivity to primary CNI. # #trafficEncapMode: encap + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + #enablePrometheusMetrics: false + + # Enable golang metrics exposure via Prometheus. + #enablePrometheusGoMetrics: false + + # Enable process metrics exposure via Prometheus. + #enablePrometheusProcessMetrics: false antrea-cni.conf: | { "cniVersion":"0.3.0", @@ -295,13 +304,21 @@ data: "type": "host-local" } } - antrea-controller.conf: "" + antrea-controller.conf: | + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false + + # Enable golang metrics exposure via Prometheus. + #enablePrometheusGoMetrics: false + + # Enable process metrics exposure via Prometheus. + #enablePrometheusProcessMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-f59cfh8thg + name: antrea-config-fggd7g4h2k namespace: kube-system --- apiVersion: v1 @@ -316,6 +333,10 @@ type: Opaque apiVersion: v1 kind: Service metadata: + annotations: + prometheus.io/port: "443" + prometheus.io/scheme: https + prometheus.io/scrape: "true" labels: app: antrea name: antrea @@ -403,7 +424,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-f59cfh8thg + name: antrea-config-fggd7g4h2k name: antrea-config --- apiVersion: apiregistration.k8s.io/v1 @@ -453,6 +474,10 @@ spec: component: antrea-agent template: metadata: + annotations: + prometheus.io/port: "10443" + prometheus.io/scheme: https + prometheus.io/scrape: "true" labels: app: antrea component: antrea-agent @@ -619,7 +644,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-f59cfh8thg + name: antrea-config-fggd7g4h2k name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index 35f77f3a345..9d3344b6016 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -286,6 +286,15 @@ data: # networkPolicyOnly: Antrea enforces NetworkPolicy only, and utilizes CNI chaining and delegates Pod IPAM and connectivity to primary CNI. # #trafficEncapMode: encap + + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + #enablePrometheusMetrics: false + + # Enable golang metrics exposure via Prometheus. + #enablePrometheusGoMetrics: false + + # Enable process metrics exposure via Prometheus. + #enablePrometheusProcessMetrics: false antrea-cni.conf: | { "cniVersion":"0.3.0", @@ -295,18 +304,30 @@ data: "type": "host-local" } } - antrea-controller.conf: "" + antrea-controller.conf: | + # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. + #enablePrometheusMetrics: false + + # Enable golang metrics exposure via Prometheus. + #enablePrometheusGoMetrics: false + + # Enable process metrics exposure via Prometheus. + #enablePrometheusProcessMetrics: false kind: ConfigMap metadata: annotations: {} labels: app: antrea - name: antrea-config-b2b5bdkh8t + name: antrea-config-656thg244c namespace: kube-system --- apiVersion: v1 kind: Service metadata: + annotations: + prometheus.io/port: "443" + prometheus.io/scheme: https + prometheus.io/scrape: "true" labels: app: antrea name: antrea @@ -394,7 +415,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-b2b5bdkh8t + name: antrea-config-656thg244c name: antrea-config --- apiVersion: apiregistration.k8s.io/v1 @@ -444,6 +465,10 @@ spec: component: antrea-agent template: metadata: + annotations: + prometheus.io/port: "10443" + prometheus.io/scheme: https + prometheus.io/scrape: "true" labels: app: antrea component: antrea-agent @@ -578,7 +603,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-b2b5bdkh8t + name: antrea-config-656thg244c name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/base/agent.yml b/build/yamls/base/agent.yml index 9acc4a78b69..d5e35a386a2 100644 --- a/build/yamls/base/agent.yml +++ b/build/yamls/base/agent.yml @@ -13,6 +13,10 @@ spec: type: RollingUpdate template: metadata: + annotations: + prometheus.io/port: "10443" + prometheus.io/scrape: "true" + prometheus.io/scheme: "https" labels: component: antrea-agent spec: diff --git a/build/yamls/base/conf/antrea-agent.conf b/build/yamls/base/conf/antrea-agent.conf index 5bab23b3748..063b2669f6d 100644 --- a/build/yamls/base/conf/antrea-agent.conf +++ b/build/yamls/base/conf/antrea-agent.conf @@ -41,3 +41,12 @@ # networkPolicyOnly: Antrea enforces NetworkPolicy only, and utilizes CNI chaining and delegates Pod IPAM and connectivity to primary CNI. # #trafficEncapMode: encap + +# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener +#enablePrometheusMetrics: false + +# Enable golang metrics exposure via Prometheus. +#enablePrometheusGoMetrics: false + +# Enable process metrics exposure via Prometheus. +#enablePrometheusProcessMetrics: false diff --git a/build/yamls/base/conf/antrea-controller.conf b/build/yamls/base/conf/antrea-controller.conf index e69de29bb2d..3b09b41be92 100644 --- a/build/yamls/base/conf/antrea-controller.conf +++ b/build/yamls/base/conf/antrea-controller.conf @@ -0,0 +1,8 @@ +# Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. +#enablePrometheusMetrics: false + +# Enable golang metrics exposure via Prometheus. +#enablePrometheusGoMetrics: false + +# Enable process metrics exposure via Prometheus. +#enablePrometheusProcessMetrics: false diff --git a/build/yamls/base/controller.yml b/build/yamls/base/controller.yml index a6d6906bd04..0bd605e482a 100644 --- a/build/yamls/base/controller.yml +++ b/build/yamls/base/controller.yml @@ -2,6 +2,10 @@ apiVersion: v1 kind: Service metadata: + annotations: + prometheus.io/port: "443" + prometheus.io/scrape: "true" + prometheus.io/scheme: "https" name: antrea spec: ports: diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 030cbe0da40..652247466a1 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -30,6 +30,7 @@ import ( "github.com/vmware-tanzu/antrea/pkg/agent/controller/networkpolicy" "github.com/vmware-tanzu/antrea/pkg/agent/controller/noderoute" "github.com/vmware-tanzu/antrea/pkg/agent/interfacestore" + "github.com/vmware-tanzu/antrea/pkg/agent/metrics" "github.com/vmware-tanzu/antrea/pkg/agent/openflow" "github.com/vmware-tanzu/antrea/pkg/agent/route" "github.com/vmware-tanzu/antrea/pkg/apis/networking/v1beta1" @@ -151,6 +152,13 @@ func run(o *Options) error { go networkPolicyController.Run(stopCh) + if o.config.EnablePrometheusMetrics { + go metrics.InitializePrometheusMetrics( + o.config.EnablePrometheusGoMetrics, + o.config.EnablePrometheusProcessMetrics, + o.config.OVSBridge, ifaceStore, ofClient) + } + agentMonitor := monitor.NewAgentMonitor( crdClient, o.config.OVSBridge, diff --git a/cmd/antrea-agent/config.go b/cmd/antrea-agent/config.go index e14b3157ddf..eb7890c5067 100644 --- a/cmd/antrea-agent/config.go +++ b/cmd/antrea-agent/config.go @@ -72,4 +72,13 @@ type AgentConfig struct { // Hybrid: noEncap if worker Nodes on same subnet, otherwise encap. // NetworkPolicyOnly: Antrea enforces NetworkPolicy only, and utilizes CNI chaining and delegates Pod IPAM and connectivity to primary CNI. TrafficEncapMode string `yaml:"trafficEncapMode,omitempty"` + // Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + // Defaults to false. + EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"` + // Enable golang metrics exposure via Prometheus + // Defaults to false. + EnablePrometheusGoMetrics bool `yaml:"enablePrometheusGoMetrics,omitempty"` + // Enable process metrics exposure via Prometheus + // Defaults to false. + EnablePrometheusProcessMetrics bool `yaml:"enablePrometheusProcessMetrics,omitempty"` } diff --git a/cmd/antrea-controller/config.go b/cmd/antrea-controller/config.go index aa4b57b5658..9dbe16344c9 100644 --- a/cmd/antrea-controller/config.go +++ b/cmd/antrea-controller/config.go @@ -22,4 +22,13 @@ type ControllerConfig struct { // clientConnection specifies the kubeconfig file and client connection settings for the agent // to communicate with the apiserver. ClientConnection componentbaseconfig.ClientConnectionConfiguration `yaml:"clientConnection"` + // Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener + // Defaults to false. + EnablePrometheusMetrics bool `yaml:"enablePrometheusMetrics,omitempty"` + // Enable golang metrics exposure via Prometheus + // Defaults to false. + EnablePrometheusGoMetrics bool `yaml:"enablePrometheusGoMetrics,omitempty"` + // Enable process metrics exposure via Prometheus + // Defaults to false. + EnablePrometheusProcessMetrics bool `yaml:"enablePrometheusProcessMetrics,omitempty"` } diff --git a/cmd/antrea-controller/controller.go b/cmd/antrea-controller/controller.go index ae9c90bfd06..464671883ac 100644 --- a/cmd/antrea-controller/controller.go +++ b/cmd/antrea-controller/controller.go @@ -17,8 +17,12 @@ package main import ( "fmt" "net" + "net/http" + "os" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" genericopenapi "k8s.io/apiserver/pkg/endpoints/openapi" genericapiserver "k8s.io/apiserver/pkg/server" genericoptions "k8s.io/apiserver/pkg/server/options" @@ -74,7 +78,8 @@ func run(o *Options) error { addressGroupStore, appliedToGroupStore, networkPolicyStore, - controllerMonitor) + controllerMonitor, + o.config.EnablePrometheusMetrics) if err != nil { return fmt.Errorf("error creating API server config: %v", err) } @@ -96,16 +101,54 @@ func run(o *Options) error { go apiServer.GenericAPIServer.PrepareRun().Run(stopCh) + if o.config.EnablePrometheusMetrics { + go initializePrometheusMetrics( + o.config.EnablePrometheusGoMetrics, + o.config.EnablePrometheusProcessMetrics) + } + <-stopCh klog.Info("Stopping Antrea controller") return nil } +// Initialize Prometheus metrics collection. +func initializePrometheusMetrics( + enablePrometheusGoMetrics bool, + enablePrometheusProcessMetrics bool) { + hostname, err := os.Hostname() + if err != nil { + klog.Errorf("Failed to retrieve agent node name, %v", err) + } + + klog.Info("Initializing prometheus") + gaugeHost := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "antrea_controller_host", + Help: "Antrea controller hostname (as a label), typically used in grouping/aggregating stats; " + + "the label defaults to the hostname of the host but can be overridden by configuration. " + + "The value of the gauge is always set to 1.", + ConstLabels: prometheus.Labels{"host": hostname}, + }) + gaugeHost.Set(1) + prometheus.MustRegister(gaugeHost) + http.Handle("/metrics", promhttp.Handler()) + + if !enablePrometheusGoMetrics { + klog.Info("Golang metrics are disabled") + prometheus.Unregister(prometheus.NewGoCollector()) + } + if !enablePrometheusProcessMetrics { + klog.Info("Process metrics are disabled") + prometheus.Unregister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})) + } +} + func createAPIServerConfig(kubeconfig string, addressGroupStore storage.Interface, appliedToGroupStore storage.Interface, networkPolicyStore storage.Interface, - controllerQuerier monitor.ControllerQuerier) (*apiserver.Config, error) { + controllerQuerier monitor.ControllerQuerier, + enablePrometheusMetrics bool) (*apiserver.Config, error) { // TODO: // 1. Support user-provided certificate. // 2. Support configurable https port. @@ -113,6 +156,9 @@ func createAPIServerConfig(kubeconfig string, authentication := genericoptions.NewDelegatingAuthenticationOptions() authorization := genericoptions.NewDelegatingAuthorizationOptions() + if enablePrometheusMetrics { + authorization.WithAlwaysAllowPaths("/metrics") + } // Set the PairName but leave certificate directory blank to generate in-memory by default secureServing.ServerCert.CertDirectory = "" secureServing.ServerCert.PairName = "antrea-apiserver" diff --git a/go.mod b/go.mod index 6e8187d64c5..06bb7bb1d11 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/imdario/mergo v0.3.7 // indirect github.com/j-keck/arping v1.0.0 github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd + github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 github.com/satori/go.uuid v1.2.0 github.com/spf13/cobra v0.0.5 github.com/spf13/pflag v1.0.3 diff --git a/pkg/agent/metrics/prometheus.go b/pkg/agent/metrics/prometheus.go new file mode 100644 index 00000000000..bb0a248d0d4 --- /dev/null +++ b/pkg/agent/metrics/prometheus.go @@ -0,0 +1,119 @@ +// Copyright 2020 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/vmware-tanzu/antrea/pkg/agent/interfacestore" + "k8s.io/klog" + "net/http" + "os" + "strconv" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/vmware-tanzu/antrea/pkg/agent/openflow" +) + +type OVSStatManager struct { + ofClient openflow.Client + OVSBridge string + OVSTableDesc *prometheus.Desc +} + +func (c *OVSStatManager) OVSGetStatistics() ( + ovsFlowsByTable map[string]float64, +) { + ovsFlowsByTable = make(map[string]float64) + flowTableStatus := c.ofClient.GetFlowTableStatus() + for _, tableStatus := range flowTableStatus { + ovsFlowsByTable[strconv.Itoa(int(tableStatus.ID))] = float64(tableStatus.FlowCount) + } + return +} + +func (c *OVSStatManager) Describe(ch chan<- *prometheus.Desc) { + ch <- c.OVSTableDesc +} + +func (c *OVSStatManager) Collect(ch chan<- prometheus.Metric) { + ovsFlowsByTable := c.OVSGetStatistics() + for tableId, tableFlowCount := range ovsFlowsByTable { + ch <- prometheus.MustNewConstMetric( + c.OVSTableDesc, + prometheus.GaugeValue, + tableFlowCount, + tableId, + ) + } +} + +func NewOVSStatManager(ovsBridge string, ofClient openflow.Client) *OVSStatManager { + return &OVSStatManager{ + ofClient: ofClient, + OVSBridge: ovsBridge, + OVSTableDesc: prometheus.NewDesc( + "antrea_agent_ovs_flow_table", + "OVS flow table flow count.", + []string{"table_id"}, + prometheus.Labels{"bridge": ovsBridge}, + ), + } +} + +func InitializePrometheusMetrics( + enablePrometheusGoMetrics bool, + enablePrometheusProcessMetrics bool, + ovsBridge string, + ifaceStore interfacestore.InterfaceStore, + ofClient openflow.Client) { + hostname, err := os.Hostname() + if err != nil { + klog.Errorf("Failed to retrieve agent node name, %v", err) + } + klog.Info("Binding antrea_local_pod_count") + if err := prometheus.Register(prometheus.NewGaugeFunc( + prometheus.GaugeOpts{ + Name: "antrea_agent_local_pod_count", + Help: "Number of pods on local node.", + }, + func() float64 { return float64(ifaceStore.GetContainerInterfaceNum()) }, + )); err != nil { + klog.Error("Failed to register local_pod_count with Prometheus") + } + + gaugeHost := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "antrea_agent_host", + Help: "Antrea agent hostname (as a label), typically used in grouping/aggregating stats; " + + "the label defaults to the hostname of the host but can be overridden by configuration. " + + "The value of the gauge is always set to 1.", + ConstLabels: prometheus.Labels{"host": hostname}, + }) + gaugeHost.Set(1) + prometheus.MustRegister(gaugeHost) + http.Handle("/metrics", promhttp.Handler()) + + ovsStats := NewOVSStatManager(ovsBridge, ofClient) + prometheus.MustRegister(ovsStats) + + if !enablePrometheusGoMetrics { + klog.Info("Golang metrics are disabled") + prometheus.Unregister(prometheus.NewGoCollector()) + } + if !enablePrometheusProcessMetrics { + klog.Info("Process metrics are disabled") + prometheus.Unregister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})) + } +}