diff --git a/cmd/machine-healthcheck/main.go b/cmd/machine-healthcheck/main.go index 4412924ec6..60a478a60a 100644 --- a/cmd/machine-healthcheck/main.go +++ b/cmd/machine-healthcheck/main.go @@ -5,6 +5,7 @@ import ( "runtime" "github.com/openshift/machine-api-operator/pkg/controller/machinehealthcheck" + "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/golang/glog" mapiv1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" @@ -24,6 +25,7 @@ func printVersion() { func main() { watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") + metricsAddress := flag.String("metrics-bind-address", metrics.DefaultHealthCheckMetricsAddress, "Address for hosting metrics") flag.Parse() printVersion() @@ -34,8 +36,7 @@ func main() { } opts := manager.Options{ - // Disable metrics serving - MetricsBindAddress: "0", + MetricsBindAddress: *metricsAddress, } if *watchNamespace != "" { opts.Namespace = *watchNamespace diff --git a/cmd/machineset/main.go b/cmd/machineset/main.go index 394fd27d87..91d3d57546 100644 --- a/cmd/machineset/main.go +++ b/cmd/machineset/main.go @@ -24,6 +24,7 @@ import ( "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" "github.com/openshift/machine-api-operator/pkg/controller" "github.com/openshift/machine-api-operator/pkg/controller/machineset" + "github.com/openshift/machine-api-operator/pkg/metrics" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" "k8s.io/klog" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -42,6 +43,7 @@ func main() { klog.InitFlags(nil) watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile cluster-api objects. If unspecified, the controller watches for cluster-api objects across all namespaces.") + metricsAddress := flag.String("metrics-bind-address", metrics.DefaultMachineSetMetricsAddress, "Address for hosting metrics") webhookEnabled := flag.Bool("webhook-enabled", true, "Webhook server, enabled by default. When enabled, the manager will run a webhook server.") @@ -67,8 +69,7 @@ func main() { // Create a new Cmd to provide shared dependencies and start components syncPeriod := 10 * time.Minute opts := manager.Options{ - // Disable metrics serving - MetricsBindAddress: "0", + MetricsBindAddress: *metricsAddress, SyncPeriod: &syncPeriod, Namespace: *watchNamespace, } diff --git a/cmd/vsphere/main.go b/cmd/vsphere/main.go index 86fe933e6f..9b2b79b04d 100644 --- a/cmd/vsphere/main.go +++ b/cmd/vsphere/main.go @@ -4,13 +4,13 @@ import ( "flag" "fmt" "os" - "time" configv1 "github.com/openshift/api/config/v1" "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" vsphereapis "github.com/openshift/machine-api-operator/pkg/apis/vsphereprovider" capimachine "github.com/openshift/machine-api-operator/pkg/controller/machine" machine "github.com/openshift/machine-api-operator/pkg/controller/vsphere" + "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/openshift/machine-api-operator/pkg/version" "k8s.io/klog" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -24,6 +24,7 @@ func main() { klog.InitFlags(nil) watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") + metricsAddress := flag.String("metrics-bind-address", metrics.DefaultMachineMetricsAddress, "Address for hosting metrics") flag.Set("logtostderr", "true") flag.Parse() @@ -33,12 +34,9 @@ func main() { } cfg := config.GetConfigOrDie() - syncPeriod := 10 * time.Minute opts := manager.Options{ - // Disable metrics serving - MetricsBindAddress: "0", - SyncPeriod: &syncPeriod, + MetricsBindAddress: *metricsAddress, } if *watchNamespace != "" { opts.Namespace = *watchNamespace diff --git a/install/0000_30_machine-api-operator_01_images.configmap.yaml b/install/0000_30_machine-api-operator_01_images.configmap.yaml index 993d085379..76038d53e6 100644 --- a/install/0000_30_machine-api-operator_01_images.configmap.yaml +++ b/install/0000_30_machine-api-operator_01_images.configmap.yaml @@ -7,6 +7,7 @@ data: images.json: > { "machineAPIOperator": "registry.svc.ci.openshift.org/openshift:machine-api-operator", + "kubeRBACProxy": "registry.svc.ci.openshift.org/openshift:kube-rbac-proxy", "clusterAPIControllerAWS": "registry.svc.ci.openshift.org/openshift:aws-machine-controllers", "clusterAPIControllerOpenStack": "registry.svc.ci.openshift.org/openshift:openstack-machine-controllers", "clusterAPIControllerLibvirt": "registry.svc.ci.openshift.org/openshift:libvirt-machine-controllers", diff --git a/install/0000_30_machine-api-operator_09_rbac.yaml b/install/0000_30_machine-api-operator_09_rbac.yaml index 97f9797886..f1d5b7ea08 100644 --- a/install/0000_30_machine-api-operator_09_rbac.yaml +++ b/install/0000_30_machine-api-operator_09_rbac.yaml @@ -146,6 +146,20 @@ rules: verbs: - create + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create + # TODO(vikasc): Remove extensions/daemonsets permissions once all controllers have bumped kubernetes-drain - apiGroups: - extensions diff --git a/install/0000_30_machine-api-operator_10_service.yaml b/install/0000_30_machine-api-operator_10_service.yaml index 41a7b63c75..22edb1bcd7 100644 --- a/install/0000_30_machine-api-operator_10_service.yaml +++ b/install/0000_30_machine-api-operator_10_service.yaml @@ -18,3 +18,29 @@ spec: selector: k8s-app: machine-api-operator sessionAffinity: None +--- +apiVersion: v1 +kind: Service +metadata: + name: machine-api-controllers + namespace: openshift-machine-api + annotations: + service.alpha.openshift.io/serving-cert-secret-name: machine-api-controllers-tls + exclude.release.openshift.io/internal-openshift-hosted: "true" + labels: + k8s-app: controller +spec: + type: ClusterIP + ports: + - name: machine-mtrc + targetPort: machine-mtrc + port: 8441 + - name: machineset-mtrc + targetPort: machineset-mtrc + port: 8442 + - name: mhc-mtrc + targetPort: mhc-mtrc + port: 8444 + selector: + k8s-app: controller + sessionAffinity: None diff --git a/pkg/controller/vsphere/reconciler.go b/pkg/controller/vsphere/reconciler.go index 2f4fd7561f..524cd237ad 100644 --- a/pkg/controller/vsphere/reconciler.go +++ b/pkg/controller/vsphere/reconciler.go @@ -12,6 +12,7 @@ import ( vspherev1 "github.com/openshift/machine-api-operator/pkg/apis/vsphereprovider/v1beta1" machinecontroller "github.com/openshift/machine-api-operator/pkg/controller/machine" "github.com/openshift/machine-api-operator/pkg/controller/vsphere/session" + "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/vmware/govmomi/find" "github.com/vmware/govmomi/object" "github.com/vmware/govmomi/property" @@ -67,6 +68,13 @@ func (r *Reconciler) create() error { return err } } + if moTask.Info.State == types.TaskInfoStateError { + metrics.RegisterFailedInstanceCreate(&metrics.MachineLabels{ + Name: r.machine.Name, + Namespace: r.machine.Namespace, + Reason: fmt.Sprintf("Create machine task finished with error: %+v", moTask.Info.Error), + }) + } if taskIsFinished, err := taskIsFinished(moTask); err != nil || !taskIsFinished { if !taskIsFinished { return fmt.Errorf("task %v has not finished", moTask.Reference().Value) @@ -113,6 +121,13 @@ func (r *Reconciler) update() error { return err } } + if motask.Info.State == types.TaskInfoStateError { + metrics.RegisterFailedInstanceCreate(&metrics.MachineLabels{ + Name: r.machine.Name, + Namespace: r.machine.Namespace, + Reason: fmt.Sprintf("Create machine task finished with error: %+v", motask.Info.Error), + }) + } if taskIsFinished, err := taskIsFinished(motask); err != nil || !taskIsFinished { if !taskIsFinished { return fmt.Errorf("task %v has not finished", motask.Reference().Value) @@ -167,6 +182,13 @@ func (r *Reconciler) delete() error { return err } } + if moTask.Info.State == types.TaskInfoStateError { + metrics.RegisterFailedInstanceCreate(&metrics.MachineLabels{ + Name: r.machine.Name, + Namespace: r.machine.Namespace, + Reason: fmt.Sprintf("Create machine task finished with error: %+v", moTask.Info.Error), + }) + } if taskIsFinished, err := taskIsFinished(moTask); err != nil || !taskIsFinished { if !taskIsFinished { return fmt.Errorf("task %v has not finished", moTask.Reference().Value) @@ -178,6 +200,11 @@ func (r *Reconciler) delete() error { vmRef, err := findVM(r.machineScope) if err != nil { if !isNotFound(err) { + metrics.RegisterFailedInstanceDelete(&metrics.MachineLabels{ + Name: r.machine.Name, + Namespace: r.machine.Namespace, + Reason: err.Error(), + }) return err } klog.Infof("%v: vm does not exist", r.machine.GetName()) @@ -196,6 +223,11 @@ func (r *Reconciler) delete() error { task, err := vm.Obj.Destroy(r.Context) if err != nil { + metrics.RegisterFailedInstanceDelete(&metrics.MachineLabels{ + Name: r.machine.Name, + Namespace: r.machine.Namespace, + Reason: err.Error(), + }) return fmt.Errorf("%v: failed to destroy vm: %v", r.machine.GetName(), err) } @@ -258,6 +290,11 @@ func (r *Reconciler) reconcileRegionAndZoneLabels(vm *virtualMachine) error { }) if err != nil { + metrics.RegisterFailedInstanceUpdate(&metrics.MachineLabels{ + Name: r.machine.Name, + Namespace: r.machine.Namespace, + Reason: err.Error(), + }) return err } @@ -550,7 +587,13 @@ func clone(s *machineScope) (string, error) { task, err := vmTemplate.Clone(s, folder, s.machine.GetName(), spec) if err != nil { - return "", fmt.Errorf("error triggering clone op for machine %v: %w", s, err) + err = fmt.Errorf("error triggering clone op for machine %v: %w", s, err) + metrics.RegisterFailedInstanceCreate(&metrics.MachineLabels{ + Name: s.machine.Name, + Namespace: s.machine.Namespace, + Reason: err.Error(), + }) + return "", err } klog.V(3).Infof("%v: running task: %+v", s.machine.GetName(), s.providerStatus.TaskRef) @@ -813,6 +856,11 @@ func (vm *virtualMachine) reconcileTags(ctx context.Context, session *session.Se klog.Infof("%v: Attaching %s tag to vm", machine.GetName(), clusterID) // the tag should already be created by installer if err := m.AttachTag(ctx, clusterID, vm.Ref); err != nil { + metrics.RegisterFailedInstanceUpdate(&metrics.MachineLabels{ + Name: machine.Name, + Namespace: machine.Namespace, + Reason: err.Error(), + }) return err } } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index d68684f9d3..ce70b98665 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,12 +1,19 @@ package metrics import ( - "github.com/golang/glog" mapiv1beta1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" machineinformers "github.com/openshift/machine-api-operator/pkg/generated/informers/externalversions/machine/v1beta1" machinelisters "github.com/openshift/machine-api-operator/pkg/generated/listers/machine/v1beta1" "github.com/prometheus/client_golang/prometheus" "k8s.io/apimachinery/pkg/labels" + "k8s.io/klog" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +const ( + DefaultHealthCheckMetricsAddress = ":8083" + DefaultMachineSetMetricsAddress = ":8082" + DefaultMachineMetricsAddress = ":8081" ) var ( @@ -33,10 +40,36 @@ var ( Name: "mapi_mao_collector_up", Help: "Machine API Operator metrics are being collected and reported successfully", }, []string{"kind"}) + + failedInstanceCreateCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "mapi_instance_create_failed", + Help: "Number of times provider instance create has failed.", + }, []string{"name", "namespace", "reason"}, + ) + + failedInstanceUpdateCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "mapi_instance_update_failed", + Help: "Number of times provider instance update has failed.", + }, []string{"name", "namespace", "reason"}, + ) + + failedInstanceDeleteCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "mapi_instance_delete_failed", + Help: "Number of times provider instance delete has failed.", + }, []string{"name", "namespace", "reason"}, + ) ) func init() { prometheus.MustRegister(MachineCollectorUp) + metrics.Registry.MustRegister( + failedInstanceCreateCount, + failedInstanceUpdateCount, + failedInstanceDeleteCount, + ) } // MachineCollector is implementing prometheus.Collector interface. @@ -46,6 +79,13 @@ type MachineCollector struct { namespace string } +// MachineLabels is the group of labels that are applied to the machine metrics +type MachineLabels struct { + Name string + Namespace string + Reason string +} + func NewMachineCollector(machineInformer machineinformers.MachineInformer, machinesetInformer machineinformers.MachineSetInformer, namespace string) *MachineCollector { return &MachineCollector{ machineLister: machineInformer.Lister(), @@ -95,7 +135,7 @@ func (mc MachineCollector) collectMachineMetrics(ch chan<- prometheus.Metric) { } ch <- prometheus.MustNewConstMetric(MachineCountDesc, prometheus.GaugeValue, float64(len(machineList))) - glog.V(4).Infof("collectmachineMetrics exit") + klog.V(4).Infof("collectmachineMetrics exit") } func stringPointerDeref(stringPointer *string) string { @@ -151,3 +191,27 @@ func (mc MachineCollector) listMachines() ([]*mapiv1beta1.Machine, error) { func (mc MachineCollector) listMachineSets() ([]*mapiv1beta1.MachineSet, error) { return mc.machineSetLister.MachineSets(mc.namespace).List(labels.Everything()) } + +func RegisterFailedInstanceCreate(labels *MachineLabels) { + failedInstanceCreateCount.With(prometheus.Labels{ + "name": labels.Name, + "namespace": labels.Namespace, + "reason": labels.Reason, + }).Inc() +} + +func RegisterFailedInstanceUpdate(labels *MachineLabels) { + failedInstanceCreateCount.With(prometheus.Labels{ + "name": labels.Name, + "namespace": labels.Namespace, + "reason": labels.Reason, + }).Inc() +} + +func RegisterFailedInstanceDelete(labels *MachineLabels) { + failedInstanceDeleteCount.With(prometheus.Labels{ + "name": labels.Name, + "namespace": labels.Namespace, + "reason": labels.Reason, + }).Inc() +} diff --git a/pkg/operator/baremetal_test.go b/pkg/operator/baremetal_test.go index 641b986f3d..96ff925ce8 100644 --- a/pkg/operator/baremetal_test.go +++ b/pkg/operator/baremetal_test.go @@ -55,6 +55,7 @@ func newOperatorWithBaremetalConfig() *OperatorConfig { "docker.io/openshift/origin-machine-api-operator:v4.0.0", "docker.io/openshift/origin-machine-api-operator:v4.0.0", "docker.io/openshift/origin-machine-api-operator:v4.0.0", + "docker.io/openshift/origin-kube-rbac-proxy:v4.0.0", "docker.io/openshift/origin-aws-machine-controllers:v4.0.0", }, BaremetalControllers{ diff --git a/pkg/operator/config.go b/pkg/operator/config.go index 68ef7506b6..1485747799 100644 --- a/pkg/operator/config.go +++ b/pkg/operator/config.go @@ -30,6 +30,7 @@ type Controllers struct { MachineSet string NodeLink string MachineHealthCheck string + KubeRBACProxy string TerminationHandler string } @@ -53,6 +54,7 @@ type Images struct { ClusterAPIControllerGCP string `json:"clusterAPIControllerGCP"` ClusterAPIControllerOvirt string `json:"clusterAPIControllerOvirt"` ClusterAPIControllerVSphere string `json:"clusterAPIControllerVSphere"` + KubeRBACProxy string `json:"kubeRBACProxy"` // Images required for the metal3 pod BaremetalOperator string `json:"baremetalOperator"` BaremetalIronic string `json:"baremetalIronic"` @@ -140,3 +142,10 @@ func getMachineAPIOperatorFromImages(images Images) (string, error) { } return images.MachineAPIOperator, nil } + +func getKubeRBACProxyFromImages(images Images) (string, error) { + if images.KubeRBACProxy == "" { + return "", fmt.Errorf("failed getting kubeRBACProxy image. It is empty") + } + return images.KubeRBACProxy, nil +} diff --git a/pkg/operator/config_test.go b/pkg/operator/config_test.go index d6c41cb1f7..62d7ef63d6 100644 --- a/pkg/operator/config_test.go +++ b/pkg/operator/config_test.go @@ -12,6 +12,7 @@ var ( expectedLibvirtImage = "docker.io/openshift/origin-libvirt-machine-controllers:v4.0.0" expectedOpenstackImage = "docker.io/openshift/origin-openstack-machine-controllers:v4.0.0" expectedMachineAPIOperatorImage = "docker.io/openshift/origin-machine-api-operator:v4.0.0" + expectedKubeRBACProxyImage = "docker.io/openshift/origin-kube-rbac-proxy:v4.0.0" expectedBareMetalImage = "quay.io/openshift/origin-baremetal-machine-controllers:v4.0.0" expectedAzureImage = "quay.io/openshift/origin-azure-machine-controllers:v4.0.0" expectedGCPImage = "quay.io/openshift/origin-gcp-machine-controllers:v4.0.0" @@ -189,7 +190,6 @@ func TestGetProviderControllerFromImages(t *testing.T) { }, } - imagesJSONFile := "fixtures/images.json" img, err := getImagesFromJSONFile(imagesJSONFile) if err != nil { t.Errorf("failed getImagesFromJSONFile, %v", err) @@ -252,7 +252,6 @@ func TestGetTerminationHandlerFromImages(t *testing.T) { }, } - imagesJSONFile := "fixtures/images.json" img, err := getImagesFromJSONFile(imagesJSONFile) if err != nil { t.Errorf("failed getImagesFromJSONFile, %v", err) @@ -270,7 +269,6 @@ func TestGetTerminationHandlerFromImages(t *testing.T) { } func TestGetMachineAPIOperatorFromImages(t *testing.T) { - imagesJSONFile := "fixtures/images.json" img, err := getImagesFromJSONFile(imagesJSONFile) if err != nil { t.Errorf("failed getImagesFromJSONFile, %v", err) @@ -285,8 +283,22 @@ func TestGetMachineAPIOperatorFromImages(t *testing.T) { } } +func TestGetKubeRBACProxyFromImages(t *testing.T) { + img, err := getImagesFromJSONFile(imagesJSONFile) + if err != nil { + t.Errorf("failed getImagesFromJSONFile, %v", err) + } + + res, err := getKubeRBACProxyFromImages(*img) + if err != nil { + t.Errorf("failed getKubeRBACProxyFromImages : %v", err) + } + if res != expectedKubeRBACProxyImage { + t.Errorf("failed getKubeRBACProxyFromImages. Expected: %s, got: %s", expectedKubeRBACProxyImage, res) + } +} + func TestGetBaremetalControllers(t *testing.T) { - imagesJSONFile := "fixtures/images.json" img, err := getImagesFromJSONFile(imagesJSONFile) if err != nil { t.Errorf("failed getImagesFromJSONFile, %v", err) diff --git a/pkg/operator/fixtures/images.json b/pkg/operator/fixtures/images.json index bcbe149e23..533c88d27c 100644 --- a/pkg/operator/fixtures/images.json +++ b/pkg/operator/fixtures/images.json @@ -13,5 +13,6 @@ "baremetalIronicInspector": "quay.io/openshift/origin-ironic-inspector:v4.2.0", "baremetalIpaDownloader": "quay.io/openshift/origin-ironic-ipa-downloader:v4.2.0", "baremetalMachineOsDownloader": "quay.io/openshift/origin-ironic-machine-os-downloader:v4.3.0", - "baremetalStaticIpManager": "quay.io/openshift/origin-ironic-static-ip-manager:v4.2.0" + "baremetalStaticIpManager": "quay.io/openshift/origin-ironic-static-ip-manager:v4.2.0", + "kubeRBACProxy": "docker.io/openshift/origin-kube-rbac-proxy:v4.0.0" } diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index c4bf58b794..35189a6499 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -294,6 +294,11 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) { return nil, err } + kubeRBACProxy, err := getKubeRBACProxyFromImages(*images) + if err != nil { + return nil, err + } + return &OperatorConfig{ TargetNamespace: optr.namespace, Controllers: Controllers{ @@ -301,6 +306,7 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) { MachineSet: machineAPIOperatorImage, NodeLink: machineAPIOperatorImage, MachineHealthCheck: machineAPIOperatorImage, + KubeRBACProxy: kubeRBACProxy, TerminationHandler: terminationHandlerImage, }, BaremetalControllers: baremetalControllers, diff --git a/pkg/operator/operator_test.go b/pkg/operator/operator_test.go index eded10101d..09201d6908 100644 --- a/pkg/operator/operator_test.go +++ b/pkg/operator/operator_test.go @@ -282,6 +282,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: images.ClusterAPIControllerAWS, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -297,6 +298,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -312,6 +314,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -327,6 +330,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -342,6 +346,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, BaremetalControllers: BaremetalControllers{ BaremetalOperator: images.BaremetalOperator, @@ -365,6 +370,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -380,6 +386,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -395,6 +402,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -410,6 +418,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -425,6 +434,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, @@ -440,6 +450,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { NodeLink: images.MachineAPIOperator, MachineHealthCheck: images.MachineAPIOperator, TerminationHandler: clusterAPIControllerNoOp, + KubeRBACProxy: images.KubeRBACProxy, }, }, }, diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 0f54e0f630..0e4298b15b 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -9,6 +9,7 @@ import ( "github.com/openshift/library-go/pkg/operator/resource/resourceapply" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" machinecontroller "github.com/openshift/machine-api-operator/pkg/controller/machine" + "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/openshift/machine-api-operator/pkg/util/conditions" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -20,12 +21,17 @@ import ( ) const ( - deploymentRolloutPollInterval = time.Second - deploymentRolloutTimeout = 5 * time.Minute - deploymentMinimumAvailabilityTime = 3 * time.Minute - daemonsetRolloutPollInterval = time.Second - daemonsetRolloutTimeout = 5 * time.Minute - machineAPITerminationHandler = "machine-api-termination-handler" + deploymentRolloutPollInterval = time.Second + deploymentRolloutTimeout = 5 * time.Minute + deploymentMinimumAvailabilityTime = 3 * time.Minute + daemonsetRolloutPollInterval = time.Second + daemonsetRolloutTimeout = 5 * time.Minute + machineAPITerminationHandler = "machine-api-termination-handler" + machineExposeMetricsPort = 8441 + machineSetExposeMetricsPort = 8442 + machineHealthCheckExposeMetricsPort = 8444 + kubeRBACConfigName = "config" + certStoreName = "machine-api-controllers-tls" ) func (optr *Operator) syncAll(config *OperatorConfig) error { @@ -263,6 +269,7 @@ func newDeployment(config *OperatorConfig, features map[string]bool) *appsv1.Dep func newPodTemplateSpec(config *OperatorConfig, features map[string]bool) *corev1.PodTemplateSpec { containers := newContainers(config, features) + proxyContainers := newKubeProxyContainers(config.Controllers.KubeRBACProxy) tolerations := []corev1.Toleration{ { Key: "node-role.kubernetes.io/master", @@ -286,6 +293,49 @@ func newPodTemplateSpec(config *OperatorConfig, features map[string]bool) *corev }, } + var readOnly int32 = 420 + volumes := []corev1.Volume{ + { + Name: kubeRBACConfigName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "kube-rbac-proxy", + }, + DefaultMode: pointer.Int32Ptr(readOnly), + }, + }, + }, + { + Name: certStoreName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "machine-api-controllers-tls", + DefaultMode: pointer.Int32Ptr(readOnly), + }, + }, + }, + { + Name: "cert", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "machine-api-operator-webhook-cert", + DefaultMode: pointer.Int32Ptr(readOnly), + Items: []corev1.KeyToPath{ + { + Key: "tls.crt", + Path: "tls.crt", + }, + { + Key: "tls.key", + Path: "tls.key", + }, + }, + }, + }, + }, + } + return &corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -294,32 +344,12 @@ func newPodTemplateSpec(config *OperatorConfig, features map[string]bool) *corev }, }, Spec: corev1.PodSpec{ - Containers: containers, + Containers: append(containers, proxyContainers...), PriorityClassName: "system-node-critical", NodeSelector: map[string]string{"node-role.kubernetes.io/master": ""}, ServiceAccountName: "machine-api-controllers", Tolerations: tolerations, - Volumes: []corev1.Volume{ - { - Name: "cert", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: "machine-api-operator-webhook-cert", - DefaultMode: pointer.Int32Ptr(420), - Items: []corev1.KeyToPath{ - { - Key: "tls.crt", - Path: "tls.crt", - }, - { - Key: "tls.key", - Path: "tls.key", - }, - }, - }, - }, - }, - }, + Volumes: volumes, }, } } @@ -393,6 +423,55 @@ func newContainers(config *OperatorConfig, features map[string]bool) []corev1.Co return containers } +func newKubeProxyContainers(image string) []corev1.Container { + return []corev1.Container{ + newKubeProxyContainer(image, "machineset-mtrc", metrics.DefaultMachineSetMetricsAddress, machineSetExposeMetricsPort), + newKubeProxyContainer(image, "machine-mtrc", metrics.DefaultMachineMetricsAddress, machineExposeMetricsPort), + newKubeProxyContainer(image, "mhc-mtrc", metrics.DefaultHealthCheckMetricsAddress, machineHealthCheckExposeMetricsPort), + } +} + +func newKubeProxyContainer(image, portName, upstreamPort string, exposePort int32) corev1.Container { + configMountPath := "/etc/kube-rbac-proxy" + tlsCertMountPath := "/etc/tls/private" + resources := corev1.ResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("20Mi"), + corev1.ResourceCPU: resource.MustParse("10m"), + }, + } + args := []string{ + fmt.Sprintf("--secure-listen-address=0.0.0.0:%d", exposePort), + fmt.Sprintf("--upstream=http://localhost%s", upstreamPort), + fmt.Sprintf("--config-file=%s/config-file.yaml", configMountPath), + fmt.Sprintf("--tls-cert-file=%s/tls.crt", tlsCertMountPath), + fmt.Sprintf("--tls-private-key-file=%s/tls.key", tlsCertMountPath), + "--logtostderr=true", + "--v=10", + } + ports := []corev1.ContainerPort{{ + Name: portName, + ContainerPort: exposePort, + }} + + return corev1.Container{ + Name: fmt.Sprintf("kube-rbac-proxy-%s", portName), + Image: image, + Args: args, + Resources: resources, + Ports: ports, + VolumeMounts: []corev1.VolumeMount{ + { + Name: kubeRBACConfigName, + MountPath: configMountPath, + }, + { + Name: certStoreName, + MountPath: tlsCertMountPath, + }}, + } +} + func newTerminationDaemonSet(config *OperatorConfig) *appsv1.DaemonSet { template := newTerminationPodTemplateSpec(config)