diff --git a/manifests/arbiter.machineconfigpool.yaml b/manifests/arbiter.machineconfigpool.yaml new file mode 100644 index 0000000000..f9065f1b86 --- /dev/null +++ b/manifests/arbiter.machineconfigpool.yaml @@ -0,0 +1,15 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfigPool +metadata: + name: arbiter + labels: + "operator.machineconfiguration.openshift.io/required-for-upgrade": "" + "machineconfiguration.openshift.io/mco-built-in": "" + "pools.operator.machineconfiguration.openshift.io/arbiter": "" +spec: + machineConfigSelector: + matchLabels: + "machineconfiguration.openshift.io/role": "arbiter" + nodeSelector: + matchLabels: + node-role.kubernetes.io/arbiter: "" diff --git a/manifests/machineconfigcontroller/custom-machine-config-pool-selector-validatingadmissionpolicy.yaml b/manifests/machineconfigcontroller/custom-machine-config-pool-selector-validatingadmissionpolicy.yaml index 48bd2c8ca8..f9653a14ed 100644 --- a/manifests/machineconfigcontroller/custom-machine-config-pool-selector-validatingadmissionpolicy.yaml +++ b/manifests/machineconfigcontroller/custom-machine-config-pool-selector-validatingadmissionpolicy.yaml @@ -22,6 +22,8 @@ spec: (object.spec.machineConfigSelector.matchLabels["machineconfiguration.openshift.io/role"] == "master") || (object.spec.machineConfigSelector.matchLabels["machineconfiguration.openshift.io/role"] == "worker") + || + (object.spec.machineConfigSelector.matchLabels["machineconfiguration.openshift.io/role"] == "arbiter") ) ) || diff --git a/pkg/controller/common/constants.go b/pkg/controller/common/constants.go index 9c6078bca9..74cf41225d 100644 --- a/pkg/controller/common/constants.go +++ b/pkg/controller/common/constants.go @@ -52,6 +52,9 @@ const ( // APIServerInstanceName is a singleton name for APIServer configuration APIServerBootstrapFileLocation = "/etc/mcs/bootstrap/api-server/api-server.yaml" + // MachineConfigPoolArbiter is the MachineConfigPool name given to the arbiter + MachineConfigPoolArbiter = "arbiter" + // MachineConfigPoolMaster is the MachineConfigPool name given to the master MachineConfigPoolMaster = "master" diff --git a/pkg/controller/kubelet-config/kubelet_config_nodes.go b/pkg/controller/kubelet-config/kubelet_config_nodes.go index bc7607d509..6da6145207 100644 --- a/pkg/controller/kubelet-config/kubelet_config_nodes.go +++ b/pkg/controller/kubelet-config/kubelet_config_nodes.go @@ -133,7 +133,8 @@ func (ctrl *Controller) syncNodeConfigHandler(key string) error { } } // The following code updates the MC with the relevant CGroups version - if role == ctrlcommon.MachineConfigPoolWorker || role == ctrlcommon.MachineConfigPoolMaster { + switch role { + case ctrlcommon.MachineConfigPoolWorker, ctrlcommon.MachineConfigPoolMaster, ctrlcommon.MachineConfigPoolArbiter: err = updateMachineConfigwithCgroup(nodeConfig, mc) if err != nil { return err diff --git a/pkg/controller/template/render.go b/pkg/controller/template/render.go index a652b2fb4c..f3726ea351 100644 --- a/pkg/controller/template/render.go +++ b/pkg/controller/template/render.go @@ -47,6 +47,9 @@ const ( platformBase = "_base" platformOnPrem = "on-prem" sno = "sno" + masterRole = "master" + workerRole = "worker" + arbiterRole = "arbiter" ) // generateTemplateMachineConfigs returns MachineConfig objects from the templateDir and a config object @@ -80,6 +83,11 @@ func generateTemplateMachineConfigs(config *RenderConfig, templateDir string) ([ continue } + // Avoid creating resources for non arbiter deployments + if role == arbiterRole && !hasControlPlaneTopology(config, configv1.HighlyAvailableArbiterMode) { + continue + } + roleConfigs, err := GenerateMachineConfigsForRole(config, role, templateDir) if err != nil { return nil, fmt.Errorf("failed to create MachineConfig for role %s: %w", role, err) @@ -102,10 +110,10 @@ func generateTemplateMachineConfigs(config *RenderConfig, templateDir string) ([ func GenerateMachineConfigsForRole(config *RenderConfig, role, templateDir string) ([]*mcfgv1.MachineConfig, error) { rolePath := role //nolint:goconst - if role != "worker" && role != "master" { + if role != workerRole && role != masterRole && role != arbiterRole { // custom pools are only allowed to be worker's children // and can reuse the worker templates - rolePath = "worker" + rolePath = workerRole } path := filepath.Join(templateDir, rolePath) @@ -219,7 +227,7 @@ func getPaths(config *RenderConfig, platformString string) []string { platformBasedPaths = append(platformBasedPaths, platformString) // sno is specific case and it should override even specific platform files - if config.Infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode { + if hasControlPlaneTopology(config, configv1.SingleReplicaTopologyMode) { platformBasedPaths = append(platformBasedPaths, sno) } @@ -799,3 +807,12 @@ func cloudPlatformLoadBalancerIPState(cfg RenderConfig) LoadBalancerIPState { } return lbIPState } + +// hasControlPlaneTopology returns true if the topology matches the infra.controlPlaneTopology +// checks to make sure RenderConfig and Infra are not nil. +func hasControlPlaneTopology(r *RenderConfig, topo configv1.TopologyMode) bool { + if r == nil || r.Infra == nil { + return false + } + return r.Infra.Status.ControlPlaneTopology == topo +} diff --git a/pkg/controller/template/render_test.go b/pkg/controller/template/render_test.go index 92248695f8..db2e9d0af3 100644 --- a/pkg/controller/template/render_test.go +++ b/pkg/controller/template/render_test.go @@ -203,6 +203,7 @@ var ( configs = map[string]string{ "aws": "./test_data/controller_config_aws.yaml", "baremetal": "./test_data/controller_config_baremetal.yaml", + "baremetal-arbiter": "./test_data/controller_config_baremetal_arbiter.yaml", "gcp": "./test_data/controller_config_gcp.yaml", "openstack": "./test_data/controller_config_openstack.yaml", "libvirt": "./test_data/controller_config_libvirt.yaml", @@ -281,7 +282,7 @@ func TestGenerateMachineConfigs(t *testing.T) { if err != nil { t.Errorf("Failed to parse Ignition config for %s, %s, error: %v", config, cfg.Name, err) } - if role == "master" { + if role == masterRole { if !foundPullSecretMaster { foundPullSecretMaster = findIgnFile(ign.Storage.Files, "/var/lib/kubelet/config.json", t) } @@ -292,7 +293,7 @@ func TestGenerateMachineConfigs(t *testing.T) { foundMTUMigrationMaster = findIgnFile(ign.Storage.Files, "/usr/local/bin/mtu-migration.sh", t) foundMTUMigrationMaster = foundMTUMigrationMaster || findIgnFile(ign.Storage.Files, "/etc/systemd/system/mtu-migration.service", t) } - } else if role == "worker" { + } else if role == workerRole { if !foundPullSecretWorker { foundPullSecretWorker = findIgnFile(ign.Storage.Files, "/var/lib/kubelet/config.json", t) } @@ -303,6 +304,18 @@ func TestGenerateMachineConfigs(t *testing.T) { foundMTUMigrationWorker = findIgnFile(ign.Storage.Files, "/usr/local/bin/mtu-migration.sh", t) foundMTUMigrationWorker = foundMTUMigrationWorker || findIgnFile(ign.Storage.Files, "/etc/systemd/system/mtu-migration.service", t) } + } else if role == arbiterRole { + // arbiter role currently follows master output + if !foundPullSecretMaster { + foundPullSecretMaster = findIgnFile(ign.Storage.Files, "/var/lib/kubelet/config.json", t) + } + if !foundKubeletUnitMaster { + foundKubeletUnitMaster = findIgnUnit(ign.Systemd.Units, "kubelet.service", t) + } + if !foundMTUMigrationMaster { + foundMTUMigrationMaster = findIgnFile(ign.Storage.Files, "/usr/local/bin/mtu-migration.sh", t) + foundMTUMigrationMaster = foundMTUMigrationMaster || findIgnFile(ign.Storage.Files, "/etc/systemd/system/mtu-migration.service", t) + } } else { t.Fatalf("Unknown role %s", role) } diff --git a/pkg/controller/template/test_data/controller_config_baremetal_arbiter.yaml b/pkg/controller/template/test_data/controller_config_baremetal_arbiter.yaml new file mode 100644 index 0000000000..2ceaf662d3 --- /dev/null +++ b/pkg/controller/template/test_data/controller_config_baremetal_arbiter.yaml @@ -0,0 +1,37 @@ +apiVersion: "machineconfigurations.openshift.io/v1" +kind: "ControllerConfig" +spec: + clusterDNSIP: "10.3.0.10" + cloudProviderConfig: "" + etcdInitialCount: 3 + etcdCAData: ZHVtbXkgZXRjZC1jYQo= + rootCAData: ZHVtbXkgcm9vdC1jYQo= + pullSecret: + data: ZHVtbXkgZXRjZC1jYQo= + images: + etcd: image/etcd:1 + setupEtcdEnv: image/setupEtcdEnv:1 + infraImage: image/infraImage:1 + kubeClientAgentImage: image/kubeClientAgentImage:1 + infra: + apiVersion: config.openshift.io/v1 + kind: Infrastructure + spec: + cloudConfig: + key: config + name: cloud-provider-config + status: + apiServerInternalURI: https://api-int.my-test-cluster.installer.team.coreos.systems:6443 + apiServerURL: https://api.my-test-cluster.installer.team.coreos.systems:6443 + etcdDiscoveryDomain: my-test-cluster.installer.team.coreos.systems + infrastructureName: my-test-cluster + controlPlaneTopology: HighlyAvailableArbiter + platformStatus: + type: "BareMetal" + baremetal: + apiServerInternalIP: 10.0.0.1 + ingressIP: 10.0.0.2 + nodeDNSIP: 10.0.0.3 + dns: + spec: + baseDomain: my-test-cluster.installer.team.coreos.systems diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 6589fef9d4..f25de22ec2 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -2728,6 +2728,8 @@ func (dn *Daemon) getControlPlaneTopology() configv1.TopologyMode { return configv1.SingleReplicaTopologyMode case configv1.HighlyAvailableTopologyMode: return configv1.HighlyAvailableTopologyMode + case configv1.HighlyAvailableArbiterMode: + return configv1.HighlyAvailableArbiterMode default: // for any unhandled case, default to HighlyAvailableTopologyMode return configv1.HighlyAvailableTopologyMode diff --git a/pkg/operator/bootstrap.go b/pkg/operator/bootstrap.go index 2f178f183d..57a6582761 100644 --- a/pkg/operator/bootstrap.go +++ b/pkg/operator/bootstrap.go @@ -155,7 +155,7 @@ func RenderBootstrap( templatectrl.KubeRbacProxyKey: imgs.KubeRbacProxy, } - config := getRenderConfig("", string(filesData[kubeAPIServerServingCA]), spec, &imgs.RenderConfigImages, infra.Status.APIServerInternalURL, nil, []*mcfgv1alpha1.MachineOSConfig{}, nil) + config := getRenderConfig("", string(filesData[kubeAPIServerServingCA]), spec, &imgs.RenderConfigImages, infra, nil, []*mcfgv1alpha1.MachineOSConfig{}, nil) manifests := []manifest{ { @@ -182,6 +182,13 @@ func RenderBootstrap( }, } + if infra.Status.ControlPlaneTopology == configv1.HighlyAvailableArbiterMode { + manifests = append(manifests, manifest{ + name: "manifests/arbiter.machineconfigpool.yaml", + filename: "bootstrap/manifests/arbiter.machineconfigpool.yaml", + }) + } + manifests = appendManifestsByPlatform(manifests, *infra) for _, m := range manifests { diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 4aaf72e99a..e1f026d721 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -641,9 +641,9 @@ func (optr *Operator) syncRenderConfig(_ *renderConfig, _ *configv1.ClusterOpera } // create renderConfig - optr.renderConfig = getRenderConfig(optr.namespace, string(kubeAPIServerServingCABytes), spec, &imgs.RenderConfigImages, infra.Status.APIServerInternalURL, pointerConfigData, moscs, apiServer) + optr.renderConfig = getRenderConfig(optr.namespace, string(kubeAPIServerServingCABytes), spec, &imgs.RenderConfigImages, infra, pointerConfigData, moscs, apiServer) } else { - optr.renderConfig = getRenderConfig(optr.namespace, string(kubeAPIServerServingCABytes), spec, &imgs.RenderConfigImages, infra.Status.APIServerInternalURL, pointerConfigData, nil, apiServer) + optr.renderConfig = getRenderConfig(optr.namespace, string(kubeAPIServerServingCABytes), spec, &imgs.RenderConfigImages, infra, pointerConfigData, nil, apiServer) } return nil @@ -682,6 +682,11 @@ func (optr *Operator) syncMachineConfigPools(config *renderConfig, _ *configv1.C "manifests/master.machineconfigpool.yaml", "manifests/worker.machineconfigpool.yaml", } + + if config.Infra.Status.ControlPlaneTopology == configv1.HighlyAvailableArbiterMode { + mcps = append(mcps, "manifests/arbiter.machineconfigpool.yaml") + } + for _, mcp := range mcps { mcpBytes, err := renderAsset(config, mcp) if err != nil { @@ -778,6 +783,8 @@ func (optr *Operator) syncMachineConfigNodes(_ *renderConfig, _ *configv1.Cluste pool = "worker" } else if _, ok = node.Labels["node-role.kubernetes.io/master"]; ok { pool = "master" + } else if _, ok = node.Labels["node-role.kubernetes.io/arbiter"]; ok { + pool = "arbiter" } newMCS := &v1alpha1.MachineConfigNode{ Spec: v1alpha1.MachineConfigNodeSpec{ @@ -2035,7 +2042,7 @@ func setGVK(obj runtime.Object, scheme *runtime.Scheme) error { return nil } -func getRenderConfig(tnamespace, kubeAPIServerServingCA string, ccSpec *mcfgv1.ControllerConfigSpec, imgs *ctrlcommon.RenderConfigImages, apiServerURL string, pointerConfigData []byte, moscs []*mcfgv1alpha1.MachineOSConfig, apiServer *configv1.APIServer) *renderConfig { +func getRenderConfig(tnamespace, kubeAPIServerServingCA string, ccSpec *mcfgv1.ControllerConfigSpec, imgs *ctrlcommon.RenderConfigImages, infra *configv1.Infrastructure, pointerConfigData []byte, moscs []*mcfgv1alpha1.MachineOSConfig, apiServer *configv1.APIServer) *renderConfig { tlsMinVersion, tlsCipherSuites := ctrlcommon.GetSecurityProfileCiphersFromAPIServer(apiServer) return &renderConfig{ TargetNamespace: tnamespace, @@ -2044,8 +2051,9 @@ func getRenderConfig(tnamespace, kubeAPIServerServingCA string, ccSpec *mcfgv1.C ControllerConfig: *ccSpec, Images: imgs, KubeAPIServerServingCA: kubeAPIServerServingCA, - APIServerURL: apiServerURL, + APIServerURL: infra.Status.APIServerInternalURL, PointerConfig: string(pointerConfigData), + Infra: *infra, MachineOSConfigs: moscs, TLSMinVersion: tlsMinVersion, TLSCipherSuites: tlsCipherSuites, diff --git a/pkg/server/bootstrap_server.go b/pkg/server/bootstrap_server.go index 5ac6b438cf..da987fc6a3 100644 --- a/pkg/server/bootstrap_server.go +++ b/pkg/server/bootstrap_server.go @@ -62,7 +62,7 @@ func NewBootstrapServer(dir, kubeconfig string, ircerts []string) (Server, error const yamlExt = ".yaml" func (bsc *bootstrapServer) GetConfig(cr poolRequest) (*runtime.RawExtension, error) { - if cr.machineConfigPool != "master" { + if cr.machineConfigPool != "master" && cr.machineConfigPool != "arbiter" { return nil, fmt.Errorf("refusing to serve bootstrap configuration to pool %q", cr.machineConfigPool) } // 1. Read the Machine Config Pool object. diff --git a/templates/arbiter/00-arbiter/_base/files/kubelet-cgroups.yaml b/templates/arbiter/00-arbiter/_base/files/kubelet-cgroups.yaml new file mode 100644 index 0000000000..5ec11ccc31 --- /dev/null +++ b/templates/arbiter/00-arbiter/_base/files/kubelet-cgroups.yaml @@ -0,0 +1,9 @@ +mode: 0644 +path: "/etc/systemd/system.conf.d/kubelet-cgroups.conf" +contents: + inline: | + # Turning on Accounting helps track down performance issues. + [Manager] + DefaultCPUAccounting=yes + DefaultMemoryAccounting=yes + DefaultBlockIOAccounting=yes diff --git a/templates/arbiter/00-arbiter/_base/files/usr-local-bin-openshift-kubeconfig-gen.yaml b/templates/arbiter/00-arbiter/_base/files/usr-local-bin-openshift-kubeconfig-gen.yaml new file mode 100644 index 0000000000..a483e3d838 --- /dev/null +++ b/templates/arbiter/00-arbiter/_base/files/usr-local-bin-openshift-kubeconfig-gen.yaml @@ -0,0 +1,26 @@ +mode: 0755 +path: "/usr/local/bin/recover-kubeconfig.sh" +contents: + inline: | + #!/bin/bash + + set -eou pipefail + + # context + intapi=$(oc get infrastructures.config.openshift.io cluster -o "jsonpath={.status.apiServerInternalURI}") + context="$(oc config current-context)" + # cluster + cluster="$(oc config view -o "jsonpath={.contexts[?(@.name==\"$context\")].context.cluster}")" + server="$(oc config view -o "jsonpath={.clusters[?(@.name==\"$cluster\")].cluster.server}")" + # token + ca_crt_data="$(oc get secret -n openshift-machine-config-operator node-bootstrapper-token -o "jsonpath={.data.ca\.crt}" | base64 --decode)" + namespace="$(oc get secret -n openshift-machine-config-operator node-bootstrapper-token -o "jsonpath={.data.namespace}" | base64 --decode)" + token="$(oc get secret -n openshift-machine-config-operator node-bootstrapper-token -o "jsonpath={.data.token}" | base64 --decode)" + + export KUBECONFIG="$(mktemp)" + kubectl config set-credentials "kubelet" --token="$token" >/dev/null + ca_crt="$(mktemp)"; echo "$ca_crt_data" > $ca_crt + kubectl config set-cluster $cluster --server="$intapi" --certificate-authority="$ca_crt" --embed-certs >/dev/null + kubectl config set-context kubelet --cluster="$cluster" --user="kubelet" >/dev/null + kubectl config use-context kubelet >/dev/null + cat "$KUBECONFIG" diff --git a/templates/arbiter/00-arbiter/_base/units/rpm-ostreed.service.yaml b/templates/arbiter/00-arbiter/_base/units/rpm-ostreed.service.yaml new file mode 100644 index 0000000000..6a9e214729 --- /dev/null +++ b/templates/arbiter/00-arbiter/_base/units/rpm-ostreed.service.yaml @@ -0,0 +1,9 @@ +name: rpm-ostreed.service +dropins: +- name: mco-controlplane-nice.conf + contents: | + # See https://github.com/openshift/machine-config-operator/issues/1897 + [Service] + Nice=10 + IOSchedulingClass=best-effort + IOSchedulingPriority=6 diff --git a/templates/arbiter/00-arbiter/azure/files/etc-kubernetes-manifests-apiserver-watcher.yaml b/templates/arbiter/00-arbiter/azure/files/etc-kubernetes-manifests-apiserver-watcher.yaml new file mode 100644 index 0000000000..f82286eb38 --- /dev/null +++ b/templates/arbiter/00-arbiter/azure/files/etc-kubernetes-manifests-apiserver-watcher.yaml @@ -0,0 +1,45 @@ +mode: 0644 +path: "/etc/kubernetes/manifests/apiserver-watcher.yaml" +contents: + inline: | + apiVersion: v1 + kind: Pod + metadata: + name: apiserver-watcher + namespace: openshift-kube-apiserver + spec: + containers: + - name: apiserver-watcher + image: "{{.Images.apiServerWatcherKey}}" + command: + - flock + - --verbose + - --exclusive + - --timeout=300 + - /rootfs/run/cloud-routes/apiserver-watcher.lock + - apiserver-watcher + args: + - "run" + - "--health-check-url={{.Infra.Status.APIServerInternalURL}}/readyz" + resources: + requests: + cpu: 20m + memory: 50Mi + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + privileged: true + volumeMounts: + - mountPath: /rootfs + name: rootfs + mountPropagation: HostToContainer + hostNetwork: true + hostPID: true + priorityClassName: system-node-critical + tolerations: + - operator: "Exists" + restartPolicy: Always + volumes: + - name: rootfs + hostPath: + path: / + diff --git a/templates/arbiter/00-arbiter/azure/files/opt-libexec-openshift-azure-routes-sh.yaml b/templates/arbiter/00-arbiter/azure/files/opt-libexec-openshift-azure-routes-sh.yaml new file mode 100644 index 0000000000..d1d68b9570 --- /dev/null +++ b/templates/arbiter/00-arbiter/azure/files/opt-libexec-openshift-azure-routes-sh.yaml @@ -0,0 +1,225 @@ +mode: 0755 +path: "/opt/libexec/openshift-azure-routes.sh" +contents: + inline: | + #!/bin/bash + + # Prevent hairpin traffic when the apiserver is up + + # As per the Azure documentation (https://docs.microsoft.com/en-us/azure/load-balancer/concepts#limitations), + # if a backend is load-balanced to itself, then the traffic will be dropped. + # + # This is because the L3LB does DNAT, so while the outgoing packet has a destination + # IP of the VIP, the incoming load-balanced packet has a destination IP of the + # host. That means that it "sees" a syn with the source and destination + # IPs of itself, and duly replies wit a syn-ack back to itself. However, the client + # socket expects a syn-ack with a source IP of the VIP, so it drops the packet. + # + # The solution is to redirect traffic destined to the lb vip back to ourselves. + # + # We check /run/cloud-routes/ for files $VIP.up and $VIP.down. If the .up file + # exists, then we redirect traffic destined for that vip to ourselves via nftables. + # A systemd unit watches the directory for changes. + # + # TODO: Address the potential issue where apiserver-watcher could create multiple files + # and openshift-azure-routes doesn't detect all of them because file change events are not queued + # when the service is already running. + # https://github.com/openshift/machine-config-operator/pull/3643#issuecomment-1497234369 + + set -euo pipefail + + # the list of load balancer IPs that are assigned to this node + declare -A v4vips + declare -A v6vips + + TABLE_NAME="azure-vips" + VIPS_CHAIN="redirect-vips" + RUN_DIR="/run/cloud-routes" + + initialize() { + nft -f - <&2 + else + printf "%s" "${BODY}" + fi + } + + TABLE_NAME="gcp-vips" + EXTERNAL_VIPS_CHAIN="external-vips" + LOCAL_VIPS_CHAIN="local-vips" + RUN_DIR="/run/cloud-routes" + + # Set up base table and rules + initialize() { + nft -f - < /dev/null; then + inotifywait -t 30 -r "${RUN_DIR}" &> /dev/null || true + else + # no inotify, need to manually poll + for i in {0..5}; do + for vip in "${!vips[@]}"; do + if [[ "${vips[${vip}]}" != down ]] && [[ -e "${RUN_DIR}/${vip}.down" ]]; then + echo "new downfile detected" + break 2 + elif [[ "${vips[${vip}]}" = down ]] && ! [[ -e "${RUN_DIR}/${vip}.down" ]]; then + echo "downfile disappeared" + break 2 + fi + done + sleep 1 # keep this small enough to not make gcp-routes slower than LBs on recovery + done + fi + } + + case "$1" in + start) + initialize + while :; do + list_lb_ips + sync_rules + remove_stale_routes # needed for OVN-Kubernetes plugin's routingViaHost=false mode + add_routes # needed for OVN-Kubernetes plugin's routingViaHost=false mode + echo "done applying vip rules" + sleep_or_watch + done + ;; + cleanup) + clear_rules + clear_routes # needed for OVN-Kubernetes plugin's routingViaHost=false mode + ;; + *) + echo $"Usage: $0 {start|cleanup}" + exit 1 + esac diff --git a/templates/arbiter/00-arbiter/gcp/units/gcp-routes.service.yaml b/templates/arbiter/00-arbiter/gcp/units/gcp-routes.service.yaml new file mode 100644 index 0000000000..bd069de7d0 --- /dev/null +++ b/templates/arbiter/00-arbiter/gcp/units/gcp-routes.service.yaml @@ -0,0 +1,6 @@ +name: gcp-routes.service +dropins: +- name: mco-disabled.conf + contents: | + [Unit] + ConditionPathExists=/enoent diff --git a/templates/arbiter/00-arbiter/gcp/units/openshift-gcp-routes.service.yaml b/templates/arbiter/00-arbiter/gcp/units/openshift-gcp-routes.service.yaml new file mode 100644 index 0000000000..f26ba16bf9 --- /dev/null +++ b/templates/arbiter/00-arbiter/gcp/units/openshift-gcp-routes.service.yaml @@ -0,0 +1,21 @@ +name: openshift-gcp-routes.service +enabled: true +contents: | + [Unit] + Description=Update GCP routes for forwarded IPs. + ConditionKernelCommandLine=|ignition.platform.id=gce + ConditionKernelCommandLine=|ignition.platform.id=gcp + Before=network-online.target + + [Service] + Type=simple + ExecStart=/bin/bash /opt/libexec/openshift-gcp-routes.sh start + ExecStopPost=/bin/bash /opt/libexec/openshift-gcp-routes.sh cleanup + User=root + RestartSec=30 + Restart=always + + [Install] + WantedBy=multi-user.target + # Ensure that network-online.target will not complete until the node has working external LBs. + RequiredBy=network-online.target diff --git a/templates/arbiter/00-arbiter/on-prem/OWNERS b/templates/arbiter/00-arbiter/on-prem/OWNERS new file mode 100644 index 0000000000..ebcd078791 --- /dev/null +++ b/templates/arbiter/00-arbiter/on-prem/OWNERS @@ -0,0 +1,8 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +reviewers: + - baremetal-reviewers + - openstack-reviewers + - ovirt-reviewers + - vsphere-reviewers diff --git a/templates/arbiter/00-arbiter/on-prem/files/haproxy-haproxy.yaml b/templates/arbiter/00-arbiter/on-prem/files/haproxy-haproxy.yaml new file mode 100644 index 0000000000..b402ecb9b5 --- /dev/null +++ b/templates/arbiter/00-arbiter/on-prem/files/haproxy-haproxy.yaml @@ -0,0 +1,43 @@ +mode: 0644 +path: "/etc/kubernetes/static-pod-resources/haproxy/haproxy.cfg.tmpl" +contents: + inline: | + global + stats socket /var/lib/haproxy/run/haproxy.sock mode 600 level admin expose-fd listeners + defaults + maxconn 20000 + mode tcp + log /var/run/haproxy/haproxy-log.sock local0 notice alert + log-format "%ci:%cp -> %fi:%fp [%t] %ft %b/%s %Tw/%Tc/%Tt %B %ts %ac/%fc/%bc/%sc/%rc %sq/%bq" + option dontlognull + retries 3 + timeout http-request 30s + timeout queue 1m + timeout connect 10s + timeout client 86400s + timeout server 86400s + timeout tunnel 86400s + {{`{{- if gt (len .LBConfig.Backends) 0 }}`}} + frontend main + bind :::{{`{{ .LBConfig.LbPort }}`}} v4v6 + default_backend masters + listen health_check_http_url + bind :::9444 v4v6 + mode http + monitor-uri /haproxy_ready + option dontlognull + {{`{{- end }}`}} + listen stats + bind localhost:{{`{{ .LBConfig.StatPort }}`}} + mode http + stats enable + stats hide-version + stats uri /haproxy_stats + stats refresh 30s + stats auth Username:Password + backend masters + option httpchk GET /readyz HTTP/1.0 + balance roundrobin + {{`{{- range .LBConfig.Backends }} + server {{ .Host }} {{ .Address }}:{{ .Port }} weight 1 verify none check check-ssl inter 1s fall 2 rise 3 + {{- end }}`}} diff --git a/templates/arbiter/00-arbiter/on-prem/files/haproxy.yaml b/templates/arbiter/00-arbiter/on-prem/files/haproxy.yaml new file mode 100644 index 0000000000..5a56ff13b1 --- /dev/null +++ b/templates/arbiter/00-arbiter/on-prem/files/haproxy.yaml @@ -0,0 +1,166 @@ +mode: 0644 +path: {{ if isOpenShiftManagedDefaultLB . -}} "/etc/kubernetes/manifests/haproxy.yaml" {{ else }} "/etc/kubernetes/disabled-manifests/haproxy.yaml" {{ end }} +contents: + inline: | + kind: Pod + apiVersion: v1 + metadata: + name: haproxy + namespace: openshift-{{ onPremPlatformShortName . }}-infra + creationTimestamp: + deletionGracePeriodSeconds: 65 + labels: + app: {{ onPremPlatformShortName . }}-infra-api-lb + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + openshift.io/required-scc: privileged + spec: + volumes: + - name: resource-dir + hostPath: + path: "/etc/kubernetes/static-pod-resources/haproxy" + - name: kubeconfigvarlib + hostPath: + path: "/var/lib/kubelet" + - name: run-dir + empty-dir: {} + - name: conf-dir + hostPath: + path: "/etc/haproxy" + - name: chroot-host + hostPath: + path: "/" + initContainers: + - name: verify-api-int-resolvable + image: {{ .Images.baremetalRuntimeCfgImage }} + command: + - "/bin/bash" + - "-c" + - | + /usr/bin/curl -o /dev/null -kLfs https://api-int.{{ .DNS.Spec.BaseDomain }}:6443/healthz + resources: {} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: chroot-host + mountPath: "/host" + mountPropagation: HostToContainer + - name: kubeconfigvarlib + mountPath: "/var/lib/kubelet" + mountPropagation: HostToContainer + imagePullPolicy: IfNotPresent + containers: + - name: haproxy + image: {{.Images.haproxyImage}} + env: + - name: OLD_HAPROXY_PS_FORCE_DEL_TIMEOUT + value: "120" + command: + - "/bin/bash" + - "-c" + - | + #/bin/bash + verify_old_haproxy_ps_being_deleted() + { + local prev_pids + + prev_pids="$1" + sleep $OLD_HAPROXY_PS_FORCE_DEL_TIMEOUT + cur_pids=$(pidof haproxy) + + for val in $prev_pids; do + if [[ $cur_pids =~ (^|[[:space:]])"$val"($|[[:space:]]) ]] ; then + kill $val + fi + done + } + + reload_haproxy() + { + old_pids=$(pidof haproxy) + if [ -n "$old_pids" ]; then + /usr/sbin/haproxy -W -db -f /etc/haproxy/haproxy.cfg -p /var/lib/haproxy/run/haproxy.pid -x /var/lib/haproxy/run/haproxy.sock -sf $old_pids & + #There seems to be some cases where HAProxy doesn't drain properly. + #To handle that case, SIGTERM signal being sent to old HAProxy processes which haven't terminated. + verify_old_haproxy_ps_being_deleted "$old_pids" & + else + /usr/sbin/haproxy -W -db -f /etc/haproxy/haproxy.cfg -p /var/lib/haproxy/run/haproxy.pid & + fi + } + + msg_handler() + { + while read -r line; do + echo "The client send: $line" >&2 + # currently only 'reload' msg is supported + if [ "$line" = reload ]; then + reload_haproxy + fi + done + } + set -ex + declare -r haproxy_sock="/var/run/haproxy/haproxy-master.sock" + declare -r haproxy_log_sock="/var/run/haproxy/haproxy-log.sock" + export -f msg_handler + export -f reload_haproxy + export -f verify_old_haproxy_ps_being_deleted + rm -f "$haproxy_sock" "$haproxy_log_sock" + socat UNIX-RECV:${haproxy_log_sock} STDOUT & + if [ -s "/etc/haproxy/haproxy.cfg" ]; then + /usr/sbin/haproxy -W -db -f /etc/haproxy/haproxy.cfg -p /var/lib/haproxy/run/haproxy.pid & + fi + socat UNIX-LISTEN:${haproxy_sock},fork system:'bash -c msg_handler' + resources: + requests: + cpu: 100m + memory: 200Mi + volumeMounts: + - name: conf-dir + mountPath: "/etc/haproxy" + mountPropagation: HostToContainer + - name: run-dir + mountPath: "/var/run/haproxy" + livenessProbe: + initialDelaySeconds: 50 + httpGet: + path: /haproxy_ready + port: 9444 + terminationMessagePolicy: FallbackToLogsOnError + imagePullPolicy: IfNotPresent + - name: haproxy-monitor + securityContext: + capabilities: + add: ["NET_ADMIN", "SYS_CHROOT"] + image: {{ .Images.baremetalRuntimeCfgImage }} + command: + - monitor + - "/var/lib/kubelet/kubeconfig" + - "/config/haproxy.cfg.tmpl" + - "/etc/haproxy/haproxy.cfg" + - "--api-vips" + - "{{- range $index, $ip := onPremPlatformAPIServerInternalIPs . }}{{ if gt $index 0 }},{{end}}{{$ip}}{{end}}" + resources: + requests: + cpu: 100m + memory: 200Mi + volumeMounts: + - name: conf-dir + mountPath: "/etc/haproxy" + mountPropagation: HostToContainer + - name: run-dir + mountPath: "/var/run/haproxy" + - name: resource-dir + mountPath: "/config" + mountPropagation: HostToContainer + - name: chroot-host + mountPath: "/host" + mountPropagation: HostToContainer + - name: kubeconfigvarlib + mountPath: "/var/lib/kubelet" + mountPropagation: HostToContainer + terminationMessagePolicy: FallbackToLogsOnError + imagePullPolicy: IfNotPresent + hostNetwork: true + tolerations: + - operator: Exists + priorityClassName: system-node-critical + status: {} diff --git a/templates/arbiter/00-arbiter/on-prem/files/keepalived-keepalived.yaml b/templates/arbiter/00-arbiter/on-prem/files/keepalived-keepalived.yaml new file mode 100644 index 0000000000..159321e593 --- /dev/null +++ b/templates/arbiter/00-arbiter/on-prem/files/keepalived-keepalived.yaml @@ -0,0 +1,156 @@ +mode: 0644 +path: "/etc/kubernetes/static-pod-resources/keepalived/keepalived.conf.tmpl" +contents: + inline: | + global_defs { + enable_script_security + script_user root + max_auto_priority -1 + vrrp_garp_master_refresh 60 + } + + # These are separate checks to provide the following behavior: + # If the loadbalancer is healthy then all is well regardless + # of what the local API status is. Both checks will return success and + # we'll have the maximum priority. This means as long as there is a node + # with a functional loadbalancer it will get the VIP. + # If all of the loadbalancers go down but the local API is still running, + # the _both check will still succeed and allow any node with a functional + # API to take the VIP. This isn't preferred because it means all API + # traffic will go through one node, but at least it keeps the API available. + vrrp_script chk_ocp_lb { + script "/usr/bin/timeout 1.9 /etc/keepalived/chk_ocp_script.sh" + interval 2 + weight 20 + rise 3 + fall 3 + } + + vrrp_script chk_ocp_both { + script "/usr/bin/timeout 1.9 /etc/keepalived/chk_ocp_script_both.sh" + interval 2 + # Use a smaller weight for this check so it won't trigger the move from + # bootstrap to master by itself. + weight 5 + rise 3 + fall 3 + } + + vrrp_script chk_mcs { + script "/usr/bin/timeout 1.9 /etc/keepalived/chk_mcs_script.sh" + interval 2 + weight 3 + rise 3 + fall 3 + } + + # TODO: Improve this check. The port is assumed to be alive. + # Need to assess what is the ramification if the port is not there. + vrrp_script chk_ingress_ready { + script "/usr/bin/timeout 0.9 /usr/bin/curl -o /dev/null -Lfs http://localhost:1936/healthz/ready" + interval 1 + weight 10 + rise 3 + fall 2 + } + + vrrp_script chk_ingress { + script "/usr/bin/timeout 0.9 /usr/bin/curl -o /dev/null -Lfs http://localhost:1936/healthz" + interval 1 + rise 3 + fall 2 + } + + vrrp_script chk_default_ingress { + script "/usr/bin/timeout 4.9 /etc/keepalived/chk_default_ingress.sh" + interval 5 + weight 50 + rise 3 + fall 2 + } + + {{`{{ range $i, $config := .Configs }}`}} + {{`{{$nonVirtualIP := .NonVirtualIP}}`}} + + {{`{{$participateInAPIVRPP := not .EnableUnicast}}`}} + {{`{{- if .EnableUnicast}} + {{- range .LBConfig.Backends}} + {{- if eq $nonVirtualIP .Address}} + {{$participateInAPIVRPP = true}} + {{- end}} + {{- end}} + {{- end}}`}} + + {{`{{if $participateInAPIVRPP}}`}} + vrrp_instance {{`{{ .Cluster.Name }}`}}_API_{{`{{$i}}`}} { + state BACKUP + interface {{`{{ .VRRPInterface }}`}} + virtual_router_id {{`{{ .Cluster.APIVirtualRouterID }}`}} + priority 40 + advert_int 1 + {{`{{if .EnableUnicast}}`}} + unicast_src_ip {{`{{.NonVirtualIP}}`}} + unicast_peer { + {{`{{- range .LBConfig.Backends -}} + {{- if ne $nonVirtualIP .Address}} + {{.Address}} + {{- end}} + {{- end}}`}} + } + {{`{{end}}`}} + authentication { + auth_type PASS + auth_pass {{`{{ .Cluster.Name }}`}}_api_vip + } + virtual_ipaddress { + {{`{{ .Cluster.APIVIP }}`}}/{{`{{ .Cluster.VIPNetmask }}`}} label vip + } + track_script { + chk_ocp_lb + chk_ocp_both + chk_mcs + } + } + {{`{{end}}`}} + + {{`{{$participateInIngressVRPP := not .EnableUnicast}}`}} + {{`{{- if .EnableUnicast}} + {{- range .IngressConfig.Peers}} + {{- if eq $nonVirtualIP .}} + {{$participateInIngressVRPP = true}} + {{- end}} + {{- end}} + {{- end}}`}} + + {{`{{if $participateInIngressVRPP}}`}} + vrrp_instance {{`{{ .Cluster.Name }}`}}_INGRESS_{{`{{$i}}`}} { + state BACKUP + interface {{`{{ .VRRPInterface }}`}} + virtual_router_id {{`{{ .Cluster.IngressVirtualRouterID }}`}} + priority 20 + advert_int 1 + {{`{{if .EnableUnicast}}`}} + unicast_src_ip {{`{{.NonVirtualIP}}`}} + unicast_peer { + {{`{{- range .IngressConfig.Peers}} + {{- if ne $nonVirtualIP .}} + {{.}} + {{- end}} + {{- end}}`}} + } + {{`{{end}}`}} + authentication { + auth_type PASS + auth_pass {{`{{ .Cluster.Name }}`}}_ingress_vip + } + virtual_ipaddress { + {{`{{ .Cluster.IngressVIP }}`}}/{{`{{ .Cluster.VIPNetmask }}`}} label vip + } + track_script { + chk_ingress + chk_ingress_ready + chk_default_ingress + } + } + {{`{{ end }}`}} + {{`{{ end }}`}} diff --git a/templates/arbiter/00-arbiter/on-prem/files/keepalived-mcs-script.yaml b/templates/arbiter/00-arbiter/on-prem/files/keepalived-mcs-script.yaml new file mode 100644 index 0000000000..a977cbcc2d --- /dev/null +++ b/templates/arbiter/00-arbiter/on-prem/files/keepalived-mcs-script.yaml @@ -0,0 +1,6 @@ +mode: 0755 +path: "/etc/kubernetes/static-pod-resources/keepalived/scripts/chk_mcs_script.sh.tmpl" +contents: + inline: | + #!/bin/bash + chroot /host /bin/crictl ps --state running | grep -qE '\smachine-config-server\s' diff --git a/templates/arbiter/00-arbiter/on-prem/files/keepalived-script-both.yaml b/templates/arbiter/00-arbiter/on-prem/files/keepalived-script-both.yaml new file mode 100644 index 0000000000..fc1d76783d --- /dev/null +++ b/templates/arbiter/00-arbiter/on-prem/files/keepalived-script-both.yaml @@ -0,0 +1,6 @@ +mode: 0755 +path: "/etc/kubernetes/static-pod-resources/keepalived/scripts/chk_ocp_script_both.sh.tmpl" +contents: + inline: | + #!/bin/bash + /usr/bin/curl -o /dev/null -kLfs http://localhost:9444/haproxy_ready && [ -e /var/run/keepalived/iptables-rule-exists ] || /usr/bin/curl -kLfs https://localhost:{{`{{ .LBConfig.ApiPort }}`}}/readyz diff --git a/templates/arbiter/00-arbiter/on-prem/files/keepalived-script.yaml b/templates/arbiter/00-arbiter/on-prem/files/keepalived-script.yaml new file mode 100644 index 0000000000..1f118c119e --- /dev/null +++ b/templates/arbiter/00-arbiter/on-prem/files/keepalived-script.yaml @@ -0,0 +1,6 @@ +mode: 0755 +path: "/etc/kubernetes/static-pod-resources/keepalived/scripts/chk_ocp_script.sh.tmpl" +contents: + inline: | + #!/bin/bash + /usr/bin/curl -o /dev/null -kLfs http://localhost:9444/haproxy_ready && [ -e /var/run/keepalived/iptables-rule-exists ] diff --git a/templates/arbiter/00-arbiter/openstack/OWNERS b/templates/arbiter/00-arbiter/openstack/OWNERS new file mode 100644 index 0000000000..bdf81dfe6e --- /dev/null +++ b/templates/arbiter/00-arbiter/openstack/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +approvers: + - openstack-approvers +reviewers: + - openstack-reviewers diff --git a/templates/arbiter/00-arbiter/sno/OWNERS b/templates/arbiter/00-arbiter/sno/OWNERS new file mode 100644 index 0000000000..e93ddd2231 --- /dev/null +++ b/templates/arbiter/00-arbiter/sno/OWNERS @@ -0,0 +1,5 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +reviewers: + - assisted-reviewers diff --git a/templates/arbiter/00-arbiter/sno/files/sno-forcedns-fix.yaml b/templates/arbiter/00-arbiter/sno/files/sno-forcedns-fix.yaml new file mode 100644 index 0000000000..78234066c2 --- /dev/null +++ b/templates/arbiter/00-arbiter/sno/files/sno-forcedns-fix.yaml @@ -0,0 +1,12 @@ +mode: 0755 +path: "/etc/NetworkManager/dispatcher.d/forcedns-rhel9-fix" +contents: + inline: | + #!/bin/bash + # this script was added as upgrade fix for assisted sno installation were forcedns dispatcher script misses shebang + filename="/etc/NetworkManager/dispatcher.d/forcedns" + if [ ! -f "$filename" ]; then + exit 0 + fi + # in case there is no shebang run forcedns dispatcher script with /bin/bash + head -n1 "$filename" | grep -qx '#!/bin/bash' || /bin/bash "$filename" $1 $2 diff --git a/templates/arbiter/01-arbiter-container-runtime/OWNERS b/templates/arbiter/01-arbiter-container-runtime/OWNERS new file mode 100644 index 0000000000..99b0f03794 --- /dev/null +++ b/templates/arbiter/01-arbiter-container-runtime/OWNERS @@ -0,0 +1,16 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +approvers: + - giuseppe + - haircommander + - kolyshkin + - mrunalp + - nalind + - saschagrunert + - umohnani8 +reviewers: + - QiWang19 + - hasan4791 + - kwilczynski + - mtrmac + - sohankunkerkar + - wgahnagl diff --git a/templates/arbiter/01-arbiter-container-runtime/_base/files/container-registries.yaml b/templates/arbiter/01-arbiter-container-runtime/_base/files/container-registries.yaml new file mode 100644 index 0000000000..6d6c9f1c13 --- /dev/null +++ b/templates/arbiter/01-arbiter-container-runtime/_base/files/container-registries.yaml @@ -0,0 +1,5 @@ +mode: 0644 +path: "/etc/containers/registries.conf" +contents: + inline: | + unqualified-search-registries = ['registry.access.redhat.com', 'docker.io'] diff --git a/templates/arbiter/01-arbiter-container-runtime/_base/files/crio.yaml b/templates/arbiter/01-arbiter-container-runtime/_base/files/crio.yaml new file mode 100644 index 0000000000..9a9d04e48f --- /dev/null +++ b/templates/arbiter/01-arbiter-container-runtime/_base/files/crio.yaml @@ -0,0 +1,105 @@ +mode: 0644 +path: "/etc/crio/crio.conf.d/00-default" +contents: + inline: | + [crio] + internal_wipe = true + internal_repair = true + + [crio.api] + stream_address = "127.0.0.1" + stream_port = "0" + + [crio.runtime] + selinux = true + conmon = "" + conmon_cgroup = "pod" + default_env = [ + "NSS_SDB_USE_CACHE=no", + ] + default_runtime = "runc" + log_level = "info" + cgroup_manager = "systemd" + default_sysctls = [ + "net.ipv4.ping_group_range=0 2147483647", + ] + hooks_dir = [ + "/etc/containers/oci/hooks.d", + "/run/containers/oci/hooks.d", + "/usr/share/containers/oci/hooks.d", + ] + manage_ns_lifecycle = true + absent_mount_sources_to_reject = [ + "/etc/hostname", + ] + drop_infra_ctr = true + + [crio.runtime.runtimes.runc] + allowed_annotations = [ + "io.containers.trace-syscall", + "io.kubernetes.cri-o.Devices", + "io.kubernetes.cri-o.LinkLogs", + ] + + [crio.runtime.runtimes.crun] + runtime_root = "/run/crun" + allowed_annotations = [ + "io.containers.trace-syscall", + "io.kubernetes.cri-o.Devices", + "io.kubernetes.cri-o.LinkLogs", + ] + # Based on https://github.com/containers/crun/blob/27d7dd3a0/README.md?plain=1#L48 + container_min_memory = "512KiB" + + [crio.runtime.workloads.openshift-builder] + activation_annotation = "io.openshift.builder" + allowed_annotations = [ + "io.kubernetes.cri-o.userns-mode", + "io.kubernetes.cri-o.Devices" + ] + [crio.runtime.workloads.openshift-builder.resources] + + [crio.image] + global_auth_file = "/var/lib/kubelet/config.json" + pause_image = "{{.Images.infraImageKey}}" + pause_image_auth_file = "/var/lib/kubelet/config.json" + pause_command = "/usr/bin/pod" + + [crio.network] + network_dir = "/etc/kubernetes/cni/net.d/" + plugin_dirs = [ + "/var/lib/cni/bin", + "/usr/libexec/cni", + ] + + [crio.metrics] + enable_metrics = true + metrics_host = "127.0.0.1" + metrics_port = 9537 + metrics_collectors = [ + "operations", # DEPRECATED: in favour of "operations_total" + "operations_latency_microseconds_total", # DEPRECATED: in favour of "operations_latency_seconds_total" + "operations_latency_microseconds", # DEPRECATED: in favour of "operations_latency_seconds" + "operations_errors", # DEPRECATED: in favour of "operations_errors_total" + "image_pulls_layer_size", + "containers_oom_total", # DEPRECATED: in favour of "containers_oom_count_total" + "containers_oom", + # Drop metrics with excessive label cardinality. + # "image_pulls_by_digest", # DEPRECATED: in favour of "image_pulls_bytes_total" + # "image_pulls_by_name", # DEPRECATED: in favour of "image_pulls_bytes_total" + # "image_pulls_by_name_skipped", # DEPRECATED: in favour of "image_pulls_skipped_bytes_total" + # "image_pulls_failures", # DEPRECATED: in favour of "image_pulls_failure_total" + # "image_pulls_successes", # DEPRECATED: in favour of "image_pulls_success_total" + # "image_layer_reuse", # DEPRECATED: in favour of "image_layer_reuse_total" + "operations_total", + "operations_latency_seconds_total", + "operations_latency_seconds", + "operations_errors_total", + "image_pulls_bytes_total", + "image_pulls_skipped_bytes_total", + "image_pulls_success_total", + "image_pulls_failure_total", + "image_layer_reuse_total", + "containers_oom_count_total", + "processes_defunct" + ] diff --git a/templates/arbiter/01-arbiter-container-runtime/_base/files/policy.yaml b/templates/arbiter/01-arbiter-container-runtime/_base/files/policy.yaml new file mode 100644 index 0000000000..daae76e164 --- /dev/null +++ b/templates/arbiter/01-arbiter-container-runtime/_base/files/policy.yaml @@ -0,0 +1,18 @@ +mode: 0644 +path: "/etc/containers/policy.json" +contents: + inline: | + { + "default": [ + { + "type": "insecureAcceptAnything" + } + ], + "transports": + { + "docker-daemon": + { + "": [{"type":"insecureAcceptAnything"}] + } + } + } \ No newline at end of file diff --git a/templates/arbiter/01-arbiter-container-runtime/openstack/OWNERS b/templates/arbiter/01-arbiter-container-runtime/openstack/OWNERS new file mode 100644 index 0000000000..bdf81dfe6e --- /dev/null +++ b/templates/arbiter/01-arbiter-container-runtime/openstack/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +approvers: + - openstack-approvers +reviewers: + - openstack-reviewers diff --git a/templates/arbiter/01-arbiter-kubelet/OWNERS b/templates/arbiter/01-arbiter-kubelet/OWNERS new file mode 100644 index 0000000000..8851eb9312 --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/OWNERS @@ -0,0 +1,8 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +approvers: + - rphillips + - sjenning + - mrunalp + diff --git a/templates/arbiter/01-arbiter-kubelet/_base/files/cloudconfig.yaml b/templates/arbiter/01-arbiter-kubelet/_base/files/cloudconfig.yaml new file mode 100644 index 0000000000..0d10309942 --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/_base/files/cloudconfig.yaml @@ -0,0 +1,5 @@ +mode: 0644 +path: "/etc/kubernetes/cloud.conf" +contents: + inline: |- +{{indent 4 .CloudProviderConfig}} diff --git a/templates/arbiter/01-arbiter-kubelet/_base/files/criometricsproxy-config.yaml b/templates/arbiter/01-arbiter-kubelet/_base/files/criometricsproxy-config.yaml new file mode 100644 index 0000000000..2ebf703a69 --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/_base/files/criometricsproxy-config.yaml @@ -0,0 +1,11 @@ +mode: 0644 +path: "/etc/kubernetes/crio-metrics-proxy.cfg" +contents: + inline: |- + authorization: + static: + - resourceRequest: false + path: /metrics + verb: get + user: + name: system:serviceaccount:openshift-monitoring:prometheus-k8s diff --git a/templates/arbiter/01-arbiter-kubelet/_base/files/criometricsproxy.yaml b/templates/arbiter/01-arbiter-kubelet/_base/files/criometricsproxy.yaml new file mode 100644 index 0000000000..79c01086ca --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/_base/files/criometricsproxy.yaml @@ -0,0 +1,79 @@ +mode: 0644 +path: "/etc/kubernetes/manifests/criometricsproxy.yaml" +contents: + inline: |- + apiVersion: v1 + kind: Pod + metadata: + name: kube-rbac-proxy-crio + namespace: openshift-machine-config-operator + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + spec: + volumes: + - name: etc-kube + hostPath: + path: "/etc/kubernetes" + - name: var-lib-kubelet + hostPath: + path: "/var/lib/kubelet" + hostNetwork: true + priorityClassName: system-cluster-critical + initContainers: + - name: setup + terminationMessagePolicy: FallbackToLogsOnError + image: {{.Images.kubeRbacProxyImage}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: var-lib-kubelet + mountPath: "/var" + mountPropagation: HostToContainer + command: ['/bin/bash', '-ec'] + args: + - | + echo -n "Waiting for kubelet key and certificate to be available" + while [ -n "$(test -e /var/lib/kubelet/pki/kubelet-server-current.pem)" ] ; do + echo -n "." + sleep 1 + (( tries += 1 )) + if [[ "${tries}" -gt 10 ]]; then + echo "Timed out waiting for kubelet key and cert." + exit 1 + fi + done + securityContext: + privileged: true + resources: + requests: + memory: 50Mi + cpu: 5m + containers: + - name: kube-rbac-proxy-crio + image: {{.Images.kubeRbacProxyImage}} + securityContext: + privileged: true + ports: + - containerPort: 9637 + args: + - --secure-listen-address=:9637 + - --config-file=/etc/kubernetes/crio-metrics-proxy.cfg + - --client-ca-file=/etc/kubernetes/kubelet-ca.crt + - --logtostderr=true + - --kubeconfig=/var/lib/kubelet/kubeconfig + - --tls-cipher-suites={{join .TLSCipherSuites ","}} + - --tls-min-version={{.TLSMinVersion}} + - --upstream=http://127.0.0.1:9537 + - --tls-cert-file=/var/lib/kubelet/pki/kubelet-server-current.pem + - --tls-private-key-file=/var/lib/kubelet/pki/kubelet-server-current.pem + resources: + requests: + cpu: 20m + memory: 50Mi + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: etc-kube + mountPath: "/etc/kubernetes" + mountPropagation: HostToContainer + - name: var-lib-kubelet + mountPath: "/var/lib/kubelet" + mountPropagation: HostToContainer diff --git a/templates/arbiter/01-arbiter-kubelet/_base/files/kubelet.yaml b/templates/arbiter/01-arbiter-kubelet/_base/files/kubelet.yaml new file mode 100644 index 0000000000..b1d7f695ec --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/_base/files/kubelet.yaml @@ -0,0 +1,35 @@ +mode: 0644 +path: "/etc/kubernetes/kubelet.conf" +contents: + inline: | + kind: KubeletConfiguration + apiVersion: kubelet.config.k8s.io/v1beta1 + authentication: + x509: + clientCAFile: /etc/kubernetes/kubelet-ca.crt + anonymous: + enabled: false + cgroupDriver: systemd + cgroupRoot: / + clusterDNS: + - {{.ClusterDNSIP}} + clusterDomain: cluster.local + containerLogMaxSize: 50Mi + enableSystemLogQuery: true + maxPods: 250 + kubeAPIQPS: 50 + kubeAPIBurst: 100 + podPidsLimit: 4096 + protectKernelDefaults: true + rotateCertificates: true + serializeImagePulls: false + staticPodPath: /etc/kubernetes/manifests + systemCgroups: /system.slice + nodeStatusUpdateFrequency: 10s + nodeStatusReportFrequency: 5m + serverTLSBootstrap: true + tlsMinVersion: {{.TLSMinVersion}} + tlsCipherSuites: + {{- range .TLSCipherSuites }} + - {{ . }} + {{- end }} diff --git a/templates/arbiter/01-arbiter-kubelet/_base/files/kubenswrapper.yaml b/templates/arbiter/01-arbiter-kubelet/_base/files/kubenswrapper.yaml new file mode 100644 index 0000000000..01321665b7 --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/_base/files/kubenswrapper.yaml @@ -0,0 +1,12 @@ +# Note: This compatibility wrapper is needed to bridge the gap from OCP 4.11->4.12 when the new 'kubensenter' script is being introduced. +# It can be removed (and the kubelet.service should call kubensenter directly) when 4.11 is no longer a release we must upgrade from. +mode: 0755 +path: "/usr/local/bin/kubenswrapper" +contents: + inline: | + #!/bin/sh + if [ -x /usr/bin/kubensenter ]; then + exec /usr/bin/kubensenter "$@" + else + exec "$@" + fi diff --git a/templates/arbiter/01-arbiter-kubelet/_base/units/kubelet-cleanup.service.yaml b/templates/arbiter/01-arbiter-kubelet/_base/units/kubelet-cleanup.service.yaml new file mode 100644 index 0000000000..f7cf670a4b --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/_base/units/kubelet-cleanup.service.yaml @@ -0,0 +1,14 @@ +name: kubelet-cleanup.service +enabled: true +contents: | + [Unit] + Description=Kubernetes Kubelet After Reboot Cleanup + Before=kubelet.service + + [Service] + Type=oneshot + ExecStart=/bin/rm -f /var/lib/kubelet/cpu_manager_state + ExecStart=/bin/rm -f /var/lib/kubelet/memory_manager_state + + [Install] + WantedBy=multi-user.target diff --git a/templates/arbiter/01-arbiter-kubelet/_base/units/kubelet.service.yaml b/templates/arbiter/01-arbiter-kubelet/_base/units/kubelet.service.yaml new file mode 100644 index 0000000000..b9d8921a30 --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/_base/units/kubelet.service.yaml @@ -0,0 +1,52 @@ +name: kubelet.service +enabled: true +contents: | + [Unit] + Description=Kubernetes Kubelet + Requires=crio.service kubelet-dependencies.target + After=kubelet-dependencies.target + After=ostree-finalize-staged.service + + [Service] + Type=notify + ExecStartPre=/bin/mkdir --parents /etc/kubernetes/manifests + ExecStartPre=-/usr/sbin/restorecon /usr/local/bin/kubenswrapper /usr/bin/kubensenter +{{- if eq .IPFamilies "IPv6"}} + Environment="KUBELET_NODE_IP=::" +{{- else}} + Environment="KUBELET_NODE_IP=0.0.0.0" +{{- end}} + EnvironmentFile=/etc/os-release + EnvironmentFile=-/etc/kubernetes/kubelet-workaround + EnvironmentFile=-/etc/kubernetes/kubelet-env + EnvironmentFile=/etc/node-sizing.env + + ExecStart=/usr/local/bin/kubenswrapper \ + /usr/bin/kubelet \ + --config=/etc/kubernetes/kubelet.conf \ + --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \ + --kubeconfig=/var/lib/kubelet/kubeconfig \ + --container-runtime-endpoint=/var/run/crio/crio.sock \ + --runtime-cgroups=/system.slice/crio.service \ + --node-labels=node-role.kubernetes.io/arbiter,node.openshift.io/os_id=${ID} \ +{{- if or (eq .IPFamilies "DualStack") (eq .IPFamilies "DualStackIPv6Primary") }} + --node-ip=${KUBELET_NODE_IPS} \ +{{- else}} + --node-ip=${KUBELET_NODE_IP} \ +{{- end}} + --minimum-container-ttl-duration=6m0s \ + --cloud-provider={{cloudProvider .}} \ + --volume-plugin-dir=/etc/kubernetes/kubelet-plugins/volume/exec \ + {{credentialProviderConfigFlag . }} \ + --hostname-override=${KUBELET_NODE_NAME} \ + --provider-id=${KUBELET_PROVIDERID} \ + --register-with-taints=node-role.kubernetes.io/arbiter=:NoSchedule \ + --pod-infra-container-image={{.Images.infraImageKey}} \ + --system-reserved=cpu=${SYSTEM_RESERVED_CPU},memory=${SYSTEM_RESERVED_MEMORY},ephemeral-storage=${SYSTEM_RESERVED_ES} \ + --v=${KUBELET_LOG_LEVEL} + + Restart=always + RestartSec=10 + + [Install] + WantedBy=multi-user.target diff --git a/templates/arbiter/01-arbiter-kubelet/on-prem/units/kubelet.service.yaml b/templates/arbiter/01-arbiter-kubelet/on-prem/units/kubelet.service.yaml new file mode 100644 index 0000000000..9a5f406ef9 --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/on-prem/units/kubelet.service.yaml @@ -0,0 +1,51 @@ +name: kubelet.service +enabled: true +contents: | + [Unit] + Description=Kubernetes Kubelet + Requires=crio.service kubelet-dependencies.target + After=kubelet-dependencies.target + After=ostree-finalize-staged.service + + [Service] + Type=notify + ExecStartPre=/bin/mkdir --parents /etc/kubernetes/manifests + ExecStartPre=-/usr/sbin/restorecon /usr/local/bin/kubenswrapper /usr/bin/kubensenter +{{- if eq .IPFamilies "IPv6"}} + Environment="KUBELET_NODE_IP=::" +{{- else}} + Environment="KUBELET_NODE_IP=0.0.0.0" +{{- end}} + EnvironmentFile=/etc/os-release + EnvironmentFile=-/etc/kubernetes/kubelet-workaround + EnvironmentFile=-/etc/kubernetes/kubelet-env + EnvironmentFile=/etc/node-sizing.env + + ExecStart=/usr/local/bin/kubenswrapper \ + /usr/bin/kubelet \ + --config=/etc/kubernetes/kubelet.conf \ + --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \ + --kubeconfig=/var/lib/kubelet/kubeconfig \ + --container-runtime-endpoint=/var/run/crio/crio.sock \ + --runtime-cgroups=/system.slice/crio.service \ + --node-labels=node-role.kubernetes.io/arbiter,node.openshift.io/os_id=${ID} \ +{{- if or (eq .IPFamilies "DualStack") (eq .IPFamilies "DualStackIPv6Primary") }} + --node-ip=${KUBELET_NODE_IPS} \ +{{- else}} + --node-ip=${KUBELET_NODE_IP} \ +{{- end}} + --address=${KUBELET_NODE_IP} \ + --minimum-container-ttl-duration=6m0s \ + --cloud-provider={{cloudProvider .}} \ + --volume-plugin-dir=/etc/kubernetes/kubelet-plugins/volume/exec \ + --hostname-override=${KUBELET_NODE_NAME} \ + --register-with-taints=node-role.kubernetes.io/master=:NoSchedule,node-role.kubernetes.io/arbiter=:NoSchedule \ + --pod-infra-container-image={{.Images.infraImageKey}} \ + --system-reserved=cpu=${SYSTEM_RESERVED_CPU},memory=${SYSTEM_RESERVED_MEMORY},ephemeral-storage=${SYSTEM_RESERVED_ES} \ + --v=${KUBELET_LOG_LEVEL} + + Restart=always + RestartSec=10 + + [Install] + WantedBy=multi-user.target diff --git a/templates/arbiter/01-arbiter-kubelet/openstack/OWNERS b/templates/arbiter/01-arbiter-kubelet/openstack/OWNERS new file mode 100644 index 0000000000..bdf81dfe6e --- /dev/null +++ b/templates/arbiter/01-arbiter-kubelet/openstack/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md +# This file just uses aliases defined in OWNERS_ALIASES. + +approvers: + - openstack-approvers +reviewers: + - openstack-reviewers