From 3b2a81d4096c7968f2d78b505355bb634de2e06d Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Thu, 28 Feb 2019 16:26:42 -0800 Subject: [PATCH 1/8] Increase ReasonableStartTime from 5 to 9 minutes --- pkg/util/kubernetes.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/util/kubernetes.go b/pkg/util/kubernetes.go index 0b8df0d0f787..aabab3176060 100644 --- a/pkg/util/kubernetes.go +++ b/pkg/util/kubernetes.go @@ -23,7 +23,7 @@ import ( "github.com/golang/glog" "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" @@ -39,8 +39,10 @@ import ( ) var ( + // ReasonableMutateTime is how long to wait for basic object mutations, such as deletions, to show up ReasonableMutateTime = time.Minute * 1 - ReasonableStartTime = time.Minute * 5 + // ReasonableStartTime is how long to wait for pods to start, considering dependency chains & slow networks. + ReasonableStartTime = time.Minute * 9 ) type PodStore struct { From a4be5ee43880b22027e9a55d0a1999b181bf5c16 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Thu, 28 Feb 2019 16:27:35 -0800 Subject: [PATCH 2/8] Have StartCluster/RestartCluster block until system pods are healthy --- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 67 +++++++++++++++++++- pkg/minikube/bootstrapper/kubeadm/util.go | 4 +- pkg/minikube/console/style.go | 1 + 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index aa68d68c0ffa..796a7a47a8dd 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -35,6 +35,7 @@ import ( download "github.com/jimmidyson/go-download" "github.com/pkg/errors" "golang.org/x/sync/errgroup" + "k8s.io/apimachinery/pkg/labels" "k8s.io/minikube/pkg/minikube/assets" "k8s.io/minikube/pkg/minikube/bootstrapper" "k8s.io/minikube/pkg/minikube/config" @@ -64,6 +65,25 @@ var SkipPreflights = []string{ "CRI", } +type pod struct { + // Human friendly name + name string + key string + value string +} + +// PodsByLayer are queries we run when health checking, sorted roughly by dependency layer +var PodsByLayer = []pod{ + {"apiserver", "component", "kube-apiserver"}, + {"proxy", "k8s-app", "kube-proxy"}, + {"etcd", "component", "etcd"}, + {"dns", "k8s-app", "kube-dns"}, + {"controller", "component", "kube-controller-manager"}, + {"scheduler", "component", "kube-scheduler"}, + {"storage-provisioner", "integration-test", "storage-provisioner"}, + {"addon-manager", "component", "kube-addon-manager"}, +} + // SkipAdditionalPreflights are additional preflights we skip depending on the runtime in use. var SkipAdditionalPreflights = map[string][]string{} @@ -175,12 +195,20 @@ func (k *KubeadmBootstrapper) StartCluster(k8s config.KubernetesConfig) error { } } - // NOTE: We have not yet asserted that we can access the apiserver. Now would be a great time to do so. + if err := waitForPods(false); err != nil { + return errors.Wrap(err, "wait") + } + console.OutStyle("permissions", "Configuring cluster permissions ...") if err := util.RetryAfter(100, elevateKubeSystemPrivileges, time.Millisecond*500); err != nil { return errors.Wrap(err, "timed out waiting to elevate kube-system RBAC privileges") } + // Make sure elevating privileges didn't screw anything up + if err := waitForPods(true); err != nil { + return errors.Wrap(err, "wait") + } + return nil } @@ -204,6 +232,31 @@ func addAddons(files *[]assets.CopyableFile) error { return nil } +// waitForPods waits until the important Kubernetes pods are in running state +func waitForPods(quiet bool) error { + if !quiet { + console.OutStyle("waiting-pods", "Waiting for pods:") + } + client, err := util.GetClient() + if err != nil { + return errors.Wrap(err, "k8s client") + } + + for _, p := range PodsByLayer { + if !quiet { + console.Out(" %s", p.name) + } + selector := labels.SelectorFromSet(labels.Set(map[string]string{p.key: p.value})) + if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil { + return errors.Wrap(err, fmt.Sprintf("waiting for %s=%s", p.key, p.value)) + } + } + if !quiet { + console.OutLn("") + } + return nil +} + // RestartCluster restarts the Kubernetes cluster configured by kubeadm func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error { version, err := ParseKubernetesVersion(k8s.KubernetesVersion) @@ -232,12 +285,20 @@ func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error } } - // NOTE: Perhaps now would be a good time to check apiserver health? - console.OutStyle("waiting", "Waiting for kube-proxy to come back up ...") + if err := waitForPods(false); err != nil { + return errors.Wrap(err, "wait") + } + + console.OutStyle("waiting", "Updating kube-proxy configmap ...") if err := restartKubeProxy(k8s); err != nil { return errors.Wrap(err, "restarting kube-proxy") } + // Make sure the kube-proxy restart didn't screw anything up. + if err := waitForPods(true); err != nil { + return errors.Wrap(err, "wait") + } + return nil } diff --git a/pkg/minikube/bootstrapper/kubeadm/util.go b/pkg/minikube/bootstrapper/kubeadm/util.go index 548f73fa2293..fb6ec05d8657 100644 --- a/pkg/minikube/bootstrapper/kubeadm/util.go +++ b/pkg/minikube/bootstrapper/kubeadm/util.go @@ -28,8 +28,8 @@ import ( clientv1 "k8s.io/api/core/v1" rbacv1beta1 "k8s.io/api/rbac/v1beta1" apierrs "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/strategicpatch" @@ -163,7 +163,7 @@ func restartKubeProxy(k8s config.KubernetesConfig) error { selector := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": "kube-proxy"})) if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil { - return errors.Wrap(err, "waiting for kube-proxy to be up for configmap update") + return errors.Wrap(err, "kube-proxy not running") } cfgMap, err := client.CoreV1().ConfigMaps("kube-system").Get("kube-proxy", metav1.GetOptions{}) diff --git a/pkg/minikube/console/style.go b/pkg/minikube/console/style.go index d51c863f212a..4bf162203a3c 100644 --- a/pkg/minikube/console/style.go +++ b/pkg/minikube/console/style.go @@ -55,6 +55,7 @@ var styles = map[string]style{ "stopped": {Prefix: "🛑 "}, "warning": {Prefix: "⚠️ ", LowPrefix: "! "}, "waiting": {Prefix: "⌛ ", LowPrefix: ": "}, + "waiting-pods": {Prefix: "⌛ ", LowPrefix: ": ", OmitNewline: true}, "usage": {Prefix: "💡 "}, "launch": {Prefix: "🚀 "}, "sad": {Prefix: "😿 ", LowPrefix: "* "}, From c40602b327dc4ab549f10b9d305ba8cad15686e1 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Thu, 28 Feb 2019 16:40:46 -0800 Subject: [PATCH 3/8] ReasonableStartTime = 8, for folks with CrashLooping DNS on none --- pkg/util/kubernetes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/kubernetes.go b/pkg/util/kubernetes.go index aabab3176060..d4f559b2fc78 100644 --- a/pkg/util/kubernetes.go +++ b/pkg/util/kubernetes.go @@ -42,7 +42,7 @@ var ( // ReasonableMutateTime is how long to wait for basic object mutations, such as deletions, to show up ReasonableMutateTime = time.Minute * 1 // ReasonableStartTime is how long to wait for pods to start, considering dependency chains & slow networks. - ReasonableStartTime = time.Minute * 9 + ReasonableStartTime = time.Minute * 8 ) type PodStore struct { From 2f1f52edbab45f09b3c8b34957e7a7b7ff4cb1f8 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Thu, 28 Feb 2019 16:40:55 -0800 Subject: [PATCH 4/8] Add reconfiguring style --- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 2 +- pkg/minikube/console/style.go | 49 ++++++++++---------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 796a7a47a8dd..1a771ef65f1d 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -289,7 +289,7 @@ func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error return errors.Wrap(err, "wait") } - console.OutStyle("waiting", "Updating kube-proxy configmap ...") + console.OutStyle("reconfiguring", "Reconfiguring kube-proxy ...") if err := restartKubeProxy(k8s); err != nil { return errors.Wrap(err, "restarting kube-proxy") } diff --git a/pkg/minikube/console/style.go b/pkg/minikube/console/style.go index 4bf162203a3c..366ad2769963 100644 --- a/pkg/minikube/console/style.go +++ b/pkg/minikube/console/style.go @@ -41,30 +41,31 @@ type style struct { // styles is a map of style name to style struct // For consistency, ensure that emojis added render with the same width across platforms. var styles = map[string]style{ - "happy": {Prefix: "😄 ", LowPrefix: "o "}, - "success": {Prefix: "✅ "}, - "failure": {Prefix: "❌ ", LowPrefix: "X "}, - "conflict": {Prefix: "💥 ", LowPrefix: "x "}, - "fatal": {Prefix: "💣 ", LowPrefix: "! "}, - "notice": {Prefix: "📌 ", LowPrefix: "* "}, - "ready": {Prefix: "🏄 ", LowPrefix: "= "}, - "running": {Prefix: "🏃 ", LowPrefix: ": "}, - "provisioning": {Prefix: "🌱 ", LowPrefix: "> "}, - "restarting": {Prefix: "🔄 ", LowPrefix: ": "}, - "stopping": {Prefix: "✋ ", LowPrefix: ": "}, - "stopped": {Prefix: "🛑 "}, - "warning": {Prefix: "⚠️ ", LowPrefix: "! "}, - "waiting": {Prefix: "⌛ ", LowPrefix: ": "}, - "waiting-pods": {Prefix: "⌛ ", LowPrefix: ": ", OmitNewline: true}, - "usage": {Prefix: "💡 "}, - "launch": {Prefix: "🚀 "}, - "sad": {Prefix: "😿 ", LowPrefix: "* "}, - "thumbs-up": {Prefix: "👍 "}, - "option": {Prefix: " ▪ "}, // Indented bullet - "command": {Prefix: " ▪ "}, // Indented bullet - "log-entry": {Prefix: " "}, // Indent - "crushed": {Prefix: "💔 "}, - "url": {Prefix: "👉 "}, + "happy": {Prefix: "😄 ", LowPrefix: "o "}, + "success": {Prefix: "✅ "}, + "failure": {Prefix: "❌ ", LowPrefix: "X "}, + "conflict": {Prefix: "💥 ", LowPrefix: "x "}, + "fatal": {Prefix: "💣 ", LowPrefix: "! "}, + "notice": {Prefix: "📌 ", LowPrefix: "* "}, + "ready": {Prefix: "🏄 ", LowPrefix: "= "}, + "running": {Prefix: "🏃 ", LowPrefix: ": "}, + "provisioning": {Prefix: "🌱 ", LowPrefix: "> "}, + "restarting": {Prefix: "🔄 ", LowPrefix: ": "}, + "reconfiguring": {Prefix: "📯 ", LowPrefix: ": "}, + "stopping": {Prefix: "✋ ", LowPrefix: ": "}, + "stopped": {Prefix: "🛑 "}, + "warning": {Prefix: "⚠️ ", LowPrefix: "! "}, + "waiting": {Prefix: "⌛ ", LowPrefix: ": "}, + "waiting-pods": {Prefix: "⌛ ", LowPrefix: ": ", OmitNewline: true}, + "usage": {Prefix: "💡 "}, + "launch": {Prefix: "🚀 "}, + "sad": {Prefix: "😿 ", LowPrefix: "* "}, + "thumbs-up": {Prefix: "👍 "}, + "option": {Prefix: " ▪ "}, // Indented bullet + "command": {Prefix: " ▪ "}, // Indented bullet + "log-entry": {Prefix: " "}, // Indent + "crushed": {Prefix: "💔 "}, + "url": {Prefix: "👉 "}, // Specialized purpose styles "iso-download": {Prefix: "💿 ", LowPrefix: "@ "}, From a2865f183a18df2c85afc18442895ca0cfc894ed Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Fri, 1 Mar 2019 10:50:39 -0800 Subject: [PATCH 5/8] Back to the original 10 minute timeout :( --- pkg/util/kubernetes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/kubernetes.go b/pkg/util/kubernetes.go index d4f559b2fc78..5c07aa68bca9 100644 --- a/pkg/util/kubernetes.go +++ b/pkg/util/kubernetes.go @@ -42,7 +42,7 @@ var ( // ReasonableMutateTime is how long to wait for basic object mutations, such as deletions, to show up ReasonableMutateTime = time.Minute * 1 // ReasonableStartTime is how long to wait for pods to start, considering dependency chains & slow networks. - ReasonableStartTime = time.Minute * 8 + ReasonableStartTime = time.Minute * 10 ) type PodStore struct { From eae5b34100f1a0529b32af16d0e1468cba73c842 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Fri, 1 Mar 2019 10:53:55 -0800 Subject: [PATCH 6/8] Only restart kube-proxy if necessary, make certain steps retialble --- pkg/minikube/bootstrapper/kubeadm/util.go | 28 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/util.go b/pkg/minikube/bootstrapper/kubeadm/util.go index fb6ec05d8657..7527f77a0223 100644 --- a/pkg/minikube/bootstrapper/kubeadm/util.go +++ b/pkg/minikube/bootstrapper/kubeadm/util.go @@ -155,7 +155,8 @@ users: ` ) -func restartKubeProxy(k8s config.KubernetesConfig) error { +// updateKubeProxyConfigMap updates the IP & port kube-proxy listens on, and restarts it. +func updateKubeProxyConfigMap(k8s config.KubernetesConfig) error { client, err := util.GetClient() if err != nil { return errors.Wrap(err, "getting k8s client") @@ -168,9 +169,9 @@ func restartKubeProxy(k8s config.KubernetesConfig) error { cfgMap, err := client.CoreV1().ConfigMaps("kube-system").Get("kube-proxy", metav1.GetOptions{}) if err != nil { - return errors.Wrap(err, "getting kube-proxy configmap") + return &util.RetriableError{Err: errors.Wrap(err, "getting kube-proxy configmap")} } - + glog.Infof("kube-proxy config: %v", cfgMap.Data[kubeconfigConf]) t := template.Must(template.New("kubeProxyTmpl").Parse(kubeProxyConfigmapTmpl)) opts := struct { AdvertiseAddress string @@ -188,10 +189,19 @@ func restartKubeProxy(k8s config.KubernetesConfig) error { if cfgMap.Data == nil { cfgMap.Data = map[string]string{} } - cfgMap.Data[kubeconfigConf] = strings.TrimSuffix(kubeconfig.String(), "\n") + updated := strings.TrimSuffix(kubeconfig.String(), "\n") + glog.Infof("updated kube-proxy config: %s", updated) + if cfgMap.Data[kubeconfigConf] == updated { + glog.Infof("kube-proxy config appears to require no change, not restarting kube-proxy") + return nil + } + cfgMap.Data[kubeconfigConf] = updated + + // Make this step retriable, as it can fail with: + // "Operation cannot be fulfilled on configmaps "kube-proxy": the object has been modified; please apply your changes to the latest version and try again" if _, err := client.CoreV1().ConfigMaps("kube-system").Update(cfgMap); err != nil { - return errors.Wrap(err, "updating configmap") + return &util.RetriableError{Err: errors.Wrap(err, "updating configmap")} } pods, err := client.CoreV1().Pods("kube-system").List(metav1.ListOptions{ @@ -201,10 +211,16 @@ func restartKubeProxy(k8s config.KubernetesConfig) error { return errors.Wrap(err, "listing kube-proxy pods") } for _, pod := range pods.Items { + // Retriable, as known to fail with: pods "" not found if err := client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{}); err != nil { - return errors.Wrapf(err, "deleting pod %+v", pod) + return &util.RetriableError{Err: errors.Wrapf(err, "deleting pod %+v", pod)} } } + // Wait for the scheduler to restart kube-proxy + if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil { + return errors.Wrap(err, "kube-proxy not running") + } + return nil } From a11e1c3c27261dc3012f0c4b2a559b521af429e2 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Fri, 1 Mar 2019 10:54:18 -0800 Subject: [PATCH 7/8] retry updateKubeProxyConfigMap --- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 1a771ef65f1d..f0cf0727396b 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -77,11 +77,10 @@ var PodsByLayer = []pod{ {"apiserver", "component", "kube-apiserver"}, {"proxy", "k8s-app", "kube-proxy"}, {"etcd", "component", "etcd"}, - {"dns", "k8s-app", "kube-dns"}, - {"controller", "component", "kube-controller-manager"}, {"scheduler", "component", "kube-scheduler"}, - {"storage-provisioner", "integration-test", "storage-provisioner"}, + {"controller", "component", "kube-controller-manager"}, {"addon-manager", "component", "kube-addon-manager"}, + {"dns", "k8s-app", "kube-dns"}, } // SkipAdditionalPreflights are additional preflights we skip depending on the runtime in use. @@ -289,8 +288,8 @@ func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error return errors.Wrap(err, "wait") } - console.OutStyle("reconfiguring", "Reconfiguring kube-proxy ...") - if err := restartKubeProxy(k8s); err != nil { + console.OutStyle("reconfiguring", "Updating kube-proxy configuration ...") + if err = util.RetryAfter(5, func() error { return updateKubeProxyConfigMap(k8s) }, 5*time.Second); err != nil { return errors.Wrap(err, "restarting kube-proxy") } From b9675da28be9a642df4e0480e894d6ef6b368702 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Fri, 1 Mar 2019 11:03:06 -0800 Subject: [PATCH 8/8] Add comment about optimization --- pkg/minikube/bootstrapper/kubeadm/util.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/minikube/bootstrapper/kubeadm/util.go b/pkg/minikube/bootstrapper/kubeadm/util.go index 7527f77a0223..e26388e86b6d 100644 --- a/pkg/minikube/bootstrapper/kubeadm/util.go +++ b/pkg/minikube/bootstrapper/kubeadm/util.go @@ -192,6 +192,8 @@ func updateKubeProxyConfigMap(k8s config.KubernetesConfig) error { updated := strings.TrimSuffix(kubeconfig.String(), "\n") glog.Infof("updated kube-proxy config: %s", updated) + + // An optimization, but also one that's unlikely, as kubeadm writes the address as 'localhost' if cfgMap.Data[kubeconfigConf] == updated { glog.Infof("kube-proxy config appears to require no change, not restarting kube-proxy") return nil