From dc3a50e026b00cee569abb9dd1b074d7248b1bb7 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 13:38:48 -0800 Subject: [PATCH 01/12] Make --wait=false wait for nothing again, change default to --wait=true --- cmd/minikube/cmd/start.go | 20 +- pkg/minikube/bootstrapper/bootstrapper.go | 2 +- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 193 +++++++------------ pkg/minikube/out/style.go | 1 - 4 files changed, 73 insertions(+), 143 deletions(-) diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go index 6b14c8c4e545..abef3ca25fa4 100644 --- a/cmd/minikube/cmd/start.go +++ b/cmd/minikube/cmd/start.go @@ -170,7 +170,7 @@ func initMinikubeFlags() { startCmd.Flags().String(criSocket, "", "The cri socket path to be used.") startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.") startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".") - startCmd.Flags().Bool(waitUntilHealthy, false, "Wait until Kubernetes core services are healthy before exiting.") + startCmd.Flags().Bool(waitUntilHealthy, true, "Wait until apiserver API calls return content before exiting.") startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.") startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.") startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.") @@ -365,7 +365,11 @@ func runStart(cmd *cobra.Command, args []string) { if driverName == driver.None { prepareNone() } - waitCluster(bs, config) + if viper.GetBool(waitUntilHealthy) { + if err := bs.WaitForCluster(config.KubernetesConfig, viper.GetDuration(waitTimeout)); err != nil { + exit.WithError("Wait failed", err) + } + } if err := showKubectlInfo(kubeconfig, k8sVersion, config.Name); err != nil { glog.Errorf("kubectl info: %v", err) } @@ -389,18 +393,6 @@ func enableAddons() { } } -func waitCluster(bs bootstrapper.Bootstrapper, config cfg.MachineConfig) { - var podsToWaitFor []string - - if !viper.GetBool(waitUntilHealthy) { - // only wait for apiserver if wait=false - podsToWaitFor = []string{"apiserver"} - } - if err := bs.WaitForPods(config.KubernetesConfig, viper.GetDuration(waitTimeout), podsToWaitFor); err != nil { - exit.WithError("Wait failed", err) - } -} - func displayVersion(version string) { prefix := "" if viper.GetString(cfg.MachineProfile) != constants.DefaultMachineName { diff --git a/pkg/minikube/bootstrapper/bootstrapper.go b/pkg/minikube/bootstrapper/bootstrapper.go index f0b244f3e74b..f54c0b4f41b5 100644 --- a/pkg/minikube/bootstrapper/bootstrapper.go +++ b/pkg/minikube/bootstrapper/bootstrapper.go @@ -41,7 +41,7 @@ type Bootstrapper interface { UpdateCluster(config.KubernetesConfig) error RestartCluster(config.KubernetesConfig) error DeleteCluster(config.KubernetesConfig) error - WaitForPods(config.KubernetesConfig, time.Duration, []string) error + WaitForCluster(config.KubernetesConfig, time.Duration) error // LogCommands returns a map of log type to a command which will display that log. LogCommands(LogOptions) map[string]string SetupCerts(cfg config.KubernetesConfig) error diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 7e9c27e41b0d..29825517318f 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -39,7 +39,6 @@ import ( "github.com/spf13/viper" "golang.org/x/sync/errgroup" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" @@ -66,7 +65,6 @@ const ( defaultCNIConfigPath = "/etc/cni/net.d/k8s.conf" kubeletServiceFile = "/lib/systemd/system/kubelet.service" kubeletSystemdConfFile = "/etc/systemd/system/kubelet.service.d/10-kubeadm.conf" - AllPods = "ALL_PODS" ) const ( @@ -96,22 +94,6 @@ var KubeadmExtraArgsWhitelist = map[int][]string{ }, } -type pod struct { - // Human friendly name - name string - key string - value string -} - -// PodsByLayer are queries we run when health checking, sorted roughly by dependency layer -var PodsByLayer = []pod{ - {"proxy", "k8s-app", "kube-proxy"}, - {"etcd", "component", "etcd"}, - {"scheduler", "component", "kube-scheduler"}, - {"controller", "component", "kube-controller-manager"}, - {"dns", "k8s-app", "kube-dns"}, -} - // yamlConfigPath is the path to the kubeadm configuration var yamlConfigPath = path.Join(vmpath.GuestEphemeralDir, "kubeadm.yaml") @@ -354,7 +336,6 @@ func addAddons(files *[]assets.CopyableFile, data interface{}) error { // client returns a Kubernetes client to use to speak to a kubeadm launched apiserver func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientset, error) { - // Catch case if WaitForPods was called with a stale ~/.kube/config config, err := kapi.ClientConfig(k.contextName) if err != nil { return nil, errors.Wrap(err, "client config") @@ -369,67 +350,69 @@ func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientse return kubernetes.NewForConfig(config) } -// WaitForPods blocks until pods specified in podsToWaitFor appear to be healthy. -func (k *Bootstrapper) WaitForPods(k8s config.KubernetesConfig, timeout time.Duration, podsToWaitFor []string) error { - // Do not wait for "k8s-app" pods in the case of CNI, as they are managed - // by a CNI plugin which is usually started after minikube has been brought - // up. Otherwise, minikube won't start, as "k8s-app" pods are not ready. - componentsOnly := k8s.NetworkPlugin == "cni" - out.T(out.WaitingPods, "Waiting for:") - - // Wait until the apiserver can answer queries properly. We don't care if the apiserver - // pod shows up as registered, but need the webserver for all subsequent queries. - - if shouldWaitForPod("apiserver", podsToWaitFor) { - out.String(" apiserver") - if err := k.waitForAPIServer(k8s); err != nil { - return errors.Wrap(err, "waiting for apiserver") - } - } +// WaitForCluster blocks until the cluster appears to be healthy +func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error { + start := time.Now() + out.T(out.Waiting, "Waiting for cluster to come online ...") - client, err := k.client(k8s) + glog.Infof("waiting for apiserver process to appear ...") + err := wait.PollImmediate(time.Second*1, time.Minute*5, func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("cluster wait timed out during process check") + } + rr, ierr := k.c.RunCmd(exec.Command("sudo", "pgrep", "kube-apiserver")) + if ierr != nil { + glog.Warningf("pgrep apiserver: %v cmd: %s", ierr, rr.Command()) + return false, nil + } + return true, nil + }) if err != nil { - return errors.Wrap(err, "client") + return fmt.Errorf("apiserver process never appeared") } + glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start)) - for _, p := range PodsByLayer { - if componentsOnly && p.key != "component" { // skip component check if network plugin is cni - continue + glog.Infof("waiting for apiserver healthz status ...") + hStart := time.Now() + healthz := func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("cluster wait timed out during healthz check") } - if !shouldWaitForPod(p.name, podsToWaitFor) { - continue + + status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort) + if err != nil { + glog.Warningf("status: %v", err) + return false, nil } - out.String(" %s", p.name) - selector := labels.SelectorFromSet(labels.Set(map[string]string{p.key: p.value})) - if err := kapi.WaitForPodsWithLabelRunning(client, "kube-system", selector, timeout); err != nil { - return errors.Wrap(err, fmt.Sprintf("waiting for %s=%s", p.key, p.value)) + if status != "Running" { + return false, nil } + return true, nil } - out.Ln("") - return nil -} -// shouldWaitForPod returns true if: -// 1. podsToWaitFor is nil -// 2. name is in podsToWaitFor -// 3. ALL_PODS is in podsToWaitFor -// else, return false -func shouldWaitForPod(name string, podsToWaitFor []string) bool { - if podsToWaitFor == nil { - return true - } - if len(podsToWaitFor) == 0 { - return false - } - for _, p := range podsToWaitFor { - if p == AllPods { - return true + if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { + return fmt.Errorf("apiserver healthz never reported healthy") + } + glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart)) + + glog.Infof("waiting for apiserver to appear in pod list ...") + pStart := time.Now() + client, err := k.client(k8s) + podList := func() (bool, error) { + if time.Since(start) > timeout { + return false, fmt.Errorf("cluster wait timed out during pod check") } - if p == name { - return true + _, err := client.CoreV1().Pods("kube-system").Get("kube-apiserver-minikube", metav1.GetOptions{}) + if err != nil { + return false, nil } + return true, nil } - return false + if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil { + return fmt.Errorf("apiserver never returned a pod list") + } + glog.Infof("duration metric: took %s to wait for pod list to return data ...", time.Since(pStart)) + return nil } // RestartCluster restarts the Kubernetes cluster configured by kubeadm @@ -472,48 +455,10 @@ func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { } } - if err := k.waitForAPIServer(k8s); err != nil { - return errors.Wrap(err, "waiting for apiserver") - } - - // restart the proxy and coredns - if rr, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", fmt.Sprintf("%s phase addon all --config %s", baseCmd, yamlConfigPath))); err != nil { - return errors.Wrapf(err, fmt.Sprintf("addon phase cmd:%q", rr.Command())) - } - - if err := k.adjustResourceLimits(); err != nil { - glog.Warningf("unable to adjust resource limits: %v", err) - } - return nil -} - -// waitForAPIServer waits for the apiserver to start up -func (k *Bootstrapper) waitForAPIServer(k8s config.KubernetesConfig) error { - start := time.Now() - defer func() { - glog.Infof("duration metric: took %s to wait for apiserver status ...", time.Since(start)) - }() - - glog.Infof("Waiting for apiserver process ...") - // To give a better error message, first check for process existence via ssh - // Needs minutes in case the image isn't cached (such as with v1.10.x) - err := wait.PollImmediate(time.Millisecond*300, time.Minute*3, func() (bool, error) { - rr, ierr := k.c.RunCmd(exec.Command("sudo", "pgrep", "kube-apiserver")) - if ierr != nil { - glog.Warningf("pgrep apiserver: %v cmd: %s", ierr, rr.Command()) - return false, nil - } - return true, nil - }) - if err != nil { - return fmt.Errorf("apiserver process never appeared") - } - - glog.Infof("Waiting for apiserver to port healthy status ...") - var client *kubernetes.Clientset - f := func() (bool, error) { + // We must ensure that the apiserver is healthy before proceeding + glog.Infof("waiting for apiserver healthz ...") + healthz := func() (bool, error) { status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort) - glog.Infof("apiserver status: %s, err: %v", status, err) if err != nil { glog.Warningf("status: %v", err) return false, nil @@ -521,27 +466,21 @@ func (k *Bootstrapper) waitForAPIServer(k8s config.KubernetesConfig) error { if status != "Running" { return false, nil } - // Make sure apiserver pod is retrievable - if client == nil { - // We only want to get the clientset once, because this line takes ~1 second to complete - client, err = k.client(k8s) - if err != nil { - glog.Warningf("get kubernetes client: %v", err) - return false, nil - } - } + return true, nil + } + if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { + return fmt.Errorf("apiserver healthz never reported healthy") + } - _, err = client.CoreV1().Pods("kube-system").Get("kube-apiserver-minikube", metav1.GetOptions{}) - if err != nil { - return false, nil - } + // restart the proxy and coredns + if rr, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", fmt.Sprintf("%s phase addon all --config %s", baseCmd, yamlConfigPath))); err != nil { + return errors.Wrapf(err, fmt.Sprintf("addon phase cmd:%q", rr.Command())) + } - return true, nil - // TODO: Check apiserver/kubelet logs for fatal errors so that users don't - // need to wait minutes to find out their flag didn't work. + if err := k.adjustResourceLimits(); err != nil { + glog.Warningf("unable to adjust resource limits: %v", err) } - err = wait.PollImmediate(kconst.APICallRetryInterval, 2*kconst.DefaultControlPlaneTimeout, f) - return err + return nil } // DeleteCluster removes the components that were started earlier diff --git a/pkg/minikube/out/style.go b/pkg/minikube/out/style.go index 0af52ed464d1..b14c9d5883e2 100644 --- a/pkg/minikube/out/style.go +++ b/pkg/minikube/out/style.go @@ -65,7 +65,6 @@ var styles = map[StyleEnum]style{ Stopped: {Prefix: "🛑 "}, WarningType: {Prefix: "⚠️ ", LowPrefix: lowWarning}, Waiting: {Prefix: "⌛ "}, - WaitingPods: {Prefix: "⌛ ", OmitNewline: true}, Usage: {Prefix: "💡 "}, Launch: {Prefix: "🚀 "}, Sad: {Prefix: "😿 "}, From e142319e1cacdbc1e90933ca45df523ffea94b66 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 14:13:38 -0800 Subject: [PATCH 02/12] Wait for any kube-system pod rather than apiserver (much faster) --- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 11 ++++++++--- test/integration/functional_test.go | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 29825517318f..abd59732045f 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -38,7 +38,7 @@ import ( "github.com/pkg/errors" "github.com/spf13/viper" "golang.org/x/sync/errgroup" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants" @@ -395,17 +395,22 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time. } glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart)) - glog.Infof("waiting for apiserver to appear in pod list ...") + glog.Infof("waiting for pod list to contain data ...") pStart := time.Now() client, err := k.client(k8s) podList := func() (bool, error) { if time.Since(start) > timeout { return false, fmt.Errorf("cluster wait timed out during pod check") } - _, err := client.CoreV1().Pods("kube-system").Get("kube-apiserver-minikube", metav1.GetOptions{}) + // Wait for any system pod, as waiting for apiserver may block until etcd + pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) + if len(pods.Items) == 0 { + return true, nil + } if err != nil { return false, nil } + glog.Infof("%d kube-system pods found", len(pods.Items)) return true, nil } if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil { diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index b61ed0a70753..21f913e30db7 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -153,9 +153,9 @@ func validateKubectlGetPods(ctx context.Context, t *testing.T, profile string) { if err != nil { t.Errorf("%s failed: %v", rr.Args, err) } - podName := "kube-apiserver-minikube" + podName := "kube-system" if !strings.Contains(rr.Stdout.String(), podName) { - t.Errorf("%s is not up in running, got: %s\n", podName, rr.Stdout.String()) + t.Errorf("%s pods are not listed, got: %s\n", podName, rr.Stdout.String()) } } From 2128326cf700f5b8837813fe41e586fdef415341 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 14:19:52 -0800 Subject: [PATCH 03/12] Remove unused tests --- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 4 ++ .../bootstrapper/kubeadm/kubeadm_test.go | 42 ------------------- pkg/minikube/out/out_test.go | 1 - test/integration/functional_test.go | 5 +-- 4 files changed, 6 insertions(+), 46 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index abd59732045f..10b9bfc98ba8 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -398,6 +398,10 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time. glog.Infof("waiting for pod list to contain data ...") pStart := time.Now() client, err := k.client(k8s) + if err != nil { + return errors.Wrap(err, "client") + } + podList := func() (bool, error) { if time.Since(start) > timeout { return false, fmt.Errorf("cluster wait timed out during pod check") diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm_test.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm_test.go index 4a19c4be358c..158ea3e81a3a 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm_test.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm_test.go @@ -363,45 +363,3 @@ func TestGenerateConfig(t *testing.T) { } } } - -func TestShouldWaitForPod(t *testing.T) { - tests := []struct { - description string - pod string - podsToWaitFor []string - expected bool - }{ - { - description: "pods to wait for is nil", - pod: "apiserver", - expected: true, - }, { - description: "pods to wait for is empty", - pod: "apiserver", - podsToWaitFor: []string{}, - }, { - description: "pod is in podsToWaitFor", - pod: "apiserver", - podsToWaitFor: []string{"etcd", "apiserver"}, - expected: true, - }, { - description: "pod is not in podsToWaitFor", - pod: "apiserver", - podsToWaitFor: []string{"etcd", "gvisor"}, - }, { - description: "wait for all pods", - pod: "apiserver", - podsToWaitFor: []string{"ALL_PODS"}, - expected: true, - }, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - actual := shouldWaitForPod(test.pod, test.podsToWaitFor) - if actual != test.expected { - t.Fatalf("unexpected diff: got %t, expected %t", actual, test.expected) - } - }) - } -} diff --git a/pkg/minikube/out/out_test.go b/pkg/minikube/out/out_test.go index 79646c5de489..c364b581197f 100644 --- a/pkg/minikube/out/out_test.go +++ b/pkg/minikube/out/out_test.go @@ -46,7 +46,6 @@ func TestOutT(t *testing.T) { {Option, "Option", nil, " ▪ Option\n", " - Option\n"}, {WarningType, "Warning", nil, "⚠️ Warning\n", "! Warning\n"}, {FatalType, "Fatal: {{.error}}", V{"error": "ugh"}, "💣 Fatal: ugh\n", "X Fatal: ugh\n"}, - {WaitingPods, "wait", nil, "⌛ wait", "* wait"}, {Issue, "http://i/{{.number}}", V{"number": 10000}, " ▪ http://i/10000\n", " - http://i/10000\n"}, {Usage, "raw: {{.one}} {{.two}}", V{"one": "'%'", "two": "%d"}, "💡 raw: '%' %d\n", "* raw: '%' %d\n"}, {Running, "Installing Kubernetes version {{.version}} ...", V{"version": "v1.13"}, "🏃 ... v1.13 تثبيت Kubernetes الإصدار\n", "* ... v1.13 تثبيت Kubernetes الإصدار\n"}, diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index 21f913e30db7..98c298c96f71 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -153,9 +153,8 @@ func validateKubectlGetPods(ctx context.Context, t *testing.T, profile string) { if err != nil { t.Errorf("%s failed: %v", rr.Args, err) } - podName := "kube-system" - if !strings.Contains(rr.Stdout.String(), podName) { - t.Errorf("%s pods are not listed, got: %s\n", podName, rr.Stdout.String()) + if !strings.Contains(rr.Stdout.String(), "kube-system") { + t.Errorf("%s = %q, want *kube-system*", rr.Command(), rr.Stdout.String()) } } From 55a7002ed7a2b906ff8a2d6343de9e2d76839a03 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 14:22:53 -0800 Subject: [PATCH 04/12] Clarify help text for --wait --- cmd/minikube/cmd/start.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go index abef3ca25fa4..bc83b0682fb0 100644 --- a/cmd/minikube/cmd/start.go +++ b/cmd/minikube/cmd/start.go @@ -170,7 +170,7 @@ func initMinikubeFlags() { startCmd.Flags().String(criSocket, "", "The cri socket path to be used.") startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.") startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".") - startCmd.Flags().Bool(waitUntilHealthy, true, "Wait until apiserver API calls return content before exiting.") + startCmd.Flags().Bool(waitUntilHealthy, true, "Block until the apiserver can service API requests") startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.") startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.") startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.") From 4ae939244cb7f4004c3c1b606efbdfdc6d802dd3 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 14:23:39 -0800 Subject: [PATCH 05/12] Clarify help text for --wait, take #2 --- cmd/minikube/cmd/start.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go index bc83b0682fb0..7c38b9d87179 100644 --- a/cmd/minikube/cmd/start.go +++ b/cmd/minikube/cmd/start.go @@ -170,7 +170,7 @@ func initMinikubeFlags() { startCmd.Flags().String(criSocket, "", "The cri socket path to be used.") startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.") startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".") - startCmd.Flags().Bool(waitUntilHealthy, true, "Block until the apiserver can service API requests") + startCmd.Flags().Bool(waitUntilHealthy, true, "Block until the apiserver is servicing API requests") startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.") startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.") startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.") From f5736fd9af7807737097a01367eddb94bceadd86 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 20:11:21 -0800 Subject: [PATCH 06/12] Set kubectl context in validateKubectlGetPods --- test/integration/functional_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index 98c298c96f71..5c4c84419fe7 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -149,7 +149,7 @@ func validateKubeContext(ctx context.Context, t *testing.T, profile string) { // validateKubectlGetPods asserts that `kubectl get pod -A` returns non-zero content func validateKubectlGetPods(ctx context.Context, t *testing.T, profile string) { - rr, err := Run(t, exec.CommandContext(ctx, "kubectl", "get", "pod", "-A")) + rr, err := Run(t, exec.CommandContext(ctx, "kubectl", "--context", profile, "get", "pod", "-A")) if err != nil { t.Errorf("%s failed: %v", rr.Args, err) } From 7e5ab0417ba515fb713cca2960123dafc3ca3778 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 21:14:37 -0800 Subject: [PATCH 07/12] Split WaitForCluster up into smaller functions, be more conservative --- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 55 +++++++++++++++----- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 10b9bfc98ba8..47736013556b 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -148,12 +148,13 @@ func (k *Bootstrapper) GetAPIServerStatus(ip net.IP, apiserverPort int) (string, } client := &http.Client{Transport: tr} resp, err := client.Get(url) - glog.Infof("%s response: %v %+v", url, err, resp) // Connection refused, usually. if err != nil { + glog.Warningf("%s response: %v %+v", url, err, resp) return state.Stopped.String(), nil } if resp.StatusCode != http.StatusOK { + glog.Warningf("%s response: %v %+v", url, err, resp) return state.Error.String(), nil } return state.Running.String(), nil @@ -350,13 +351,9 @@ func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientse return kubernetes.NewForConfig(config) } -// WaitForCluster blocks until the cluster appears to be healthy -func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error { - start := time.Now() - out.T(out.Waiting, "Waiting for cluster to come online ...") - +func (k *Bootstrapper) waitForApiServerProcess(start time.Time, timeout time.Duration) error { glog.Infof("waiting for apiserver process to appear ...") - err := wait.PollImmediate(time.Second*1, time.Minute*5, func() (bool, error) { + err := wait.PollImmediate(time.Second*1, timeout, func() (bool, error) { if time.Since(start) > timeout { return false, fmt.Errorf("cluster wait timed out during process check") } @@ -371,7 +368,10 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time. return fmt.Errorf("apiserver process never appeared") } glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start)) + return nil +} +func (k *Bootstrapper) waitForApiServerHealthz(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error { glog.Infof("waiting for apiserver healthz status ...") hStart := time.Now() healthz := func() (bool, error) { @@ -390,32 +390,46 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time. return true, nil } - if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { + if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { return fmt.Errorf("apiserver healthz never reported healthy") } - glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart)) + glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart)) + return nil +} - glog.Infof("waiting for pod list to contain data ...") +func (k *Bootstrapper) waitForSystemPods(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error { + glog.Infof("waiting for kube-system pods to appear ...") pStart := time.Now() client, err := k.client(k8s) if err != nil { return errors.Wrap(err, "client") } + podStart := time.Time{} podList := func() (bool, error) { if time.Since(start) > timeout { return false, fmt.Errorf("cluster wait timed out during pod check") } // Wait for any system pod, as waiting for apiserver may block until etcd pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{}) - if len(pods.Items) == 0 { - return true, nil + if len(pods.Items) < 2 { + podStart = time.Time{} + return false, nil } if err != nil { + podStart = time.Time{} return false, nil } - glog.Infof("%d kube-system pods found", len(pods.Items)) - return true, nil + if podStart.IsZero() { + podStart = time.Now() + } + + glog.Infof("%d kube-system pods found since %s", len(pods.Items), podStart) + if time.Since(podStart) > 2*kconst.APICallRetryInterval { + glog.Infof("stability requirement met, returning") + return true, nil + } + return false, nil } if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil { return fmt.Errorf("apiserver never returned a pod list") @@ -424,6 +438,19 @@ func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time. return nil } +// WaitForCluster blocks until the cluster appears to be healthy +func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error { + start := time.Now() + out.T(out.Waiting, "Waiting for cluster to come online ...") + if err := k.waitForApiServerProcess(start, timeout); err != nil { + return err + } + if err := k.waitForApiServerHealthz(start, k8s, timeout); err != nil { + return err + } + return k.waitForSystemPods(start, k8s, timeout) +} + // RestartCluster restarts the Kubernetes cluster configured by kubeadm func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { glog.Infof("RestartCluster start") From 2411895715708c24e4804b73346b855bc2dab011 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 21:29:53 -0800 Subject: [PATCH 08/12] Clarify restart code, remove duplicate health check --- cmd/minikube/cmd/start.go | 4 +++- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 19 +++++-------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go index 7c38b9d87179..050d9f68909a 100644 --- a/cmd/minikube/cmd/start.go +++ b/cmd/minikube/cmd/start.go @@ -365,7 +365,9 @@ func runStart(cmd *cobra.Command, args []string) { if driverName == driver.None { prepareNone() } - if viper.GetBool(waitUntilHealthy) { + + // Skip pre-existing, because we already waited for health + if viper.GetBool(waitUntilHealthy) && !preExists { if err := bs.WaitForCluster(config.KubernetesConfig, viper.GetDuration(waitTimeout)); err != nil { exit.WithError("Wait failed", err) } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 47736013556b..0090367d4b03 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -492,23 +492,14 @@ func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { } // We must ensure that the apiserver is healthy before proceeding - glog.Infof("waiting for apiserver healthz ...") - healthz := func() (bool, error) { - status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort) - if err != nil { - glog.Warningf("status: %v", err) - return false, nil - } - if status != "Running" { - return false, nil - } - return true, nil + if err := k.waitForApiServerHealthz(time.Now(), k8s, kconst.DefaultControlPlaneTimeout); err != nil { + return errors.Wrap(err, "apiserver healthz") } - if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil { - return fmt.Errorf("apiserver healthz never reported healthy") + if err := k.waitForSystemPods(time.Now(), k8s, kconst.DefaultControlPlaneTimeout); err != nil { + return errors.Wrap(err, "system pods") } - // restart the proxy and coredns + // Explicitly re-enable kubeadm addons (proxy, coredns) so that they will check for IP or configuration changes. if rr, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", fmt.Sprintf("%s phase addon all --config %s", baseCmd, yamlConfigPath))); err != nil { return errors.Wrapf(err, fmt.Sprintf("addon phase cmd:%q", rr.Command())) } From 6ba05939e6b5edd8d027c6afc1afc4f16efa785a Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 21:38:31 -0800 Subject: [PATCH 09/12] Add stderr checking to validateKubectlGetPods --- test/integration/functional_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index 5c4c84419fe7..7b82aa7e7313 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -149,12 +149,15 @@ func validateKubeContext(ctx context.Context, t *testing.T, profile string) { // validateKubectlGetPods asserts that `kubectl get pod -A` returns non-zero content func validateKubectlGetPods(ctx context.Context, t *testing.T, profile string) { - rr, err := Run(t, exec.CommandContext(ctx, "kubectl", "--context", profile, "get", "pod", "-A")) + rr, err := Run(t, exec.CommandContext(ctx, "kubectl", "--context", profile, "get", "po", "-A")) if err != nil { t.Errorf("%s failed: %v", rr.Args, err) } + if rr.Stderr.String() != "" { + t.Errorf("%s: got unexpected stderr: %s", rr.Command(), rr.Stderr) + } if !strings.Contains(rr.Stdout.String(), "kube-system") { - t.Errorf("%s = %q, want *kube-system*", rr.Command(), rr.Stdout.String()) + t.Errorf("%s = %q, want *kube-system*", rr.Command(), rr.Stdout) } } From 506cd949b9de5438209c4bbf22936673f9712c53 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 21:48:03 -0800 Subject: [PATCH 10/12] Lint --- pkg/minikube/bootstrapper/kubeadm/kubeadm.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 0090367d4b03..08f1d857c248 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -351,7 +351,7 @@ func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientse return kubernetes.NewForConfig(config) } -func (k *Bootstrapper) waitForApiServerProcess(start time.Time, timeout time.Duration) error { +func (k *Bootstrapper) waitForAPIServerProcess(start time.Time, timeout time.Duration) error { glog.Infof("waiting for apiserver process to appear ...") err := wait.PollImmediate(time.Second*1, timeout, func() (bool, error) { if time.Since(start) > timeout { @@ -371,7 +371,7 @@ func (k *Bootstrapper) waitForApiServerProcess(start time.Time, timeout time.Dur return nil } -func (k *Bootstrapper) waitForApiServerHealthz(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error { +func (k *Bootstrapper) waitForAPIServerHealthz(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error { glog.Infof("waiting for apiserver healthz status ...") hStart := time.Now() healthz := func() (bool, error) { @@ -442,10 +442,10 @@ func (k *Bootstrapper) waitForSystemPods(start time.Time, k8s config.KubernetesC func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error { start := time.Now() out.T(out.Waiting, "Waiting for cluster to come online ...") - if err := k.waitForApiServerProcess(start, timeout); err != nil { + if err := k.waitForAPIServerProcess(start, timeout); err != nil { return err } - if err := k.waitForApiServerHealthz(start, k8s, timeout); err != nil { + if err := k.waitForAPIServerHealthz(start, k8s, timeout); err != nil { return err } return k.waitForSystemPods(start, k8s, timeout) @@ -492,7 +492,7 @@ func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error { } // We must ensure that the apiserver is healthy before proceeding - if err := k.waitForApiServerHealthz(time.Now(), k8s, kconst.DefaultControlPlaneTimeout); err != nil { + if err := k.waitForAPIServerHealthz(time.Now(), k8s, kconst.DefaultControlPlaneTimeout); err != nil { return errors.Wrap(err, "apiserver healthz") } if err := k.waitForSystemPods(time.Now(), k8s, kconst.DefaultControlPlaneTimeout); err != nil { From 00c33d0a2252cf7bc08a099ef64c8dd0e2a668f8 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 12 Nov 2019 22:43:55 -0800 Subject: [PATCH 11/12] KubectlGetPods now requires --wait=true --- test/integration/functional_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index 7b82aa7e7313..3a1a234e2424 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -114,7 +114,7 @@ func validateStartWithProxy(ctx context.Context, t *testing.T, profile string) { } // Use more memory so that we may reliably fit MySQL and nginx - startArgs := append([]string{"start", "-p", profile, "--wait=false", "--memory", "2500MB"}, StartArgs()...) + startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory", "2500MB"}, StartArgs()...) c := exec.CommandContext(ctx, Target(), startArgs...) env := os.Environ() env = append(env, fmt.Sprintf("HTTP_PROXY=%s", srv.Addr)) From e338ac0cc99adc8e3c44f7dcbf9d1bf5a28fb442 Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Wed, 13 Nov 2019 06:42:07 -0800 Subject: [PATCH 12/12] Double pod test waits --- test/integration/addons_test.go | 2 +- test/integration/fn_mount_cmd.go | 2 +- test/integration/fn_pvc.go | 4 ++-- test/integration/gvisor_addon_test.go | 12 ++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/integration/addons_test.go b/test/integration/addons_test.go index a575e96eb57b..b3ea1f10689f 100644 --- a/test/integration/addons_test.go +++ b/test/integration/addons_test.go @@ -107,7 +107,7 @@ func validateIngressAddon(ctx context.Context, t *testing.T, profile string) { t.Errorf("%s failed: %v", rr.Args, err) } - if _, err := PodWait(ctx, t, profile, "default", "run=nginx", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "default", "run=nginx", 4*time.Minute); err != nil { t.Fatalf("wait: %v", err) } if err := kapi.WaitForService(client, "default", "nginx", true, time.Millisecond*500, time.Minute*10); err != nil { diff --git a/test/integration/fn_mount_cmd.go b/test/integration/fn_mount_cmd.go index 277b8951423d..7888971be886 100644 --- a/test/integration/fn_mount_cmd.go +++ b/test/integration/fn_mount_cmd.go @@ -139,7 +139,7 @@ func validateMountCmd(ctx context.Context, t *testing.T, profile string) { t.Fatalf("%s failed: %v", rr.Args, err) } - if _, err := PodWait(ctx, t, profile, "default", "integration-test=busybox-mount", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "default", "integration-test=busybox-mount", 4*time.Minute); err != nil { t.Fatalf("wait: %v", err) } diff --git a/test/integration/fn_pvc.go b/test/integration/fn_pvc.go index 54e1e8e93eb4..3110d455009d 100644 --- a/test/integration/fn_pvc.go +++ b/test/integration/fn_pvc.go @@ -37,7 +37,7 @@ func validatePersistentVolumeClaim(ctx context.Context, t *testing.T, profile st ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) defer cancel() - if _, err := PodWait(ctx, t, profile, "kube-system", "integration-test=storage-provisioner", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "kube-system", "integration-test=storage-provisioner", 4*time.Minute); err != nil { t.Fatalf("wait: %v", err) } @@ -83,7 +83,7 @@ func validatePersistentVolumeClaim(ctx context.Context, t *testing.T, profile st return fmt.Errorf("testpvc phase = %q, want %q (msg=%+v)", pvc.Status.Phase, "Bound", pvc) } - if err := retry.Expo(checkStoragePhase, 2*time.Second, 2*time.Minute); err != nil { + if err := retry.Expo(checkStoragePhase, 2*time.Second, 4*time.Minute); err != nil { t.Fatalf("PV Creation failed with error: %v", err) } } diff --git a/test/integration/gvisor_addon_test.go b/test/integration/gvisor_addon_test.go index f8ffb4d2c0ce..172802ceea95 100644 --- a/test/integration/gvisor_addon_test.go +++ b/test/integration/gvisor_addon_test.go @@ -75,7 +75,7 @@ func TestGvisorAddon(t *testing.T) { } }() - if _, err := PodWait(ctx, t, profile, "kube-system", "kubernetes.io/minikube-addons=gvisor", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "kube-system", "kubernetes.io/minikube-addons=gvisor", 4*time.Minute); err != nil { t.Fatalf("waiting for gvisor controller to be up: %v", err) } @@ -90,10 +90,10 @@ func TestGvisorAddon(t *testing.T) { t.Fatalf("%s failed: %v", rr.Args, err) } - if _, err := PodWait(ctx, t, profile, "default", "run=nginx,untrusted=true", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "default", "run=nginx,untrusted=true", 4*time.Minute); err != nil { t.Errorf("nginx: %v", err) } - if _, err := PodWait(ctx, t, profile, "default", "run=nginx,runtime=gvisor", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "default", "run=nginx,runtime=gvisor", 4*time.Minute); err != nil { t.Errorf("nginx: %v", err) } @@ -107,13 +107,13 @@ func TestGvisorAddon(t *testing.T) { if err != nil { t.Fatalf("%s failed: %v", rr.Args, err) } - if _, err := PodWait(ctx, t, profile, "kube-system", "kubernetes.io/minikube-addons=gvisor", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "kube-system", "kubernetes.io/minikube-addons=gvisor", 4*time.Minute); err != nil { t.Errorf("waiting for gvisor controller to be up: %v", err) } - if _, err := PodWait(ctx, t, profile, "default", "run=nginx,untrusted=true", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "default", "run=nginx,untrusted=true", 4*time.Minute); err != nil { t.Errorf("nginx: %v", err) } - if _, err := PodWait(ctx, t, profile, "default", "run=nginx,runtime=gvisor", 2*time.Minute); err != nil { + if _, err := PodWait(ctx, t, profile, "default", "run=nginx,runtime=gvisor", 4*time.Minute); err != nil { t.Errorf("nginx: %v", err) } }