Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-initalize failed Kubernetes clusters #7234

Merged
merged 1 commit into from
Mar 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,6 @@ github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/joefitzgerald/rainbow-reporter v0.1.0/go.mod h1:481CNgqmVHQZzdIbN52CupLJyoVwB10FQ/IQlF1pdL8=
github.com/johanneswuerbach/nfsexports v0.0.0-20181204082207-1aa528dcb345 h1:XP1VL9iOZu4yz/rq8zj+yvB23XEY5erXRzp8JYmkWu0=
github.com/johanneswuerbach/nfsexports v0.0.0-20181204082207-1aa528dcb345/go.mod h1:+c1/kUpg2zlkoWqTOvzDs36Wpbm3Gd1nlmtXAEB0WGU=
github.com/johanneswuerbach/nfsexports v0.0.0-20200318065542-c48c3734757f h1:tL0xH80QVHQOde6Qqdohv6PewABH8l8N9pywZtuojJ0=
github.com/johanneswuerbach/nfsexports v0.0.0-20200318065542-c48c3734757f/go.mod h1:+c1/kUpg2zlkoWqTOvzDs36Wpbm3Gd1nlmtXAEB0WGU=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
Expand Down
104 changes: 75 additions & 29 deletions pkg/minikube/bootstrapper/kubeadm/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ import (
"k8s.io/minikube/pkg/minikube/constants"
"k8s.io/minikube/pkg/minikube/cruntime"
"k8s.io/minikube/pkg/minikube/driver"
"k8s.io/minikube/pkg/minikube/kubelet"
"k8s.io/minikube/pkg/minikube/machine"
"k8s.io/minikube/pkg/minikube/out"
"k8s.io/minikube/pkg/minikube/vmpath"
Expand Down Expand Up @@ -129,7 +130,7 @@ func (k *Bootstrapper) LogCommands(cfg config.ClusterConfig, o bootstrapper.LogO
dmesg.WriteString(fmt.Sprintf(" | tail -n %d", o.Lines))
}

describeNodes := fmt.Sprintf("sudo %s describe node -A --kubeconfig=%s",
describeNodes := fmt.Sprintf("sudo %s describe nodes --kubeconfig=%s",
path.Join(vmpath.GuestPersistentDir, "binaries", cfg.KubernetesConfig.KubernetesVersion, "kubectl"),
path.Join(vmpath.GuestPersistentDir, "kubeconfig"))

Expand Down Expand Up @@ -181,20 +182,7 @@ func (k *Bootstrapper) clearStaleConfigs(cfg config.ClusterConfig) error {
return nil
}

// StartCluster starts the cluster
func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error {
err := bsutil.ExistingConfig(k.c)
if err == nil { // if there is an existing cluster don't reconfigure it
return k.restartCluster(cfg)
}
glog.Infof("existence check: %v", err)

start := time.Now()
glog.Infof("StartCluster: %+v", cfg)
defer func() {
glog.Infof("StartCluster complete in %s", time.Since(start))
}()

func (k *Bootstrapper) init(cfg config.ClusterConfig) error {
version, err := util.ParseKubernetesVersion(cfg.KubernetesConfig.KubernetesVersion)
if err != nil {
return errors.Wrap(err, "parsing kubernetes version")
Expand Down Expand Up @@ -237,10 +225,10 @@ func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error {
}

conf := bsutil.KubeadmYamlPath
c := exec.Command("/bin/bash", "-c", fmt.Sprintf("sudo mv %s.new %s && %s init --config %s %s --ignore-preflight-errors=%s", conf, conf, bsutil.InvokeKubeadm(cfg.KubernetesConfig.KubernetesVersion), conf, extraFlags, strings.Join(ignore, ",")))
rr, err := k.c.RunCmd(c)
if err != nil {
return errors.Wrapf(err, "init failed. output: %q", rr.Output())
c := exec.Command("/bin/bash", "-c", fmt.Sprintf("%s init --config %s %s --ignore-preflight-errors=%s",
bsutil.InvokeKubeadm(cfg.KubernetesConfig.KubernetesVersion), conf, extraFlags, strings.Join(ignore, ",")))
if _, err := k.c.RunCmd(c); err != nil {
return errors.Wrap(err, "run")
}

if cfg.Driver == driver.Docker {
Expand All @@ -258,12 +246,49 @@ func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error {
}

if err := k.elevateKubeSystemPrivileges(cfg); err != nil {
glog.Warningf("unable to create cluster role binding, some addons might not work : %v. ", err)
glog.Warningf("unable to create cluster role binding, some addons might not work: %v", err)
}

return nil
}

// StartCluster starts the cluster
func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error {
start := time.Now()
glog.Infof("StartCluster: %+v", cfg)
defer func() {
glog.Infof("StartCluster complete in %s", time.Since(start))
}()

if err := bsutil.ExistingConfig(k.c); err == nil {
glog.Infof("found existing configuration files, will attempt cluster restart")
rerr := k.restartCluster(cfg)
if rerr == nil {
return nil
}
out.T(out.Embarrassed, "Unable to restart cluster, will reset it: {{.error}}", out.V{"error": rerr})
if err := k.DeleteCluster(cfg.KubernetesConfig); err != nil {
glog.Warningf("delete failed: %v", err)
}
// Fall-through to init
}

conf := bsutil.KubeadmYamlPath
if _, err := k.c.RunCmd(exec.Command("sudo", "cp", conf+".new", conf)); err != nil {
return errors.Wrap(err, "cp")
}

err := k.init(cfg)
if err == nil {
return nil
}

out.T(out.Conflict, "initialization failed, will try again: {{.error}}", out.V{"error": err})
if err := k.DeleteCluster(cfg.KubernetesConfig); err != nil {
glog.Warningf("delete failed: %v", err)
}
return k.init(cfg)
}

func (k *Bootstrapper) controlPlaneEndpoint(cfg config.ClusterConfig) (string, int, error) {
cp, err := config.PrimaryControlPlane(&cfg)
if err != nil {
Expand Down Expand Up @@ -410,8 +435,8 @@ func (k *Bootstrapper) restartCluster(cfg config.ClusterConfig) error {
return errors.Wrap(err, "clearing stale configs")
}

if _, err := k.c.RunCmd(exec.Command("sudo", "mv", conf+".new", conf)); err != nil {
return errors.Wrap(err, "mv")
if _, err := k.c.RunCmd(exec.Command("sudo", "cp", conf+".new", conf)); err != nil {
return errors.Wrap(err, "cp")
}

baseCmd := fmt.Sprintf("%s %s", bsutil.InvokeKubeadm(cfg.KubernetesConfig.KubernetesVersion), phase)
Expand All @@ -425,9 +450,9 @@ func (k *Bootstrapper) restartCluster(cfg config.ClusterConfig) error {
glog.Infof("resetting cluster from %s", conf)
// Run commands one at a time so that it is easier to root cause failures.
for _, c := range cmds {
rr, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", c))
_, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", c))
if err != nil {
return errors.Wrapf(err, "running cmd: %s", rr.Command())
return errors.Wrap(err, "run")
}
}

Expand Down Expand Up @@ -504,11 +529,32 @@ func (k *Bootstrapper) DeleteCluster(k8s config.KubernetesConfig) error {
cmd = fmt.Sprintf("%s reset", bsutil.InvokeKubeadm(k8s.KubernetesVersion))
}

if rr, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", cmd)); err != nil {
return errors.Wrapf(err, "kubeadm reset: cmd: %q", rr.Command())
rr, derr := k.c.RunCmd(exec.Command("/bin/bash", "-c", cmd))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code seems similar to Kic and None Driver Stop()... I wonder if we could get rid of those (not sure) since we have it here.

given if minikube stop calls delete cluster....we could get rid of those?

if derr != nil {
glog.Warningf("%s: %v", rr.Command(), err)
}

return nil
if err := kubelet.ForceStop(k.c); err != nil {
glog.Warningf("stop kubelet: %v", err)
}

cr, err := cruntime.New(cruntime.Config{Type: k8s.ContainerRuntime, Runner: k.c, Socket: k8s.CRISocket})
if err != nil {
return errors.Wrap(err, "runtime")
}

containers, err := cr.ListContainers(cruntime.ListOptions{Namespaces: []string{"kube-system"}})
if err != nil {
glog.Warningf("unable to list kube-system containers: %v", err)
}
if len(containers) > 0 {
glog.Warningf("found %d kube-system containers to stop", len(containers))
if err := cr.StopContainers(containers); err != nil {
glog.Warningf("error stopping containers: %v", err)
}
}

return derr
}

// SetupCerts sets up certificates within the cluster.
Expand Down Expand Up @@ -619,7 +665,7 @@ func reloadKubelet(runner command.Runner) error {
return nil
}

startCmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("sudo mv %s.new %s && sudo mv %s.new %s && sudo systemctl daemon-reload && sudo systemctl restart kubelet", svc, svc, conf, conf))
startCmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("sudo cp %s.new %s && sudo cp %s.new %s && sudo systemctl daemon-reload && sudo systemctl restart kubelet", svc, svc, conf, conf))
if _, err := runner.RunCmd(startCmd); err != nil {
return errors.Wrap(err, "starting kubelet")
}
Expand Down