Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make hyperkit driver more robust: detect crashing, misinstallation, other process names #3660

Merged
merged 11 commits into from
Feb 15, 2019
112 changes: 74 additions & 38 deletions pkg/drivers/hyperkit/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,28 @@ func NewDriver(hostName, storePath string) *Driver {

// PreCreateCheck is called to enforce pre-creation steps
func (d *Driver) PreCreateCheck() error {
return d.verifyRootPermissions()
}

// verifyRootPermissions is called before any step which needs root access
func (d *Driver) verifyRootPermissions() error {
exe, err := os.Executable()
if err != nil {
return err
}

if syscall.Geteuid() != 0 {
euid := syscall.Geteuid()
log.Debugf("exe=%s uid=%d", exe, euid)
if euid != 0 {
return fmt.Errorf(permErr, filepath.Base(exe), exe, exe)
}

return nil
}

func (d *Driver) Create() error {
if err := d.verifyRootPermissions(); err != nil {
return err
}

// TODO: handle different disk types.
if err := pkgdrivers.MakeDiskImage(d.BaseDriver, d.Boot2DockerURL, d.DiskSize); err != nil {
return errors.Wrap(err, "making disk image")
Expand Down Expand Up @@ -125,37 +134,55 @@ func (d *Driver) GetURL() (string, error) {
return fmt.Sprintf("tcp://%s:2376", ip), nil
}

// GetState returns the state that the host is in (running, stopped, etc)
func (d *Driver) GetState() (state.State, error) {
pid := d.getPid()
// Return the state of the hyperkit pid
func pidState(pid int) (state.State, error) {
if pid == 0 {
return state.Stopped, nil
}
p, err := os.FindProcess(pid)
p, err := ps.FindProcess(pid)
if err != nil {
return state.Error, err
}

// Sending a signal of 0 can be used to check the existence of a process.
if err := p.Signal(syscall.Signal(0)); err != nil {
if p == nil {
log.Warnf("hyperkit pid missing from process table, err=%v", err)
return state.Stopped, nil
}
if p == nil {
// hyperkit or com.docker.hyper
if !strings.Contains(p.Executable(), "hyper") {
log.Warnf("pid %d is stale, and is being used by %s", pid, p.Executable())
return state.Stopped, nil
}
return state.Running, nil
}

// GetState returns the state that the host is in (running, stopped, etc)
func (d *Driver) GetState() (state.State, error) {
if err := d.verifyRootPermissions(); err != nil {
return state.Error, err
}

pid := d.getPid()
log.Debugf("hyperkit pid from json: %d", pid)
return pidState(pid)
}

// Kill stops a host forcefully
func (d *Driver) Kill() error {
if err := d.verifyRootPermissions(); err != nil {
return err
}
return d.sendSignal(syscall.SIGKILL)
}

// Remove a host
func (d *Driver) Remove() error {
if err := d.verifyRootPermissions(); err != nil {
return err
}

s, err := d.GetState()
if err != nil || s == state.Error {
log.Infof("Error checking machine status: %v, assuming it has been removed already", err)
log.Debugf("Error checking machine status: %v, assuming it has been removed already", err)
}
if s == state.Running {
if err := d.Stop(); err != nil {
Expand All @@ -171,6 +198,10 @@ func (d *Driver) Restart() error {

// Start a host
func (d *Driver) Start() error {
if err := d.verifyRootPermissions(); err != nil {
return err
}

stateDir := filepath.Join(d.StorePath, "machines", d.MachineName)
if err := d.recoverFromUncleanShutdown(); err != nil {
return err
Expand All @@ -197,29 +228,36 @@ func (d *Driver) Start() error {
h.VSockPorts = vsockPorts
}

log.Infof("Using UUID %s", h.UUID)
log.Debugf("Using UUID %s", h.UUID)
mac, err := GetMACAddressFromUUID(h.UUID)
if err != nil {
return errors.Wrap(err, "getting MAC address from UUID")
}

// Need to strip 0's
mac = trimMacAddress(mac)
log.Infof("Generated MAC %s", mac)
log.Debugf("Generated MAC %s", mac)
h.Disks = []hyperkit.DiskConfig{
{
Path: pkgdrivers.GetDiskPath(d.BaseDriver),
Size: d.DiskSize,
Driver: "virtio-blk",
},
}
log.Infof("Starting with cmdline: %s", d.Cmdline)
log.Debugf("Starting with cmdline: %s", d.Cmdline)
if err := h.Start(d.Cmdline); err != nil {
return errors.Wrapf(err, "starting with cmd line: %s", d.Cmdline)
}

getIP := func() error {
var err error
st, err := d.GetState()
if err != nil {
return errors.Wrap(err, "get state")
}
if st == state.Error || st == state.Stopped {
return fmt.Errorf("hyperkit crashed! command line:\n hyperkit %s", d.Cmdline)
}

d.IPAddress, err = GetIPAddressByMACAddress(mac)
if err != nil {
return &commonutil.RetriableError{Err: err}
Expand All @@ -230,6 +268,7 @@ func (d *Driver) Start() error {
if err := commonutil.RetryAfter(30, getIP, 2*time.Second); err != nil {
return fmt.Errorf("IP address never found in dhcp leases file %v", err)
}
log.Debugf("IP: %s", d.IPAddress)

if len(d.NFSShares) > 0 {
log.Info("Setting up NFS mounts")
Expand Down Expand Up @@ -257,47 +296,46 @@ func (d *Driver) recoverFromUncleanShutdown() error {
stateDir := filepath.Join(d.StorePath, "machines", d.MachineName)
pidFile := filepath.Join(stateDir, pidFileName)

_, err := os.Stat(pidFile)

if os.IsNotExist(err) {
log.Infof("clean start, hyperkit pid file doesn't exist: %s", pidFile)
return nil
}

if err != nil {
return errors.Wrap(err, "checking hyperkit pid file existence")
if _, err := os.Stat(pidFile); err != nil {
if os.IsNotExist(err) {
log.Debugf("clean start, hyperkit pid file doesn't exist: %s", pidFile)
return nil
}
return errors.Wrap(err, "stat")
}

log.Warnf("minikube might have been shutdown in an unclean way, the hyperkit pid file still exists: %s", pidFile)

content, err := ioutil.ReadFile(pidFile)
bs, err := ioutil.ReadFile(pidFile)
if err != nil {
return errors.Wrapf(err, "reading pidfile %s", pidFile)
}
pid, err := strconv.Atoi(string(content))
content := strings.TrimSpace(string(bs))
pid, err := strconv.Atoi(content)
if err != nil {
return errors.Wrapf(err, "parsing pidfile %s", pidFile)
}

p, err := ps.FindProcess(pid)
st, err := pidState(pid)
if err != nil {
return errors.Wrapf(err, "trying to find process for PID %d", pid)
return errors.Wrap(err, "pidState")
}

if p != nil && !strings.Contains(p.Executable(), "hyperkit") {
return fmt.Errorf("something is not right...please stop all minikube instances, seemingly a hyperkit server is already running with pid %d, executable: %s", pid, p.Executable())
log.Debugf("pid %d is in state %q", pid, st)
if st == state.Running {
return nil
}

log.Infof("No running hyperkit process found with PID %d, removing %s...", pid, pidFile)
log.Debugf("Removing stale pid file %s...", pidFile)
if err := os.Remove(pidFile); err != nil {
return errors.Wrap(err, fmt.Sprintf("removing pidFile %s", pidFile))
}

return nil
}

// Stop a host gracefully
func (d *Driver) Stop() error {
if err := d.verifyRootPermissions(); err != nil {
return err
}
d.cleanupNfsExports()
return d.sendSignal(syscall.SIGTERM)
}
Expand Down Expand Up @@ -334,9 +372,7 @@ func (d *Driver) extractVSockPorts() ([]int, error) {
for _, port := range d.VSockPorts {
p, err := strconv.Atoi(port)
if err != nil {
var err InvalidPortNumberError
err = InvalidPortNumberError(port)
return nil, err
return nil, InvalidPortNumberError(port)
}
vsockPorts = append(vsockPorts, p)
}
Expand Down
1 change: 0 additions & 1 deletion pkg/drivers/hyperkit/iso.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"io"
"io/ioutil"
"os"

"strings"

"github.com/hooklift/iso9660"
Expand Down
25 changes: 15 additions & 10 deletions pkg/drivers/hyperkit/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ import (
"os/exec"
"regexp"
"strings"

"github.com/docker/machine/libmachine/log"
)

const (
DHCPLeasesFile = "/var/db/dhcpd_leases"
CONFIG_PLIST = "/Library/Preferences/SystemConfiguration/com.apple.vmnet"
NET_ADDR_KEY = "Shared_Net_Address"
LeasesPath = "/var/db/dhcpd_leases"
VMNetDomain = "/Library/Preferences/SystemConfiguration/com.apple.vmnet"
SharedNetAddrKey = "Shared_Net_Address"
)

type DHCPEntry struct {
Expand All @@ -42,10 +44,11 @@ type DHCPEntry struct {
}

func GetIPAddressByMACAddress(mac string) (string, error) {
return getIpAddressFromFile(mac, DHCPLeasesFile)
return getIPAddressFromFile(mac, LeasesPath)
}

func getIpAddressFromFile(mac, path string) (string, error) {
func getIPAddressFromFile(mac, path string) (string, error) {
log.Debugf("Searching for %s in %s ...", mac, path)
file, err := os.Open(path)
if err != nil {
return "", err
Expand All @@ -56,8 +59,11 @@ func getIpAddressFromFile(mac, path string) (string, error) {
if err != nil {
return "", err
}
log.Debugf("Found %d entries in %s!", len(dhcpEntries), path)
for _, dhcpEntry := range dhcpEntries {
log.Debugf("dhcp entry: %+v", dhcpEntry)
if dhcpEntry.HWAddress == mac {
log.Debugf("Found match: %s", mac)
return dhcpEntry.IPAddress, nil
}
}
Expand Down Expand Up @@ -114,12 +120,11 @@ func trimMacAddress(rawUUID string) string {
}

func GetNetAddr() (net.IP, error) {
_, err := os.Stat(CONFIG_PLIST + ".plist")
if err != nil {
return nil, fmt.Errorf("Does not exist %s", CONFIG_PLIST+".plist")
plistPath := VMNetDomain + ".plist"
tstromberg marked this conversation as resolved.
Show resolved Hide resolved
if _, err := os.Stat(plistPath); err != nil {
return nil, fmt.Errorf("stat: %v", err)
}

out, err := exec.Command("defaults", "read", CONFIG_PLIST, NET_ADDR_KEY).Output()
out, err := exec.Command("defaults", "read", VMNetDomain, SharedNetAddrKey).Output()
if err != nil {
return nil, err
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/drivers/hyperkit/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ func Test_getIpAddressFromFile(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := getIpAddressFromFile(tt.args.mac, tt.args.path)
got, err := getIPAddressFromFile(tt.args.mac, tt.args.path)
if (err != nil) != tt.wantErr {
t.Errorf("getIpAddressFromFile() error = %v, wantErr %v", err, tt.wantErr)
t.Errorf("getIPAddressFromFile() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("getIpAddressFromFile() = %v, want %v", got, tt.want)
t.Errorf("getIPAddressFromFile() = %v, want %v", got, tt.want)
}
})
}
Expand Down