Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cmd/machine-config-daemon/pivot.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"

// Enable sha256 in container image references
Expand Down Expand Up @@ -183,7 +184,7 @@ func updateTuningArgs(tuningFilePath, cmdLinePath string) (bool, error) {
return changed, nil
}

func run(_ *cobra.Command, args []string) error {
func run(_ *cobra.Command, args []string) (retErr error) {
flag.Set("logtostderr", "true")
flag.Parse()

Expand Down Expand Up @@ -241,6 +242,10 @@ func Execute(cmd *cobra.Command, args []string) {
err := run(cmd, args)
if err != nil {
fmt.Printf("error: %v\n", err)
os.MkdirAll(filepath.Dir(types.PivotFailurePath), 0755)
// write a pivot failure file that we'll read from MCD since we start this with systemd
// and we just follow logs
ioutil.WriteFile(types.PivotFailurePath, []byte(err.Error()), 0644)
os.Exit(1)
}
}
9 changes: 9 additions & 0 deletions pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -733,17 +733,26 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {

nodes, err := ctrl.getNodesForPool(pool)
if err != nil {
if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil {
return goerrs.Wrapf(err, "error getting nodes for pool %q, sync error: %v", pool.Name, syncErr)
}
return err
}

maxunavail, err := maxUnavailable(pool, nodes)
if err != nil {
if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil {
return goerrs.Wrapf(err, "error getting max unavailable count for pool %q, sync error: %v", pool.Name, syncErr)
}
return err
}

candidates := getCandidateMachines(pool, nodes, maxunavail)
for _, node := range candidates {
if err := ctrl.setDesiredMachineConfigAnnotation(node.Name, pool.Spec.Configuration.Name); err != nil {
if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil {
return goerrs.Wrapf(err, "error setting desired machine config annotation for node %q, pool %q, sync error: %v", node.Name, pool.Name, syncErr)
}
return err
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -1272,12 +1272,12 @@ func (dn *Daemon) validateOnDiskState(currentConfig *mcfgv1.MachineConfig) error
return err
}
if !osMatch {
return errors.Errorf("expected target osImageURL %s", currentConfig.Spec.OSImageURL)
return errors.Errorf("expected target osImageURL %q, have %q", currentConfig.Spec.OSImageURL, dn.bootedOSImageURL)
}
// And the rest of the disk state
currentIgnConfig, report, err := ign.Parse(currentConfig.Spec.Config.Raw)
if err != nil {
return errors.Errorf("Failed to parse Ignition for validation: %s\nReport: %v", err, report)
return errors.Errorf("failed to parse Ignition for validation: %s\nReport: %v", err, report)
}
if err := checkFiles(currentIgnConfig.Storage.Files); err != nil {
return err
Expand Down
5 changes: 5 additions & 0 deletions pkg/daemon/pivot/types/imageinspection.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ package types
const (
// PivotNamePrefix is literally the name prefix of the new pivot
PivotNamePrefix = "ostree-container-pivot-"

// PivotFailurePath is the path pivot writes its error to when it fails. We need this to bubble
// up any errors since we only log those errors in MCD. If we don't do this then the error messages
// don't get visibility up the stack.
PivotFailurePath = "/run/pivot/failure"
)
57 changes: 31 additions & 26 deletions pkg/daemon/pivot/utils/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,58 +2,63 @@ package utils

import (
"bytes"
"io"
"os"
"os/exec"
"strings"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/wait"
)

// runImpl is the actual shell execution implementation used by other functions.
func runImpl(capture bool, command string, args ...string) ([]byte, error) {
func runImpl(command string, args ...string) ([]byte, error) {
glog.Infof("Running: %s %s\n", command, strings.Join(args, " "))
cmd := exec.Command(command, args...)
cmd.Stderr = os.Stderr
var stdout bytes.Buffer
if !capture {
cmd.Stdout = os.Stdout
} else {
cmd.Stdout = &stdout
}
// multiplex writes to std streams so we keep seeing logs in MCD/systemd
// but we'll still be able to give out something here
var b bytes.Buffer
stderr := io.MultiWriter(os.Stderr, &b)
stdout := io.MultiWriter(os.Stdout, &b)
cmd.Stderr = stderr
cmd.Stdout = stdout
err := cmd.Run()
if err != nil {
return nil, err
}
if capture {
return stdout.Bytes(), nil
return nil, errors.Wrapf(err, "running %s %s failed: %s", command, strings.Join(args, " "), b.String())
}
return []byte{}, nil
return b.Bytes(), nil
}

// runExtBackoff is an extension to runExt that supports configuring retries/duration/backoff.
func runExtBackoff(capture bool, backoff wait.Backoff, command string, args ...string) string {
var output string
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
if out, e := runImpl(capture, command, args...); e != nil {
glog.Warningf("%s failed: %v; retrying...", command, e)
func runExtBackoff(backoff wait.Backoff, command string, args ...string) (string, error) {
var (
output string
lastErr error
)
if err := wait.ExponentialBackoff(backoff, func() (bool, error) {
out, err := runImpl(command, args...)
if err != nil {
lastErr = err
glog.Warningf("%s failed: %v; retrying...", command, err)
return false, nil
} else if capture {
output = strings.TrimSpace(string(out))
}
output = strings.TrimSpace(string(out))
return true, nil
})
if err != nil {
glog.Fatalf("%s: %s", command, err)
}); err != nil {
if err == wait.ErrWaitTimeout {
return "", errors.Wrapf(lastErr, "failed to run command %s (%d tries): %v", command, backoff.Steps, err)
}
return "", errors.Wrap(err, "failed to run command %s (%d tries): %v")
}
return output
return output, nil
}

// RunExt executes a command, optionally capturing the output and retrying multiple
// times before exiting with a fatal error.
func RunExt(capture bool, retries int, command string, args ...string) string {
return runExtBackoff(capture, wait.Backoff{
func RunExt(retries int, command string, args ...string) (string, error) {
return runExtBackoff(wait.Backoff{
Steps: retries + 1, // times to try
Duration: 5 * time.Second, // sleep between tries
Factor: 2, // factor by which to increase sleep
Expand Down
7 changes: 2 additions & 5 deletions pkg/daemon/pivot/utils/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@ import (
// TestRunExt verifies that the wait machinery works, even though we're only
// just testing a single step here since it's tricky to test retries.
func TestRunExt(t *testing.T) {
RunExt(false, 0, "echo", "echo", "from", "TestRunExt")

result := RunExt(true, 0, "echo", "hello", "world")
assert.Equal(t, "hello world", result)
RunExt(0, "echo", "echo", "from", "TestRunExt")

tmpdir, err := ioutil.TempDir("", "run_test")
assert.Nil(t, err)
defer os.RemoveAll(tmpdir)
tmpf := tmpdir + "/t"
runExtBackoff(false, wait.Backoff{Steps: 6,
runExtBackoff(wait.Backoff{Steps: 6,
Duration: 1 * time.Second,
Factor: 1.1},
"sh", "-c", "printf x >> "+tmpf+" && test $(wc -c < "+tmpf+") = 3")
Expand Down
10 changes: 8 additions & 2 deletions pkg/daemon/rpm-ostree.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ func (r *RpmOstreeClient) PullAndRebase(container string, keep bool) (imgid stri
args := []string{"pull", "-q"}
args = append(args, authArgs...)
args = append(args, container)
pivotutils.RunExt(false, numRetriesNetCommands, "podman", args...)
_, err = pivotutils.RunExt(numRetriesNetCommands, "podman", args...)
if err != nil {
return
}
} else {
if previousPivot != "" {
var targetMatched bool
Expand All @@ -190,7 +193,10 @@ func (r *RpmOstreeClient) PullAndRebase(container string, keep bool) (imgid stri
args := []string{"pull", "-q"}
args = append(args, authArgs...)
args = append(args, container)
pivotutils.RunExt(false, numRetriesNetCommands, "podman", args...)
_, err = pivotutils.RunExt(numRetriesNetCommands, "podman", args...)
if err != nil {
return
}
}

inspectArgs := []string{"inspect", "--type=image"}
Expand Down
14 changes: 11 additions & 3 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
"github.com/openshift/machine-config-operator/pkg/daemon/constants"
pivottypes "github.com/openshift/machine-config-operator/pkg/daemon/pivot/types"
pivotutils "github.com/openshift/machine-config-operator/pkg/daemon/pivot/utils"
)

Expand Down Expand Up @@ -658,7 +659,10 @@ func (dn *Daemon) mountOSContainer(container string) (mnt, containerName string,
args := []string{"pull", "-q"}
args = append(args, authArgs...)
args = append(args, container)
pivotutils.RunExt(false, numRetriesNetCommands, "podman", args...)
_, err = pivotutils.RunExt(numRetriesNetCommands, "podman", args...)
if err != nil {
return
}

containerName = "mcd-" + string(uuid.NewUUID())
// `podman mount` wants a container, so let's create a dummy one, but not run it
Expand Down Expand Up @@ -1285,7 +1289,11 @@ func (dn *Daemon) updateOS(config *mcfgv1.MachineConfig) error {
if dn.kubeClient != nil {
if err := dn.NodeUpdaterClient.RunPivot(newURL); err != nil {
MCDPivotErr.WithLabelValues(newURL, err.Error()).SetToCurrentTime()
return fmt.Errorf("failed to run pivot: %v", err)
pivotErr, err2 := ioutil.ReadFile(pivottypes.PivotFailurePath)
if err2 != nil || len(pivotErr) == 0 {
glog.Warningf("pivot error file doesn't contain anything, it was never written or an error occurred: %v", err2)
}
return fmt.Errorf("failed to run pivot: %v: %v", err, string(pivotErr))
}
} else {
// If we're here we're invoked via `machine-config-daemon-firstboot.service`, so let's
Expand All @@ -1297,7 +1305,7 @@ func (dn *Daemon) updateOS(config *mcfgv1.MachineConfig) error {
}
if !changed {
// This really shouldn't happen
glog.Warningf("Didn't change when updating to %s ?", newURL)
glog.Warningf("Didn't change when updating to %q?", newURL)
}
}

Expand Down