Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ maipo/
.coverage
tools/bin
.idea
bin/
15 changes: 12 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ PYIGNORE ?= E128,E241,E402,E501,E722,W503,W504

MANTLE_BINARIES := ore kola plume

all: tools mantle gangplank
all: bin/coreos-assembler tools mantle gangplank

src:=$(shell find src -maxdepth 1 -type f -executable -print)
pysources=$(shell find src -type f -name '*.py') $(shell for x in $(src); do if head -1 $$x | grep -q python; then echo $$x; fi; done)
Expand All @@ -30,12 +30,16 @@ else ifeq ($(GOARCH),aarch64)
GOARCH="arm64"
endif

bin/coreos-assembler:
cd cmd && go build -mod vendor -o ../$@
.PHONY: bin/coreos-assembler

.%.shellchecked: %
./tests/check_one.sh $< $@

shellcheck: ${src_checked} ${tests_checked} ${cwd_checked}

check: shellcheck flake8 pycheck schema-check mantle-check gangplank-check
check: shellcheck flake8 pycheck schema-check mantle-check gangplank-check cosa-go-check
echo OK

pycheck:
Expand All @@ -53,6 +57,11 @@ unittest:
COSA_TEST_META_PATH=`pwd`/fixtures \
PYTHONPATH=`pwd`/src python3 -m pytest tests/

cosa-go-check:
(cd cmd && go test -mod=vendor)
go test -mod=vendor github.com/coreos/coreos-assembler/internal/pkg/bashexec
go test -mod=vendor github.com/coreos/coreos-assembler/internal/pkg/cosash

clean:
rm -f ${src_checked} ${tests_checked} ${cwd_checked}
rm -rf tools/bin
Expand Down Expand Up @@ -114,7 +123,7 @@ install:
install -d $(DESTDIR)$(PREFIX)/lib/coreos-assembler/cosalib
install -D -t $(DESTDIR)$(PREFIX)/lib/coreos-assembler/cosalib $$(find src/cosalib/ -maxdepth 1 -type f)
install -d $(DESTDIR)$(PREFIX)/bin
ln -sf ../lib/coreos-assembler/coreos-assembler $(DESTDIR)$(PREFIX)/bin/
install bin/coreos-assembler $(DESTDIR)$(PREFIX)/bin/
ln -sf ../lib/coreos-assembler/cp-reflink $(DESTDIR)$(PREFIX)/bin/
ln -sf coreos-assembler $(DESTDIR)$(PREFIX)/bin/cosa
install -d $(DESTDIR)$(PREFIX)/lib/coreos-assembler/tests/kola
Expand Down
55 changes: 55 additions & 0 deletions cmd/clean.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// See usage below
package main

import (
"fmt"

"github.com/coreos/coreos-assembler/internal/pkg/bashexec"
"github.com/coreos/coreos-assembler/internal/pkg/cosash"
)

func runClean(argv []string) error {
const cleanUsage = `Usage: coreos-assembler clean --help
coreos-assembler clean [--all]

Delete all build artifacts. Use --all to also clean the cache/ directory.
`

all := false
for _, arg := range argv {
switch arg {
case "h":
case "--help":
fmt.Print(cleanUsage)
return nil
case "-a":
case "--all":
all = true
default:
return fmt.Errorf("unrecognized option: %s", arg)
}
}

sh, err := cosash.NewCosaSh()
if err != nil {
return err
}
if _, err := sh.PrepareBuild(); err != nil {
return err
}

if all {
priv, err := sh.HasPrivileges()
if err != nil {
return err
}
cmd := "rm -rf cache/*"
if priv {
cmd = fmt.Sprintf("sudo %s", cmd)
}
bashexec.Run("cleanup cache", cmd)
} else {
fmt.Println("Note: retaining cache/")
}
return bashexec.Run("cleanup", "rm -rf builds/* tmp/*")
}
177 changes: 177 additions & 0 deletions cmd/coreos-assembler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// This is the primary entrypoint for /usr/bin/coreos-assembler.
package main

import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"sort"
"strings"
"syscall"
)

// commands we'd expect to use in the local dev path
var buildCommands = []string{"init", "fetch", "build", "run", "prune", "clean", "list"}
var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container", "upload-oscontainer"}
var buildextendCommands = []string{"aliyun", "aws", "azure", "digitalocean", "exoscale", "gcp", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"}
var utilityCommands = []string{"aws-replicate", "compress", "generate-hashlist", "koji-upload", "kola", "remote-build-container", "remote-prune", "sign", "tag"}
var otherCommands = []string{"shell", "meta"}

func init() {
// Note buildCommands is intentionally listed in frequency order
sort.Strings(advancedBuildCommands)
sort.Strings(buildextendCommands)
sort.Strings(utilityCommands)
sort.Strings(otherCommands)
}

func wrapCommandErr(err error) error {
if err == nil {
return nil
}
if exiterr, ok := err.(*exec.ExitError); ok {
return fmt.Errorf("%w\n%s", err, exiterr.Stderr)
}
return err
}

func printCommands(title string, cmds []string) {
fmt.Printf("%s:\n", title)
for _, cmd := range cmds {
fmt.Printf(" %s\n", cmd)
}
}

func printUsage() {
fmt.Println("Usage: coreos-assembler CMD ...")
printCommands("Build commands", buildCommands)
printCommands("Advanced build commands", advancedBuildCommands)
printCommands("Platform builds", buildextendCommands)
printCommands("Utility commands", utilityCommands)
printCommands("Other commands", otherCommands)
}

func run(argv []string) error {
if err := initializeGlobalState(argv); err != nil {
return fmt.Errorf("failed to initialize global state: %w", err)
}

var cmd string
if len(argv) > 0 {
cmd = argv[0]
argv = argv[1:]
}

if cmd == "" {
printUsage()
os.Exit(1)
}

// Manual argument parsing here for now; once we get to "phase 1"
// of the Go conversion we can vendor cobra (and other libraries)
// at the toplevel.
switch cmd {
case "clean":
return runClean(argv)
}

target := fmt.Sprintf("/usr/lib/coreos-assembler/cmd-%s", cmd)
_, err := os.Stat(target)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("unknown command: %s", cmd)
}
return fmt.Errorf("failed to stat %s: %w", target, err)
}

c := exec.Command(target, argv...)
c.Stdin = os.Stdin
c.Stdout = os.Stdout
c.Stderr = os.Stderr
if err := c.Run(); err != nil {
return fmt.Errorf("failed to execute cmd-%s: %w", cmd, err)
}
return nil
}

func initializeGlobalState(argv []string) error {
// Set PYTHONUNBUFFERED=1 so that we get unbuffered output. We should
// be able to do this on the shebang lines but env doesn't support args
// right now. In Fedora we should be able to use the `env -S` option.
os.Setenv("PYTHONUNBUFFERED", "1")

// docker/podman don't run through PAM, but we want this set for the privileged
// (non-virtualized) path
user, ok := os.LookupEnv("USER")
if !ok {
b, err := exec.Command("id", "-nu").Output()
if err == nil {
user = strings.TrimSpace(string(b))
} else {
user = "cosa"
}
os.Setenv("USER", user)
}

// https://github.com/containers/libpod/issues/1448
// if /sys/fs/selinux is mounted, various tools will think they're on a SELinux enabled
// host system, and we don't want that. Work around this by overmounting it.
// So far we only see /sys/fs/selinux mounted in a privileged container, so we know we
// have privileges to create a new mount namespace and overmount it with an empty directory.
const selinuxfs = "/sys/fs/selinux"
if _, err := os.Stat(selinuxfs + "/status"); err == nil {
const unsharedKey = "coreos_assembler_unshared"
if _, ok := os.LookupEnv(unsharedKey); ok {
err := exec.Command("sudo", "mount", "--bind", "/usr/share/empty", "/sys/fs/selinux").Run()
if err != nil {
return fmt.Errorf("failed to unmount %s: %w", selinuxfs, wrapCommandErr(err))
}
} else {
fmt.Fprintf(os.Stderr, "warning: %s appears to be mounted but should not be; enabling workaround\n", selinuxfs)
selfpath, err := os.Readlink("/proc/self/exe")
if err != nil {
return err
}
baseArgv := []string{"sudo", "-E", "--", "env", fmt.Sprintf("%s=1", unsharedKey), "unshare", "-m", "--", "runuser", "-u", user, "--", selfpath}
err = syscall.Exec("/usr/bin/sudo", append(baseArgv, argv...), os.Environ())
return fmt.Errorf("failed to re-exec self to unmount %s: %w", selinuxfs, err)
}
}

// When trying to connect to libvirt we get "Failed to find user record
// for uid" errors if there is no entry for our UID in /etc/passwd.
// This was taken from 'Support Arbitrary User IDs' section of:
// https://docs.openshift.com/container-platform/3.10/creating_images/guidelines.html
c := exec.Command("whoami")
c.Stdout = ioutil.Discard
c.Stderr = ioutil.Discard
if err := c.Run(); err != nil {
fmt.Fprintln(os.Stderr, "notice: failed to look up uid in /etc/passwd; enabling workaround")
home := fmt.Sprintf("/var/tmp/%s", user)
err := os.MkdirAll(home, 0755)
if err != nil {
return err
}
f, err := os.OpenFile("/etc/passwd", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("opening /etc/passwd: %w", err)
}
defer f.Close()
id := os.Getuid()
buf := fmt.Sprintf("%s:x:%d:0:%s user:%s:/sbin/nologin\n", user, id, user, home)
if _, err = f.WriteString(buf); err != nil {
return err
}
}

return nil
}

func main() {
err := run(os.Args[1:])
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
}
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/coreos/coreos-assembler

go 1.15
101 changes: 101 additions & 0 deletions internal/pkg/bashexec/bashexec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Package bashexec provides helpers to execute bash code.
// What this primarily offers over directly writing e.g. `exec.Command("bash")`
// is:
//
// - By default, all fragments are executed in "bash strict mode": http://redsymbol.net/articles/unofficial-bash-strict-mode/
// - The code encourages adding a "name" for in-memory scripts, similar to e.g.
// Ansible tasks as well as many CI systems like Github actions
// - The code to execute is piped to stdin instead of passed via `-c` which
// avoids argument length limits and makes the output of e.g. `ps` readable.
// - Scripts are assumed synchronous, and stdin/stdout/stderr are passed directly
// instead of piped.
// - We use prctl(PR_SET_PDEATHSIG) (assuming Linux) to lifecycle bind the script to the caller
//
package bashexec

import (
"fmt"
"io"
"os"
"os/exec"
"strings"
"syscall"
)

// StrictMode enables http://redsymbol.net/articles/unofficial-bash-strict-mode/
const StrictMode = "set -euo pipefail"

// BashRunner is a wrapper for executing in-memory bash scripts
type BashRunner struct {
name string
cmd *exec.Cmd
}

// NewBashRunner creates a bash executor from in-memory shell script.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not immediately clear to me (I feel like I should know this but don't) why we need to convert the shell script (file) to be in memory first. What benefits does this have? Should we add a comment/explanation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We now have 3 different ways to interact with shell.

  • pkg/internal/cosash is a "co-processing" proxy for cmdlib.sh
  • pkg/internal/bashexec (what you're commenting on) is about a convenience wrapper for executing shell script embedded in memory in Go (which may or may not use cmdlib.sh, but mostly for things that aren't)
  • Running fully external scripts as a subprocess, things that usually live in /usr/lib/coreos-assembler/foo.sh

The rationale for in-memory embedding is to aid incremental porting of things like cmd-build.sh. Basically we might start by changing the option processing to be Go, but use Go file embedding to actually move the data from living in /usr/lib/coreos-assembler to being in the binary.

This makes it clearer that the goal is to use Go, not to have distinct shell scripts.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We now have 3 different ways to interact with shell.

  • pkg/internal/cosash is a "co-processing" proxy for cmdlib.sh

I feel like I kind of understand this but it's a struggle. This is brilliant and helps us get closer to our goal but I think we're starting to violate a basic principle here:

“Everyone knows that debugging is twice as hard as writing a
program in the first place. So if you're as clever as you
can be when you write it, how will you ever debug it?”

IOW, how many people on our team fully grok what is going on here and how many people can debug it?

  • pkg/internal/bashexec (what you're commenting on) is about a convenience wrapper for executing shell script embedded in memory in Go (which may or may not use cmdlib.sh, but mostly for things that aren't)

I guess I still don't really understand the "embedded in memory" part. Why can't we use a file on disk rather than the fd/3 magic. I'm missing something fundamental about why that part is required. Maybe the new process doesn't have access to the old filesystem view of the world?

  • Running fully external scripts as a subprocess, things that usually live in /usr/lib/coreos-assembler/foo.sh

I feel like this is the easiest option to understand, I was even able to figure it out myself on Friday, but obviously we aren't able to take as "incremental" of an approach with this strategy.

The rationale for in-memory embedding is to aid incremental porting of things like cmd-build.sh. Basically we might start by changing the option processing to be Go, but use Go file embedding to actually move the data from living in /usr/lib/coreos-assembler to being in the binary.

This makes it clearer that the goal is to use Go, not to have distinct shell scripts.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like I kind of understand this but it's a struggle. This is brilliant and helps us get closer to our goal

Hah, thanks - I appreciate the complement, seriously! I'll be honest I was quite happy when I typed out that code and it all basically worked the first time I ran it. But, it's probably because I have a lot of experience with gluing together code in multiple languages and have learned the hard way what works and what doesn't. (And really more generally, things like what can work but is overkill and hard to debug)

e.g. I also have worked on gobject introspection and https://github.com/containers/containers-image-proxy-rs

but I think we're starting to violate a basic principle here:

I think this is a valid point. But:

  • I personally can't think of a simpler approach that will also allow us to avoid duplicating or reimplementing the shell script. The first path would lead to a lot of extra typing and new bugs, and the latter would easily scope-creep into a wholesale rewrite that never works (and would be a continual conflict fest until it merged)
  • But on the flip side, I think with some rewriting in the near term future we can avoid the need for the special "cosash" co-processing path and just fork off "stateless" shell script, so this code will go away

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK I added godoc package level comments to both bashexec and cosash which should help this.

Copy link
Member

@dustymabe dustymabe Jul 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but I think we're starting to violate a basic principle here:

I think this is a valid point. But:

  • I personally can't think of a simpler approach that will also allow us to avoid duplicating or reimplementing the shell script. The first path would lead to a lot of extra typing and new bugs, and the latter would easily scope-creep into a wholesale rewrite that never works (and would be a continual conflict fest until it merged)

Right. I think we probably do end up doing this, but an alternative is that we go with the 'duplicating' approach with a plan and a timeline for migrating everything else. The problem here is I don't think we are in a position to commit to that.

Sorry for the noise here.

  • But on the flip side, I think with some rewriting in the near term future we can avoid the need for the special "cosash" co-processing path and just fork off "stateless" shell script, so this code will go away

👍

func NewBashRunner(name, src string, args ...string) (*BashRunner, error) {
// This will be proxied to fd 3
f, err := os.CreateTemp("", name)
if err != nil {
return nil, err
}
if _, err := io.Copy(f, strings.NewReader(src)); err != nil {
return nil, err
}
if err := os.Remove(f.Name()); err != nil {
return nil, err
}

bashCmd := fmt.Sprintf("%s\n. /proc/self/fd/3\n", StrictMode)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: strict mode can be passed in on the cmdline directly, e.g. bash -euo pipefail -c ....

fullargs := append([]string{"-c", bashCmd, name}, args...)
cmd := exec.Command("/bin/bash", fullargs...)
cmd.SysProcAttr = &syscall.SysProcAttr{
Pdeathsig: syscall.SIGTERM,
}
cmd.Stdin = os.Stdin
cmd.ExtraFiles = append(cmd.ExtraFiles, f)

return &BashRunner{
name: name,
cmd: cmd,
}, nil
}

// Exec synchronously spawns the child process, passing stdin/stdout/stderr directly.
func (r *BashRunner) Exec() error {
r.cmd.Stdin = os.Stdin
r.cmd.Stdout = os.Stdout
r.cmd.Stderr = os.Stderr
err := r.cmd.Run()
if err != nil {
return fmt.Errorf("failed to execute internal script %s: %w", r.name, err)
}
return nil
}

// Run spawns the script, gathering stdout/stderr into a buffer that is displayed only on error.
func (r *BashRunner) Run() error {
buf, err := r.cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to execute internal script %s: %w\n%s", r.name, err, buf)
}
return nil
}

// Run spawns a named script (without any arguments),
// gathering stdout/stderr into a buffer that is displayed only on error.
func Run(name, cmd string) error {
sh, err := NewBashRunner(name, cmd)
if err != nil {
return err
}
return sh.Run()
}

// RunA spawns an anonymous script, and is otherwise the same as `Run`.
func RunA(cmd string) error {
sh, err := NewBashRunner("", cmd)
if err != nil {
return err
}
return sh.Run()
}
Loading