From d83d0abfbf6f914303c1b33d954aa115accbd999 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Tue, 14 May 2019 14:54:21 -0400 Subject: [PATCH 1/2] Ensure that start() in StartAndAttach() is locked StartAndAttach() runs start() in a goroutine, which can allow it to fire after the caller returns - and thus, after the defer to unlock the container lock has fired. The start() call _must_ occur while the container is locked, or else state inconsistencies may occur. Fixes #3114 Signed-off-by: Matthew Heon --- libpod/container_api.go | 12 ++++++++++-- libpod/container_attach_linux.go | 17 +++++++++++++---- libpod/container_attach_unsupported.go | 4 +++- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/libpod/container_api.go b/libpod/container_api.go index 5bb610aabf2..06a31da118c 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "os" "strconv" + "sync" "time" "github.com/containers/libpod/libpod/driver" @@ -119,13 +120,20 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, attachChan := make(chan error) + // We need to ensure that we don't return until start() fired in attach. + // Use a WaitGroup to sync this. + wg := new(sync.WaitGroup) + wg.Add(1) + // Attach to the container before starting it go func() { - if err := c.attach(streams, keys, resize, true); err != nil { + if err := c.attach(streams, keys, resize, true, wg); err != nil { attachChan <- err } close(attachChan) }() + + wg.Wait() c.newContainerEvent(events.Attach) return attachChan, nil } @@ -398,7 +406,7 @@ func (c *Container) Attach(streams *AttachStreams, keys string, resize <-chan re return errors.Wrapf(ErrCtrStateInvalid, "can only attach to created or running containers") } defer c.newContainerEvent(events.Attach) - return c.attach(streams, keys, resize, false) + return c.attach(streams, keys, resize, false, nil) } // Mount mounts a container's filesystem on the host diff --git a/libpod/container_attach_linux.go b/libpod/container_attach_linux.go index 3ff6ddc762d..7e9b7697b39 100644 --- a/libpod/container_attach_linux.go +++ b/libpod/container_attach_linux.go @@ -8,6 +8,7 @@ import ( "net" "os" "path/filepath" + "sync" "github.com/containers/libpod/pkg/kubeutils" "github.com/containers/libpod/utils" @@ -31,7 +32,7 @@ const ( // Attach to the given container // Does not check if state is appropriate -func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, startContainer bool) error { +func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, startContainer bool, wg *sync.WaitGroup) error { if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { return errors.Wrapf(ErrInvalidArg, "must provide at least one stream to attach to") } @@ -48,12 +49,17 @@ func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan re logrus.Debugf("Attaching to container %s", c.ID()) - return c.attachContainerSocket(resize, detachKeys, streams, startContainer) + return c.attachContainerSocket(resize, detachKeys, streams, startContainer, wg) } -// attachContainerSocket connects to the container's attach socket and deals with the IO +// attachContainerSocket connects to the container's attach socket and deals with the IO. +// wg is only required if startContainer is true // TODO add a channel to allow interrupting -func (c *Container) attachContainerSocket(resize <-chan remotecommand.TerminalSize, detachKeys []byte, streams *AttachStreams, startContainer bool) error { +func (c *Container) attachContainerSocket(resize <-chan remotecommand.TerminalSize, detachKeys []byte, streams *AttachStreams, startContainer bool, wg *sync.WaitGroup) error { + if startContainer && wg == nil { + return errors.Wrapf(ErrInternal, "wait group not passed when startContainer set") + } + kubeutils.HandleResizing(resize, func(size remotecommand.TerminalSize) { controlPath := filepath.Join(c.bundlePath(), "ctl") controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0) @@ -84,10 +90,13 @@ func (c *Container) attachContainerSocket(resize <-chan remotecommand.TerminalSi } defer conn.Close() + // If starting was requested, start the container and notify when that's + // done. if startContainer { if err := c.start(); err != nil { return err } + wg.Done() } receiveStdoutError := make(chan error) diff --git a/libpod/container_attach_unsupported.go b/libpod/container_attach_unsupported.go index 068652b2987..9e8badeafc9 100644 --- a/libpod/container_attach_unsupported.go +++ b/libpod/container_attach_unsupported.go @@ -3,9 +3,11 @@ package libpod import ( + "sync" + "k8s.io/client-go/tools/remotecommand" ) -func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, startContainer bool) error { +func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, startContainer bool, wg *sync.WaitGroup) error { return ErrNotImplemented } From caa894f4456b21bc01bfded2213802809a081b48 Mon Sep 17 00:00:00 2001 From: Chris Evich Date: Mon, 13 May 2019 15:34:12 -0400 Subject: [PATCH 2/2] DO NOT MERGE - FOCUSED TESTING ON notify_socket FLAKE Signed-off-by: Chris Evich --- .cirrus.yml | 340 ++--------------------------- Makefile | 2 +- contrib/cirrus/integration_test.sh | 35 +-- contrib/cirrus/rootless_test.sh | 1 - 4 files changed, 22 insertions(+), 356 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 514889969c1..99c4ea271cd 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -104,337 +104,35 @@ env: CIRRUS_WORKING_DIR CIRRUS_HTTP_CACHE_HOST PACKER_BUILDS BUILT_IMAGE_SUFFIX XDG_DATA_DIRS XDG_RUNTIME_DIR XDG_SESSION_ID ROOTLESS_USER +gce_instance: + image_project: "libpod-218412" + zone: "us-central1-a" # Required by Cirrus for the time being + cpu: 2 + memory: "4Gb" + disk: 200 + # A matrix could be used here, for now just one VM + image_name: "${FEDORA_CACHE_IMAGE_NAME}" -# Every *_task runs in parallel in separate VMsd. The name prefix only for reference -# in WebUI, and will be followed by matrix details. This task gates all others with -# quick format, lint, and unit tests on the standard platform. -gating_task: - - env: - CIRRUS_WORKING_DIR: "/usr/src/libpod" - GOSRC: "/go/src/github.com/containers/libpod" - - # Runs within Cirrus's "community cluster" - container: - image: "quay.io/libpod/gate:latest" - cpu: 4 - memory: 12 - - timeout_in: 20m - - gate_script: - # N/B: entrypoint.sh resets $GOSRC (same as make clean) - - '/usr/local/bin/entrypoint.sh install.tools |& ${TIMESTAMP}' - - '/usr/local/bin/entrypoint.sh validate |& ${TIMESTAMP}' - - '/usr/local/bin/entrypoint.sh lint |& ${TIMESTAMP}' - - '${CIRRUS_WORKING_DIR}/${SCRIPT_BASE}/test/test_dot_cirrus_yaml.py |& ${TIMESTAMP}' - - # This task builds Podman with different buildtags to ensure the build does - # not break. It also verifies all sub-commands have man pages. - build_script: - - '/usr/local/bin/entrypoint.sh podman |& ${TIMESTAMP}' - - 'cd $GOSRC && ./hack/podman-commands.sh |& ${TIMESTAMP}' - # N/B: need 'clean' so some commited files are re-generated. - - '/usr/local/bin/entrypoint.sh clean podman-remote |& ${TIMESTAMP}' - - '/usr/local/bin/entrypoint.sh clean podman BUILDTAGS="exclude_graphdriver_devicemapper selinux seccomp" |& ${TIMESTAMP}' - - '/usr/local/bin/entrypoint.sh podman-remote-darwin |& ${TIMESTAMP}' - - '/usr/local/bin/entrypoint.sh podman-remote-windows |& ${TIMESTAMP}' - - # Verify expected bash environment (-o pipefail) - pipefail_enabledscript: 'if /bin/false | /bin/true; then echo "pipefail fault" && exit 72; fi' - - on_failure: - failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh' - - -# This task runs `make vendor` followed by ./hack/tree_status.sh to check -# whether the git tree is clean. The reasoning for that is to make sure -# that the vendor.conf, the code and the vendored packages in ./vendor are -# in sync at all times. -vendor_task: - - depends_on: - - "gating" - - env: - CIRRUS_WORKING_DIR: "/usr/src/libpod" - - # Runs within Cirrus's "community cluster" - container: - image: "quay.io/libpod/gate:latest" - cpu: 4 - memory: 12 - - timeout_in: 30m - - vendor_script: - - '/usr/local/bin/entrypoint.sh .install.vndr |& ${TIMESTAMP}' - - '/usr/local/bin/entrypoint.sh vendor |& ${TIMESTAMP}' - - 'cd /go/src/github.com/containers/libpod && ./hack/tree_status.sh |& ${TIMESTAMP}' - - on_failure: - failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh |& ${TIMESTAMP}' - - -# This task runs `make varlink_api_generate` followed by ./hack/tree_status.sh to check -# whether the git tree is clean. -varlink_api_task: - - depends_on: - - "gating" - - env: - CIRRUS_WORKING_DIR: "/usr/src/libpod" - # Used by tree_status.sh - SUGGESTION: 'remove API.md, then "make varlink_api_generate" and commit changes.' - - # Runs within Cirrus's "community cluster" - container: - image: "quay.io/libpod/gate:latest" - cpu: 4 - memory: 12 - - timeout_in: 10m - - vendor_script: - - '/usr/local/bin/entrypoint.sh varlink_api_generate' - - 'cd /go/src/github.com/containers/libpod && ./hack/tree_status.sh' - - on_failure: - failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh' - - -build_each_commit_task: - - depends_on: - - "gating" - - "vendor" - - "varlink_api" - - # $CIRRUS_BASE_BRANCH is only set when testing a PR - only_if: $CIRRUS_BRANCH != 'master' - - gce_instance: - image_project: "libpod-218412" - zone: "us-central1-a" # Required by Cirrus for the time being - cpu: 8 - memory: "8Gb" - disk: 200 - image_name: "${FEDORA_CACHE_IMAGE_NAME}" - - timeout_in: 30m - - setup_environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}' - build_each_commit_script: - - 'git fetch --depth $CIRRUS_CLONE_DEPTH origin $CIRRUS_BASE_BRANCH |& ${TIMESTAMP}' - - 'env GOPATH=/var/tmp/go/ make build-all-new-commits GIT_BASE_BRANCH=origin/$CIRRUS_BASE_BRANCH |& ${TIMESTAMP}' - - on_failure: - failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh' - - -# Update metadata on VM images referenced by this repository state -meta_task: - - depends_on: - - "gating" - - container: - image: "quay.io/libpod/imgts:latest" # see contrib/imgts - cpu: 1 - memory: 1 - - env: - # Space-separated list of images used by this repository state - IMGNAMES: "${ACTIVE_CACHE_IMAGE_NAMES}" - BUILDID: "${CIRRUS_BUILD_ID}" - REPOREF: "${CIRRUS_CHANGE_IN_REPO}" - GCPJSON: ENCRYPTED[950d9c64ad78f7b1f0c7e499b42dc058d2b23aa67e38b315e68f557f2aba0bf83068d4734f7b1e1bdd22deabe99629df] - GCPNAME: ENCRYPTED[b05d469a0dba8cb479cb00cc7c1f6747c91d17622fba260a986b976aa6c817d4077eacffd4613d6d5f23afc4084fab1d] - GCPPROJECT: ENCRYPTED[7c80e728e046b1c76147afd156a32c1c57d4a1ac1eab93b7e68e718c61ca8564fc61fef815952b8ae0a64e7034b8fe4f] - CIRRUS_CLONE_DEPTH: 1 # source not used - - script: '/usr/local/bin/entrypoint.sh |& ${TIMESTAMP}' - - -# This task does the unit and integration testing for every platform -testing_task: - - depends_on: - - "gating" - - "varlink_api" - - "vendor" - - "build_each_commit" - - gce_instance: - image_project: "libpod-218412" - zone: "us-central1-a" # Required by Cirrus for the time being - cpu: 2 - memory: "4Gb" - disk: 200 # see https://developers.google.com/compute/docs/disks#performance - # Generate multiple parallel tasks, covering all possible - # 'matrix' combinations. - matrix: - # Images are generated separately, from build_images_task (below) - image_name: "${FEDORA_CACHE_IMAGE_NAME}" - image_name: "${PRIOR_FEDORA_CACHE_IMAGE_NAME}" - image_name: "${UBUNTU_CACHE_IMAGE_NAME}" - - # TODO: Make these work (also optional_testing_task below) - # image_name: "${PRIOR_RHEL_CACHE_IMAGE_NAME}" - # image_name: "${RHEL_CACHE_IMAGE_NAME}" - # image_name: "${CENTOS_CACHE_IMAGE_NAME}" - - timeout_in: 120m - - # Every *_script runs in sequence, for each task. The name prefix is for - # WebUI reference. The values may be strings... - setup_environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}' - unit_test_script: '$SCRIPT_BASE/unit_test.sh |& ${TIMESTAMP}' - integration_test_script: '$SCRIPT_BASE/integration_test.sh |& ${TIMESTAMP}' - audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log' - journalctl_b_script: 'journalctl -b' - - on_failure: - failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh' - # Job has already failed, don't fail again and miss collecting data - failed_audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log || echo "Uh oh, cat audit.log failed"' - failed_journalctl_b_script: 'journalctl -b || echo "Uh oh, journalctl -b failed"' - - -# This task executes tests under unique environments/conditions special_testing_task: - - depends_on: - - "gating" - - "varlink_api" - - "vendor" - - "build_each_commit" - - gce_instance: - image_project: "libpod-218412" - zone: "us-central1-a" # Required by Cirrus for the time being - cpu: 2 - memory: "4Gb" - disk: 200 - # A matrix could be used here, for now just one VM - image_name: "${FEDORA_CACHE_IMAGE_NAME}" - env: + SPECIALMODE: 'rootless' # See docs matrix: - SPECIALMODE: 'rootless' # See docs - SPECIALMODE: 'in_podman' # See docs - + JUNK: 0 + JUNK: 1 + JUNK: 2 + JUNK: 3 + JUNK: 4 + JUNK: 5 + JUNK: 6 + JUNK: 7 + JUNK: 8 + JUNK: 9 timeout_in: 120m - setup_environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}' integration_test_script: '$SCRIPT_BASE/integration_test.sh |& ${TIMESTAMP}' audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log' journalctl_b_script: 'journalctl -b' - on_failure: failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh' - # Job has already failed, don't fail again and miss collecting data failed_audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log || echo "Uh oh, cat audit.log failed"' failed_journalctl_b_script: 'journalctl -b || echo "Uh oh, journalctl -b failed"' - - -# Because system tests are stored within the repository, it is sometimes -# necessary to execute them within a PR to validate changes. -optional_testing_task: - - # Only run system tests in PRs (not on merge) if magic string is present - # in the PR description. Post-merge system testing is assumed to happen - # later from OS distribution's build systems. - only_if: >- - $CIRRUS_BRANCH != 'master' && - $CIRRUS_CHANGE_MESSAGE =~ '.*\*\*\*\s*CIRRUS:\s*SYSTEM\s*TEST\s*\*\*\*.*' - - gce_instance: - image_project: "libpod-218412" - matrix: - image_name: "${FEDORA_CACHE_IMAGE_NAME}" - image_name: "${PRIOR_FEDORA_CACHE_IMAGE_NAME}" - image_name: "${UBUNTU_CACHE_IMAGE_NAME}" - # TODO: Make these work (also testing_task above) - # image_name: "${RHEL_CACHE_IMAGE_NAME}" - # image_name: "${PRIOR_RHEL_CACHE_IMAGE_NAME}" - # image_name: "${CENTOS_CACHE_IMAGE_NAME}" - - timeout_in: 60m - - setup_environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}' - system_test_script: '$SCRIPT_BASE/system_test.sh |& ${TIMESTAMP}' - - -# Build new cache-images for future PR testing, but only after a PR merge. -# The cache-images save install/setup time needed test every PR. The 'active' images -# are selected by the 'image_name' items tasks above. Currently this requires -# manually updating the names, but this could be automated (see comment below). -cache_images_task: - # Only produce new cache-images after a PR merge, and if a magic string - # is present in the most recent commit-message. - only_if: >- - $CIRRUS_BRANCH == 'master' && - $CIRRUS_CHANGE_MESSAGE =~ '.*\*\*\*\s*CIRRUS:\s*REBUILD\s*IMAGES\s*\*\*\*.*' - - # Require tests to pass first. - depends_on: - - "gating" - - "testing" - - # VMs created by packer are not cleaned up by cirrus - auto_cancellation: $CI != "true" - - gce_instance: - image_project: "libpod-218412" - zone: "us-central1-a" # Required by Cirrus for the time being - cpu: 4 - memory: "4Gb" - disk: 200 - image_name: "${IMAGE_BUILDER_CACHE_IMAGE_NAME}" - # Additional permissions for building GCE images, within a GCE VM - scopes: - - compute - - devstorage.full_control - environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}' - build_vm_images_script: '$SCRIPT_BASE/build_vm_images.sh |& ${TIMESTAMP}' - - # TODO,Continuous Delivery: Automatically open a libpod PR after using 'sed' to replace - # the image_names with the new (just build) images. That will - # cause a new round of testing to happen (via the PR) using - # the new images. When all is good, the PR may be manually - # merged so all PR testing uses the new images. The script - # names (below) describe their purpose in this workflow. - # deploy_images_script: - # - clone_podman_release_branch.sh - # - modify_cirrus_yaml_image_names.sh - # - commit_and_create_upstream_pr.sh - - on_failure: - failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh |& ${TIMESTAMP}' - - -# Post message to IRC if everything passed -success_task: - - only_if: $CIRRUS_BRANCH != 'master' - - depends_on: # ignores any dependent task conditions - - "gating" - - "varlink_api" - - "vendor" - - "build_each_commit_task" - - "testing" - - "rootless_testing_task" - - "optional_testing" - - env: - CIRRUS_WORKING_DIR: "/usr/src/libpod" - - container: - image: "quay.io/libpod/gate:latest" - cpu: 1 - memory: 1 - - success_script: '$SCRIPT_BASE/success.sh |& ${TIMESTAMP}' diff --git a/Makefile b/Makefile index 10ba1424885..7135fb10b91 100644 --- a/Makefile +++ b/Makefile @@ -207,7 +207,7 @@ localunit: test/goecho/goecho varlink_generate ./contrib/cirrus/lib.sh.t ginkgo: - ginkgo -v -tags "$(BUILDTAGS)" $(GINKGOTIMEOUT) -cover -flakeAttempts 3 -progress -trace -noColor -nodes 3 test/e2e/. + ginkgo -v -tags "$(BUILDTAGS)" $(GINKGOTIMEOUT) -cover -progress -trace -noColor -focus='podman run notify_socket' test/e2e/. ginkgo-remote: ginkgo -v -tags "$(BUILDTAGS) remoteclient" $(GINKGOTIMEOUT) -cover -flakeAttempts 3 -progress -trace -noColor test/e2e/. diff --git a/contrib/cirrus/integration_test.sh b/contrib/cirrus/integration_test.sh index 5b73f0c6c57..69b48fe9edb 100755 --- a/contrib/cirrus/integration_test.sh +++ b/contrib/cirrus/integration_test.sh @@ -9,20 +9,7 @@ cd "$GOSRC" if [[ "$SPECIALMODE" == "in_podman" ]] then - set -x - ${CONTAINER_RUNTIME} run --rm --privileged --net=host \ - -v $GOSRC:$GOSRC:Z \ - --workdir $GOSRC \ - -e "CGROUP_MANAGER=cgroupfs" \ - -e "STORAGE_OPTIONS=--storage-driver=vfs" \ - -e "CRIO_ROOT=$GOSRC" \ - -e "PODMAN_BINARY=/usr/bin/podman" \ - -e "CONMON_BINARY=/usr/libexec/podman/conmon" \ - -e "DIST=$OS_RELEASE_ID" \ - -e "CONTAINER_RUNTIME=$CONTAINER_RUNTIME" \ - ${OS_RELEASE_ID}podmanbuild bash $GOSRC/$SCRIPT_BASE/container_test.sh -b -i -t -n - - exit $? + die 4 "NOT SUPPORTED" elif [[ "$SPECIALMODE" == "rootless" ]] then req_env_var ROOTLESS_USER @@ -32,23 +19,5 @@ then $GOSRC/$SCRIPT_BASE/rootless_test.sh exit $? else - set -x - make - make install PREFIX=/usr ETCDIR=/etc - make test-binaries - clean_env - - case "${OS_RELEASE_ID}-${OS_RELEASE_VER}" in - ubuntu-18) ;; - fedora-29) ;& # Continue to the next item - fedora-28) ;& - centos-7) ;& - rhel-7) - make podman-remote - install bin/podman-remote /usr/bin - ;; - *) bad_os_id_ver ;; - esac - make localintegration - exit $? + die 5 "NOT SUPPORTED" fi diff --git a/contrib/cirrus/rootless_test.sh b/contrib/cirrus/rootless_test.sh index eab06bac005..5a2c81d86e3 100755 --- a/contrib/cirrus/rootless_test.sh +++ b/contrib/cirrus/rootless_test.sh @@ -22,4 +22,3 @@ make make varlink_generate make test-binaries make ginkgo -make ginkgo-remote