From 8006342f5a0f6d31a2b7fe3eb265d1c146ba537a Mon Sep 17 00:00:00 2001 From: Chance Bair Date: Fri, 2 Nov 2018 17:28:54 +0100 Subject: [PATCH] Fix docker cleanup race condition --- ci/build.py | 92 +++++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/ci/build.py b/ci/build.py index e2554d9b8cea..8f3fe2d1244e 100755 --- a/ci/build.py +++ b/ci/build.py @@ -281,7 +281,6 @@ def container_run(platform: str, # noinspection PyShadowingNames # runc is default (docker info | grep -i runtime) runtime = 'nvidia' - container = docker_client.containers.run( tag, runtime=runtime, @@ -299,52 +298,55 @@ def container_run(platform: str, {'bind': '/work/ccache', 'mode': 'rw'}, }, environment=environment) - logging.info("Started container: %s", trim_container_id(container.id)) - # Race condition: - # If the previous call is interrupted then it's possible that the container is not cleaned up - # We avoid by masking the signals temporarily - cleanup.add_container(container) - signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM}) - # - ############################# - - stream = container.logs(stream=True, stdout=True, stderr=True) - sys.stdout.flush() - for chunk in stream: - sys.stdout.buffer.write(chunk) - sys.stdout.buffer.flush() - sys.stdout.flush() - stream.close() - try: - logging.info("Waiting for status of container %s for %d s.", - trim_container_id(container.id), - container_wait_s) - wait_result = container.wait(timeout=container_wait_s) - logging.info("Container exit status: %s", wait_result) - ret = wait_result.get('StatusCode', 200) - except Exception as e: - logging.exception(e) - ret = 150 - - # Stop try: - logging.info("Stopping container: %s", trim_container_id(container.id)) - container.stop() - except Exception as e: - logging.exception(e) - ret = 151 + logging.info("Started container: %s", trim_container_id(container.id)) + # Race condition: + # If the previous call is interrupted then it's possible that the container is not cleaned up + # We avoid by masking the signals temporarily + cleanup.add_container(container) + signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM}) + # + ############################# + + stream = container.logs(stream=True, stdout=True, stderr=True) + sys.stdout.flush() + for chunk in stream: + sys.stdout.buffer.write(chunk) + sys.stdout.buffer.flush() + sys.stdout.flush() + stream.close() + try: + logging.info("Waiting for status of container %s for %d s.", + trim_container_id(container.id), + container_wait_s) + wait_result = container.wait(timeout=container_wait_s) + logging.info("Container exit status: %s", wait_result) + ret = wait_result.get('StatusCode', 200) + except Exception as e: + logging.exception(e) + ret = 150 - # Remove - try: - logging.info("Removing container: %s", trim_container_id(container.id)) - container.remove() - except Exception as e: - logging.exception(e) - ret = 152 - cleanup.remove_container(container) - containers = docker_client.containers.list() - if containers: - logging.info("Other running containers: %s", [trim_container_id(x.id) for x in containers]) + # Stop + try: + logging.info("Stopping container: %s", trim_container_id(container.id)) + container.stop() + except Exception as e: + logging.exception(e) + ret = 151 + + # Remove + try: + logging.info("Removing container: %s", trim_container_id(container.id)) + container.remove() + except Exception as e: + logging.exception(e) + ret = 152 + cleanup.remove_container(container) + containers = docker_client.containers.list() + if containers: + logging.info("Other running containers: %s", [trim_container_id(x.id) for x in containers]) + except docker.errors.NotFound as e: + logging.info("Container was stopped before cleanup started: %s", e) return ret