From f0d8dd5372d732c091e0cb113c0ef7cfd247842d Mon Sep 17 00:00:00 2001 From: PabloRapidScale Date: Tue, 15 Oct 2024 15:47:52 -0700 Subject: [PATCH] Swarm mode modifications --- README.md | 21 ++++++ db-auto-backup.py | 172 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 149 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 91e2911..bb123dc 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,27 @@ services: - INCLUDE_LOGS=true ``` +#### Running in Docker Swarm +To run the backup service in Docker Swarm, you should configure it as a global service. This ensures that the backup process runs on each node in the Swarm and backs up all matching containers on the local node. +```yml +version: "3.9" # Use version 3.9 to support Swarm deploy settings + +services: + backup: + image: ghcr.io/realorangeone/db-auto-backup:latest + restart: unless-stopped + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./backups:/var/backups + environment: + - SUCCESS_HOOK_URL=https://hc-ping.com/1234 + - INCLUDE_LOGS=true + deploy: + mode: global # Run one instance of the service on each node + restart_policy: + condition: on-failure # Restart on failure to ensure resilience +``` + ### Oneshot You may want to use this container to run backups just once, rather than on a schedule. To achieve this, set `$SCHEDULE` to an empty string, and the backup will run just once. This may be useful in conjunction with an external scheduler. diff --git a/db-auto-backup.py b/db-auto-backup.py index 85441ab..38d4ab1 100755 --- a/db-auto-backup.py +++ b/db-auto-backup.py @@ -160,70 +160,154 @@ def get_backup_provider(container_names: Iterable[str]) -> Optional[BackupProvid def get_container_names(container: Container) -> Iterable[str]: + """ + Extract names for a container from image tags or fallback to container name. + """ names = set() - for tag in container.image.tags: - registry, image = docker.auth.resolve_repository_name(tag) - image, tag_name = image.split(":", 1) - names.add(image) + + if container.image.tags: + for tag in container.image.tags: + image_name = tag.split(":")[0].split("@")[0] + image_name = image_name.split("/")[-1] + names.add(image_name) + + if not names and container.attrs.get("Config", {}).get("Image"): + image_name = container.attrs["Config"]["Image"].split(":")[0].split("@")[0] + image_name = image_name.split("/")[-1] + names.add(image_name) + + if not names and container.name: + names.add(container.name) + return names +def is_swarm_mode() -> bool: + docker_client = docker.from_env() + info = docker_client.info() + return info.get("Swarm", {}).get("LocalNodeState") == "active" -@pycron.cron(SCHEDULE) -def backup(now: datetime) -> None: - print("Starting backup...") +def get_local_node_id() -> str: docker_client = docker.from_env() - containers = docker_client.containers.list() + info = docker_client.info() + return info["Swarm"]["NodeID"] - backed_up_containers = [] - print(f"Found {len(containers)} containers.") +def get_local_node_tasks() -> list: + docker_client = docker.from_env() + local_node_id = get_local_node_id() + services = docker_client.services.list() - for container in containers: - container_names = get_container_names(container) - backup_provider = get_backup_provider(container_names) - if backup_provider is None: - continue + local_tasks = [] + for service in services: + tasks = service.tasks() + for task in tasks: + if task["NodeID"] == local_node_id and task["Status"]["State"] == "running": + local_tasks.append(task) - backup_file = ( - BACKUP_DIR - / f"{container.name}.{backup_provider.file_extension}{get_compressed_file_extension(COMPRESSION)}" - ) - backup_temp_file_path = BACKUP_DIR / temp_backup_file_name() + return local_tasks - backup_command = backup_provider.backup_method(container) - _, output = container.exec_run(backup_command, stream=True, demux=True) - with open_file_compressed( - backup_temp_file_path, COMPRESSION - ) as backup_temp_file: - with tqdm.wrapattr( - backup_temp_file, - method="write", - desc=container.name, - disable=not SHOW_PROGRESS, - ) as f: - for stdout, _ in output: - if stdout is None: - continue - f.write(stdout) +def create_backup_file_name(container: Container, backup_provider: BackupProvider) -> Path: + """ + Create a backup file name with a timestamp prefix and the container name. + """ + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + container_name = container.name + return BACKUP_DIR / f"{timestamp}_{container_name}.{backup_provider.file_extension}{get_compressed_file_extension(COMPRESSION)}" - os.replace(backup_temp_file_path, backup_file) - if not SHOW_PROGRESS: - print(container.name) +@pycron.cron(SCHEDULE) +def backup(now: datetime) -> None: + print("Starting backup...") - backed_up_containers.append(container.name) + docker_client = docker.from_env() - duration = (datetime.now() - now).total_seconds() - print( - f"Backup of {len(backed_up_containers)} containers complete in {duration:.2f} seconds." - ) + if is_swarm_mode(): + print("Running in Swarm mode, adjusting container lookup...") + tasks = get_local_node_tasks() + backed_up_services = [] + + for task in tasks: + task_container_id = task['Status']['ContainerStatus']['ContainerID'] + try: + container = docker_client.containers.get(task_container_id) + except docker.errors.NotFound: + continue + + container_names = get_container_names(container) + backup_provider = get_backup_provider(container_names) + + if backup_provider is None: + continue + + backup_file = create_backup_file_name(container, backup_provider) + backup_temp_file_path = BACKUP_DIR / temp_backup_file_name() + + backup_command = backup_provider.backup_method(container) + _, output = container.exec_run(backup_command, stream=True, demux=True) + + with open_file_compressed( + backup_temp_file_path, COMPRESSION + ) as backup_temp_file: + with tqdm.wrapattr( + backup_temp_file, + method="write", + desc=task["ServiceID"], + disable=not SHOW_PROGRESS, + ) as f: + for stdout, _ in output: + if stdout is None: + continue + f.write(stdout) + + os.replace(backup_temp_file_path, backup_file) + backed_up_services.append(container.name) + + duration = (datetime.now() - now).total_seconds() + print(f"Backup of {len(backed_up_services)} services complete in {duration:.2f} seconds.") + else: + containers = docker_client.containers.list() + backed_up_containers = [] + + for container in containers: + container_names = get_container_names(container) + backup_provider = get_backup_provider(container_names) + + if backup_provider is None: + continue + + backup_file = create_backup_file_name(container, backup_provider) + backup_temp_file_path = BACKUP_DIR / temp_backup_file_name() + + backup_command = backup_provider.backup_method(container) + _, output = container.exec_run(backup_command, stream=True, demux=True) + + with open_file_compressed( + backup_temp_file_path, COMPRESSION + ) as backup_temp_file: + with tqdm.wrapattr( + backup_temp_file, + method="write", + desc=container.name, + disable=not SHOW_PROGRESS, + ) as f: + for stdout, _ in output: + if stdout is None: + continue + f.write(stdout) + + os.replace(backup_temp_file_path, backup_file) + backed_up_containers.append(container.name) + duration = (datetime.now() - now).total_seconds() + print( + f"Backup of {len(backed_up_containers)} containers complete in {duration:.2f} seconds." + ) if success_hook_url := get_success_hook_url(): if INCLUDE_LOGS: response = requests.post( - success_hook_url, data="\n".join(backed_up_containers) + success_hook_url, data="\n".join(backed_up_containers or backed_up_services) ) else: response = requests.get(success_hook_url)