Skip to content

Commit

Permalink
Merge pull request #95 from rafsaf/improve-file-folder-performance
Browse files Browse the repository at this point in the history
Improve folder and file performance using symlinks, added ZIP_SKIP_INTEGRITY_CHECK and some refactoring
  • Loading branch information
rafsaf authored Aug 13, 2023
2 parents c2b4483 + 1614827 commit 6100e8b
Show file tree
Hide file tree
Showing 18 changed files with 72 additions and 56 deletions.
1 change: 0 additions & 1 deletion CNAME

This file was deleted.

8 changes: 4 additions & 4 deletions backuper/backup_targets/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def __init__(
def _backup(self) -> Path:
out_file = core.get_new_backup_path(self.env_name, self.file.name)

shell_args = f"cp -f {self.file} {out_file}"
log.debug("start cp -f in subprocess: %s", shell_args)
core.run_subprocess(shell_args)
log.debug("finished cp -f, output: %s", out_file)
shell_create_file_symlink = f"ln -s {self.file} {out_file}"
log.debug("start ln in subprocess: %s", shell_create_file_symlink)
core.run_subprocess(shell_create_file_symlink)
log.debug("finished ln, output: %s", out_file)
return out_file
8 changes: 4 additions & 4 deletions backuper/backup_targets/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def __init__(
def _backup(self) -> Path:
out_file = core.get_new_backup_path(self.env_name, self.folder.name)

shell_args = f"cp -r {self.folder} {out_file}"
log.debug("start cp -r in subprocess: %s", shell_args)
core.run_subprocess(shell_args)
log.debug("finished cp -r, output: %s", out_file)
shell_create_dir_symlink = f"ln -s {self.folder} {out_file}"
log.debug("start ln in subprocess: %s", shell_create_dir_symlink)
core.run_subprocess(shell_create_dir_symlink)
log.debug("finished ln, output: %s", out_file)
return out_file
6 changes: 3 additions & 3 deletions backuper/backup_targets/mariadb.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,11 @@ def _backup(self) -> Path:
name = f"{escaped_dbname}_{self.db_version}"
out_file = core.get_new_backup_path(self.env_name, name, sql=True)
db = shlex.quote(self.db)
shell_args = (
shell_mariadb_dump_db = (
f"mariadb-dump --defaults-file={self.option_file} "
f"--result-file={out_file} --verbose {db}"
)
log.debug("start mariadbdump in subprocess: %s", shell_args)
core.run_subprocess(shell_args)
log.debug("start mariadbdump in subprocess: %s", shell_mariadb_dump_db)
core.run_subprocess(shell_mariadb_dump_db)
log.debug("finished mariadbdump, output: %s", out_file)
return out_file
6 changes: 3 additions & 3 deletions backuper/backup_targets/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@ def _backup(self) -> Path:
out_file = core.get_new_backup_path(self.env_name, name, sql=True)

db = shlex.quote(self.db)
shell_args = (
shell_mysqldump_db = (
f"mysqldump --defaults-file={self.option_file} "
f"--result-file={out_file} --verbose {db}"
)
log.debug("start mysqldump in subprocess: %s", shell_args)
core.run_subprocess(shell_args)
log.debug("start mysqldump in subprocess: %s", shell_mysqldump_db)
core.run_subprocess(shell_mysqldump_db)
log.debug("finished mysqldump, output: %s", out_file)
return out_file
6 changes: 3 additions & 3 deletions backuper/backup_targets/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ def _backup(self) -> Path:
escaped_dbname = core.safe_text_version(self.db)
name = f"{escaped_dbname}_{self.db_version}"
out_file = core.get_new_backup_path(self.env_name, name, sql=True)
shell_args = f"pg_dump -v -O -d {self.escaped_conn_uri} -f {out_file}"
log.debug("start pg_dump in subprocess: %s", shell_args)
core.run_subprocess(shell_args)
shell_pg_dump_db = f"pg_dump -v -O -d {self.escaped_conn_uri} -f {out_file}"
log.debug("start pg_dump in subprocess: %s", shell_pg_dump_db)
core.run_subprocess(shell_pg_dump_db)
log.debug("finished pg_dump, output: %s", out_file)
return out_file
1 change: 1 addition & 0 deletions backuper/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class BackupTargetEnum(StrEnum):
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
BACKUP_PROVIDER = os.environ.get("BACKUP_PROVIDER", "")
ZIP_ARCHIVE_PASSWORD = os.environ.get("ZIP_ARCHIVE_PASSWORD", "")
ZIP_SKIP_INTEGRITY_CHECK = os.environ.get("ZIP_SKIP_INTEGRITY_CHECK", "false")
SUBPROCESS_TIMEOUT_SECS: int = int(os.environ.get("SUBPROCESS_TIMEOUT_SECS", 60 * 60))
SIGTERM_TIMEOUT_SECS: float = float(os.environ.get("SIGTERM_TIMEOUT_SECS", 30))
ZIP_ARCHIVE_LEVEL: int = int(os.environ.get("ZIP_ARCHIVE_LEVEL", 3))
Expand Down
46 changes: 34 additions & 12 deletions backuper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import re
import secrets
import shlex
import shutil
import subprocess
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -46,6 +47,14 @@ def run_subprocess(shell_args: str) -> str:
return p.stdout


def remove_path(path: Path) -> None:
if path.exists():
if path.is_file() or path.is_symlink():
path.unlink()
else:
shutil.rmtree(path=path)


def get_new_backup_path(env_name: str, name: str, sql: bool = False) -> Path:
base_dir_path = config.CONST_BACKUP_FOLDER_PATH / env_name
base_dir_path.mkdir(mode=0o700, exist_ok=True, parents=True)
Expand All @@ -64,21 +73,34 @@ def get_new_backup_path(env_name: str, name: str, sql: bool = False) -> Path:
def run_create_zip_archive(backup_file: Path) -> Path:
seven_zip_path = seven_zip_bin_path()
out_file = Path(f"{backup_file}.zip")
log.debug("run_create_zip_archive start creating in subprocess: %s", backup_file)
log.info("start creating zip archive in subprocess: %s", backup_file)
zip_escaped_password = shlex.quote(config.ZIP_ARCHIVE_PASSWORD)
shell_args_create = (
shell_create_7zip_archive = (
f"{seven_zip_path} a -p{zip_escaped_password} "
f"-mx={config.ZIP_ARCHIVE_LEVEL} {out_file} {backup_file}"
)
run_subprocess(shell_args_create)
log.debug("run_create_zip_archive finished, output: %s", out_file)

log.debug("run_create_zip_archive start integriy test in subprocess: %s", out_file)
shell_args_integriy = f"{seven_zip_path} t -p{zip_escaped_password} {out_file}"
integrity_check_result = run_subprocess(shell_args_integriy)
run_subprocess(shell_create_7zip_archive)
log.info("finished zip archive creating")

if config.ZIP_SKIP_INTEGRITY_CHECK != "false":
return out_file

log.info(
(
"start zip archive integriy test on %s in subprocess, "
"you can control it using ZIP_SKIP_INTEGRITY_CHECK"
),
out_file,
)
shell_7zip_archive_integriy_check = (
f"{seven_zip_path} t -p{zip_escaped_password} {out_file}"
)
integrity_check_result = run_subprocess(shell_7zip_archive_integriy_check)
if "Everything is Ok" not in integrity_check_result: # pragma: no cover
raise AssertionError("zip arichive integrity fatal error")
log.debug("run_create_zip_archive finish integriy test in subprocess: %s", out_file)
raise AssertionError(
"zip arichive integrity test on %s: %s", out_file, integrity_check_result
)
log.info("finished zip archive integriy test")
return out_file


Expand Down Expand Up @@ -160,8 +182,8 @@ def create_provider_model() -> ProviderModel:


def seven_zip_bin_path() -> Path:
shell_args_cpu_architecture = "dpkg --print-architecture"
cpu_arch = run_subprocess(shell_args_cpu_architecture).strip()
shell_get_cpu_architecture = "dpkg --print-architecture"
cpu_arch = run_subprocess(shell_get_cpu_architecture).strip()
seven_zip = config.CONST_BIN_ZIP_PATH / f"{cpu_arch}/7zz"
if not seven_zip.exists(): # pragma: no cover
raise RuntimeError(
Expand Down
2 changes: 1 addition & 1 deletion backuper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def run_backup(target: BaseBackupTarget, provider: BaseUploadProvider) -> None:
env_name=target.env_name,
send_on_success=True,
):
provider.safe_clean(backup_file=backup_file, max_backups=target.max_backups)
provider.clean(backup_file=backup_file, max_backups=target.max_backups)

log.info(
"backup and upload finished, next backup of target `%s` is: %s",
Expand Down
2 changes: 1 addition & 1 deletion backuper/upload_providers/base_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def post_save(self, backup_file: Path) -> str:
raise

@final
def safe_clean(self, backup_file: Path, max_backups: int) -> None:
def clean(self, backup_file: Path, max_backups: int) -> None:
try:
return self._clean(backup_file=backup_file, max_backups=max_backups)
except Exception as err:
Expand Down
11 changes: 2 additions & 9 deletions backuper/upload_providers/debug.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import shutil
from pathlib import Path

from backuper import config, core
Expand All @@ -25,21 +24,15 @@ def _post_save(self, backup_file: Path) -> str:
return str(zip_file)

def _clean(self, backup_file: Path, max_backups: int) -> None:
if backup_file.is_file():
backup_file.unlink()
else:
shutil.rmtree(backup_file)
core.remove_path(backup_file)
files: list[str] = []
for backup_path in backup_file.parent.iterdir():
files.append(str(backup_path.absolute()))
files.sort(reverse=True)
while len(files) > max_backups:
backup_to_remove = Path(files.pop())
try:
if backup_to_remove.is_file():
backup_to_remove.unlink()
else:
shutil.rmtree(backup_to_remove)
core.remove_path(backup_to_remove)
log.info("removed path %s", backup_to_remove)
except Exception as e: # pragma: no cover
log.error(
Expand Down
6 changes: 1 addition & 5 deletions backuper/upload_providers/google_cloud_storage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import base64
import logging
import os
import shutil
from pathlib import Path

import google.cloud.storage as storage
Expand Down Expand Up @@ -63,10 +62,7 @@ def _post_save(self, backup_file: Path) -> str:

def _clean(self, backup_file: Path, max_backups: int) -> None:
for backup_path in backup_file.parent.iterdir():
if backup_path.is_dir():
shutil.rmtree(backup_path)
else:
backup_path.unlink()
core.remove_path(backup_path)
log.info("removed %s from local disk", backup_path)

backup_list_cloud: list[str] = []
Expand Down
3 changes: 2 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Environemt variables
| ZIP_ARCHIVE_PASSWORD | string[**required**] | Zip archive password that all backups generated by this backuper instance will have. When it is lost, you lose access to your backups. | - |
| BACKUP_PROVIDER | string[**required**] | See `Providers` chapter, choosen backup provider for example [GCS](./providers/google_cloud_storage.md). | - |
| BACKUP_MAX_NUMBER | int | How many backups can live at once for backup target. Defaults to `7`. Note this must makes sense with cron expression you use. For example if you want to have 7 day retention, and make backups at 5:00, BACKUP_MAX_NUMBER=7 is fine, but if you make 4 backups per day, you will need BACKUP_MAX_NUMBER=28. | 7 |
| ROOT_MODE | bool | If false, process in container will start backuper using user with minimal permissions required. If `true` or anything different than `false`, it will run as root (it may help for example with file/directory backup permission issues in mounted volumes). | false |
| ROOT_MODE | bool | If false, process in container will start backuper using user with minimal permissions required. If `true`, it will run as root (it may help for example with file/directory backup permission issues in mounted volumes). | false |
| POSTGRESQL\_... | backup target syntax | PostgreSQL database target, see [PostgreSQL](./backup_targets/postgresql.md). | - |
| MYSQL\_... | backup target syntax | MySQL database target, see [MySQL](./backup_targets/mysql.md). | - |
| MARIADB\_... | backup target syntax | MariaDB database target, see [MariaDB](./backup_targets/mariadb.md). | - |
Expand All @@ -26,6 +26,7 @@ Environemt variables
| ZIP_ARCHIVE_LEVEL | int[1-9] | Compression level of 7-zip via `-mx` option: `-mx[N] : set compression level: -mx1 (fastest) ... -mx9 (ultra)`. Defaults to `3` which should be sufficient and fast enough. | 3 |
| LOG_FOLDER_PATH | string | Path to store log files, for local development `./logs`, in container `/var/log/backuper`. | /var/log/backuper |
| SIGTERM_TIMEOUT_SECS | int | Time in seconds on exit how long backuper will wait for ongoing backup threads before force killing them and exiting. | 30 |
| ZIP_SKIP_INTEGRITY_CHECK | bool | By default after 7zip archive is created, integrity check runs on it. You can opt out this behaviour for performance reasons, use `true`. | false |

<br>
<br>
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ authors = ["Rafał Safin <[email protected]>"]
description = "A tool for performing scheduled database backups and transferring encrypted data to secure clouds, for home labs, hobby projects, etc., in environments such as k8s, docker, vms."
license = "GNU GPLv3"
name = "backuper"
version = "3.1"
version = "3.2"

[tool.poetry.dependencies]
croniter = "^1.3.7"
Expand Down
6 changes: 5 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ def test_get_new_backup_path_sql(caplog: LogCaptureFixture) -> None:
assert caplog.messages == []


def test_run_create_zip_archive(tmp_path: Path) -> None:
@pytest.mark.parametrize("integrity", ["false", "true"])
def test_run_create_zip_archive(
tmp_path: Path, integrity: str, monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.setattr(config, "ZIP_SKIP_INTEGRITY_CHECK", integrity)
fake_backup_file = tmp_path / "fake_backup"
with open(fake_backup_file, "w") as f:
f.write("abcdefghijk\n12345")
Expand Down
6 changes: 3 additions & 3 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_main(monkeypatch: pytest.MonkeyPatch) -> None:


@pytest.mark.parametrize(
"make_backup_side_effect,post_save_side_effect,safe_clean_side_effect",
"make_backup_side_effect,post_save_side_effect,clean_side_effect",
[
(ValueError(), None, None),
(None, ValueError(), None),
Expand All @@ -131,7 +131,7 @@ def test_main(monkeypatch: pytest.MonkeyPatch) -> None:
def test_run_backup_notifications_fail_message_is_fired_when_it_fails(
make_backup_side_effect: Any | None,
post_save_side_effect: Any | None,
safe_clean_side_effect: Any | None,
clean_side_effect: Any | None,
monkeypatch: pytest.MonkeyPatch,
) -> None:
fail_message_mock = Mock()
Expand All @@ -151,7 +151,7 @@ def test_run_backup_notifications_fail_message_is_fired_when_it_fails(
monkeypatch.setattr(target, "_backup", backup_mock)
provider = UploadProviderLocalDebug()
monkeypatch.setattr(provider, "_post_save", Mock(side_effect=post_save_side_effect))
monkeypatch.setattr(provider, "_clean", Mock(side_effect=safe_clean_side_effect))
monkeypatch.setattr(provider, "_clean", Mock(side_effect=clean_side_effect))

with pytest.raises(ValueError):
main.run_backup(target=target, provider=provider)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_storage_provider_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(self, blob_name: str) -> None:
]


@pytest.mark.parametrize("gcs_method_name", ["_clean", "safe_clean"])
@pytest.mark.parametrize("gcs_method_name", ["_clean", "clean"])
def test_gcs_clean_file_and_short_blob_list(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, gcs_method_name: str
) -> None:
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_gcs_clean_file_and_short_blob_list(
single_blob_mock.delete.assert_called_once_with()


@pytest.mark.parametrize("gcs_method_name", ["_clean", "safe_clean"])
@pytest.mark.parametrize("gcs_method_name", ["_clean", "clean"])
def test_gcs_clean_directory_and_long_blob_list(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, gcs_method_name: str
) -> None:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_storage_provider_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from backuper.upload_providers import UploadProviderLocalDebug


@pytest.mark.parametrize("method_name", ["_clean", "safe_clean"])
@pytest.mark.parametrize("method_name", ["_clean", "clean"])
def test_gcs_clean_file(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, method_name: str
) -> None:
Expand All @@ -30,7 +30,7 @@ def test_gcs_clean_file(
assert fake_backup_file_zip_path4.exists()


@pytest.mark.parametrize("method_name", ["_clean", "safe_clean"])
@pytest.mark.parametrize("method_name", ["_clean", "clean"])
def test_gcs_clean_folder(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, method_name: str
) -> None:
Expand Down

0 comments on commit 6100e8b

Please sign in to comment.