Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add minimum retention days param to providers and update tests #107

Merged
merged 3 commits into from
Aug 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion backuper/backup_targets/base_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@
class BaseBackupTarget(ABC):
NAME: config.BackupTargetEnum

def __init__(self, cron_rule: str, env_name: str, max_backups: int) -> None:
def __init__(
self, cron_rule: str, env_name: str, max_backups: int, min_retention_days: int
) -> None:
self.cron_rule: str = cron_rule
self.env_name: str = env_name
self.max_backups: int = max_backups
self.min_retention_days: int = min_retention_days
self.last_backup_time: datetime = datetime.utcnow()
self.next_backup_time: datetime = self._get_next_backup_time()
log.info(
Expand Down
6 changes: 5 additions & 1 deletion backuper/backup_targets/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@ def __init__(
cron_rule: str,
env_name: str,
max_backups: int,
min_retention_days: int,
**kwargs: str | int,
) -> None:
self.cron_rule: str = cron_rule
self.file: Path = abs_path
super().__init__(
cron_rule=cron_rule, env_name=env_name, max_backups=max_backups
cron_rule=cron_rule,
env_name=env_name,
max_backups=max_backups,
min_retention_days=min_retention_days,
)

def _backup(self) -> Path:
Expand Down
6 changes: 5 additions & 1 deletion backuper/backup_targets/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@ def __init__(
cron_rule: str,
env_name: str,
max_backups: int,
min_retention_days: int,
**kwargs: str | int,
) -> None:
self.cron_rule: str = cron_rule
self.folder: Path = abs_path
super().__init__(
cron_rule=cron_rule, env_name=env_name, max_backups=max_backups
cron_rule=cron_rule,
env_name=env_name,
max_backups=max_backups,
min_retention_days=min_retention_days,
)

def _backup(self) -> Path:
Expand Down
6 changes: 5 additions & 1 deletion backuper/backup_targets/mariadb.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@ def __init__(
cron_rule: str,
env_name: str,
max_backups: int,
min_retention_days: int,
**kwargs: str | int,
) -> None:
super().__init__(
cron_rule=cron_rule, env_name=env_name, max_backups=max_backups
cron_rule=cron_rule,
env_name=env_name,
max_backups=max_backups,
min_retention_days=min_retention_days,
)
self.cron_rule: str = cron_rule
self.user: str = user
Expand Down
6 changes: 5 additions & 1 deletion backuper/backup_targets/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@ def __init__(
cron_rule: str,
env_name: str,
max_backups: int,
min_retention_days: int,
**kwargs: str | int,
) -> None:
super().__init__(
cron_rule=cron_rule, env_name=env_name, max_backups=max_backups
cron_rule=cron_rule,
env_name=env_name,
max_backups=max_backups,
min_retention_days=min_retention_days,
)
self.cron_rule: str = cron_rule
self.user: str = user
Expand Down
6 changes: 5 additions & 1 deletion backuper/backup_targets/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,14 @@ def __init__(
cron_rule: str,
env_name: str,
max_backups: int,
min_retention_days: int,
**kwargs: str | int,
) -> None:
super().__init__(
cron_rule=cron_rule, env_name=env_name, max_backups=max_backups
cron_rule=cron_rule,
env_name=env_name,
max_backups=max_backups,
min_retention_days=min_retention_days,
)
self.cron_rule: str = cron_rule
self.user: str = user
Expand Down
3 changes: 1 addition & 2 deletions backuper/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import logging.config
import re
from enum import StrEnum
from pathlib import Path
from typing import Literal
Expand All @@ -11,7 +10,6 @@
_log_levels = Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]

CONST_BASE_DIR = Path(__file__).resolve().parent.parent.absolute()
CONST_ENV_NAME_REGEX = re.compile(r"^[A-Za-z_0-9]{1,}$")
CONST_BIN_ZIP_PATH: Path = CONST_BASE_DIR / "bin/7zip"
CONST_BACKUP_FOLDER_PATH: Path = CONST_BASE_DIR / "data"
CONST_CONFIG_FOLDER_PATH: Path = CONST_BASE_DIR / "conf"
Expand Down Expand Up @@ -51,6 +49,7 @@ class Settings(BaseSettings):
SIGTERM_TIMEOUT_SECS: float = Field(ge=0, le=3600 * 24, default=30)
ZIP_ARCHIVE_LEVEL: int = Field(ge=1, le=9, default=3)
BACKUP_MAX_NUMBER: int = Field(ge=1, le=998, default=7)
BACKUP_MIN_RETENTION_DAYS: int = Field(ge=0, le=36600, default=3)
DISCORD_SUCCESS_WEBHOOK_URL: HttpUrl | None = None
DISCORD_FAIL_WEBHOOK_URL: HttpUrl | None = None
DISCORD_NOTIFICATION_MAX_MSG_LEN: int = Field(ge=150, le=10000, default=1500)
Expand Down
28 changes: 26 additions & 2 deletions backuper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shlex
import shutil
import subprocess
from datetime import datetime
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, TypeVar

Expand All @@ -18,7 +18,9 @@

log = logging.getLogger(__name__)

SAFE_LETTER_PATTERN = r"[^A-Za-z0-9_]*"
SAFE_LETTER_PATTERN = re.compile(r"[^A-Za-z0-9_]*")
DATETIME_BACKUP_FILE_PATTERN = re.compile(r"_[0-9]{8}_[0-9]{4}_")

_BM = TypeVar("_BM", bound=BaseModel)


Expand Down Expand Up @@ -194,3 +196,25 @@ def seven_zip_bin_path() -> Path:
f"unsuported architecture {cpu_arch}, 7zip not found at {seven_zip}"
)
return seven_zip


def file_before_retention_period_ends(
backup_name: str, min_retention_days: int
) -> bool:
now = datetime.utcnow()
matches = DATETIME_BACKUP_FILE_PATTERN.finditer(backup_name)

datetime_str = ""
for match in matches:
datetime_str = match.group(0)
break
if not datetime_str: # pragma: no cover
raise ValueError(
f"unexpected backup file name, could not parse datetime: {backup_name}"
)
backup_datetime = datetime.strptime(datetime_str, "_%Y%m%d_%H%M_")
delete_not_before = backup_datetime + timedelta(days=min_retention_days)

if now < delete_not_before:
return True
return False
6 changes: 5 additions & 1 deletion backuper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,11 @@ def run_backup(target: BaseBackupTarget, provider: BaseUploadProvider) -> None:
env_name=target.env_name,
send_on_success=True,
):
provider.clean(backup_file=backup_file, max_backups=target.max_backups)
provider.clean(
backup_file=backup_file,
max_backups=target.max_backups,
min_retention_days=target.min_retention_days,
)

log.info(
"backup and upload finished, next backup of target `%s` is: %s",
Expand Down
16 changes: 6 additions & 10 deletions backuper/models/backup_target_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from croniter import croniter
from pydantic import (
BaseModel,
Field,
SecretStr,
computed_field,
field_validator,
Expand All @@ -15,9 +16,12 @@


class TargetModel(BaseModel):
env_name: str
env_name: str = Field(pattern=r"^[A-Za-z_0-9]{1,}$")
cron_rule: str
max_backups: int = config.options.BACKUP_MAX_NUMBER
max_backups: int = Field(ge=1, le=998, default=config.options.BACKUP_MAX_NUMBER)
min_retention_days: int = Field(
ge=0, le=36600, default=config.options.BACKUP_MIN_RETENTION_DAYS
)

@field_validator("cron_rule")
def cron_rule_is_valid(cls, cron_rule: str) -> str:
Expand All @@ -27,14 +31,6 @@ def cron_rule_is_valid(cls, cron_rule: str) -> str:
)
return cron_rule

@field_validator("env_name")
def env_name_is_valid(cls, env_name: str) -> str:
if not config.CONST_ENV_NAME_REGEX.match(env_name):
raise ValueError(
f"Env variable does not match regex {config.CONST_ENV_NAME_REGEX}: `{env_name}`"
)
return env_name

@computed_field() # type: ignore
@cached_property
def target_type(self) -> config.BackupTargetEnum:
Expand Down
26 changes: 22 additions & 4 deletions backuper/upload_providers/aws_s3.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
from pathlib import Path
from typing import Any, TypedDict

import boto3
from typing import Any, TypedDict
from boto3.s3.transfer import TransferConfig

from backuper import config, core
from backuper.upload_providers.base_provider import BaseUploadProvider
from boto3.s3.transfer import TransferConfig

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -63,7 +64,9 @@ def _post_save(self, backup_file: Path) -> str:
log.info("uploaded %s to %s", zip_backup_file, backup_dest_in_bucket)
return backup_dest_in_bucket

def _clean(self, backup_file: Path, max_backups: int) -> None:
def _clean(
self, backup_file: Path, max_backups: int, min_retention_days: int
) -> None:
for backup_path in backup_file.parent.iterdir():
core.remove_path(backup_path)
log.info("removed %s from local disk", backup_path)
Expand All @@ -79,6 +82,18 @@ def _clean(self, backup_file: Path, max_backups: int) -> None:

while len(backup_list_cloud) > max_backups:
backup_to_remove = backup_list_cloud.pop()
file_name = backup_to_remove.split("/")[-1]
if core.file_before_retention_period_ends(
backup_name=file_name, min_retention_days=min_retention_days
):
log.info(
"there are more backups than max_backups (%s/%s), "
"but oldest cannot be removed due to min retention days",
len(backup_list_cloud),
max_backups,
)
break

items_to_delete.append({"Key": backup_to_remove})
log.info("backup %s will be deleted from aws s3 bucket", backup_to_remove)

Expand All @@ -92,4 +107,7 @@ def _clean(self, backup_file: Path, max_backups: int) -> None:
raise RuntimeError(
"Fail to delete backups from aws s3: %s", delete_response["Errors"]
)
log.info("backups were successfully deleted from aws s3 bucket")
log.info(
"%s backups were successfully deleted from aws s3 bucket",
len(items_to_delete),
)
14 changes: 11 additions & 3 deletions backuper/upload_providers/base_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,15 @@ def post_save(self, backup_file: Path) -> str:
raise

@final
def clean(self, backup_file: Path, max_backups: int) -> None:
def clean(
self, backup_file: Path, max_backups: int, min_retention_days: int
) -> None:
try:
return self._clean(backup_file=backup_file, max_backups=max_backups)
return self._clean(
backup_file=backup_file,
max_backups=max_backups,
min_retention_days=min_retention_days,
)
except Exception as err:
log.error(err, exc_info=True)
raise
Expand All @@ -36,5 +42,7 @@ def _post_save(self, backup_file: Path) -> str: # pragma: no cover
pass

@abstractmethod
def _clean(self, backup_file: Path, max_backups: int) -> None: # pragma: no cover
def _clean(
self, backup_file: Path, max_backups: int, min_retention_days: int
) -> None: # pragma: no cover
pass
14 changes: 13 additions & 1 deletion backuper/upload_providers/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,26 @@ def _post_save(self, backup_file: Path) -> str:
zip_file = core.run_create_zip_archive(backup_file=backup_file)
return str(zip_file)

def _clean(self, backup_file: Path, max_backups: int) -> None:
def _clean(
self, backup_file: Path, max_backups: int, min_retention_days: int
) -> None:
core.remove_path(backup_file)
files: list[str] = []
for backup_path in backup_file.parent.iterdir():
files.append(str(backup_path.absolute()))
files.sort(reverse=True)
while len(files) > max_backups:
backup_to_remove = Path(files.pop())
if core.file_before_retention_period_ends(
backup_name=backup_to_remove.name, min_retention_days=min_retention_days
):
log.info(
"there are more backups than max_backups (%s/%s), "
"but oldest cannot be removed due to min retention days",
len(files),
max_backups,
)
break
try:
core.remove_path(backup_to_remove)
log.info("removed path %s", backup_to_remove)
Expand Down
16 changes: 15 additions & 1 deletion backuper/upload_providers/google_cloud_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ def _post_save(self, backup_file: Path) -> str:
log.info("uploaded %s to %s", zip_backup_file, backup_dest_in_bucket)
return backup_dest_in_bucket

def _clean(self, backup_file: Path, max_backups: int) -> None:
def _clean(
self, backup_file: Path, max_backups: int, min_retention_days: int
) -> None:
for backup_path in backup_file.parent.iterdir():
core.remove_path(backup_path)
log.info("removed %s from local disk", backup_path)
Expand All @@ -74,6 +76,18 @@ def _clean(self, backup_file: Path, max_backups: int) -> None:
backup_list_cloud.sort(reverse=True)
while len(backup_list_cloud) > max_backups:
backup_to_remove = backup_list_cloud.pop()
file_name = backup_to_remove.split("/")[-1]
if core.file_before_retention_period_ends(
backup_name=file_name, min_retention_days=min_retention_days
):
log.info(
"there are more backups than max_backups (%s/%s), "
"but oldest cannot be removed due to min retention days",
len(backup_list_cloud),
max_backups,
)
break

blob = self.bucket.blob(backup_to_remove)
blob.delete()
log.info("deleted backup %s from google cloud storage", backup_to_remove)
11 changes: 6 additions & 5 deletions docs/backup_targets/directory.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ DIRECTORY_SOME_STRING="abs_path=... cron_rule=..."

## Params

| Name | Type | Description | Default |
| :---------- | :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---------------- |
| abs_path | string[**requried**] | Absolute path to folder for backup. | - |
| cron_rule | string[**requried**] | Cron expression for backups, see [https://crontab.guru/](https://crontab.guru/) for help. | - |
| max_backups | int | Max number of backups stored in upload provider, if this number is exceeded, oldest one is removed, by default enviornment variable BACKUP_MAX_NUMBER, see [Configuration](./../configuration.md). | BACKUP_MAX_NUMBER |
| Name | Type | Description | Default |
| :----------------- | :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :------------------------ |
| abs_path | string[**requried**] | Absolute path to folder for backup. | - |
| cron_rule | string[**requried**] | Cron expression for backups, see [https://crontab.guru/](https://crontab.guru/) for help. | - |
| max_backups | int | Soft limit how many backups can live at once for backup target. Defaults to `7`. This must makes sense with cron expression you use. For example if you want to have `7` day retention, and make backups at 5:00, `max_backups=7` is fine, but if you make `4` backups per day, you would need `max_backups=28`. Limit is soft and can be exceeded if no backup is older than value specified in min_retention_days. Min `1` and max `998`. Defaults to enviornment variable BACKUP_MAX_NUMBER, see [Configuration](./../configuration.md). | BACKUP_MAX_NUMBER |
| min_retention_days | int | Hard minimum backups lifetime in days. Backuper won't ever delete files before, regardles of other options. Min `0` and max `36600`. Defaults to enviornment variable BACKUP_MIN_RETENTION_DAYS, see [Configuration](./../configuration.md). | BACKUP_MIN_RETENTION_DAYS |


## Examples
Expand Down
Loading