From 49b125e11b560c88a2353d31aa6524b4b818fc95 Mon Sep 17 00:00:00 2001 From: Christoph Herzog Date: Wed, 15 Feb 2023 12:25:35 +0100 Subject: [PATCH] ci: Add a scheduled action that purges the custom CI cache Adds a new Python script and a scheduled Github Actions workflow for cleaning up the custom CI cache bucket. Needed to prevent the cache bucket from growing infinitely large. We currently use Cloudflare R2, which does not support any kind of automatic object retention lifecycle. --- .github/s3-cache-cleanup.py | 44 +++++++++++++++++++++ .github/workflows/cache-bucket-cleanup.yaml | 29 ++++++++++++++ 2 files changed, 73 insertions(+) create mode 100755 .github/s3-cache-cleanup.py create mode 100644 .github/workflows/cache-bucket-cleanup.yaml diff --git a/.github/s3-cache-cleanup.py b/.github/s3-cache-cleanup.py new file mode 100755 index 00000000000..c28d7a0d74f --- /dev/null +++ b/.github/s3-cache-cleanup.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +# Deletes all objects in an S3 bucket that are older than a given number of days. +# Used for cleaning up the custom Github Actions cache. + +import boto3 +import datetime +import os + +# Define the S3 bucket name and the number of days to retain objects +days_to_retain = 7 + +bucket_name = os.environ['AWS_BUCKET_NAME'] +access_key = os.environ['AWS_ACCESS_KEY_ID'] +secret_key = os.environ['AWS_SECRET_ACCESS_KEY'] +endpoint = os.environ['AWS_ENDPOINT'] + +# Create a connection to the S3 service +s3 = boto3.resource('s3', + endpoint_url = endpoint, + aws_access_key_id = access_key, + aws_secret_access_key = secret_key, + region_name = 'auto', +) + +bucket = s3.Bucket(bucket_name) + +# Calculate the retention date. +cutoff_date = (datetime.datetime.now() - datetime.timedelta(days=days_to_retain)) +cutoff_date = cutoff_date.replace(tzinfo=datetime.timezone.utc) + +print(f'Deleting all objects in bucket {bucket_name} older than {cutoff_date}...') + +total_count = 0 +deleted_count = 0 + +for obj in bucket.objects.all(): + total_count += 1 + if obj.last_modified < cutoff_date: + print(f'Deleting {obj.key}...') + obj.delete() + deleted_count += 1 + +print(f'Complete! Deleted {deleted_count} objects out of a total {total_count}.') diff --git a/.github/workflows/cache-bucket-cleanup.yaml b/.github/workflows/cache-bucket-cleanup.yaml new file mode 100644 index 00000000000..03ae072afd7 --- /dev/null +++ b/.github/workflows/cache-bucket-cleanup.yaml @@ -0,0 +1,29 @@ +# Cleans up the custom Github Actions cache bucket. + +name: Actions Cache Bucket Cleanup + +on: + schedule: + # Run once a day. + - cron: "0 3 * * *" + + # A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "cron" + cron: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + steps: + - name: Install boto3 library + run: pip install boto3 + - name: Clone repository + uses: actions/checkout@v3 + - name: Run cleanup + env: + AWS_ENDPOINT: https://1541b1e8a3fc6ad155ce67ef38899700.r2.cloudflarestorage.com + AWS_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_ARTIFACTS_CACHE_ACCESS_TOKEN }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_ARTIFACTS_CACHE_ACCESS_KEY }} + AWS_BUCKET_NAME: wasmer-rust-artifacts-cache + run: | + ./.github/s3-cache-cleanup.py