Skip to content

Prune orphaned plugin instance files

Jennings Zhang edited this page Sep 1, 2023 · 4 revisions

Files created by plugin instances are not removed from swift when the plugin instance is deleted. This is a script which you run in manage.py shell to prune orphaned plugin instance files.

Method 1: Remove By User

Warning: this could take a while and use a lot of memory, depending on the number of files in swift.

from plugininstances.models import PluginInstanceFile
from core.storage import connect_storage
from django.conf import settings

# maybe need this
# https://stackoverflow.com/a/27194927
import django
django.setup()

# username of user who owns the feeds of the plugin instances you want to prune
USER = 'rudolph'

swift_manager = connect_storage(settings)

known_files = frozenset(f.fname.name for f in PluginInstanceFile.objects.all())
swift_files = swift_manager.ls(USER)
orphans = [f for f in swift_files if f.startswith(f'{USER}/feed_') and f not in known_files]

# optionally, save to a file for review
with open('/tmp/orphans.txt', 'w') as of:
    for o in orphans:
        _ = of.write(o)
        _ = of.write('\n')

# optionally, count how much you're going to delete
connection = swift_manager.get_connection()
infos = [  # this takes a while
    connection.head_object(swift_manager.container_name, orphan_name)
    for orphan_name in orphans
]
total_size = sum(int(info['content-length']) for info in infos)
print(f'Found {total_size / 1e9:.3f}GB of orphaned data.')

# are you sure you want to do this?
for i, orphaned_file in enumerate(orphans):
    swift_manager.delete_obj(orphaned_file)
    print(f'\rDeleting {i} / {len(orphans)}', end='')

print(' done. ')

Method 2: Remove for All Users, Outdated Swift Code

The code below affects all users, has progress bars, and is more tightly coupled to Swift and a deprecated version of CUBE. Before running this code, install tqdm:

pip install tqdm

Code:

from typing import FrozenSet

from plugininstances.models import PluginInstanceFile
from core.swiftmanager import SwiftManager
from django.conf import settings
from tqdm import tqdm

swift_manager = SwiftManager(settings.SWIFT_CONTAINER_NAME, settings.SWIFT_CONNECTION_PARAMS)
conn = swift_manager.get_connection()

# takes a while and a lot of RAM
print('Listing all files in Swift container...')
swift_container = conn.get_container('users', full_listing=True)

swift_files = swift_container[1]
with tqdm(swift_files, desc='Checking which files were from a feed...') as pbar:
    swift_feed_files = {
        file_info['name']: file_info['bytes']
        for file_info in pbar
        if file_info['name'].split('/', maxsplit=1)[1].startswith('feed_')
    }

with tqdm(PluginInstanceFile.objects.all(), total=PluginInstanceFile.objects.count(), desc='Listing files in DB...') as pbar:
    db_feed_files: FrozenSet[str] = frozenset(f.fname.name for f in pbar)

with tqdm(desc='Getting orphans...'):
    orphans = set(swift_feed_files.keys()) - db_feed_files

orphan_bytes = sum(swift_feed_files[f] for f in orphans)
orphan_gb = orphan_bytes / 1e9
print(f'Found {len(orphans)} orphans, {orphan_gb:.2f}GB data')

with tqdm(orphans, desc='Deleting orphans from Swift...') as pbar:
    for orphan in pbar:
        swift_manager.delete_obj(orphan)
Clone this wiki locally