Skip to content

Commit

Permalink
Merge branch 'master' into related_entity_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed Jul 23, 2023
2 parents 8a3ec87 + 21d36c2 commit c5b56fe
Show file tree
Hide file tree
Showing 72 changed files with 4,362 additions and 2,752 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:

jobs:
test:
runs-on: ubuntu-18.04
runs-on: ubuntu-20.04
services:
postgres:
image: postgis/postgis:10-2.5
Expand Down Expand Up @@ -40,7 +40,6 @@ jobs:
sudo apt update
sudo apt install -y gdal-bin
pip install .[dev] --pre Django==${{ matrix.django-series }}
pip install -e git+https://github.com/opencivicdata/python-opencivicdata.git#egg=opencivicdata
- name: Lint with flake8
run: |
flake8 pupa
Expand All @@ -56,7 +55,7 @@ jobs:
build:
needs: test
name: Build package and upload to PyPI
runs-on: ubuntu-18.04
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- name: Build and publish
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# pupa changelog

## 0.11.0 - April 3 2023

Improvements:

* Add `pupa clean` command to delete database objects that haven't been seen in recent scrapes

## 0.10.2 - March 18 2021

Improvements:
Expand Down
2 changes: 1 addition & 1 deletion pupa/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.10.2' # pragma: no cover
__version__ = "0.11.0" # pragma: no cover
86 changes: 51 additions & 35 deletions pupa/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,50 @@

class ScrapeReportInline(admin.TabularInline):
model = models.ScrapeReport
readonly_fields = ('scraper', 'args', 'start_time', 'end_time',
'get_object_list')
readonly_fields = ("scraper", "args", "start_time", "end_time", "get_object_list")

def has_add_permission(self, request):
return False

can_delete = False

def get_object_list(self, obj):
return '\n'.join('{} ({})'.format(o.object_type, o.count) for o in
obj.scraped_objects.all())
return "\n".join(
"{} ({})".format(o.object_type, o.count) for o in obj.scraped_objects.all()
)


class ImportObjectsInline(admin.TabularInline):
model = models.ImportObjects
readonly_fields = ('object_type', 'insert_count', 'update_count',
'noop_count', 'start_time', 'end_time')
readonly_fields = (
"object_type",
"insert_count",
"update_count",
"noop_count",
"start_time",
"end_time",
)

def has_add_permission(self, request):
return False

can_delete = False


@admin.register(models.RunPlan)
class RunPlanAdmin(admin.ModelAdmin):
actions = None

readonly_fields = ('jurisdiction', 'success', 'start_time', 'end_time',
'exception', 'traceback')
list_filter = ('jurisdiction__name', 'success')
list_display = ('jurisdiction', 'success', 'start_time')
readonly_fields = (
"jurisdiction",
"success",
"start_time",
"end_time",
"exception",
"traceback",
)
list_filter = ("jurisdiction__name", "success")
list_display = ("jurisdiction", "success", "start_time")
inlines = [
ScrapeReportInline,
ImportObjectsInline,
Expand All @@ -50,31 +64,33 @@ def has_add_permission(self, request):
class SessionDataQualityAdmin(admin.ModelAdmin):
actions = None

readonly_fields = ('legislative_session',
'bills_missing_actions',
'bills_missing_sponsors',
'bills_missing_versions',
'votes_missing_voters',
'votes_missing_bill',
'votes_missing_yes_count',
'votes_missing_no_count',
'votes_with_bad_counts',
'unmatched_sponsor_people',
'unmatched_sponsor_organizations',
'unmatched_voters',
)
list_display = ('jurisdiction_name',
'legislative_session',
'bills_missing_actions',
'bills_missing_sponsors',
'bills_missing_versions',
'votes_missing_voters',
'votes_missing_bill',
'votes_missing_yes_count',
'votes_missing_no_count',
'votes_with_bad_counts',
)
list_filter = ('legislative_session__jurisdiction__name',)
readonly_fields = (
"legislative_session",
"bills_missing_actions",
"bills_missing_sponsors",
"bills_missing_versions",
"votes_missing_voters",
"votes_missing_bill",
"votes_missing_yes_count",
"votes_missing_no_count",
"votes_with_bad_counts",
"unmatched_sponsor_people",
"unmatched_sponsor_organizations",
"unmatched_voters",
)
list_display = (
"jurisdiction_name",
"legislative_session",
"bills_missing_actions",
"bills_missing_sponsors",
"bills_missing_versions",
"votes_missing_voters",
"votes_missing_bill",
"votes_missing_yes_count",
"votes_missing_no_count",
"votes_with_bad_counts",
)
list_filter = ("legislative_session__jurisdiction__name",)

def jurisdiction_name(self, obj):
return obj.legislative_session.jurisdiction.name
43 changes: 25 additions & 18 deletions pupa/cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,34 @@
from django.conf import settings
from pupa.exceptions import CommandError

logger = logging.getLogger('pupa')
logger = logging.getLogger("pupa")

COMMAND_MODULES = (
'pupa.cli.commands.init',
'pupa.cli.commands.dbinit',
'pupa.cli.commands.update',
'pupa.cli.commands.party',
"pupa.cli.commands.init",
"pupa.cli.commands.dbinit",
"pupa.cli.commands.update",
"pupa.cli.commands.party",
"pupa.cli.commands.clean",
)


def main():
parser = argparse.ArgumentParser('pupa', description='pupa CLI')
parser.add_argument('--debug', action='store_true',
help='open debugger on error')
parser.add_argument('--loglevel', default='INFO', help=('set log level. options are: '
'DEBUG|INFO|WARNING|ERROR|CRITICAL '
'(default is INFO)'))
subparsers = parser.add_subparsers(dest='subcommand')
parser = argparse.ArgumentParser("pupa", description="pupa CLI")
parser.add_argument("--debug", action="store_true", help="open debugger on error")
parser.add_argument(
"--loglevel",
default="INFO",
help=(
"set log level. options are: "
"DEBUG|INFO|WARNING|ERROR|CRITICAL "
"(default is INFO)"
),
)
subparsers = parser.add_subparsers(dest="subcommand")

# configure Django before model imports
if os.environ.get("DJANGO_SETTINGS_MODULE") is None:
os.environ['DJANGO_SETTINGS_MODULE'] = 'pupa.settings'
os.environ["DJANGO_SETTINGS_MODULE"] = "pupa.settings"

subcommands = {}
for mod in COMMAND_MODULES:
Expand All @@ -42,23 +48,24 @@ def main():
args, other = parser.parse_known_args()

# set log level from command line
handler_level = getattr(logging, args.loglevel.upper(), 'INFO')
settings.LOGGING['handlers']['default']['level'] = handler_level
handler_level = getattr(logging, args.loglevel.upper(), "INFO")
settings.LOGGING["handlers"]["default"]["level"] = handler_level
logging.config.dictConfig(settings.LOGGING)

# turn debug on
if args.debug:
try:
debug_module = importlib.import_module('ipdb')
debug_module = importlib.import_module("ipdb")
except ImportError:
debug_module = importlib.import_module('pdb')
debug_module = importlib.import_module("pdb")

# turn on PDB-on-error mode
# stolen from http://stackoverflow.com/questions/1237379/
# if this causes problems in interactive mode check that page
def _tb_info(type, value, tb):
traceback.print_exception(type, value, tb)
debug_module.pm()

sys.excepthook = _tb_info

if not args.subcommand:
Expand All @@ -71,5 +78,5 @@ def _tb_info(type, value, tb):
sys.exit(1)


if __name__ == '__main__':
if __name__ == "__main__":
main()
3 changes: 1 addition & 2 deletions pupa/cli/commands/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
class BaseCommand(object):

def __init__(self, subparsers):
self.subparser = subparsers.add_parser(self.name, description=self.help)
self.add_args()
Expand All @@ -11,4 +10,4 @@ def add_argument(self, *args, **kwargs):
self.subparser.add_argument(*args, **kwargs)

def handle(self, args):
raise NotImplementedError('commands must implement handle(args)')
raise NotImplementedError("commands must implement handle(args)")
117 changes: 117 additions & 0 deletions pupa/cli/commands/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import sys
from datetime import datetime, timezone, timedelta

import django
from django.apps import apps
from .base import BaseCommand


def get_subclasses(app_list, abstract_class):
"""
Finds and returns all subclasses of an abstract class.
"""
result = []
for app in app_list:
for model in apps.get_app_config(app).get_models():
if issubclass(model, abstract_class) and model is not abstract_class:
result.append(model)
return result


class Command(BaseCommand):
name = "clean"
help = "Removes database objects that haven't been seen in recent scrapes"

def add_args(self):
self.add_argument(
"--window",
type=int,
default=7,
help=(
"objects not seen in this many days will be deleted from the database"
),
)
self.add_argument(
"--report",
action="store_true",
help=(
"generate a report of what objects this command"
" would delete without making any changes to the database"
),
)
self.add_argument(
"--noinput",
action="store_true",
help="delete objects without getting user confirmation",
)

def get_stale_objects(self, window):
"""
Find all database objects that haven't seen been in {window} days.
"""

from opencivicdata.core.models.base import OCDBase

ocd_apps = ["core", "legislative"]
# Check all subclasses of OCDBase
models = get_subclasses(ocd_apps, OCDBase)

for model in models:
# Jurisdictions are protected from deletion
if "Jurisdiction" not in model.__name__:
cutoff_date = datetime.now(tz=timezone.utc) - timedelta(days=window)
yield from model.objects.filter(last_seen__lte=cutoff_date).iterator()

def remove_stale_objects(self, window):
"""
Remove all database objects that haven't seen been in {window} days.
"""

for obj in self.get_stale_objects(window):
print(f"Deleting {obj}...")
obj.delete()

def report_stale_objects(self, window):
"""
Print all database objects that haven't seen been in {window} days.
"""
for obj in self.get_stale_objects(window):
print(obj)

def handle(self, args, other):
django.setup()

if args.report:
print(
"These objects have not been seen in a scrape within the last"
f" {args.window} days:"
)
self.report_stale_objects()
else:
num_stale_objects = len(list(self.get_stale_objects(args.window)))

if args.noinput:
# Fail-safe to avoid deleting a large amount of objects
# without explicit confimation
if num_stale_objects > 10:
print(
"This command would delete more than 10 objects."
"If you're sure, re-run without --noinput to provide confirmation."
)
sys.exit(1)
else:
print(
f"This will permanently delete"
f" {num_stale_objects} objects from your database"
f" that have not been scraped within the last {args.window}"
" days. Are you sure? (Y/N)"
)
resp = input()
if resp != "Y":
sys.exit()

print(
"Removing objects that haven't been seen in a scrape within"
f" the last {args.window} days..."
)
self.remove_stale_objects(args.window)
Loading

0 comments on commit c5b56fe

Please sign in to comment.