From 7d43b7a73f1c60a3ff79376d175b34e3882fc27f Mon Sep 17 00:00:00 2001 From: msj Date: Mon, 20 Feb 2023 12:05:15 -0500 Subject: [PATCH 1/9] Add pupa clean CLI command --- pupa/cli/__main__.py | 1 + pupa/cli/commands/clean.py | 103 +++++++++++++++++++++++++++++++++ pupa/tests/clean/test_clean.py | 51 ++++++++++++++++ setup.py | 1 + 4 files changed, 156 insertions(+) create mode 100644 pupa/cli/commands/clean.py create mode 100644 pupa/tests/clean/test_clean.py diff --git a/pupa/cli/__main__.py b/pupa/cli/__main__.py index 09cd450..baf3868 100644 --- a/pupa/cli/__main__.py +++ b/pupa/cli/__main__.py @@ -14,6 +14,7 @@ "pupa.cli.commands.dbinit", "pupa.cli.commands.update", "pupa.cli.commands.party", + "pupa.cli.commands.clean", ) diff --git a/pupa/cli/commands/clean.py b/pupa/cli/commands/clean.py new file mode 100644 index 0000000..d29b9b9 --- /dev/null +++ b/pupa/cli/commands/clean.py @@ -0,0 +1,103 @@ +import itertools +from datetime import datetime, timezone, timedelta + +import django +from django.apps import apps +from .base import BaseCommand + + +def get_subclasses(app_list, abstract_class): + """ + Finds and returns all subclasses of an abstract class. + """ + result = [] + for app in app_list: + for model in apps.get_app_config(app).get_models(): + if issubclass(model, abstract_class) and model is not abstract_class: + result.append(model) + return result + + +def get_stale_objects(window): + """ + Find all database objects that haven't seen been in {window} days. + """ + + from opencivicdata.core.models.base import OCDBase + + ocd_apps = ["core", "legislative"] + # Check all subclasses of OCDBase + models = get_subclasses(ocd_apps, OCDBase) + + results = [] + for model in models: + # Jurisdictions are protected from deletion + if "Jurisdiction" not in model.__name__: + cutoff_date = datetime.now(tz=timezone.utc) - timedelta(days=window) + results.append(model.objects.filter(last_seen__lte=cutoff_date)) + + return itertools.chain(*results) + + +def remove_stale_objects(window): + """ + Remove all database objects that haven't seen been in {window} days. + """ + for obj in get_stale_objects(window): + print(f"Deleting {obj}...") + obj.delete() + + +class Command(BaseCommand): + name = "clean" + help = "Removes database objects that haven't been seen in recent scrapes" + + def add_args(self): + self.add_argument( + "--window", + type=int, + default=7, + help=( + "Objects not seen in this many days will be deleted from the database" + ), + ) + self.add_argument( + "--report", + action="store_true", + help=( + "Will only generate a report of what objects this command" + "would delete without making any changes to the database" + ), + ) + self.add_argument( + "--noinput", + action="store_true", + help="Will delete objects without getting user confirmation", + ) + + def handle(self, args, other): + django.setup() + + if args.report: + print( + "These objects have not been seen in a scrape within the last" + f" {args.window} days:" + ) + for obj in get_stale_objects(args.window): + print(obj) + else: + if not args.noinput: + print( + "This will permanently delete all objects from your database" + f"that have not been scraped within the last {args.window}" + " days. Are you sure? (Y/N)" + ) + resp = input() + if resp != "Y": + return + + print( + "Removing objects that haven't been seen in a scrape within" + f" the last {args.window} days..." + ) + remove_stale_objects(args.window) diff --git a/pupa/tests/clean/test_clean.py b/pupa/tests/clean/test_clean.py new file mode 100644 index 0000000..3f859a5 --- /dev/null +++ b/pupa/tests/clean/test_clean.py @@ -0,0 +1,51 @@ +import pytest +from datetime import datetime, timezone, timedelta +from freezegun import freeze_time + +from opencivicdata.core.models import Person, Organization, Jurisdiction, Division + +from pupa.cli.commands.clean import get_stale_objects, remove_stale_objects + + +def create_jurisdiction(): + Division.objects.create(id="ocd-division/country:us", name="USA") + return Jurisdiction.objects.create(id="jid", division_id="ocd-division/country:us") + + +@pytest.mark.django_db +def test_get_stale_objects(): + j = create_jurisdiction() + o = Organization.objects.create(name="WWE", jurisdiction_id="jid") + p = Person.objects.create(name="George Washington", family_name="Washington") + m = p.memberships.create(organization=o) + + expected_stale_objects = {p, o, m} + + a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7) + with freeze_time(a_week_from_now): + p = Person.objects.create(name="Thomas Jefferson", family_name="Jefferson") + j.save() + p.memberships.create(organization=o) + assert set(get_stale_objects(7)) == expected_stale_objects + + +@pytest.mark.django_db +def test_remove_stale_objects(): + j = create_jurisdiction() + o = Organization.objects.create(name="WWE", jurisdiction_id="jid") + p = Person.objects.create(name="George Washington", family_name="Washington") + m = p.memberships.create(organization=o) + + expected_stale_objects = {p, o, m} + + a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7) + with freeze_time(a_week_from_now): + p = Person.objects.create(name="Thomas Jefferson", family_name="Jefferson") + p.memberships.create(organization=o) + + j.save() + + remove_stale_objects(7) + for obj in expected_stale_objects: + was_deleted = not type(obj).objects.filter(id=obj.id).exists() + assert was_deleted diff --git a/setup.py b/setup.py index f1b4dd7..230df21 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ 'pytest>=3.6', 'pytest-cov', 'pytest-django', + 'freezegun', 'coveralls', 'coverage<=6.5.0', 'flake8', From 6b3784353706c58317455ba142437bc653d4e942 Mon Sep 17 00:00:00 2001 From: msj Date: Mon, 20 Feb 2023 12:17:43 -0500 Subject: [PATCH 2/9] Fix help tags --- pupa/cli/commands/clean.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pupa/cli/commands/clean.py b/pupa/cli/commands/clean.py index d29b9b9..6913a8e 100644 --- a/pupa/cli/commands/clean.py +++ b/pupa/cli/commands/clean.py @@ -58,21 +58,21 @@ def add_args(self): type=int, default=7, help=( - "Objects not seen in this many days will be deleted from the database" + "objects not seen in this many days will be deleted from the database" ), ) self.add_argument( "--report", action="store_true", help=( - "Will only generate a report of what objects this command" - "would delete without making any changes to the database" + "generate a report of what objects this command" + " would delete without making any changes to the database" ), ) self.add_argument( "--noinput", action="store_true", - help="Will delete objects without getting user confirmation", + help="delete objects without getting user confirmation", ) def handle(self, args, other): @@ -89,7 +89,7 @@ def handle(self, args, other): if not args.noinput: print( "This will permanently delete all objects from your database" - f"that have not been scraped within the last {args.window}" + f" that have not been scraped within the last {args.window}" " days. Are you sure? (Y/N)" ) resp = input() From 11ffc831da045f710a67c2735d9c0c686ac3bd3c Mon Sep 17 00:00:00 2001 From: msj Date: Tue, 14 Mar 2023 08:38:53 -0400 Subject: [PATCH 3/9] Use generator --- pupa/cli/commands/clean.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pupa/cli/commands/clean.py b/pupa/cli/commands/clean.py index 6913a8e..879b79a 100644 --- a/pupa/cli/commands/clean.py +++ b/pupa/cli/commands/clean.py @@ -1,4 +1,3 @@ -import itertools from datetime import datetime, timezone, timedelta import django @@ -29,20 +28,18 @@ def get_stale_objects(window): # Check all subclasses of OCDBase models = get_subclasses(ocd_apps, OCDBase) - results = [] for model in models: # Jurisdictions are protected from deletion if "Jurisdiction" not in model.__name__: cutoff_date = datetime.now(tz=timezone.utc) - timedelta(days=window) - results.append(model.objects.filter(last_seen__lte=cutoff_date)) - - return itertools.chain(*results) + yield from model.objects.filter(last_seen__lte=cutoff_date).iterator() def remove_stale_objects(window): """ Remove all database objects that haven't seen been in {window} days. """ + for obj in get_stale_objects(window): print(f"Deleting {obj}...") obj.delete() From 8ad4c19ccc4791e67f2325a9e9f8c61c0a577b55 Mon Sep 17 00:00:00 2001 From: msj Date: Tue, 14 Mar 2023 08:42:43 -0400 Subject: [PATCH 4/9] Refactor clean command --- pupa/cli/commands/clean.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/pupa/cli/commands/clean.py b/pupa/cli/commands/clean.py index 879b79a..8664d20 100644 --- a/pupa/cli/commands/clean.py +++ b/pupa/cli/commands/clean.py @@ -35,16 +35,6 @@ def get_stale_objects(window): yield from model.objects.filter(last_seen__lte=cutoff_date).iterator() -def remove_stale_objects(window): - """ - Remove all database objects that haven't seen been in {window} days. - """ - - for obj in get_stale_objects(window): - print(f"Deleting {obj}...") - obj.delete() - - class Command(BaseCommand): name = "clean" help = "Removes database objects that haven't been seen in recent scrapes" @@ -72,6 +62,22 @@ def add_args(self): help="delete objects without getting user confirmation", ) + def remove_stale_objects(window): + """ + Remove all database objects that haven't seen been in {window} days. + """ + + for obj in get_stale_objects(window): + print(f"Deleting {obj}...") + obj.delete() + + def report_stale_objects(window): + """ + Print all database objects that haven't seen been in {window} days. + """ + for obj in get_stale_objects(window): + print(obj) + def handle(self, args, other): django.setup() @@ -80,8 +86,7 @@ def handle(self, args, other): "These objects have not been seen in a scrape within the last" f" {args.window} days:" ) - for obj in get_stale_objects(args.window): - print(obj) + self.report_stale_objects() else: if not args.noinput: print( @@ -97,4 +102,4 @@ def handle(self, args, other): "Removing objects that haven't been seen in a scrape within" f" the last {args.window} days..." ) - remove_stale_objects(args.window) + self.remove_stale_objects(args.window) From 1ef9a90062c5ca469b08a44e4e82aeaab1c97e6b Mon Sep 17 00:00:00 2001 From: msj Date: Tue, 14 Mar 2023 14:36:17 -0400 Subject: [PATCH 5/9] Refactor clean tests --- pupa/tests/clean/test_clean.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/pupa/tests/clean/test_clean.py b/pupa/tests/clean/test_clean.py index 3f859a5..0e75c28 100644 --- a/pupa/tests/clean/test_clean.py +++ b/pupa/tests/clean/test_clean.py @@ -1,10 +1,12 @@ import pytest +import argparse + from datetime import datetime, timezone, timedelta from freezegun import freeze_time from opencivicdata.core.models import Person, Organization, Jurisdiction, Division -from pupa.cli.commands.clean import get_stale_objects, remove_stale_objects +from pupa.cli.commands.clean import Command def create_jurisdiction(): @@ -12,8 +14,24 @@ def create_jurisdiction(): return Jurisdiction.objects.create(id="jid", division_id="ocd-division/country:us") +@pytest.fixture +def subparsers(): + parser = argparse.ArgumentParser("pupa", description="pupa CLI") + parser.add_argument("--debug", action="store_true", help="open debugger on error") + parser.add_argument( + "--loglevel", + default="INFO", + help=( + "set log level. options are: " + "DEBUG|INFO|WARNING|ERROR|CRITICAL " + "(default is INFO)" + ), + ) + return parser.add_subparsers(dest="subcommand") + + @pytest.mark.django_db -def test_get_stale_objects(): +def test_get_stale_objects(subparsers): j = create_jurisdiction() o = Organization.objects.create(name="WWE", jurisdiction_id="jid") p = Person.objects.create(name="George Washington", family_name="Washington") @@ -26,11 +44,11 @@ def test_get_stale_objects(): p = Person.objects.create(name="Thomas Jefferson", family_name="Jefferson") j.save() p.memberships.create(organization=o) - assert set(get_stale_objects(7)) == expected_stale_objects + assert set(Command(subparsers).get_stale_objects(7)) == expected_stale_objects @pytest.mark.django_db -def test_remove_stale_objects(): +def test_remove_stale_objects(subparsers): j = create_jurisdiction() o = Organization.objects.create(name="WWE", jurisdiction_id="jid") p = Person.objects.create(name="George Washington", family_name="Washington") @@ -45,7 +63,7 @@ def test_remove_stale_objects(): j.save() - remove_stale_objects(7) + Command(subparsers).remove_stale_objects(7) for obj in expected_stale_objects: was_deleted = not type(obj).objects.filter(id=obj.id).exists() assert was_deleted From f7519596c115fda9773a6efb4e8db4da6b5d3d64 Mon Sep 17 00:00:00 2001 From: msj Date: Tue, 14 Mar 2023 16:33:13 -0400 Subject: [PATCH 6/9] Refactor clean command --- pupa/cli/commands/clean.py | 46 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pupa/cli/commands/clean.py b/pupa/cli/commands/clean.py index 8664d20..3b58a80 100644 --- a/pupa/cli/commands/clean.py +++ b/pupa/cli/commands/clean.py @@ -1,3 +1,4 @@ +import sys from datetime import datetime, timezone, timedelta import django @@ -17,24 +18,6 @@ def get_subclasses(app_list, abstract_class): return result -def get_stale_objects(window): - """ - Find all database objects that haven't seen been in {window} days. - """ - - from opencivicdata.core.models.base import OCDBase - - ocd_apps = ["core", "legislative"] - # Check all subclasses of OCDBase - models = get_subclasses(ocd_apps, OCDBase) - - for model in models: - # Jurisdictions are protected from deletion - if "Jurisdiction" not in model.__name__: - cutoff_date = datetime.now(tz=timezone.utc) - timedelta(days=window) - yield from model.objects.filter(last_seen__lte=cutoff_date).iterator() - - class Command(BaseCommand): name = "clean" help = "Removes database objects that haven't been seen in recent scrapes" @@ -62,20 +45,37 @@ def add_args(self): help="delete objects without getting user confirmation", ) - def remove_stale_objects(window): + def get_stale_objects(self, window): + """ + Find all database objects that haven't seen been in {window} days. + """ + + from opencivicdata.core.models.base import OCDBase + + ocd_apps = ["core", "legislative"] + # Check all subclasses of OCDBase + models = get_subclasses(ocd_apps, OCDBase) + + for model in models: + # Jurisdictions are protected from deletion + if "Jurisdiction" not in model.__name__: + cutoff_date = datetime.now(tz=timezone.utc) - timedelta(days=window) + yield from model.objects.filter(last_seen__lte=cutoff_date).iterator() + + def remove_stale_objects(self, window): """ Remove all database objects that haven't seen been in {window} days. """ - for obj in get_stale_objects(window): + for obj in self.get_stale_objects(window): print(f"Deleting {obj}...") obj.delete() - def report_stale_objects(window): + def report_stale_objects(self, window): """ Print all database objects that haven't seen been in {window} days. """ - for obj in get_stale_objects(window): + for obj in self.get_stale_objects(window): print(obj) def handle(self, args, other): @@ -96,7 +96,7 @@ def handle(self, args, other): ) resp = input() if resp != "Y": - return + sys.exit() print( "Removing objects that haven't been seen in a scrape within" From c3efa0d58b32f5ccc2dc52ae8e208cafd7cd3efa Mon Sep 17 00:00:00 2001 From: msj Date: Tue, 14 Mar 2023 16:33:41 -0400 Subject: [PATCH 7/9] Add integration test --- pupa/tests/clean/test_clean.py | 52 +++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/pupa/tests/clean/test_clean.py b/pupa/tests/clean/test_clean.py index 0e75c28..c845cb2 100644 --- a/pupa/tests/clean/test_clean.py +++ b/pupa/tests/clean/test_clean.py @@ -1,6 +1,5 @@ import pytest import argparse - from datetime import datetime, timezone, timedelta from freezegun import freeze_time @@ -9,11 +8,6 @@ from pupa.cli.commands.clean import Command -def create_jurisdiction(): - Division.objects.create(id="ocd-division/country:us", name="USA") - return Jurisdiction.objects.create(id="jid", division_id="ocd-division/country:us") - - @pytest.fixture def subparsers(): parser = argparse.ArgumentParser("pupa", description="pupa CLI") @@ -30,9 +24,14 @@ def subparsers(): return parser.add_subparsers(dest="subcommand") +def create_jurisdiction(): + Division.objects.create(id="ocd-division/country:us", name="USA") + return Jurisdiction.objects.create(id="jid", division_id="ocd-division/country:us") + + @pytest.mark.django_db def test_get_stale_objects(subparsers): - j = create_jurisdiction() + _ = create_jurisdiction() o = Organization.objects.create(name="WWE", jurisdiction_id="jid") p = Person.objects.create(name="George Washington", family_name="Washington") m = p.memberships.create(organization=o) @@ -42,14 +41,13 @@ def test_get_stale_objects(subparsers): a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7) with freeze_time(a_week_from_now): p = Person.objects.create(name="Thomas Jefferson", family_name="Jefferson") - j.save() p.memberships.create(organization=o) assert set(Command(subparsers).get_stale_objects(7)) == expected_stale_objects @pytest.mark.django_db def test_remove_stale_objects(subparsers): - j = create_jurisdiction() + _ = create_jurisdiction() o = Organization.objects.create(name="WWE", jurisdiction_id="jid") p = Person.objects.create(name="George Washington", family_name="Washington") m = p.memberships.create(organization=o) @@ -61,9 +59,41 @@ def test_remove_stale_objects(subparsers): p = Person.objects.create(name="Thomas Jefferson", family_name="Jefferson") p.memberships.create(organization=o) - j.save() - Command(subparsers).remove_stale_objects(7) for obj in expected_stale_objects: was_deleted = not type(obj).objects.filter(id=obj.id).exists() assert was_deleted + + +@pytest.mark.django_db +def test_clean_command(subparsers): + _ = create_jurisdiction() + o = Organization.objects.create(name="WWE", jurisdiction_id="jid") + + stale_person = Person.objects.create( + name="George Washington", family_name="Washington" + ) + stale_membership = stale_person.memberships.create(organization=o) + + a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7) + with freeze_time(a_week_from_now): + not_stale_person = Person.objects.create( + name="Thomas Jefferson", family_name="Jefferson" + ) + not_stale_membership = not_stale_person.memberships.create(organization=o) + o.save() # Update org's last_seen field + + # Call clean command + Command(subparsers).handle( + argparse.Namespace(noinput=True, report=False, window=7), [] + ) + + expected_stale_objects = {stale_person, stale_membership} + for obj in expected_stale_objects: + was_deleted = not type(obj).objects.filter(id=obj.id).exists() + assert was_deleted + + expected_not_stale_objects = {o, not_stale_person, not_stale_membership} + for obj in expected_not_stale_objects: + was_not_deleted = type(obj).objects.filter(id=obj.id).exists() + assert was_not_deleted From 3d9717e788ac917b634b48b0ef3a1487d41f9cf6 Mon Sep 17 00:00:00 2001 From: msj Date: Tue, 14 Mar 2023 16:36:19 -0400 Subject: [PATCH 8/9] Add count to clean command prompt --- pupa/cli/commands/clean.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pupa/cli/commands/clean.py b/pupa/cli/commands/clean.py index 3b58a80..a321ef4 100644 --- a/pupa/cli/commands/clean.py +++ b/pupa/cli/commands/clean.py @@ -90,7 +90,9 @@ def handle(self, args, other): else: if not args.noinput: print( - "This will permanently delete all objects from your database" + f"This will permanently delete" + f" {len(self.get_stale_objects(args.window))}" + " objects from your database" f" that have not been scraped within the last {args.window}" " days. Are you sure? (Y/N)" ) From 9f51bbf3f9c5ca8fd3ba048f61c02221ee31c135 Mon Sep 17 00:00:00 2001 From: M J <36973363+antidipyramid@users.noreply.github.com> Date: Thu, 16 Mar 2023 20:32:59 +0000 Subject: [PATCH 9/9] Update pupa/cli/commands/clean.py Fix clean warning prompt Co-authored-by: hannah cushman garland --- pupa/cli/commands/clean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pupa/cli/commands/clean.py b/pupa/cli/commands/clean.py index a321ef4..6a4cb1c 100644 --- a/pupa/cli/commands/clean.py +++ b/pupa/cli/commands/clean.py @@ -91,7 +91,7 @@ def handle(self, args, other): if not args.noinput: print( f"This will permanently delete" - f" {len(self.get_stale_objects(args.window))}" + f" {len(list(self.get_stale_objects(args.window)))}" " objects from your database" f" that have not been scraped within the last {args.window}" " days. Are you sure? (Y/N)"