test(backup): Remove @targets decorator and improve assert message (#…

…61719) Two improvements to the backup test scaffolding are included in this PR: 1. The `@targets` decorator is replaced with `@expect_models`, which is simpler and does not rely on test function returns, as these are deprecated in Python 3.11. 2. The error message shown to developers (and in CI) when the new `verify_models_in_output` test function fails are much more detailed, providing an explanation of what went wrong and where to look to fix the problem.
getsentry · Dec 20, 2023 · bbd0230 · bbd0230
1 parent 3b58ab9
commit bbd0230
Show file tree

Hide file tree

Showing 5 changed files with 204 additions and 156 deletions.
diff --git a/tests/sentry/backup/__init__.py b/tests/sentry/backup/__init__.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
-from typing import Callable, Literal, Type
+from functools import wraps
+from typing import Callable, Literal, Set, Type
 
 from django.db import models
 
@@ -12,114 +13,131 @@
     sorted_dependencies,
 )
 from sentry.backup.helpers import DatetimeSafeDjangoJSONEncoder, get_exportable_sentry_models
+from sentry.utils.json import JSONData
 
 
-def targets(expected_models: list[Type[models.Model]]):
-    """A helper decorator that checks that every model that a test "targeted" was actually seen in
-    the output, ensuring that we're actually testing the thing we think we are. Additionally, this
-    decorator is easily legible to static analysis, which allows for static checks to ensure that
-    all `__relocation_scope__ != RelocationScope.Excluded` models are being tested.
+def verify_models_in_output(
+    expected_models: list[Type[models.Model]], actual_json: JSONData
+) -> None:
+    """
+    A helper context manager that checks that every model that a test "targeted" was actually seen
+    in the output, ensuring that we're actually testing the thing we think we are. Additionally,
+    this context manager is easily legible to static analysis, which allows for static checks to
+    ensure that all `__relocation_scope__ != RelocationScope.Excluded` models are being tested.
 
     To be considered a proper "testing" of a given target type, the resulting output must contain at
-    least one instance of that type with all of its fields present and set to non-default values."""
-
-    def decorator(func):
-        def wrapped(*args, **kwargs):
-            actual = func(*args, **kwargs)
-            if actual is None:
-                raise AssertionError(f"The test {func.__name__} did not return its actual JSON")
-
-            # Do a quick scan to ensure that at least one instance of each expected model is
-            # present.
-            actual_model_names = {entry["model"] for entry in actual}
-            expected_model_types = {
-                "sentry." + type.__name__.lower(): type for type in expected_models
-            }
-            expected_model_names = set(expected_model_types.keys())
-            notfound = sorted(expected_model_names - actual_model_names)
-            if len(notfound) > 0:
-                raise AssertionError(f"Some `@targets_models` entries were not found: {notfound}")
-
-            # Now do a more thorough check: for every `expected_models` entry, make sure that we
-            # have at least one instance of that model that sets all of its fields to some
-            # non-default value.
-            mistakes_by_model: dict[str, list[str]] = {}
-            encoder = DatetimeSafeDjangoJSONEncoder()
-            for model in actual:
-                name = model["model"]
-                if name not in expected_model_names:
-                    continue
-
-                data = model["fields"]
-                type = expected_model_types[name]
-                fields = type._meta.get_fields()
-                mistakes = []
-                for f in fields:
-                    field_name = f.name
-
-                    # IDs are synonymous with primary keys, and should not be included in JSON field
-                    # output.
-                    if field_name == "id":
-                        continue
-
-                    # The model gets a `ManyToOneRel` or `ManyToManyRel` from all other models where
-                    # it is referenced by foreign key. Those do not explicitly need to be set - we
-                    # don't care that models that reference this model exist, just that this model
-                    # exists in its most filled-out form.
-                    if isinstance(f, models.ManyToOneRel) or isinstance(f, models.ManyToManyRel):
-                        continue
-
-                    # TODO(getsentry/team-ospo#156): For some reason we currently don't always
-                    # serialize some `ManyToManyField`s with the `through` property set. I'll
-                    # investigate, but will skip over these for now.
-                    if isinstance(f, models.ManyToManyField):
-                        continue
-
-                    if not isinstance(f, models.Field):
-                        continue
-                    if field_name not in data:
-                        mistakes.append(f"Must include field: `{field_name}`")
-                        continue
-                    if f.has_default():
-                        default_value = f.get_default()
-                        serialized = encoder.encode(default_value)
-                        if serialized == data:
-                            mistakes.append(f"Must use non-default data: `{field_name}`")
-                            return
-
-                # If one model instance has N mistakes, and another has N - 1 mistakes, we want to
-                # keep the shortest list, to give the user the smallest number of fixes to make when
-                # reporting the mistake.
-                if name not in mistakes_by_model or (len(mistakes) < len(mistakes_by_model[name])):
-                    mistakes_by_model[name] = mistakes
-            for name, mistakes in mistakes_by_model.items():
-                num = len(mistakes)
-                if num > 0:
-                    raise AssertionError(f"Model {name} has {num} mistakes: {mistakes}")
-
-            return None
-
-        return wrapped
-
-    return decorator
-
+    least one instance of that type with all of its fields present and set to non-default values.
+    """
 
-def mark(group: set[NormalizedModelName], *marking: Type | Literal["__all__"]):
+    # Do a quick scan to ensure that at least one instance of each expected model is present.
+    actual_model_names = {entry["model"] for entry in actual_json}
+    expected_model_types = {"sentry." + type.__name__.lower(): type for type in expected_models}
+    expected_model_names = set(expected_model_types.keys())
+    notfound = sorted(expected_model_names - actual_model_names)
+    if len(notfound) > 0:
+        raise AssertionError(
+            f"""Some `expected_models` entries were not found: {notfound}
+
+            If you are seeing this in CI, it means that this test produced an `export.json` backup
+            file that was missing the above models. This check is in place to ensure that ALL models
+            of a certain category are covered by this particular test - by omitting a certain kind
+            of model from the backup output entirely, we end up in a situation where backing up the
+            model in question to JSON is untested.
+
+            To fix this, you'll need to modify the body of the test to add at least one of these
+            models to the database before exporting. The process for doing so is test-specific, but
+            if the test body contains a fixture factory like `self.create_exhaustive_...`, that
+            function will be a good place to start. If it does not, you can just write the model to
+            the database at the appropriate point in the test: `MyNewModel.objects.create(...)`.
+            """
+        )
+
+    # Now do a more thorough check: for every `expected_models` entry, make sure that we have at
+    # least one instance of that model that sets all of its fields to some non-default value.
+    mistakes_by_model: dict[str, list[str]] = {}
+    encoder = DatetimeSafeDjangoJSONEncoder()
+    for model in actual_json:
+        name = model["model"]
+        if name not in expected_model_names:
+            continue
+
+        data = model["fields"]
+        type = expected_model_types[name]
+        fields = type._meta.get_fields()
+        mistakes = []
+        for f in fields:
+            field_name = f.name
+
+            # IDs are synonymous with primary keys, and should not be included in JSON field output.
+            if field_name == "id":
+                continue
+
+            # The model gets a `ManyToOneRel` or `ManyToManyRel` from all other models where it is
+            # referenced by foreign key. Those do not explicitly need to be set - we don't care that
+            # models that reference this model exist, just that this model exists in its most
+            # filled-out form.
+            if isinstance(f, models.ManyToOneRel) or isinstance(f, models.ManyToManyRel):
+                continue
+
+            # TODO(getsentry/team-ospo#156): For some reason we currently don't always serialize
+            # some `ManyToManyField`s with the `through` property set. I'll investigate, but will
+            # skip over these for now.
+            if isinstance(f, models.ManyToManyField):
+                continue
+
+            if not isinstance(f, models.Field):
+                continue
+            if field_name not in data:
+                mistakes.append(f"Must include field: `{field_name}`")
+                continue
+            if f.has_default():
+                default_value = f.get_default()
+                serialized = encoder.encode(default_value)
+                if serialized == data:
+                    mistakes.append(f"Must use non-default data: `{field_name}`")
+                    return
+
+        # If one model instance has N mistakes, and another has N - 1 mistakes, we want to keep the
+        # shortest list, to give the user the smallest number of fixes to make when reporting the
+        # mistake.
+        if name not in mistakes_by_model or (len(mistakes) < len(mistakes_by_model[name])):
+            mistakes_by_model[name] = mistakes
+
+    for name, mistakes in mistakes_by_model.items():
+        num = len(mistakes)
+        if num > 0:
+            raise AssertionError(f"Model {name} has {num} mistakes: {mistakes}")
+
+
+def expect_models(group: set[NormalizedModelName], *marking: Type | Literal["__all__"]) -> Callable:
     """
-    A function that runs at module load time and marks all models that appear in a given test function.
+    A function that runs at module load time and marks all models that appear in a given test
+    function. The first argument stores the tracked models in a global group, which we then check in
+    `test_coverage.py` for completeness.
 
     Use the sentinel string "__all__" to indicate that all models are expected.
     """
 
     all: Literal["__all__"] = "__all__"
+    target_models: Set[Type[models.Model]] = set()
     for model in marking:
         if model == all:
             all_models = get_exportable_sentry_models()
             group.update({get_model_name(c) for c in all_models})
-            return list(all_models)
+            target_models = all_models.copy()
+            break
 
         group.add(get_model_name(model))
-    return marking
+        target_models.add(model)
+
+    def decorator(func: Callable):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            return func(*args, target_models, **kwargs)
+
+        return wrapper
+
+    return decorator
 
 
 def get_matching_exportable_models(

diff --git a/tests/sentry/backup/test_exhaustive.py b/tests/sentry/backup/test_exhaustive.py
@@ -2,6 +2,9 @@
 
 import tempfile
 from pathlib import Path
+from typing import Type
+
+from django.db.models import Model
 
 from sentry.backup.dependencies import NormalizedModelName
 from sentry.backup.imports import (
@@ -17,7 +20,7 @@
     export_to_file,
 )
 from sentry.testutils.silo import region_silo_test
-from tests.sentry.backup import mark, targets
+from tests.sentry.backup import expect_models, verify_models_in_output
 
 EXHAUSTIVELY_TESTED: set[NormalizedModelName] = set()
 UNIQUENESS_TESTED: set[NormalizedModelName] = set()
@@ -35,18 +38,20 @@ def export_to_tmp_file_and_clear_database(self, tmp_dir, reset_pks) -> Path:
         clear_database(reset_pks=reset_pks)
         return tmp_path
 
-    @targets(mark(EXHAUSTIVELY_TESTED, "__all__"))
-    def test_exhaustive_clean_pks(self):
+    @expect_models(EXHAUSTIVELY_TESTED, "__all__")
+    def test_exhaustive_clean_pks(self, expected_models: list[Type[Model]]):
         self.create_exhaustive_instance(is_superadmin=True)
-        return self.import_export_then_validate(self._testMethodName, reset_pks=True)
+        actual = self.import_export_then_validate(self._testMethodName, reset_pks=True)
+        verify_models_in_output(expected_models, actual)
 
-    @targets(mark(EXHAUSTIVELY_TESTED, "__all__"))
-    def test_exhaustive_dirty_pks(self):
+    @expect_models(EXHAUSTIVELY_TESTED, "__all__")
+    def test_exhaustive_dirty_pks(self, expected_models: list[Type[Model]]):
         self.create_exhaustive_instance(is_superadmin=True)
-        return self.import_export_then_validate(self._testMethodName, reset_pks=False)
+        actual = self.import_export_then_validate(self._testMethodName, reset_pks=False)
+        verify_models_in_output(expected_models, actual)
 
-    @targets(mark(UNIQUENESS_TESTED, "__all__"))
-    def test_uniqueness(self):
+    @expect_models(UNIQUENESS_TESTED, "__all__")
+    def test_uniqueness(self, expected_models: list[Type[Model]]):
         self.create_exhaustive_instance(is_superadmin=True)
         with tempfile.TemporaryDirectory() as tmp_dir:
             # Export the data once.
@@ -67,5 +72,4 @@ def test_uniqueness(self):
 
             tmp_actual = Path(tmp_dir).joinpath(f"{self._testMethodName}.actual.json")
             actual = export_to_file(tmp_actual, ExportScope.Global)
-
-            return actual
+            verify_models_in_output(expected_models, actual)