From 8bd2f74c942cbfeba221338342f2fa3ebde88e2a Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Sun, 8 Dec 2024 19:48:01 -0800
Subject: [PATCH 01/22] Allow masking without primary keys

---
 .../dataset/postgres_example_test_dataset.yml | 11 ++++
 .../query_configs/bigquery_query_config.py    | 12 ++---
 .../query_configs/mongodb_query_config.py     |  6 +--
 .../connectors/query_configs/query_config.py  | 37 +++++++++----
 .../query_configs/snowflake_query_config.py   |  4 +-
 tests/fixtures/application_fixtures.py        | 53 +++++++++++++++++++
 .../service/connectors/test_query_config.py   | 42 +++++++++++++--
 7 files changed, 141 insertions(+), 24 deletions(-)

diff --git a/data/dataset/postgres_example_test_dataset.yml b/data/dataset/postgres_example_test_dataset.yml
index d62eb38d46..e8d58f626c 100644
--- a/data/dataset/postgres_example_test_dataset.yml
+++ b/data/dataset/postgres_example_test_dataset.yml
@@ -7,18 +7,29 @@ dataset:
         fields:
           - name: city
             data_categories: [user.contact.address.city]
+            fides_meta:
+              data_type: string
           - name: house
             data_categories: [user.contact.address.street]
+            fides_meta:
+              data_type: string
           - name: id
             data_categories: [system.operations]
             fides_meta:
+              data_type: string
               primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
+            fides_meta:
+              data_type: string
           - name: street
             data_categories: [user.contact.address.street]
+            fides_meta:
+              data_type: string
           - name: zip
             data_categories: [user.contact.address.postal_code]
+            fides_meta:
+              data_type: string
 
       - name: customer
         fields:
diff --git a/src/fides/api/service/connectors/query_configs/bigquery_query_config.py b/src/fides/api/service/connectors/query_configs/bigquery_query_config.py
index 681e2b9c60..74b28f3ada 100644
--- a/src/fides/api/service/connectors/query_configs/bigquery_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/bigquery_query_config.py
@@ -140,7 +140,7 @@ def generate_update(
             return []
 
         table = Table(self._generate_table_name(), MetaData(bind=client), autoload=True)
-        pk_clauses: List[ColumnElement] = [
+        where_clauses: List[ColumnElement] = [
             getattr(table.c, k) == v for k, v in non_empty_primary_keys.items()
         ]
 
@@ -153,13 +153,13 @@ def generate_update(
             for partition_clause in partition_clauses:
                 partitioned_queries.append(
                     table.update()
-                    .where(*(pk_clauses + [text(partition_clause)]))
+                    .where(*(where_clauses + [text(partition_clause)]))
                     .values(**update_value_map)
                 )
 
             return partitioned_queries
 
-        return [table.update().where(*pk_clauses).values(**update_value_map)]
+        return [table.update().where(*where_clauses).values(**update_value_map)]
 
     def generate_delete(self, row: Row, client: Engine) -> List[Delete]:
         """Returns a List of SQLAlchemy DELETE statements for BigQuery. Does not actually execute the delete statement.
@@ -189,7 +189,7 @@ def generate_delete(self, row: Row, client: Engine) -> List[Delete]:
             return []
 
         table = Table(self._generate_table_name(), MetaData(bind=client), autoload=True)
-        pk_clauses: List[ColumnElement] = [
+        where_clauses: List[ColumnElement] = [
             getattr(table.c, k) == v for k, v in non_empty_primary_keys.items()
         ]
 
@@ -202,9 +202,9 @@ def generate_delete(self, row: Row, client: Engine) -> List[Delete]:
 
             for partition_clause in partition_clauses:
                 partitioned_queries.append(
-                    table.delete().where(*(pk_clauses + [text(partition_clause)]))
+                    table.delete().where(*(where_clauses + [text(partition_clause)]))
                 )
 
             return partitioned_queries
 
-        return [table.delete().where(*pk_clauses)]
+        return [table.delete().where(*where_clauses)]
diff --git a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
index bd650723f4..1a6aa303f0 100644
--- a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
@@ -69,21 +69,21 @@ def generate_update_stmt(
         """Generate a SQL update statement in the form of Mongo update statement components"""
         update_clauses = self.update_value_map(row, policy, request)
 
-        pk_clauses: Dict[str, Any] = filter_nonempty_values(
+        where_clauses: Dict[str, Any] = filter_nonempty_values(
             {
                 field_path.string_path: field.cast(row[field_path.string_path])
                 for field_path, field in self.primary_key_field_paths.items()
             }
         )
 
-        valid = len(pk_clauses) > 0 and len(update_clauses) > 0
+        valid = len(where_clauses) > 0 and len(update_clauses) > 0
         if not valid:
             logger.warning(
                 "There is not enough data to generate a valid update for {}",
                 self.node.address,
             )
             return None
-        return pk_clauses, {"$set": update_clauses}
+        return where_clauses, {"$set": update_clauses}
 
     def query_to_str(self, t: MongoStatement, input_data: Dict[str, List[Any]]) -> str:
         """string representation of a query for logging/dry-run"""
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index 6e868964af..ae62196bc1 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -15,6 +15,7 @@
     ROOT_COLLECTION_ADDRESS,
     CollectionAddress,
     Field,
+    FieldAddress,
     FieldPath,
     MaskingTruncation,
 )
@@ -100,6 +101,15 @@ def primary_key_field_paths(self) -> Dict[FieldPath, Field]:
             if field.primary_key
         }
 
+    @property
+    def identity_or_reference_field_paths(self) -> Dict[FieldPath, Field]:
+        """Mapping of FieldPaths to Fields that have identity or dataset references"""
+        return {
+            field_path: field
+            for field_path, field in self.field_map().items()
+            if field_path in {edge.f2.field_path for edge in self.node.incoming_edges}
+        }
+
     def query_sources(self) -> Dict[str, List[CollectionAddress]]:
         """Display the input collection(s) for each query key for display purposes.
 
@@ -412,10 +422,10 @@ def generate_query_without_tuples(  # pylint: disable=R0914
     def get_update_stmt(
         self,
         update_clauses: List[str],
-        pk_clauses: List[str],
+        where_clauses: List[str],
     ) -> str:
         """Returns a SQL UPDATE statement to fit SQL syntax."""
-        return f"UPDATE {self.node.address.collection} SET {', '.join(update_clauses)} WHERE {' AND '.join(pk_clauses)}"
+        return f"UPDATE {self.node.address.collection} SET {', '.join(update_clauses)} WHERE {' AND '.join(where_clauses)}"
 
     @abstractmethod
     def get_update_clauses(
@@ -436,6 +446,7 @@ def generate_update_stmt(
     ) -> Optional[T]:
         """Returns an update statement in generic SQL-ish dialect."""
         update_value_map: Dict[str, Any] = self.update_value_map(row, policy, request)
+
         non_empty_primary_keys: Dict[str, Field] = filter_nonempty_values(
             {
                 fpath.string_path: fld.cast(row[fpath.string_path])
@@ -444,17 +455,25 @@ def generate_update_stmt(
             }
         )
 
+        non_empty_reference_fields: Dict[str, Field] = filter_nonempty_values(
+            {
+                fpath.string_path: fld.cast(row[fpath.string_path])
+                for fpath, fld in self.identity_or_reference_field_paths.items()
+                if fpath.string_path in row
+            }
+        )
+
         update_clauses = self.get_update_clauses(
-            update_value_map, non_empty_primary_keys
+            update_value_map, non_empty_reference_fields
         )
-        pk_clauses = self.format_key_map_for_update_stmt(
-            list(non_empty_primary_keys.keys())
+        where_clauses = self.format_key_map_for_update_stmt(
+            list(non_empty_reference_fields.keys())
         )
 
-        for k, v in non_empty_primary_keys.items():
-            update_value_map[k] = v
+        # for k, v in non_empty_reference_fields.items():
+        #     update_value_map[k] = v
 
-        valid = len(pk_clauses) > 0 and len(update_clauses) > 0
+        valid = len(where_clauses) > 0 and len(update_clauses) > 0
         if not valid:
             logger.warning(
                 "There is not enough data to generate a valid update statement for {}",
@@ -464,7 +483,7 @@ def generate_update_stmt(
 
         query_str = self.get_update_stmt(
             update_clauses,
-            pk_clauses,
+            where_clauses,
         )
         logger.info("query = {}, params = {}", Pii(query_str), Pii(update_value_map))
         return self.format_query_stmt(query_str, update_value_map)
diff --git a/src/fides/api/service/connectors/query_configs/snowflake_query_config.py b/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
index 574e1ea1b1..443dd94051 100644
--- a/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
@@ -67,7 +67,7 @@ def format_key_map_for_update_stmt(self, fields: List[str]) -> List[str]:
     def get_update_stmt(
         self,
         update_clauses: List[str],
-        pk_clauses: List[str],
+        where_clauses: List[str],
     ) -> str:
         """Returns a parameterized update statement in Snowflake dialect."""
-        return f'UPDATE {self._generate_table_name()} SET {", ".join(update_clauses)} WHERE {" AND ".join(pk_clauses)}'
+        return f'UPDATE {self._generate_table_name()} SET {", ".join(update_clauses)} WHERE {" AND ".join(where_clauses)}'
diff --git a/tests/fixtures/application_fixtures.py b/tests/fixtures/application_fixtures.py
index eb28b35657..355985ddd0 100644
--- a/tests/fixtures/application_fixtures.py
+++ b/tests/fixtures/application_fixtures.py
@@ -864,6 +864,59 @@ def erasure_policy(
             "rule_id": erasure_rule.id,
         },
     )
+
+    yield erasure_policy
+    try:
+        rule_target.delete(db)
+    except ObjectDeletedError:
+        pass
+    try:
+        erasure_rule.delete(db)
+    except ObjectDeletedError:
+        pass
+    try:
+        erasure_policy.delete(db)
+    except ObjectDeletedError:
+        pass
+
+
+@pytest.fixture(scope="function")
+def erasure_policy_address_city(
+    db: Session,
+    oauth_client: ClientDetail,
+) -> Generator:
+    erasure_policy = Policy.create(
+        db=db,
+        data={
+            "name": "example erasure policy",
+            "key": "example_erasure_policy",
+            "client_id": oauth_client.id,
+        },
+    )
+
+    erasure_rule = Rule.create(
+        db=db,
+        data={
+            "action_type": ActionType.erasure.value,
+            "client_id": oauth_client.id,
+            "name": "Erasure Rule",
+            "policy_id": erasure_policy.id,
+            "masking_strategy": {
+                "strategy": "null_rewrite",
+                "configuration": {},
+            },
+        },
+    )
+
+    rule_target = RuleTarget.create(
+        db=db,
+        data={
+            "client_id": oauth_client.id,
+            "data_category": DataCategory("user.contact.address.city").value,
+            "rule_id": erasure_rule.id,
+        },
+    )
+
     yield erasure_policy
     try:
         rule_target.delete(db)
diff --git a/tests/ops/service/connectors/test_query_config.py b/tests/ops/service/connectors/test_query_config.py
index 01d7b9dbd2..451788ddf7 100644
--- a/tests/ops/service/connectors/test_query_config.py
+++ b/tests/ops/service/connectors/test_query_config.py
@@ -286,10 +286,41 @@ def test_generate_update_stmt_one_field(
             "id": 1,
         }
         text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
-        assert text_clause.text == """UPDATE customer SET name = :name WHERE id = :id"""
+        assert (
+            text_clause.text
+            == """UPDATE customer SET name = :name WHERE email = :email"""
+        )
         assert text_clause._bindparams["name"].key == "name"
         assert text_clause._bindparams["name"].value is None  # Null masking strategy
 
+    def test_generate_update_stmt_one_field_inbound_reference(
+        self, erasure_policy_address_city, example_datasets, connection_config
+    ):
+        dataset = Dataset(**example_datasets[0])
+        graph = convert_dataset_to_graph(dataset, connection_config.key)
+        dataset_graph = DatasetGraph(*[graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+
+        address_node = traversal.traversal_node_dict[
+            CollectionAddress("postgres_example_test_dataset", "address")
+        ].to_mock_execution_node()
+
+        config = SQLQueryConfig(address_node)
+        row = {
+            "id": 1,
+            "house": "123",
+            "street": "Main St",
+            "city": "San Francisco",
+            "state": "CA",
+            "zip": "94105",
+        }
+        text_clause = config.generate_update_stmt(
+            row, erasure_policy_address_city, privacy_request
+        )
+        assert text_clause.text == """UPDATE address SET city = :city WHERE id = :id"""
+        assert text_clause._bindparams["city"].key == "city"
+        assert text_clause._bindparams["city"].value is None  # Null masking strategy
+
     def test_generate_update_stmt_length_truncation(
         self,
         erasure_policy_string_rewrite_long,
@@ -316,7 +347,10 @@ def test_generate_update_stmt_length_truncation(
         text_clause = config.generate_update_stmt(
             row, erasure_policy_string_rewrite_long, privacy_request
         )
-        assert text_clause.text == """UPDATE customer SET name = :name WHERE id = :id"""
+        assert (
+            text_clause.text
+            == """UPDATE customer SET name = :name WHERE email = :email"""
+        )
         assert text_clause._bindparams["name"].key == "name"
         # length truncation on name field
         assert (
@@ -365,7 +399,7 @@ def test_generate_update_stmt_multiple_fields_same_rule(
         text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
         assert (
             text_clause.text
-            == "UPDATE customer SET email = :email, name = :name WHERE id = :id"
+            == "UPDATE customer SET email = :email, name = :name WHERE email = :email"
         )
         assert text_clause._bindparams["name"].key == "name"
         # since length is set to 40 in dataset.yml, we expect only first 40 chars of masked val
@@ -409,7 +443,7 @@ def test_generate_update_stmts_from_multiple_rules(
 
         assert (
             text_clause.text
-            == "UPDATE customer SET email = :email, name = :name WHERE id = :id"
+            == "UPDATE customer SET email = :email, name = :name WHERE email = :email"
         )
         # Two different masking strategies used for name and email
         assert text_clause._bindparams["name"].value is None  # Null masking strategy

From cadcdb7ba7a59b2ed41da3f0a1eef4253fc4de46 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Sun, 8 Dec 2024 22:06:43 -0800
Subject: [PATCH 02/22] Updating tests

---
 .../dataset/postgres_example_test_dataset.yml |  2 +-
 src/fides/api/models/connectionconfig.py      |  7 +++++--
 .../connectors/query_configs/query_config.py  |  5 ++---
 src/fides/api/task/graph_task.py              | 20 -------------------
 tests/ops/integration_tests/test_sql_task.py  | 17 +++++-----------
 5 files changed, 13 insertions(+), 38 deletions(-)

diff --git a/data/dataset/postgres_example_test_dataset.yml b/data/dataset/postgres_example_test_dataset.yml
index e8d58f626c..fddbfbb391 100644
--- a/data/dataset/postgres_example_test_dataset.yml
+++ b/data/dataset/postgres_example_test_dataset.yml
@@ -16,7 +16,7 @@ dataset:
           - name: id
             data_categories: [system.operations]
             fides_meta:
-              data_type: string
+              data_type: integer
               primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
diff --git a/src/fides/api/models/connectionconfig.py b/src/fides/api/models/connectionconfig.py
index 1758222b88..2dc518cd44 100644
--- a/src/fides/api/models/connectionconfig.py
+++ b/src/fides/api/models/connectionconfig.py
@@ -220,10 +220,13 @@ def authorized(self) -> bool:
             return False
 
         # hard-coding to avoid cyclic dependency
-        if authentication.strategy not in ["oauth2_authorization_code", "oauth2_client_credentials"]:
+        if authentication.strategy not in [
+            "oauth2_authorization_code",
+            "oauth2_client_credentials",
+        ]:
             return False
 
-        return bool(self.secrets and 'access_token' in self.secrets.keys())
+        return bool(self.secrets and "access_token" in self.secrets.keys())
 
     @property
     def name_or_key(self) -> str:
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index ae62196bc1..aa00189ac4 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -15,7 +15,6 @@
     ROOT_COLLECTION_ADDRESS,
     CollectionAddress,
     Field,
-    FieldAddress,
     FieldPath,
     MaskingTruncation,
 )
@@ -470,8 +469,8 @@ def generate_update_stmt(
             list(non_empty_reference_fields.keys())
         )
 
-        # for k, v in non_empty_reference_fields.items():
-        #     update_value_map[k] = v
+        for k, v in non_empty_reference_fields.items():
+            update_value_map[k] = v
 
         valid = len(where_clauses) > 0 and len(update_clauses) > 0
         if not valid:
diff --git a/src/fides/api/task/graph_task.py b/src/fides/api/task/graph_task.py
index 6b78b57297..85576264f0 100644
--- a/src/fides/api/task/graph_task.py
+++ b/src/fides/api/task/graph_task.py
@@ -603,26 +603,6 @@ def erasure_request(
         *erasure_prereqs: int,  # TODO Remove when we stop support for DSR 2.0. DSR 3.0 enforces with downstream_tasks.
     ) -> int:
         """Run erasure request"""
-        # if there is no primary key specified in the graph node configuration
-        # note this in the execution log and perform no erasures on this node
-        if not self.execution_node.collection.contains_field(lambda f: f.primary_key):
-            logger.warning(
-                "No erasures on {} as there is no primary_key defined.",
-                self.execution_node.address,
-            )
-            if self.request_task.id:
-                # For DSR 3.0, largely for testing. DSR 3.0 uses Request Task status
-                # instead of presence of cached erasure data to know if we should rerun a node
-                self.request_task.rows_masked = 0  # Saved as part of update_status
-            # TODO Remove when we stop support for DSR 2.0
-            self.resources.cache_erasure(self.key.value, 0)
-            self.update_status(
-                "No values were erased since no primary key was defined for this collection",
-                None,
-                ActionType.erasure,
-                ExecutionLogStatus.complete,
-            )
-            return 0
 
         if not self.can_write_data():
             logger.warning(
diff --git a/tests/ops/integration_tests/test_sql_task.py b/tests/ops/integration_tests/test_sql_task.py
index 298d77229a..b349040988 100644
--- a/tests/ops/integration_tests/test_sql_task.py
+++ b/tests/ops/integration_tests/test_sql_task.py
@@ -8,13 +8,7 @@
 from sqlalchemy import text
 from sqlalchemy.orm import Session
 
-from fides.api.graph.config import (
-    Collection,
-    CollectionAddress,
-    FieldAddress,
-    GraphDataset,
-    ScalarField,
-)
+from fides.api.graph.config import Collection, FieldAddress, GraphDataset, ScalarField
 from fides.api.graph.data_type import DataType, StringTypeConverter
 from fides.api.graph.graph import DatasetGraph, Edge, Node
 from fides.api.graph.traversal import TraversalNode
@@ -25,7 +19,6 @@
     ExecutionLog,
     ExecutionLogStatus,
     PrivacyRequest,
-    PrivacyRequestStatus,
     RequestTask,
 )
 from fides.api.service.connectors import get_connector
@@ -57,7 +50,7 @@
     "dsr_version",
     ["use_dsr_3_0", "use_dsr_2_0"],
 )
-async def test_sql_erasure_ignores_collections_without_pk(
+async def test_sql_erasure_does_not_ignore_collections_without_pk(
     db,
     postgres_inserts,
     integration_postgres_config,
@@ -116,7 +109,7 @@ async def test_sql_erasure_ignores_collections_without_pk(
         .all()
     )
     logs = [log.__dict__ for log in logs]
-    # since address has no primary_key=True field, it's erasure is skipped
+    # erasure is not skipped since primary_key is not required
     assert (
         len(
             records_matching_fields(
@@ -126,13 +119,13 @@ async def test_sql_erasure_ignores_collections_without_pk(
                 message="No values were erased since no primary key was defined for this collection",
             )
         )
-        == 1
+        == 0
     )
     assert v == {
         "postgres_example:customer": 1,
         "postgres_example:payment_card": 0,
         "postgres_example:orders": 0,
-        "postgres_example:address": 0,
+        "postgres_example:address": 2,
     }
 
 

From de3ce24df7f420713867f3fadc0e831c29e585f0 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 00:27:14 -0800
Subject: [PATCH 03/22] Separating overlapping keys in update value map

---
 .../connectors/query_configs/query_config.py  | 47 ++++++++++---------
 .../query_configs/snowflake_query_config.py   |  5 +-
 .../service/connectors/scylla_query_config.py | 13 +++--
 .../v1/endpoints/test_dataset_endpoints.py    | 16 +++----
 .../service/connectors/test_query_config.py   |  5 +-
 5 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index aa00189ac4..3eed947dff 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -437,7 +437,7 @@ def format_query_stmt(self, query_str: str, update_value_map: Dict[str, Any]) ->
         """Returns a formatted update statement in the appropriate dialect."""
 
     @abstractmethod
-    def format_key_map_for_update_stmt(self, fields: List[str]) -> List[str]:
+    def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
         """Adds the appropriate formatting for update statements in this datastore."""
 
     def generate_update_stmt(
@@ -445,15 +445,6 @@ def generate_update_stmt(
     ) -> Optional[T]:
         """Returns an update statement in generic SQL-ish dialect."""
         update_value_map: Dict[str, Any] = self.update_value_map(row, policy, request)
-
-        non_empty_primary_keys: Dict[str, Field] = filter_nonempty_values(
-            {
-                fpath.string_path: fld.cast(row[fpath.string_path])
-                for fpath, fld in self.primary_key_field_paths.items()
-                if fpath.string_path in row
-            }
-        )
-
         non_empty_reference_fields: Dict[str, Field] = filter_nonempty_values(
             {
                 fpath.string_path: fld.cast(row[fpath.string_path])
@@ -462,11 +453,27 @@ def generate_update_stmt(
             }
         )
 
+        # Identify overlapping fields and create parameter mappings
+        overlapping_keys = set(update_value_map.keys()) & set(
+            non_empty_reference_fields.keys()
+        )
+        param_map = {
+            **{k: v for k, v in update_value_map.items()},  # SET values
+            **{
+                f"where_{k}" if k in overlapping_keys else k: v
+                for k, v in non_empty_reference_fields.items()
+            },  # WHERE values
+        }
+
+        # Generate SQL clauses using parameter names
         update_clauses = self.get_update_clauses(
-            update_value_map, non_empty_reference_fields
+            {k: k for k in update_value_map}, non_empty_reference_fields
         )
         where_clauses = self.format_key_map_for_update_stmt(
-            list(non_empty_reference_fields.keys())
+            {
+                k: f"where_{k}" if k in overlapping_keys else k
+                for k in non_empty_reference_fields
+            }
         )
 
         for k, v in non_empty_reference_fields.items():
@@ -480,12 +487,9 @@ def generate_update_stmt(
             )
             return None
 
-        query_str = self.get_update_stmt(
-            update_clauses,
-            where_clauses,
-        )
-        logger.info("query = {}, params = {}", Pii(query_str), Pii(update_value_map))
-        return self.format_query_stmt(query_str, update_value_map)
+        query_str = self.get_update_stmt(update_clauses, where_clauses)
+        logger.info("query = {}, params = {}", Pii(query_str), Pii(param_map))
+        return self.format_query_stmt(query_str, param_map)
 
 
 class SQLQueryConfig(SQLLikeQueryConfig[Executable]):
@@ -556,16 +560,15 @@ def generate_query(
         )
         return None
 
-    def format_key_map_for_update_stmt(self, fields: List[str]) -> List[str]:
+    def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
         """Adds the appropriate formatting for update statements in this datastore."""
-        fields.sort()
-        return [f"{k} = :{k}" for k in fields]
+        return [f"{k} = :{v}" for k, v in param_map.items()]
 
     def get_update_clauses(
         self, update_value_map: Dict[str, Any], non_empty_primary_keys: Dict[str, Field]
     ) -> List[str]:
         """Returns a list of update clauses for the update statement."""
-        return self.format_key_map_for_update_stmt(list(update_value_map.keys()))
+        return self.format_key_map_for_update_stmt(update_value_map)
 
     def format_query_stmt(
         self, query_str: str, update_value_map: Dict[str, Any]
diff --git a/src/fides/api/service/connectors/query_configs/snowflake_query_config.py b/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
index 443dd94051..279e601141 100644
--- a/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
@@ -59,10 +59,9 @@ def get_formatted_query_string(
         """Returns a query string with double quotation mark formatting as required by Snowflake syntax."""
         return f'SELECT {field_list} FROM {self._generate_table_name()} WHERE ({" OR ".join(clauses)})'
 
-    def format_key_map_for_update_stmt(self, fields: List[str]) -> List[str]:
+    def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
         """Adds the appropriate formatting for update statements in this datastore."""
-        fields.sort()
-        return [f'"{k}" = :{k}' for k in fields]
+        return [f'"{k}" = :{v}' for k, v in param_map]
 
     def get_update_stmt(
         self,
diff --git a/src/fides/api/service/connectors/scylla_query_config.py b/src/fides/api/service/connectors/scylla_query_config.py
index 2a72270a40..ce8f60335c 100644
--- a/src/fides/api/service/connectors/scylla_query_config.py
+++ b/src/fides/api/service/connectors/scylla_query_config.py
@@ -70,21 +70,20 @@ def generate_query(
     ) -> Optional[ScyllaDBStatement]:
         return self.generate_query_without_tuples(input_data, policy)
 
-    def format_key_map_for_update_stmt(self, fields: List[str]) -> List[str]:
+    def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
         """Adds the appropriate formatting for update statements in this datastore."""
-        fields.sort()
-        return [f"{k} = %({k})s" for k in fields]
+        return [f"{k} = %({v})s" for k, v in param_map.items()]
 
     def get_update_clauses(
         self, update_value_map: Dict[str, Any], non_empty_primary_keys: Dict[str, Field]
     ) -> List[str]:
         """Returns a list of update clauses for the update statement."""
         return self.format_key_map_for_update_stmt(
-            [
-                key
-                for key in update_value_map.keys()
+            {
+                key: value
+                for key, value in update_value_map.keys()
                 if key not in non_empty_primary_keys
-            ]
+            }
         )
 
     def format_query_data_name(self, query_data_name: str) -> str:
diff --git a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py
index f744d59e47..1ae9ac28f8 100644
--- a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py
+++ b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py
@@ -232,9 +232,7 @@ def test_put_validate_dataset_invalid_length(
         invalid_dataset = example_datasets[0]
 
         # string is properly read:
-        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
-            "length": 123
-        }
+        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {"length": 123}
         response = api_client.put(
             validate_dataset_url, headers=auth_header, json=invalid_dataset
         )
@@ -247,7 +245,7 @@ def test_put_validate_dataset_invalid_length(
         )
 
         # fails with an invalid value
-        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {"length": -1}
+        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {"length": -1}
         response = api_client.put(
             validate_dataset_url, headers=auth_header, json=invalid_dataset
         )
@@ -269,7 +267,7 @@ def test_put_validate_dataset_invalid_data_type(
         invalid_dataset = example_datasets[0]
 
         # string is properly read:
-        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
+        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {
             "data_type": "string"
         }
         response = api_client.put(
@@ -284,7 +282,7 @@ def test_put_validate_dataset_invalid_data_type(
         )
 
         # fails with an invalid value
-        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
+        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {
             "data_type": "stringsssssss"
         }
 
@@ -298,7 +296,7 @@ def test_put_validate_dataset_invalid_data_type(
             == "Value error, The data type stringsssssss is not supported."
         )
 
-    def test_put_validate_dataset_invalid_fidesops_meta(
+    def test_put_validate_dataset_invalid_fides_meta(
         self,
         example_datasets: List,
         validate_dataset_url,
@@ -307,8 +305,8 @@ def test_put_validate_dataset_invalid_fidesops_meta(
     ) -> None:
         auth_header = generate_auth_header(scopes=[DATASET_READ])
         invalid_dataset = example_datasets[0]
-        # Add an invalid fidesops_meta annotation to ensure our type-checking is comprehensive
-        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
+        # Add an invalid fides_meta annotation to ensure our type-checking is comprehensive
+        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {
             "references": [
                 {
                     "dataset": "postgres_example_test_dataset",
diff --git a/tests/ops/service/connectors/test_query_config.py b/tests/ops/service/connectors/test_query_config.py
index 451788ddf7..991c945081 100644
--- a/tests/ops/service/connectors/test_query_config.py
+++ b/tests/ops/service/connectors/test_query_config.py
@@ -399,7 +399,7 @@ def test_generate_update_stmt_multiple_fields_same_rule(
         text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
         assert (
             text_clause.text
-            == "UPDATE customer SET email = :email, name = :name WHERE email = :email"
+            == "UPDATE customer SET email = :email, name = :name WHERE email = :where_email"
         )
         assert text_clause._bindparams["name"].key == "name"
         # since length is set to 40 in dataset.yml, we expect only first 40 chars of masked val
@@ -415,6 +415,7 @@ def test_generate_update_stmt_multiple_fields_same_rule(
                 ["customer-1@example.com"], request_id=privacy_request.id
             )[0]
         )
+        assert text_clause._bindparams["where_email"].value == "customer-1@example.com"
         clear_cache_secrets(privacy_request.id)
 
     def test_generate_update_stmts_from_multiple_rules(
@@ -443,7 +444,7 @@ def test_generate_update_stmts_from_multiple_rules(
 
         assert (
             text_clause.text
-            == "UPDATE customer SET email = :email, name = :name WHERE email = :email"
+            == "UPDATE customer SET name = :name, email = :email WHERE email = :where_email"
         )
         # Two different masking strategies used for name and email
         assert text_clause._bindparams["name"].value is None  # Null masking strategy

From 93974b88d970f68b140a47e6f819cd89a99d9be4 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 09:40:57 -0800
Subject: [PATCH 04/22] Fixing data type

---
 data/dataset/postgres_example_test_dataset.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/dataset/postgres_example_test_dataset.yml b/data/dataset/postgres_example_test_dataset.yml
index fddbfbb391..a6b7080ec2 100644
--- a/data/dataset/postgres_example_test_dataset.yml
+++ b/data/dataset/postgres_example_test_dataset.yml
@@ -12,7 +12,7 @@ dataset:
           - name: house
             data_categories: [user.contact.address.street]
             fides_meta:
-              data_type: string
+              data_type: integer
           - name: id
             data_categories: [system.operations]
             fides_meta:

From f274ae05e7d6e2ead07efc70be9233268e9957ed Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 11:20:34 -0800
Subject: [PATCH 05/22] Sorting update map keys

---
 src/fides/api/service/connectors/query_configs/query_config.py  | 2 +-
 .../service/connectors/query_configs/snowflake_query_config.py  | 2 +-
 src/fides/api/service/connectors/scylla_query_config.py         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index 3eed947dff..4a29e0ace5 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -562,7 +562,7 @@ def generate_query(
 
     def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
         """Adds the appropriate formatting for update statements in this datastore."""
-        return [f"{k} = :{v}" for k, v in param_map.items()]
+        return [f"{k} = :{v}" for k, v in sorted(param_map.items())]
 
     def get_update_clauses(
         self, update_value_map: Dict[str, Any], non_empty_primary_keys: Dict[str, Field]
diff --git a/src/fides/api/service/connectors/query_configs/snowflake_query_config.py b/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
index 279e601141..ec640191d8 100644
--- a/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/snowflake_query_config.py
@@ -61,7 +61,7 @@ def get_formatted_query_string(
 
     def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
         """Adds the appropriate formatting for update statements in this datastore."""
-        return [f'"{k}" = :{v}' for k, v in param_map]
+        return [f'"{k}" = :{v}' for k, v in sorted(param_map.items())]
 
     def get_update_stmt(
         self,
diff --git a/src/fides/api/service/connectors/scylla_query_config.py b/src/fides/api/service/connectors/scylla_query_config.py
index ce8f60335c..dc619a72c7 100644
--- a/src/fides/api/service/connectors/scylla_query_config.py
+++ b/src/fides/api/service/connectors/scylla_query_config.py
@@ -72,7 +72,7 @@ def generate_query(
 
     def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]:
         """Adds the appropriate formatting for update statements in this datastore."""
-        return [f"{k} = %({v})s" for k, v in param_map.items()]
+        return [f"{k} = %({v})s" for k, v in sorted(param_map.items())]
 
     def get_update_clauses(
         self, update_value_map: Dict[str, Any], non_empty_primary_keys: Dict[str, Field]

From 357b6ec0625b1facdbbd447bb83c5865a8337112 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 11:54:51 -0800
Subject: [PATCH 06/22] Removing primary keys from sample and test datasets

---
 .../bigquery_enterprise_test_dataset.yml      |  8 +++----
 .../dataset/bigquery_example_test_dataset.yml | 18 ---------------
 .../dataset/dynamodb_example_test_dataset.yml |  8 -------
 data/dataset/email_dataset.yml                |  6 -----
 ...le_field_masking_override_test_dataset.yml | 18 ---------------
 data/dataset/example_test_dataset.invalid     | 18 ---------------
 data/dataset/example_test_datasets.yml        | 16 --------------
 ...e_cloud_sql_mysql_example_test_dataset.yml |  8 -------
 ...loud_sql_postgres_example_test_dataset.yml |  8 -------
 data/dataset/manual_dataset.yml               |  4 ----
 data/dataset/mariadb_example_test_dataset.yml |  8 -------
 data/dataset/mongo_example_test_dataset.yml   | 22 -------------------
 data/dataset/mssql_example_test_dataset.yml   |  8 -------
 data/dataset/mysql_example_test_dataset.yml   |  9 --------
 ...s_example_custom_request_field_dataset.yml |  1 -
 ...alid_masking_strategy_override_dataset.yml |  4 ----
 .../dataset/postgres_example_test_dataset.yml | 17 --------------
 .../dataset/redshift_example_test_dataset.yml | 18 ---------------
 .../dataset/scylladb_example_test_dataset.yml |  5 -----
 .../snowflake_example_test_dataset.yml        | 18 ---------------
 .../dataset/timebase_example_test_dataset.yml | 18 ---------------
 data/saas/dataset/hubspot_dataset.yml         |  3 ---
 data/saas/dataset/mailchimp_dataset.yml       |  2 --
 data/saas/dataset/stripe_dataset.yml          | 16 --------------
 .../mongo_example_test_dataset.yml            | 22 -------------------
 ...s_example_custom_request_field_dataset.yml |  1 -
 .../postgres_example_test_dataset.yml         | 18 ---------------
 .../test_data/mailchimp_override_dataset.yml  |  2 --
 .../saas/test_data/saas_async_dataset.yml     |  2 --
 ..._custom_privacy_request_fields_dataset.yml |  1 -
 .../test_data/saas_erasure_order_dataset.yml  |  5 -----
 .../saas/test_data/saas_example_dataset.yml   | 10 ---------
 tests/ops/generator/test_data_generator.py    |  4 ----
 .../example_datasets/multiple_identities.yml  |  2 --
 ...le_identities_with_external_dependency.yml |  2 --
 .../example_datasets/no_identities.yml        |  2 --
 .../example_datasets/single_identity.yml      |  2 --
 ...ngle_identity_with_internal_dependency.yml |  2 --
 tests/ops/util/test_dataset_yaml.py           |  3 ---
 39 files changed, 4 insertions(+), 335 deletions(-)

diff --git a/data/dataset/bigquery_enterprise_test_dataset.yml b/data/dataset/bigquery_enterprise_test_dataset.yml
index 59d27e68a2..52b20e7d03 100644
--- a/data/dataset/bigquery_enterprise_test_dataset.yml
+++ b/data/dataset/bigquery_enterprise_test_dataset.yml
@@ -30,7 +30,7 @@ dataset:
             fides_meta:
               references: null
               identity: null
-              primary_key: true
+              primary_key: null
               data_type: integer
               length: null
               return_all_elements: null
@@ -102,7 +102,7 @@ dataset:
             fides_meta:
               references: null
               identity: null
-              primary_key: true
+              primary_key: null
               data_type: integer
               length: null
               return_all_elements: null
@@ -204,7 +204,7 @@ dataset:
             fides_meta:
               references: null
               identity: null
-              primary_key: true
+              primary_key: null
               data_type: integer
               length: null
               return_all_elements: null
@@ -347,7 +347,7 @@ dataset:
             fides_meta:
               references: null
               identity: stackoverflow_user_id
-              primary_key: true
+              primary_key: null
               data_type: integer
               length: null
               return_all_elements: null
diff --git a/data/dataset/bigquery_example_test_dataset.yml b/data/dataset/bigquery_example_test_dataset.yml
index 11fdac1aba..c4ea16cb44 100644
--- a/data/dataset/bigquery_example_test_dataset.yml
+++ b/data/dataset/bigquery_example_test_dataset.yml
@@ -13,8 +13,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -53,8 +51,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -80,8 +76,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -98,8 +92,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -114,8 +106,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -166,8 +156,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -177,8 +165,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -193,8 +179,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -227,8 +211,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/dynamodb_example_test_dataset.yml b/data/dataset/dynamodb_example_test_dataset.yml
index d9ecbb8d1f..4aa3f8b2bf 100644
--- a/data/dataset/dynamodb_example_test_dataset.yml
+++ b/data/dataset/dynamodb_example_test_dataset.yml
@@ -19,8 +19,6 @@ dataset:
             data_categories: [system.operations]
           - name: email
             data_categories: [user.contact.email]
-            fides_meta:
-              primary_key: True
               identity: email
               data_type: string
           - name: name
@@ -33,8 +31,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -59,16 +55,12 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
       - name: login
         fields:
           - name: customer_id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: login_date
             data_categories: [system.operations]
           - name: name
diff --git a/data/dataset/email_dataset.yml b/data/dataset/email_dataset.yml
index c829e8a4ea..64b49f71a8 100644
--- a/data/dataset/email_dataset.yml
+++ b/data/dataset/email_dataset.yml
@@ -7,8 +7,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: true
           - name: customer_id
             data_categories: [user]
             fides_meta:
@@ -22,8 +20,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: true
           - name: first_name
             data_categories: [user.childrens]
           - name: last_name
@@ -54,8 +50,6 @@ dataset:
         fields:
           - name: id
             data_categories: [ system.operations ]
-            fides_meta:
-              primary_key: true
           - name: payer_email
             data_categories: [ user.contact.email ]
             fides_meta:
diff --git a/data/dataset/example_field_masking_override_test_dataset.yml b/data/dataset/example_field_masking_override_test_dataset.yml
index 24bdf84555..74e29ca84e 100644
--- a/data/dataset/example_field_masking_override_test_dataset.yml
+++ b/data/dataset/example_field_masking_override_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -90,8 +86,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -108,8 +102,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -124,8 +116,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -176,8 +166,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -187,8 +175,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -203,8 +189,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -237,8 +221,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
       - name: visit
diff --git a/data/dataset/example_test_dataset.invalid b/data/dataset/example_test_dataset.invalid
index 46e5235876..a3bfe261ff 100644
--- a/data/dataset/example_test_dataset.invalid
+++ b/data/dataset/example_test_dataset.invalid
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           * name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           * name: state
             data_categories: [user.contact.address.state]
           * name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           * name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           * name: name
             data_categories: [user.name]
             fides_meta:
@@ -62,8 +58,6 @@ dataset:
               data_type: string
           * name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           * name: name
             data_categories: [user.name]
             fides_meta:
@@ -80,8 +74,6 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           * name: time
             data_categories: [user.sensor]
 
@@ -96,8 +88,6 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           * name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -148,8 +138,6 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           * name: name
             data_categories: [user.financial]
           * name: preferred
@@ -159,8 +147,6 @@ dataset:
         fields:
           * name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           * name: name
             data_categories: [system.operations]
           * name: price
@@ -175,8 +161,6 @@ dataset:
               data_type: string
           * name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           * name: month
             data_categories: [system.operations]
           * name: name
@@ -209,8 +193,6 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           * name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/example_test_datasets.yml b/data/dataset/example_test_datasets.yml
index 898d61bc71..e64e9fb1e8 100644
--- a/data/dataset/example_test_datasets.yml
+++ b/data/dataset/example_test_datasets.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -59,8 +55,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -89,8 +83,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -220,8 +212,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -247,8 +237,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -268,8 +256,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -298,8 +284,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml b/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml
index 7f090e0487..86b6ad2171 100644
--- a/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml
+++ b/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -59,8 +55,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -89,8 +83,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml b/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml
index 47989b4201..833361a300 100644
--- a/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml
+++ b/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -59,8 +55,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -89,8 +83,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/manual_dataset.yml b/data/dataset/manual_dataset.yml
index 66f5e4a0da..26d6acbe48 100644
--- a/data/dataset/manual_dataset.yml
+++ b/data/dataset/manual_dataset.yml
@@ -7,8 +7,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: true
           - name: authorized_user
             data_categories: [user]
             fides_meta:
@@ -31,8 +29,6 @@ dataset:
         fields:
           - name: box_id
             data_categories: [user]
-            fides_meta:
-              primary_key: true
           - name: email
             data_categories: [user.contact.email]
             fides_meta:
diff --git a/data/dataset/mariadb_example_test_dataset.yml b/data/dataset/mariadb_example_test_dataset.yml
index 5e3c90f08f..204ad8a56d 100644
--- a/data/dataset/mariadb_example_test_dataset.yml
+++ b/data/dataset/mariadb_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -59,8 +55,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -89,8 +83,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/mongo_example_test_dataset.yml b/data/dataset/mongo_example_test_dataset.yml
index 0205f33049..4392c00bfc 100644
--- a/data/dataset/mongo_example_test_dataset.yml
+++ b/data/dataset/mongo_example_test_dataset.yml
@@ -7,8 +7,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: customer_id
             data_categories: [user.unique_id]
             fides_meta:
@@ -81,8 +79,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: customer_identifiers
             fields:
@@ -112,8 +108,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: customer_information
             fields:
@@ -145,8 +139,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: passenger_information
             fields:
@@ -175,8 +167,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: thread
             fides_meta:
@@ -200,8 +190,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: email
             data_categories: [user.contact.email]
@@ -210,8 +198,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
               references:
                 - dataset: mongo_test
                   field: flights.pilots
@@ -224,8 +210,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: planes
             data_categories: [system.operations]
@@ -243,8 +227,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: billing_address_id
             data_categories: [system.operations]
@@ -261,8 +243,6 @@ dataset:
             data_categories: [user.unique_id]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -270,8 +250,6 @@ dataset:
       - name: rewards
         fields:
           - name: _id
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: owner
             fides_meta:
diff --git a/data/dataset/mssql_example_test_dataset.yml b/data/dataset/mssql_example_test_dataset.yml
index 661c600727..d58cf013d3 100644
--- a/data/dataset/mssql_example_test_dataset.yml
+++ b/data/dataset/mssql_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -59,8 +55,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -89,8 +83,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/mysql_example_test_dataset.yml b/data/dataset/mysql_example_test_dataset.yml
index f311ebf2c7..7d2b16541b 100644
--- a/data/dataset/mysql_example_test_dataset.yml
+++ b/data/dataset/mysql_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -59,8 +55,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -89,8 +83,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -218,4 +210,3 @@ dataset:
               data_type: string
           - name: updated_at
             data_categories: [system.operations]
-
diff --git a/data/dataset/postgres_example_custom_request_field_dataset.yml b/data/dataset/postgres_example_custom_request_field_dataset.yml
index 96b58645d4..0a878fad87 100644
--- a/data/dataset/postgres_example_custom_request_field_dataset.yml
+++ b/data/dataset/postgres_example_custom_request_field_dataset.yml
@@ -10,7 +10,6 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: string
-              primary_key: True
           - name: email_address
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml b/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml
index 5195a3671a..e66c2cd140 100644
--- a/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml
+++ b/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml
@@ -14,8 +14,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -31,8 +29,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
diff --git a/data/dataset/postgres_example_test_dataset.yml b/data/dataset/postgres_example_test_dataset.yml
index a6b7080ec2..1f01fe1f03 100644
--- a/data/dataset/postgres_example_test_dataset.yml
+++ b/data/dataset/postgres_example_test_dataset.yml
@@ -17,7 +17,6 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: integer
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
             fides_meta:
@@ -49,8 +48,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -73,8 +70,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -97,8 +92,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -113,8 +106,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -165,8 +156,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -176,8 +165,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -192,8 +179,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -226,8 +211,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
       - name: visit
diff --git a/data/dataset/redshift_example_test_dataset.yml b/data/dataset/redshift_example_test_dataset.yml
index 9794f86bb3..2b1858e99a 100644
--- a/data/dataset/redshift_example_test_dataset.yml
+++ b/data/dataset/redshift_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -62,8 +58,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -80,8 +74,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -96,8 +88,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -148,8 +138,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -159,8 +147,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -175,8 +161,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -209,8 +193,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/scylladb_example_test_dataset.yml b/data/dataset/scylladb_example_test_dataset.yml
index 8374540cc1..38c0ea7b51 100644
--- a/data/dataset/scylladb_example_test_dataset.yml
+++ b/data/dataset/scylladb_example_test_dataset.yml
@@ -47,7 +47,6 @@ dataset:
             data_categories: [user.unique_id]
             fides_meta:
               data_type: integer
-              primary_key: True
           - name: uuid
             data_categories: [user.government_id]
       - name: user_activity
@@ -60,12 +59,10 @@ dataset:
                   field: users.user_id
                   direction: from
               data_type: integer
-              primary_key: True
           - name: timestamp
             data_categories: [user.behavior]
             fides_meta:
               data_type: string
-              primary_key: True
           - name: user_agent
             data_categories: [user.device]
             fides_meta:
@@ -80,7 +77,6 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: integer
-              primary_key: True
           - name: user_id
             data_categories: [user.unique_id]
             fides_meta:
@@ -101,7 +97,6 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: integer
-              primary_key: True
           - name: payment_method_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/snowflake_example_test_dataset.yml b/data/dataset/snowflake_example_test_dataset.yml
index da13723693..9b1b79f125 100644
--- a/data/dataset/snowflake_example_test_dataset.yml
+++ b/data/dataset/snowflake_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -66,8 +62,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -84,8 +78,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -100,8 +92,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -152,8 +142,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -163,8 +151,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -179,8 +165,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -213,8 +197,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/timebase_example_test_dataset.yml b/data/dataset/timebase_example_test_dataset.yml
index ffd57a7c67..fe8a7e7d1d 100644
--- a/data/dataset/timebase_example_test_dataset.yml
+++ b/data/dataset/timebase_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -62,8 +58,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -80,8 +74,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -96,8 +88,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -148,8 +138,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -159,8 +147,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -175,8 +161,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -209,8 +193,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/data/saas/dataset/hubspot_dataset.yml b/data/saas/dataset/hubspot_dataset.yml
index 94f0bf43f9..406c800840 100644
--- a/data/saas/dataset/hubspot_dataset.yml
+++ b/data/saas/dataset/hubspot_dataset.yml
@@ -8,7 +8,6 @@ dataset:
           - name: id
             data_categories: [user.unique_id]
             fidesops_meta:
-              primary_key: True
               data_type: string
           - name: properties
             fidesops_meta:
@@ -117,7 +116,6 @@ dataset:
               - name: id
                 data_categories: [system.operations]
                 fidesops_meta:
-                  primary_key: True
                   data_type: string
               - name: name
                 data_categories: [system.operations]
@@ -152,7 +150,6 @@ dataset:
           - name: id
             data_categories: [user.unique_id]
             fidesops_meta:
-              primary_key: True
               data_type: string
           - name: email
             data_categories: [user.contact.email]
diff --git a/data/saas/dataset/mailchimp_dataset.yml b/data/saas/dataset/mailchimp_dataset.yml
index 05e3e45a2e..46751b04a4 100644
--- a/data/saas/dataset/mailchimp_dataset.yml
+++ b/data/saas/dataset/mailchimp_dataset.yml
@@ -35,8 +35,6 @@ dataset:
         fields:
           - name: id
             data_categories: [user.unique_id]
-            fidesops_meta:
-              primary_key: True
           - name: list_id
             data_categories: [system.operations]
           - name: email_address
diff --git a/data/saas/dataset/stripe_dataset.yml b/data/saas/dataset/stripe_dataset.yml
index f8e7482ecf..5b26474a13 100644
--- a/data/saas/dataset/stripe_dataset.yml
+++ b/data/saas/dataset/stripe_dataset.yml
@@ -7,8 +7,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               read_only: True
               data_type: string
           - name: object
@@ -617,8 +615,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -714,8 +710,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -754,8 +748,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -923,8 +915,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -958,8 +948,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -1235,8 +1223,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -1324,8 +1310,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: true
               data_type: string
           - name: object
             data_categories: [system.operations]
diff --git a/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml b/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
index 3971b6481d..542887d5c7 100644
--- a/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
+++ b/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
@@ -7,8 +7,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: customer_id
             data_categories: [user.unique_id]
             fides_meta:
@@ -77,8 +75,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: customer_identifiers
             fields:
@@ -108,8 +104,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: customer_information
             fields:
@@ -141,8 +135,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: passenger_information
             fields:
@@ -171,8 +163,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: thread
             fides_meta:
@@ -196,8 +186,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: email
             data_categories: [user.contact.email]
@@ -206,8 +194,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
               references:
                 - dataset: mongo_test
                   field: flights.pilots
@@ -220,8 +206,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: planes
             data_categories: [system.operations]
@@ -239,8 +223,6 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: billing_address_id
             data_categories: [system.operations]
@@ -257,8 +239,6 @@ dataset:
             data_categories: [user.unique_id]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -266,8 +246,6 @@ dataset:
       - name: rewards
         fields:
           - name: _id
-            fides_meta:
-              primary_key: True
               data_type: object_id
           - name: owner
             fides_meta:
diff --git a/src/fides/data/sample_project/sample_resources/postgres_example_custom_request_field_dataset.yml b/src/fides/data/sample_project/sample_resources/postgres_example_custom_request_field_dataset.yml
index 96b58645d4..0a878fad87 100644
--- a/src/fides/data/sample_project/sample_resources/postgres_example_custom_request_field_dataset.yml
+++ b/src/fides/data/sample_project/sample_resources/postgres_example_custom_request_field_dataset.yml
@@ -10,7 +10,6 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: string
-              primary_key: True
           - name: email_address
             data_categories: [system.operations]
             fides_meta:
diff --git a/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml b/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml
index e519a75008..768c972d99 100644
--- a/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml
+++ b/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml
@@ -11,8 +11,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -38,8 +36,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -62,8 +58,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -80,8 +74,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -96,8 +88,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -148,8 +138,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -159,8 +147,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -175,8 +161,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -209,8 +193,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/tests/fixtures/saas/test_data/mailchimp_override_dataset.yml b/tests/fixtures/saas/test_data/mailchimp_override_dataset.yml
index b0d39058e3..d9f6e3214b 100644
--- a/tests/fixtures/saas/test_data/mailchimp_override_dataset.yml
+++ b/tests/fixtures/saas/test_data/mailchimp_override_dataset.yml
@@ -41,8 +41,6 @@ dataset:
         fields:
           - name: id
             data_categories: [user.unique_id]
-            fidesops_meta:
-              primary_key: True
           - name: list_id
             data_categories: [system.operations]
           - name: email_address
diff --git a/tests/fixtures/saas/test_data/saas_async_dataset.yml b/tests/fixtures/saas/test_data/saas_async_dataset.yml
index 42f6557ddb..2398a307a1 100644
--- a/tests/fixtures/saas/test_data/saas_async_dataset.yml
+++ b/tests/fixtures/saas/test_data/saas_async_dataset.yml
@@ -7,8 +7,6 @@ dataset:
         fields:
           - name: id
             data_categories: [user.unique_id]
-            fidesops_meta:
-              primary_key: True
           - name: system_id
             data_categories: [system]
           - name: state
diff --git a/tests/fixtures/saas/test_data/saas_custom_privacy_request_fields_dataset.yml b/tests/fixtures/saas/test_data/saas_custom_privacy_request_fields_dataset.yml
index 0009dce2e2..c0e9f1f094 100644
--- a/tests/fixtures/saas/test_data/saas_custom_privacy_request_fields_dataset.yml
+++ b/tests/fixtures/saas/test_data/saas_custom_privacy_request_fields_dataset.yml
@@ -9,4 +9,3 @@ dataset:
             data_categories: [system.operations]
             fidesops_meta:
               data_type: integer
-              primary_key: True
diff --git a/tests/fixtures/saas/test_data/saas_erasure_order_dataset.yml b/tests/fixtures/saas/test_data/saas_erasure_order_dataset.yml
index 513088b6e2..2dcd28806d 100644
--- a/tests/fixtures/saas/test_data/saas_erasure_order_dataset.yml
+++ b/tests/fixtures/saas/test_data/saas_erasure_order_dataset.yml
@@ -9,35 +9,30 @@ dataset:
             data_categories: [system.operations]
             fidesops_meta:
               data_type: integer
-              primary_key: True
       - name: refunds
         fields:
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
               data_type: integer
-              primary_key: True
       - name: labels
         fields:
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
               data_type: integer
-              primary_key: True
       - name: orders_to_refunds
         fields:
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
               data_type: integer
-              primary_key: True
       - name: refunds_to_orders
         fields:
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
               data_type: integer
-              primary_key: True
       - name: products
         fields:
           - name: id
diff --git a/tests/fixtures/saas/test_data/saas_example_dataset.yml b/tests/fixtures/saas/test_data/saas_example_dataset.yml
index c0c430eb80..8eaa4ce3a5 100644
--- a/tests/fixtures/saas/test_data/saas_example_dataset.yml
+++ b/tests/fixtures/saas/test_data/saas_example_dataset.yml
@@ -41,8 +41,6 @@ dataset:
         fields:
           - name: id
             data_categories: [user.unique_id]
-            fidesops_meta:
-              primary_key: True
           - name: list_id
             data_categories: [system.operations]
           - name: email_address
@@ -187,8 +185,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fidesops_meta:
-              primary_key: True
               read_only: True
           - name: name
             fields:
@@ -233,37 +229,31 @@ dataset:
           - name: id
             fidesops_meta:
               data_type: integer
-              primary_key: True
       - name: skipped_collection
         fields:
           - name: id
             fides_meta:
               data_type: integer
-              primary_key: True
       - name: request_with_output_template
         fields:
           - name: id
             fides_meta:
               data_type: integer
-              primary_key: True
       - name: request_with_invalid_output_template
         fields:
           - name: id
             fides_meta:
               data_type: integer
-              primary_key: True
       - name: standalone_output_template
         fields:
           - name: id
             fides_meta:
               data_type: integer
-              primary_key: True
       - name: complex_template_example
         fields:
           - name: id
             fides_meta:
               data_type: integer
-              primary_key: True
 
   - fides_key: saas_connector_external_example
     name: An Example External SaaS Dataset
diff --git a/tests/ops/generator/test_data_generator.py b/tests/ops/generator/test_data_generator.py
index af9ab1cc62..659185d080 100644
--- a/tests/ops/generator/test_data_generator.py
+++ b/tests/ops/generator/test_data_generator.py
@@ -19,8 +19,6 @@
       - name: user
         fields:
           - name: id
-            fides_meta:
-              primary_key: True
               data_type: integer
               references:
                 - dataset: db
@@ -33,8 +31,6 @@
       - name: address
         fields:
           - name: id
-            fides_meta:
-              primary_key: True
               data_type: integer
           - name: user_id
           - name: street
diff --git a/tests/ops/service/dataset/example_datasets/multiple_identities.yml b/tests/ops/service/dataset/example_datasets/multiple_identities.yml
index 053afb3ced..dd76dbfa6d 100644
--- a/tests/ops/service/dataset/example_datasets/multiple_identities.yml
+++ b/tests/ops/service/dataset/example_datasets/multiple_identities.yml
@@ -16,8 +16,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
diff --git a/tests/ops/service/dataset/example_datasets/multiple_identities_with_external_dependency.yml b/tests/ops/service/dataset/example_datasets/multiple_identities_with_external_dependency.yml
index fdfcd32bfc..db9e227a74 100644
--- a/tests/ops/service/dataset/example_datasets/multiple_identities_with_external_dependency.yml
+++ b/tests/ops/service/dataset/example_datasets/multiple_identities_with_external_dependency.yml
@@ -32,7 +32,5 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
diff --git a/tests/ops/service/dataset/example_datasets/no_identities.yml b/tests/ops/service/dataset/example_datasets/no_identities.yml
index fac879de99..82b56f9c65 100644
--- a/tests/ops/service/dataset/example_datasets/no_identities.yml
+++ b/tests/ops/service/dataset/example_datasets/no_identities.yml
@@ -13,8 +13,6 @@ dataset:
             data_categories: [user.contact.email]
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
diff --git a/tests/ops/service/dataset/example_datasets/single_identity.yml b/tests/ops/service/dataset/example_datasets/single_identity.yml
index 19cdc7df3e..ce1506886d 100644
--- a/tests/ops/service/dataset/example_datasets/single_identity.yml
+++ b/tests/ops/service/dataset/example_datasets/single_identity.yml
@@ -16,8 +16,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
diff --git a/tests/ops/service/dataset/example_datasets/single_identity_with_internal_dependency.yml b/tests/ops/service/dataset/example_datasets/single_identity_with_internal_dependency.yml
index 708aefbaf0..af73f8bcb8 100644
--- a/tests/ops/service/dataset/example_datasets/single_identity_with_internal_dependency.yml
+++ b/tests/ops/service/dataset/example_datasets/single_identity_with_internal_dependency.yml
@@ -16,8 +16,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
diff --git a/tests/ops/util/test_dataset_yaml.py b/tests/ops/util/test_dataset_yaml.py
index edaa26a7ca..a610ac7569 100644
--- a/tests/ops/util/test_dataset_yaml.py
+++ b/tests/ops/util/test_dataset_yaml.py
@@ -33,7 +33,6 @@
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
-              primary_key: True
               data_type: integer
 """
 
@@ -47,7 +46,6 @@
           - name: _id
             data_categories: [system.operations]
             fidesops_meta:
-              primary_key: True
               data_type: object_id
           - name: photo_id
             data_categories: [user.unique_id]
@@ -223,7 +221,6 @@ def test_invalid_datatype():
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
-              primary_key: True
               data_type: integer
       - name: users
         fields:

From fc1aaccca78d7e2ce54c945c19a261c1157df09f Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 12:28:15 -0800
Subject: [PATCH 07/22] Simplifying generate_update_stmt and fixing tests

---
 .../dataset/dynamodb_example_test_dataset.yml |  1 +
 data/dataset/mongo_example_test_dataset.yml   | 16 +++++--
 .../query_configs/mongodb_query_config.py     |  2 +-
 .../connectors/query_configs/query_config.py  | 24 +++-------
 .../service/connectors/test_query_config.py   | 47 +++++++++++--------
 5 files changed, 48 insertions(+), 42 deletions(-)

diff --git a/data/dataset/dynamodb_example_test_dataset.yml b/data/dataset/dynamodb_example_test_dataset.yml
index 4aa3f8b2bf..a4e5a1291a 100644
--- a/data/dataset/dynamodb_example_test_dataset.yml
+++ b/data/dataset/dynamodb_example_test_dataset.yml
@@ -19,6 +19,7 @@ dataset:
             data_categories: [system.operations]
           - name: email
             data_categories: [user.contact.email]
+            fides_meta:
               identity: email
               data_type: string
           - name: name
diff --git a/data/dataset/mongo_example_test_dataset.yml b/data/dataset/mongo_example_test_dataset.yml
index 4392c00bfc..ece5817b1a 100644
--- a/data/dataset/mongo_example_test_dataset.yml
+++ b/data/dataset/mongo_example_test_dataset.yml
@@ -79,6 +79,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: customer_identifiers
             fields:
@@ -108,6 +109,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: customer_information
             fields:
@@ -139,6 +141,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: passenger_information
             fields:
@@ -167,6 +170,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: thread
             fides_meta:
@@ -190,6 +194,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: email
             data_categories: [user.contact.email]
@@ -198,10 +203,10 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-              references:
-                - dataset: mongo_test
-                  field: flights.pilots
-                  direction: from
+            references:
+              - dataset: mongo_test
+                field: flights.pilots
+                direction: from
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -210,6 +215,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: planes
             data_categories: [system.operations]
@@ -227,6 +233,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: billing_address_id
             data_categories: [system.operations]
@@ -250,6 +257,7 @@ dataset:
       - name: rewards
         fields:
           - name: _id
+            fides_meta:
               data_type: object_id
           - name: owner
             fides_meta:
diff --git a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
index 1a6aa303f0..a132f2dfc8 100644
--- a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
@@ -72,7 +72,7 @@ def generate_update_stmt(
         where_clauses: Dict[str, Any] = filter_nonempty_values(
             {
                 field_path.string_path: field.cast(row[field_path.string_path])
-                for field_path, field in self.primary_key_field_paths.items()
+                for field_path, field in self.identity_or_reference_field_paths.items()
             }
         )
 
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index 4a29e0ace5..abc3da34fe 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -453,32 +453,20 @@ def generate_update_stmt(
             }
         )
 
-        # Identify overlapping fields and create parameter mappings
-        overlapping_keys = set(update_value_map.keys()) & set(
-            non_empty_reference_fields.keys()
-        )
+        # Create parameter mappings with masked_ prefix for SET values
         param_map = {
-            **{k: v for k, v in update_value_map.items()},  # SET values
-            **{
-                f"where_{k}" if k in overlapping_keys else k: v
-                for k, v in non_empty_reference_fields.items()
-            },  # WHERE values
+            **{f"masked_{k}": v for k, v in update_value_map.items()},
+            **non_empty_reference_fields,
         }
 
-        # Generate SQL clauses using parameter names
         update_clauses = self.get_update_clauses(
-            {k: k for k in update_value_map}, non_empty_reference_fields
+            {k: f"masked_{k}" for k in update_value_map},
+            non_empty_reference_fields,
         )
         where_clauses = self.format_key_map_for_update_stmt(
-            {
-                k: f"where_{k}" if k in overlapping_keys else k
-                for k in non_empty_reference_fields
-            }
+            {k: k for k in non_empty_reference_fields}
         )
 
-        for k, v in non_empty_reference_fields.items():
-            update_value_map[k] = v
-
         valid = len(where_clauses) > 0 and len(update_clauses) > 0
         if not valid:
             logger.warning(
diff --git a/tests/ops/service/connectors/test_query_config.py b/tests/ops/service/connectors/test_query_config.py
index 991c945081..75c9b26c1b 100644
--- a/tests/ops/service/connectors/test_query_config.py
+++ b/tests/ops/service/connectors/test_query_config.py
@@ -288,10 +288,12 @@ def test_generate_update_stmt_one_field(
         text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
         assert (
             text_clause.text
-            == """UPDATE customer SET name = :name WHERE email = :email"""
+            == """UPDATE customer SET name = :masked_name WHERE email = :email"""
         )
-        assert text_clause._bindparams["name"].key == "name"
-        assert text_clause._bindparams["name"].value is None  # Null masking strategy
+        assert text_clause._bindparams["masked_name"].key == "masked_name"
+        assert (
+            text_clause._bindparams["masked_name"].value is None
+        )  # Null masking strategy
 
     def test_generate_update_stmt_one_field_inbound_reference(
         self, erasure_policy_address_city, example_datasets, connection_config
@@ -317,9 +319,14 @@ def test_generate_update_stmt_one_field_inbound_reference(
         text_clause = config.generate_update_stmt(
             row, erasure_policy_address_city, privacy_request
         )
-        assert text_clause.text == """UPDATE address SET city = :city WHERE id = :id"""
-        assert text_clause._bindparams["city"].key == "city"
-        assert text_clause._bindparams["city"].value is None  # Null masking strategy
+        assert (
+            text_clause.text
+            == """UPDATE address SET city = :masked_city WHERE id = :id"""
+        )
+        assert text_clause._bindparams["masked_city"].key == "masked_city"
+        assert (
+            text_clause._bindparams["masked_city"].value is None
+        )  # Null masking strategy
 
     def test_generate_update_stmt_length_truncation(
         self,
@@ -349,12 +356,12 @@ def test_generate_update_stmt_length_truncation(
         )
         assert (
             text_clause.text
-            == """UPDATE customer SET name = :name WHERE email = :email"""
+            == """UPDATE customer SET name = :masked_name WHERE email = :email"""
         )
-        assert text_clause._bindparams["name"].key == "name"
+        assert text_clause._bindparams["masked_name"].key == "masked_name"
         # length truncation on name field
         assert (
-            text_clause._bindparams["name"].value
+            text_clause._bindparams["masked_name"].value
             == "some rewrite value that is very long and"
         )
 
@@ -399,23 +406,23 @@ def test_generate_update_stmt_multiple_fields_same_rule(
         text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
         assert (
             text_clause.text
-            == "UPDATE customer SET email = :email, name = :name WHERE email = :where_email"
+            == "UPDATE customer SET email = :masked_email, name = :masked_name WHERE email = :email"
         )
-        assert text_clause._bindparams["name"].key == "name"
+        assert text_clause._bindparams["masked_name"].key == "masked_name"
         # since length is set to 40 in dataset.yml, we expect only first 40 chars of masked val
         assert (
-            text_clause._bindparams["name"].value
+            text_clause._bindparams["masked_name"].value
             == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
                 ["John Customer"], request_id=privacy_request.id
             )[0][0:40]
         )
         assert (
-            text_clause._bindparams["email"].value
+            text_clause._bindparams["masked_email"].value
             == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
                 ["customer-1@example.com"], request_id=privacy_request.id
             )[0]
         )
-        assert text_clause._bindparams["where_email"].value == "customer-1@example.com"
+        assert text_clause._bindparams["email"].value == "customer-1@example.com"
         clear_cache_secrets(privacy_request.id)
 
     def test_generate_update_stmts_from_multiple_rules(
@@ -444,12 +451,14 @@ def test_generate_update_stmts_from_multiple_rules(
 
         assert (
             text_clause.text
-            == "UPDATE customer SET name = :name, email = :email WHERE email = :where_email"
+            == "UPDATE customer SET email = :masked_email, name = :masked_name WHERE email = :email"
         )
         # Two different masking strategies used for name and email
-        assert text_clause._bindparams["name"].value is None  # Null masking strategy
         assert (
-            text_clause._bindparams["email"].value == "*****"
+            text_clause._bindparams["masked_name"].value is None
+        )  # Null masking strategy
+        assert (
+            text_clause._bindparams["masked_email"].value == "*****"
         )  # String rewrite masking strategy
 
 
@@ -618,7 +627,7 @@ def test_generate_update_stmt_multiple_fields(
             row, erasure_policy, privacy_request
         )
 
-        expected_result_0 = {"_id": 1}
+        expected_result_0 = {"customer_id": 1}
         expected_result_1 = {
             "$set": {
                 "birthday": None,
@@ -700,7 +709,7 @@ def test_generate_update_stmt_multiple_rules(
         mongo_statement = config.generate_update_stmt(
             row, erasure_policy_two_rules, privacy_request
         )
-        assert mongo_statement[0] == {"_id": 1}
+        assert mongo_statement[0] == {"customer_id": 1}
         assert len(mongo_statement[1]["$set"]["gender"]) == 30
         assert (
             mongo_statement[1]["$set"]["birthday"]

From 0e11551ccb961203af62fb8eda00be3e1484d63c Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 13:11:04 -0800
Subject: [PATCH 08/22] More cleanup

---
 .../fixtures/connectors/datasetconfig.json    | 18 ++---
 tests/fixtures/email_fixtures.py              | 27 ++++---
 .../saas/test_data/saas_example_dataset.yml   |  1 +
 tests/ops/models/test_datasetconfig.py        |  7 +-
 tests/ops/task/test_create_request_tasks.py   |  6 +-
 tests/ops/task/traversal_data.py              | 70 +++++++------------
 6 files changed, 54 insertions(+), 75 deletions(-)

diff --git a/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json b/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json
index 6cf4d7d77c..c41d13993b 100644
--- a/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json
+++ b/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json
@@ -38,7 +38,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -125,7 +125,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -199,7 +199,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -258,7 +258,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -366,7 +366,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -466,7 +466,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -503,7 +503,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -555,7 +555,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -664,7 +664,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": true,
+                  "primary_key": null,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
diff --git a/tests/fixtures/email_fixtures.py b/tests/fixtures/email_fixtures.py
index 0df1d61b84..4d9f6c5587 100644
--- a/tests/fixtures/email_fixtures.py
+++ b/tests/fixtures/email_fixtures.py
@@ -193,29 +193,28 @@ def dynamic_email_address_config_dataset(
                             "name": "id",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
-                                "primary_key": True,
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "email_address",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "vendor_name",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "site_id",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                                 "custom_request_field": "tenant_id",
                             },
                         },
@@ -246,29 +245,28 @@ def dynamic_email_address_config_second_dataset(
                             "name": "id",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
-                                "primary_key": True,
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "email_address",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "vendor_name",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "custom_field",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                                 "custom_request_field": "custom_field",
                             },
                         },
@@ -281,29 +279,28 @@ def dynamic_email_address_config_second_dataset(
                             "name": "id2",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
-                                "primary_key": True,
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "email_address2",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "vendor_name2",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                             },
                         },
                         {
                             "name": "site_id2",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string",
+                                "data_type": "string"
                                 "custom_request_field": "tenant_id",
                             },
                         },
diff --git a/tests/fixtures/saas/test_data/saas_example_dataset.yml b/tests/fixtures/saas/test_data/saas_example_dataset.yml
index 8eaa4ce3a5..500c82df50 100644
--- a/tests/fixtures/saas/test_data/saas_example_dataset.yml
+++ b/tests/fixtures/saas/test_data/saas_example_dataset.yml
@@ -185,6 +185,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               read_only: True
           - name: name
             fields:
diff --git a/tests/ops/models/test_datasetconfig.py b/tests/ops/models/test_datasetconfig.py
index 969002baac..933c2fcb3f 100644
--- a/tests/ops/models/test_datasetconfig.py
+++ b/tests/ops/models/test_datasetconfig.py
@@ -194,18 +194,17 @@ def test_convert_dataset_to_graph(example_datasets):
         (FieldAddress("postgres_example_test_dataset", "customer", "id"), "from")
     ]
 
-    # check that primary key member has been set
     assert (
         field([graph], "postgres_example_test_dataset", "address", "id").primary_key
-        is True
+        is False
     )
     assert (
         field([graph], "postgres_example_test_dataset", "customer", "id").primary_key
-        is True
+        is False
     )
     assert (
         field([graph], "postgres_example_test_dataset", "employee", "id").primary_key
-        is True
+        is False
     )
     assert (
         field([graph], "postgres_example_test_dataset", "visit", "email").primary_key
diff --git a/tests/ops/task/test_create_request_tasks.py b/tests/ops/task/test_create_request_tasks.py
index 290c2dc1be..3792fea0e3 100644
--- a/tests/ops/task/test_create_request_tasks.py
+++ b/tests/ops/task/test_create_request_tasks.py
@@ -105,7 +105,7 @@
             "is_array": False,
             "read_only": None,
             "references": [],
-            "primary_key": True,
+            "primary_key": False,
             "data_categories": ["system.operations"],
             "data_type_converter": "None",
             "return_all_elements": None,
@@ -307,7 +307,7 @@ def test_persist_access_tasks_with_object_fields_in_collection(
                     "is_array": False,
                     "read_only": None,
                     "references": [],
-                    "primary_key": True,
+                    "primary_key": False,
                     "data_categories": ["system.operations"],
                     "data_type_converter": "object_id",
                     "return_all_elements": None,
@@ -927,7 +927,7 @@ def test_erase_after_saas_upstream_and_downstream_tasks(
                 "is_array": False,
                 "read_only": None,
                 "references": [],
-                "primary_key": True,
+                "primary_key": False,
                 "data_categories": ["system.operations"],
                 "data_type_converter": "integer",
                 "return_all_elements": None,
diff --git a/tests/ops/task/traversal_data.py b/tests/ops/task/traversal_data.py
index 20d3773e17..59032b337f 100644
--- a/tests/ops/task/traversal_data.py
+++ b/tests/ops/task/traversal_data.py
@@ -33,7 +33,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
                 "fields": [
                     {
                         "name": "id",
-                        "fides_meta": {"primary_key": True, "data_type": "integer"},
+                        "fides_meta": {"data_type": "integer"},
                     },
                     {"name": "name", "fides_meta": {"data_type": "string"}},
                     {
@@ -58,7 +58,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
                 "name": "address",
                 "after": [f"{db_name}.customer", f"{db_name}.orders"],
                 "fields": [
-                    {"name": "id", "fides_meta": {"primary_key": True}},
+                    {"name": "id"},
                     {"name": "street", "fides_meta": {"data_type": "string"}},
                     {"name": "city", "fides_meta": {"data_type": "string"}},
                     {"name": "state", "fides_meta": {"data_type": "string"}},
@@ -68,7 +68,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
             {
                 "name": "orders",
                 "fields": [
-                    {"name": "id", "fides_meta": {"primary_key": True}},
+                    {"name": "id"},
                     {
                         "name": "customer_id",
                         "fides_meta": {
@@ -113,7 +113,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
                 "fields": [
                     {
                         "name": "id",
-                        "fides_meta": {"primary_key": True, "data_type": "string"},
+                        "fides_meta": {"data_type": "string"},
                     },
                     {"name": "name", "fides_meta": {"data_type": "string"}},
                     {"name": "ccn"},
@@ -156,7 +156,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "address",
                 "fields": [
-                    {"name": "_id", "fides_meta": {"primary_key": True}},
+                    {"name": "_id"},
                     {
                         "name": "id",
                         "fides_meta": {
@@ -178,7 +178,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "orders",
                 "fields": [
-                    {"name": "_id", "fides_meta": {"primary_key": True}},
+                    {"name": "_id"},
                     {
                         "name": "customer_id",
                         "fides_meta": {
@@ -200,7 +200,6 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
-                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -229,7 +228,6 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
-                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -240,25 +238,25 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             {
                                 "name": "comment",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
                                 "name": "message",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
                                 "name": "chat_name",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
                                 "name": "ccn",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                         ],
@@ -270,9 +268,6 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {
-                            "primary_key": True,
-                        },
                     },
                     {
                         "name": "birthday",
@@ -320,19 +315,19 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             {
                                 "name": "name",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
                                 "name": "relationship",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
                                 "name": "phone",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                         ],
@@ -352,13 +347,13 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             {
                                 "name": "employer",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
                                 "name": "position",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
@@ -375,7 +370,6 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
-                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -386,20 +380,20 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             {
                                 "name": "email",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                     "identity": "email",
                                 },
                             },
                             {
                                 "name": "phone",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                             {
                                 "name": "internal_customer_id",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                         ],
@@ -424,7 +418,6 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
-                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -438,7 +431,6 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "id",
                         "fides_meta": {
-                            "primary_key": True,
                             "references": [
                                 {
                                     "dataset": mongo_db_name,
@@ -459,10 +451,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {
-                            "primary_key": True,
-                            "data_type": "object_id",
-                        },
+                        "fides_meta": {"data_type": "object_id"},
                     },
                     {
                         "name": "date",
@@ -490,7 +479,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             {
                                 "name": "full_name",
                                 "fides_meta": {
-                                    "data_type": "string",
+                                    "data_type": "string"
                                 },
                             },
                         ],
@@ -510,10 +499,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {
-                            "primary_key": True,
-                            "data_type": "object_id",
-                        },
+                        "fides_meta": {"data_type": "object_id"},
                     },
                     {
                         "name": "customer_identifiers",
@@ -560,10 +546,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {
-                            "primary_key": True,
-                            "data_type": "object_id",
-                        },
+                        "fides_meta": {"data_type": "object_id"},
                     },
                     {
                         "name": "owner",
@@ -657,7 +640,7 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                     {
                         "name": "user_id",
                         "data_categories": ["user.unique_id"],
-                        "fides_meta": {"data_type": "integer", "primary_key": True},
+                        "fides_meta": {"data_type": "integer"},
                     },
                     {"name": "uuid", "data_categories": ["user.government_id"]},
                 ],
@@ -677,13 +660,12 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                                 }
                             ],
                             "data_type": "integer",
-                            "primary_key": True,
                         },
                     },
                     {
                         "name": "timestamp",
                         "data_categories": ["user.behavior"],
-                        "fides_meta": {"data_type": "string", "primary_key": True},
+                        "fides_meta": {"data_type": "string"},
                     },
                     {
                         "name": "user_agent",
@@ -703,7 +685,7 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                     {
                         "name": "payment_method_id",
                         "data_categories": ["system.operations"],
-                        "fides_meta": {"data_type": "integer", "primary_key": True},
+                        "fides_meta": {"data_type": "integer"},
                     },
                     {
                         "name": "user_id",
@@ -733,7 +715,7 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                     {
                         "name": "order_id",
                         "data_categories": ["system.operations"],
-                        "fides_meta": {"data_type": "integer", "primary_key": True},
+                        "fides_meta": {"data_type": "integer"},
                     },
                     {
                         "name": "payment_method_id",

From 6a10d877ab202805e7979492cde710493fd11f3a Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 13:34:13 -0800
Subject: [PATCH 09/22] Misc fixes

---
 .../connectors/query_configs/query_config.py  |  8 ++-
 .../service/connectors/scylla_query_config.py |  8 +--
 tests/fixtures/email_fixtures.py              | 42 +++++-----------
 tests/ops/task/traversal_data.py              | 50 +++++--------------
 4 files changed, 36 insertions(+), 72 deletions(-)

diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index abc3da34fe..b201c2c3a4 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -428,7 +428,9 @@ def get_update_stmt(
 
     @abstractmethod
     def get_update_clauses(
-        self, update_value_map: Dict[str, Any], non_empty_primary_keys: Dict[str, Field]
+        self,
+        update_value_map: Dict[str, Any],
+        non_empty_reference_fields: Dict[str, Field],
     ) -> List[str]:
         """Returns a list of update clauses for the update statement."""
 
@@ -553,7 +555,9 @@ def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]
         return [f"{k} = :{v}" for k, v in sorted(param_map.items())]
 
     def get_update_clauses(
-        self, update_value_map: Dict[str, Any], non_empty_primary_keys: Dict[str, Field]
+        self,
+        update_value_map: Dict[str, Any],
+        non_empty_reference_fields: Dict[str, Field],
     ) -> List[str]:
         """Returns a list of update clauses for the update statement."""
         return self.format_key_map_for_update_stmt(update_value_map)
diff --git a/src/fides/api/service/connectors/scylla_query_config.py b/src/fides/api/service/connectors/scylla_query_config.py
index dc619a72c7..5e93668459 100644
--- a/src/fides/api/service/connectors/scylla_query_config.py
+++ b/src/fides/api/service/connectors/scylla_query_config.py
@@ -75,14 +75,16 @@ def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]
         return [f"{k} = %({v})s" for k, v in sorted(param_map.items())]
 
     def get_update_clauses(
-        self, update_value_map: Dict[str, Any], non_empty_primary_keys: Dict[str, Field]
+        self,
+        update_value_map: Dict[str, Any],
+        non_empty_reference_fields: Dict[str, Field],
     ) -> List[str]:
         """Returns a list of update clauses for the update statement."""
         return self.format_key_map_for_update_stmt(
             {
                 key: value
-                for key, value in update_value_map.keys()
-                if key not in non_empty_primary_keys
+                for key, value in update_value_map.items()
+                if key not in non_empty_reference_fields
             }
         )
 
diff --git a/tests/fixtures/email_fixtures.py b/tests/fixtures/email_fixtures.py
index 4d9f6c5587..e25f39e3f4 100644
--- a/tests/fixtures/email_fixtures.py
+++ b/tests/fixtures/email_fixtures.py
@@ -192,29 +192,23 @@ def dynamic_email_address_config_dataset(
                         {
                             "name": "id",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "email_address",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "vendor_name",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "site_id",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string"
+                                "data_type": "string",
                                 "custom_request_field": "tenant_id",
                             },
                         },
@@ -244,29 +238,23 @@ def dynamic_email_address_config_second_dataset(
                         {
                             "name": "id",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "email_address",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "vendor_name",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "custom_field",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string"
+                                "data_type": "string",
                                 "custom_request_field": "custom_field",
                             },
                         },
@@ -278,29 +266,23 @@ def dynamic_email_address_config_second_dataset(
                         {
                             "name": "id2",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "email_address2",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "vendor_name2",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {
-                                "data_type": "string"
-                            },
+                            "fides_meta": {"data_type": "string"},
                         },
                         {
                             "name": "site_id2",
                             "data_categories": ["system.operations"],
                             "fides_meta": {
-                                "data_type": "string"
+                                "data_type": "string",
                                 "custom_request_field": "tenant_id",
                             },
                         },
diff --git a/tests/ops/task/traversal_data.py b/tests/ops/task/traversal_data.py
index 59032b337f..07c6067d7b 100644
--- a/tests/ops/task/traversal_data.py
+++ b/tests/ops/task/traversal_data.py
@@ -237,27 +237,19 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "comment",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "message",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "chat_name",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "ccn",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                         ],
                     },
@@ -314,21 +306,15 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "name",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "relationship",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "phone",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                         ],
                     },
@@ -346,15 +332,11 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "employer",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "position",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "direct_reports",
@@ -380,21 +362,17 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             {
                                 "name": "email",
                                 "fides_meta": {
-                                    "data_type": "string"
+                                    "data_type": "string",
                                     "identity": "email",
                                 },
                             },
                             {
                                 "name": "phone",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                             {
                                 "name": "internal_customer_id",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                         ],
                     },
@@ -478,9 +456,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             },
                             {
                                 "name": "full_name",
-                                "fides_meta": {
-                                    "data_type": "string"
-                                },
+                                "fides_meta": {"data_type": "string"},
                             },
                         ],
                     },

From 001b8ecfcc67eaa74f2e4321b44572140e6f2946 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 13:45:01 -0800
Subject: [PATCH 10/22] Renaming identity_or_reference_fields_paths to
 incoming_field_paths

---
 .../connectors/query_configs/mongodb_query_config.py        | 2 +-
 .../api/service/connectors/query_configs/query_config.py    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
index a132f2dfc8..edb57599db 100644
--- a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
@@ -72,7 +72,7 @@ def generate_update_stmt(
         where_clauses: Dict[str, Any] = filter_nonempty_values(
             {
                 field_path.string_path: field.cast(row[field_path.string_path])
-                for field_path, field in self.identity_or_reference_field_paths.items()
+                for field_path, field in self.incoming_field_paths.items()
             }
         )
 
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index b201c2c3a4..2026fe0b0f 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -101,8 +101,8 @@ def primary_key_field_paths(self) -> Dict[FieldPath, Field]:
         }
 
     @property
-    def identity_or_reference_field_paths(self) -> Dict[FieldPath, Field]:
-        """Mapping of FieldPaths to Fields that have identity or dataset references"""
+    def incoming_field_paths(self) -> Dict[FieldPath, Field]:
+        """Mapping of FieldPaths to Fields that have incoming identity or dataset references"""
         return {
             field_path: field
             for field_path, field in self.field_map().items()
@@ -450,7 +450,7 @@ def generate_update_stmt(
         non_empty_reference_fields: Dict[str, Field] = filter_nonempty_values(
             {
                 fpath.string_path: fld.cast(row[fpath.string_path])
-                for fpath, fld in self.identity_or_reference_field_paths.items()
+                for fpath, fld in self.incoming_field_paths.items()
                 if fpath.string_path in row
             }
         )

From 8816be7d70fdc865a0f644b01d97424a8956536f Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 15:45:23 -0800
Subject: [PATCH 11/22] Re-adding continue on error

---
 .github/workflows/backend_checks.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/backend_checks.yml b/.github/workflows/backend_checks.yml
index eb77b7e21c..e3c7a7c5e2 100644
--- a/.github/workflows/backend_checks.yml
+++ b/.github/workflows/backend_checks.yml
@@ -255,6 +255,7 @@ jobs:
 
     runs-on: ubuntu-latest
     timeout-minutes: 45
+    continue-on-error: true
     steps:
       - name: Download container
         uses: actions/download-artifact@v4

From cae84930ab676366d7b19c923caf8f762bc327da Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 16:34:53 -0800
Subject: [PATCH 12/22] Adding individual timeouts to tests

---
 data/saas/dataset/stripe_dataset.yml | 8 ++++++++
 dev-requirements.txt                 | 3 ++-
 pyproject.toml                       | 3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/data/saas/dataset/stripe_dataset.yml b/data/saas/dataset/stripe_dataset.yml
index 5b26474a13..5d1f101973 100644
--- a/data/saas/dataset/stripe_dataset.yml
+++ b/data/saas/dataset/stripe_dataset.yml
@@ -7,6 +7,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               read_only: True
               data_type: string
           - name: object
@@ -615,6 +616,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -710,6 +712,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -748,6 +751,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -915,6 +919,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -948,6 +953,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -1223,6 +1229,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               data_type: string
           - name: object
             data_categories: [system.operations]
@@ -1310,6 +1317,7 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fidesops_meta:
               data_type: string
           - name: object
             data_categories: [system.operations]
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 149cdcd658..467e7b05a3 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,12 +7,13 @@ mypy==1.10.0
 nox==2022.8.7
 pre-commit==2.20.0
 pylint==3.2.5
+pytest==7.2.2
 pytest-asyncio==0.19.0
 pytest-cov==4.0.0
 pytest-env==0.6.2
 pytest-mock==3.14.0
 pytest-rerunfailures==14.0
-pytest==7.2.2
+pytest-timeout==2.3.1
 requests-mock==1.10.0
 setuptools>=64.0.2
 sqlalchemy-stubs==0.4
diff --git a/pyproject.toml b/pyproject.toml
index 087d2b2033..29b929a5e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -175,7 +175,8 @@ addopts = [
     "--no-cov-on-fail",
     "-ra",
     "-vv",
-    "--disable-pytest-warnings"
+    "--disable-pytest-warnings",
+    "--timeout=300"
 ]
 markers = [
     "unit: only runs tests that don't require non-python dependencies (i.e. a database)",

From bb7714a5f5199017d116182b130e6481cd776df7 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 18:09:56 -0800
Subject: [PATCH 13/22] Fixing datasets

---
 data/dataset/mongo_example_test_dataset.yml | 11 +++++++----
 tests/ops/generator/test_data_generator.py  |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/data/dataset/mongo_example_test_dataset.yml b/data/dataset/mongo_example_test_dataset.yml
index ece5817b1a..587e74b317 100644
--- a/data/dataset/mongo_example_test_dataset.yml
+++ b/data/dataset/mongo_example_test_dataset.yml
@@ -7,6 +7,8 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: customer_id
             data_categories: [user.unique_id]
             fides_meta:
@@ -203,10 +205,11 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            references:
-              - dataset: mongo_test
-                field: flights.pilots
-                direction: from
+            fides_meta:
+              references:
+                - dataset: mongo_test
+                  field: flights.pilots
+                  direction: from
           - name: name
             data_categories: [user.name]
             fides_meta:
diff --git a/tests/ops/generator/test_data_generator.py b/tests/ops/generator/test_data_generator.py
index 659185d080..d990169f28 100644
--- a/tests/ops/generator/test_data_generator.py
+++ b/tests/ops/generator/test_data_generator.py
@@ -31,6 +31,7 @@
       - name: address
         fields:
           - name: id
+            fides_meta:
               data_type: integer
           - name: user_id
           - name: street

From 0d4340139c8e8594c60e5a429bd56c82f5579c2a Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 18:48:22 -0800
Subject: [PATCH 14/22] Fixing some tests

---
 dev-requirements.txt                                   |  1 -
 pyproject.toml                                         |  1 -
 .../sample_resources/mongo_example_test_dataset.yml    | 10 ++++++++++
 tests/ops/generator/test_data_generator.py             |  1 +
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 467e7b05a3..c51b9369a5 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -13,7 +13,6 @@ pytest-cov==4.0.0
 pytest-env==0.6.2
 pytest-mock==3.14.0
 pytest-rerunfailures==14.0
-pytest-timeout==2.3.1
 requests-mock==1.10.0
 setuptools>=64.0.2
 sqlalchemy-stubs==0.4
diff --git a/pyproject.toml b/pyproject.toml
index 29b929a5e2..f1f4963dd0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -176,7 +176,6 @@ addopts = [
     "-ra",
     "-vv",
     "--disable-pytest-warnings",
-    "--timeout=300"
 ]
 markers = [
     "unit: only runs tests that don't require non-python dependencies (i.e. a database)",
diff --git a/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml b/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
index 542887d5c7..468b43bbae 100644
--- a/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
+++ b/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
@@ -7,6 +7,8 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: customer_id
             data_categories: [user.unique_id]
             fides_meta:
@@ -104,6 +106,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: customer_information
             fields:
@@ -135,6 +138,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: passenger_information
             fields:
@@ -163,6 +167,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: thread
             fides_meta:
@@ -186,6 +191,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: email
             data_categories: [user.contact.email]
@@ -194,6 +200,7 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
               references:
                 - dataset: mongo_test
                   field: flights.pilots
@@ -206,6 +213,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: planes
             data_categories: [system.operations]
@@ -223,6 +231,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: billing_address_id
             data_categories: [system.operations]
@@ -246,6 +255,7 @@ dataset:
       - name: rewards
         fields:
           - name: _id
+            fides_meta:
               data_type: object_id
           - name: owner
             fides_meta:
diff --git a/tests/ops/generator/test_data_generator.py b/tests/ops/generator/test_data_generator.py
index d990169f28..04441237b7 100644
--- a/tests/ops/generator/test_data_generator.py
+++ b/tests/ops/generator/test_data_generator.py
@@ -19,6 +19,7 @@
       - name: user
         fields:
           - name: id
+            fides_meta:
               data_type: integer
               references:
                 - dataset: db

From b13632b48e05168ec642693cf83cb9787e8a819c Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 18:52:00 -0800
Subject: [PATCH 15/22] Fixing MongoDB dataset

---
 .../sample_resources/mongo_example_test_dataset.yml              | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml b/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
index 468b43bbae..ccfbe853a9 100644
--- a/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
+++ b/src/fides/data/sample_project/sample_resources/mongo_example_test_dataset.yml
@@ -77,6 +77,7 @@ dataset:
         fields:
           - name: _id
             data_categories: [system.operations]
+            fides_meta:
               data_type: object_id
           - name: customer_identifiers
             fields:

From b0ef57d5020188b3fff15a57bd0852edfb8418ef Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Mon, 9 Dec 2024 19:58:49 -0800
Subject: [PATCH 16/22] Re-adding primary key to mongo_test.customer_details

---
 tests/ops/task/traversal_data.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/ops/task/traversal_data.py b/tests/ops/task/traversal_data.py
index 07c6067d7b..d0ef50ae18 100644
--- a/tests/ops/task/traversal_data.py
+++ b/tests/ops/task/traversal_data.py
@@ -258,9 +258,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "customer_details",
                 "fields": [
-                    {
-                        "name": "_id",
-                    },
+                    {"name": "_id", "fides_meta": {"primary_key": True}},
                     {
                         "name": "birthday",
                         "fides_meta": {"data_type": "string"},

From 77a5770a897c5d647308c2449bb46c64bbd5e855 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Tue, 10 Dec 2024 10:52:32 -0800
Subject: [PATCH 17/22] Splitting out query configs and tests

---
 .../query_configs/mongodb_query_config.py     |   2 +-
 .../connectors/query_configs/query_config.py  |  17 +-
 .../connectors/test_dynamodb_query_config.py  | 129 ++++++
 .../connectors/test_mongo_query_config.py     | 283 ++++++++++++
 .../service/connectors/test_query_config.py   | 431 +-----------------
 .../connectors/test_scylladb_query_config.py  |  47 ++
 tests/ops/task/traversal_data.py              |  79 +++-
 7 files changed, 537 insertions(+), 451 deletions(-)
 create mode 100644 tests/ops/service/connectors/test_dynamodb_query_config.py
 create mode 100644 tests/ops/service/connectors/test_mongo_query_config.py
 create mode 100644 tests/ops/service/connectors/test_scylladb_query_config.py

diff --git a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
index edb57599db..1a6aa303f0 100644
--- a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
@@ -72,7 +72,7 @@ def generate_update_stmt(
         where_clauses: Dict[str, Any] = filter_nonempty_values(
             {
                 field_path.string_path: field.cast(row[field_path.string_path])
-                for field_path, field in self.incoming_field_paths.items()
+                for field_path, field in self.primary_key_field_paths.items()
             }
         )
 
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index 2026fe0b0f..4ef115d910 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -101,7 +101,7 @@ def primary_key_field_paths(self) -> Dict[FieldPath, Field]:
         }
 
     @property
-    def incoming_field_paths(self) -> Dict[FieldPath, Field]:
+    def reference_field_paths(self) -> Dict[FieldPath, Field]:
         """Mapping of FieldPaths to Fields that have incoming identity or dataset references"""
         return {
             field_path: field
@@ -447,10 +447,19 @@ def generate_update_stmt(
     ) -> Optional[T]:
         """Returns an update statement in generic SQL-ish dialect."""
         update_value_map: Dict[str, Any] = self.update_value_map(row, policy, request)
+
+        non_empty_primary_key_fields: Dict[str, Field] = filter_nonempty_values(
+            {
+                fpath.string_path: fld.cast(row[fpath.string_path])
+                for fpath, fld in self.primary_key_field_paths.items()
+                if fpath.string_path in row
+            }
+        )
+
         non_empty_reference_fields: Dict[str, Field] = filter_nonempty_values(
             {
                 fpath.string_path: fld.cast(row[fpath.string_path])
-                for fpath, fld in self.incoming_field_paths.items()
+                for fpath, fld in self.reference_field_paths.items()
                 if fpath.string_path in row
             }
         )
@@ -463,10 +472,10 @@ def generate_update_stmt(
 
         update_clauses = self.get_update_clauses(
             {k: f"masked_{k}" for k in update_value_map},
-            non_empty_reference_fields,
+            non_empty_primary_key_fields or non_empty_reference_fields,
         )
         where_clauses = self.format_key_map_for_update_stmt(
-            {k: k for k in non_empty_reference_fields}
+            {k: k for k in non_empty_primary_key_fields or non_empty_reference_fields}
         )
 
         valid = len(where_clauses) > 0 and len(update_clauses) > 0
diff --git a/tests/ops/service/connectors/test_dynamodb_query_config.py b/tests/ops/service/connectors/test_dynamodb_query_config.py
new file mode 100644
index 0000000000..4591ae9385
--- /dev/null
+++ b/tests/ops/service/connectors/test_dynamodb_query_config.py
@@ -0,0 +1,129 @@
+from datetime import datetime, timezone
+
+import pytest
+from boto3.dynamodb.types import TypeDeserializer
+from fideslang.models import Dataset
+
+from fides.api.graph.config import CollectionAddress
+from fides.api.graph.graph import DatasetGraph
+from fides.api.graph.traversal import Traversal
+from fides.api.models.datasetconfig import convert_dataset_to_graph
+from fides.api.models.privacy_request import PrivacyRequest
+from fides.api.service.connectors.query_configs.dynamodb_query_config import (
+    DynamoDBQueryConfig,
+)
+
+privacy_request = PrivacyRequest(id="234544")
+
+
+class TestDynamoDBQueryConfig:
+    @pytest.fixture(scope="function")
+    def identity(self):
+        identity = {"email": "customer-test_uuid@example.com"}
+        return identity
+
+    @pytest.fixture(scope="function")
+    def dataset_graph(self, integration_dynamodb_config, example_datasets):
+        dataset = Dataset(**example_datasets[11])
+        dataset_graph = convert_dataset_to_graph(
+            dataset, integration_dynamodb_config.key
+        )
+
+        return DatasetGraph(*[dataset_graph])
+
+    @pytest.fixture(scope="function")
+    def traversal(self, identity, dataset_graph):
+        dynamo_traversal = Traversal(dataset_graph, identity)
+        return dynamo_traversal
+
+    @pytest.fixture(scope="function")
+    def customer_node(self, traversal):
+        return traversal.traversal_node_dict[
+            CollectionAddress("dynamodb_example_test_dataset", "customer")
+        ].to_mock_execution_node()
+
+    @pytest.fixture(scope="function")
+    def customer_identifier_node(self, traversal):
+        return traversal.traversal_node_dict[
+            CollectionAddress("dynamodb_example_test_dataset", "customer_identifier")
+        ].to_mock_execution_node()
+
+    @pytest.fixture(scope="function")
+    def customer_row(self):
+        row = {
+            "customer_email": {"S": "customer-1@example.com"},
+            "name": {"S": "John Customer"},
+            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
+            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
+            "id": {"S": "1"},
+        }
+        return row
+
+    @pytest.fixture(scope="function")
+    def deserialized_customer_row(self, customer_row):
+        deserialized_customer_row = {}
+        deserializer = TypeDeserializer()
+        for key, value in customer_row.items():
+            deserialized_customer_row[key] = deserializer.deserialize(value)
+        return deserialized_customer_row
+
+    @pytest.fixture(scope="function")
+    def customer_identifier_row(self):
+        row = {
+            "customer_id": {"S": "customer-1@example.com"},
+            "email": {"S": "customer-1@example.com"},
+            "name": {"S": "Customer 1"},
+            "created": {"S": datetime.now(timezone.utc).isoformat()},
+        }
+        return row
+
+    @pytest.fixture(scope="function")
+    def deserialized_customer_identifier_row(self, customer_identifier_row):
+        deserialized_customer_identifier_row = {}
+        deserializer = TypeDeserializer()
+        for key, value in customer_identifier_row.items():
+            deserialized_customer_identifier_row[key] = deserializer.deserialize(value)
+        return deserialized_customer_identifier_row
+
+    def test_get_query_param_formatting_single_key(
+        self,
+        resources_dict,
+        customer_node,
+    ) -> None:
+        input_data = {
+            "fidesops_grouped_inputs": [],
+            "email": ["customer-test_uuid@example.com"],
+        }
+        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
+        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
+        item = query_config.generate_query(
+            input_data=input_data, policy=resources_dict["policy"]
+        )
+        assert item["ExpressionAttributeValues"] == {
+            ":value": {"S": "customer-test_uuid@example.com"}
+        }
+        assert item["KeyConditionExpression"] == "email = :value"
+
+    def test_put_query_param_formatting_single_key(
+        self,
+        erasure_policy,
+        customer_node,
+        deserialized_customer_row,
+    ) -> None:
+        input_data = {
+            "fidesops_grouped_inputs": [],
+            "email": ["customer-test_uuid@example.com"],
+        }
+        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
+        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
+        update_item = query_config.generate_update_stmt(
+            deserialized_customer_row, erasure_policy, privacy_request
+        )
+
+        assert update_item == {
+            "customer_email": {"S": "customer-1@example.com"},
+            "name": {"NULL": True},
+            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
+            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
+            "id": {"S": "1"},
+        }
diff --git a/tests/ops/service/connectors/test_mongo_query_config.py b/tests/ops/service/connectors/test_mongo_query_config.py
new file mode 100644
index 0000000000..3912618801
--- /dev/null
+++ b/tests/ops/service/connectors/test_mongo_query_config.py
@@ -0,0 +1,283 @@
+import pytest
+from fideslang.models import Dataset
+
+from fides.api.graph.config import (
+    CollectionAddress,
+    FieldAddress,
+    FieldPath,
+    ObjectField,
+    ScalarField,
+)
+from fides.api.graph.graph import DatasetGraph, Edge
+from fides.api.graph.traversal import Traversal
+from fides.api.models.datasetconfig import convert_dataset_to_graph
+from fides.api.models.privacy_request import PrivacyRequest
+from fides.api.schemas.masking.masking_configuration import HashMaskingConfiguration
+from fides.api.schemas.masking.masking_secrets import MaskingSecretCache, SecretType
+from fides.api.service.connectors.query_configs.mongodb_query_config import (
+    MongoQueryConfig,
+)
+from fides.api.service.masking.strategy.masking_strategy_hash import HashMaskingStrategy
+from fides.api.util.data_category import DataCategory
+
+from ...task.traversal_data import combined_mongo_postgresql_graph
+from ...test_helpers.cache_secrets_helper import cache_secret
+
+privacy_request = PrivacyRequest(id="234544")
+
+
+class TestMongoQueryConfig:
+    @pytest.fixture(scope="function")
+    def combined_traversal(self, connection_config, integration_mongodb_config):
+        mongo_dataset, postgres_dataset = combined_mongo_postgresql_graph(
+            connection_config, integration_mongodb_config
+        )
+        combined_dataset_graph = DatasetGraph(mongo_dataset, postgres_dataset)
+        combined_traversal = Traversal(
+            combined_dataset_graph,
+            {"email": "customer-1@examplecom"},
+        )
+        return combined_traversal
+
+    @pytest.fixture(scope="function")
+    def customer_details_node(self, combined_traversal):
+        return combined_traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+
+    @pytest.fixture(scope="function")
+    def customer_feedback_node(self, combined_traversal):
+        return combined_traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_feedback")
+        ].to_mock_execution_node()
+
+    def test_field_map_nested(self, customer_details_node):
+        config = MongoQueryConfig(customer_details_node)
+
+        field_map = config.field_map()
+        assert isinstance(field_map[FieldPath("workplace_info")], ObjectField)
+        assert isinstance(
+            field_map[FieldPath("workplace_info", "employer")], ScalarField
+        )
+
+    def test_primary_key_field_paths(self, customer_details_node):
+        config = MongoQueryConfig(customer_details_node)
+        assert list(config.primary_key_field_paths.keys()) == [FieldPath("_id")]
+        assert isinstance(config.primary_key_field_paths[FieldPath("_id")], ScalarField)
+
+    def test_nested_query_field_paths(
+        self, customer_details_node, customer_feedback_node
+    ):
+        assert customer_details_node.query_field_paths == {
+            FieldPath("customer_id"),
+        }
+
+        assert customer_feedback_node.query_field_paths == {
+            FieldPath("customer_information", "email")
+        }
+
+    def test_nested_typed_filtered_values(self, customer_feedback_node):
+        """Identity data is located on a nested object"""
+        input_data = {
+            "customer_information.email": ["test@example.com"],
+            "ignore": ["abcde"],
+        }
+        assert customer_feedback_node.typed_filtered_values(input_data) == {
+            "customer_information.email": ["test@example.com"]
+        }
+
+    def test_generate_query(
+        self,
+        policy,
+        example_datasets,
+        integration_mongodb_config,
+        connection_config,
+    ):
+        dataset_postgres = Dataset(**example_datasets[0])
+        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
+        dataset_mongo = Dataset(**example_datasets[1])
+        mongo_graph = convert_dataset_to_graph(
+            dataset_mongo, integration_mongodb_config.key
+        )
+        dataset_graph = DatasetGraph(*[graph, mongo_graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+        # Edge created from Root to nested customer_information.email field
+        assert (
+            Edge(
+                FieldAddress("__ROOT__", "__ROOT__", "email"),
+                FieldAddress(
+                    "mongo_test", "customer_feedback", "customer_information", "email"
+                ),
+            )
+            in traversal.edges
+        )
+
+        # Test query on nested field
+        customer_feedback = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_feedback")
+        ].to_mock_execution_node()
+        config = MongoQueryConfig(customer_feedback)
+        input_data = {"customer_information.email": ["customer-1@example.com"]}
+        # Tuple of query, projection - Searching for documents with nested
+        # customer_information.email = customer-1@example.com
+        assert config.generate_query(input_data, policy) == (
+            {"customer_information.email": "customer-1@example.com"},
+            {"_id": 1, "customer_information": 1, "date": 1, "message": 1, "rating": 1},
+        )
+
+        # Test query nested data
+        customer_details = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+        config = MongoQueryConfig(customer_details)
+        input_data = {"customer_id": [1]}
+        # Tuple of query, projection - Projection is specifying fields at the top-level. Nested data will
+        # be filtered later.
+        assert config.generate_query(input_data, policy) == (
+            {"customer_id": 1},
+            {
+                "_id": 1,
+                "birthday": 1,
+                "comments": 1,
+                "customer_id": 1,
+                "customer_uuid": 1,
+                "emergency_contacts": 1,
+                "children": 1,
+                "gender": 1,
+                "travel_identifiers": 1,
+                "workplace_info": 1,
+            },
+        )
+
+    def test_generate_update_stmt_multiple_fields(
+        self,
+        erasure_policy,
+        example_datasets,
+        integration_mongodb_config,
+        connection_config,
+    ):
+        dataset_postgres = Dataset(**example_datasets[0])
+        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
+        dataset_mongo = Dataset(**example_datasets[1])
+        mongo_graph = convert_dataset_to_graph(
+            dataset_mongo, integration_mongodb_config.key
+        )
+        dataset_graph = DatasetGraph(*[graph, mongo_graph])
+
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+        customer_details = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+        config = MongoQueryConfig(customer_details)
+        row = {
+            "birthday": "1988-01-10",
+            "gender": "male",
+            "customer_id": 1,
+            "_id": 1,
+            "workplace_info": {
+                "position": "Chief Strategist",
+                "direct_reports": ["Robbie Margo", "Sully Hunter"],
+            },
+            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
+            "children": ["Christopher Customer", "Courtney Customer"],
+        }
+
+        # Make target more broad
+        rule = erasure_policy.rules[0]
+        target = rule.targets[0]
+        target.data_category = DataCategory("user").value
+
+        mongo_statement = config.generate_update_stmt(
+            row, erasure_policy, privacy_request
+        )
+
+        expected_result_0 = {"customer_id": 1}
+        expected_result_1 = {
+            "$set": {
+                "birthday": None,
+                "children.0": None,
+                "children.1": None,
+                "customer_id": None,
+                "emergency_contacts.0.name": None,
+                "workplace_info.direct_reports.0": None,  # Both direct reports are masked.
+                "workplace_info.direct_reports.1": None,
+                "emergency_contacts.0.phone": None,
+                "gender": None,
+                "workplace_info.position": None,
+            }
+        }
+
+        print(mongo_statement[1])
+        print(expected_result_1)
+        assert mongo_statement[0] == expected_result_0
+        assert mongo_statement[1] == expected_result_1
+
+    def test_generate_update_stmt_multiple_rules(
+        self,
+        erasure_policy_two_rules,
+        example_datasets,
+        integration_mongodb_config,
+        connection_config,
+    ):
+        dataset_postgres = Dataset(**example_datasets[0])
+        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
+        dataset_mongo = Dataset(**example_datasets[1])
+        mongo_graph = convert_dataset_to_graph(
+            dataset_mongo, integration_mongodb_config.key
+        )
+        dataset_graph = DatasetGraph(*[graph, mongo_graph])
+
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+
+        customer_details = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+
+        config = MongoQueryConfig(customer_details)
+        row = {
+            "birthday": "1988-01-10",
+            "gender": "male",
+            "customer_id": 1,
+            "_id": 1,
+            "workplace_info": {
+                "position": "Chief Strategist",
+                "direct_reports": ["Robbie Margo", "Sully Hunter"],
+            },
+            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
+            "children": ["Christopher Customer", "Courtney Customer"],
+        }
+
+        rule = erasure_policy_two_rules.rules[0]
+        rule.masking_strategy = {
+            "strategy": "hash",
+            "configuration": {"algorithm": "SHA-512"},
+        }
+        target = rule.targets[0]
+        target.data_category = DataCategory("user.demographic.date_of_birth").value
+
+        rule_two = erasure_policy_two_rules.rules[1]
+        rule_two.masking_strategy = {
+            "strategy": "random_string_rewrite",
+            "configuration": {"length": 30},
+        }
+        target = rule_two.targets[0]
+        target.data_category = DataCategory("user.demographic.gender").value
+        # cache secrets for hash strategy
+        secret = MaskingSecretCache[str](
+            secret="adobo",
+            masking_strategy=HashMaskingStrategy.name,
+            secret_type=SecretType.salt,
+        )
+        cache_secret(secret, privacy_request.id)
+
+        mongo_statement = config.generate_update_stmt(
+            row, erasure_policy_two_rules, privacy_request
+        )
+        assert mongo_statement[0] == {"customer_id": 1}
+        assert len(mongo_statement[1]["$set"]["gender"]) == 30
+        assert (
+            mongo_statement[1]["$set"]["birthday"]
+            == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
+                ["1988-01-10"], request_id=privacy_request.id
+            )[0]
+        )
diff --git a/tests/ops/service/connectors/test_query_config.py b/tests/ops/service/connectors/test_query_config.py
index 75c9b26c1b..2aa0871255 100644
--- a/tests/ops/service/connectors/test_query_config.py
+++ b/tests/ops/service/connectors/test_query_config.py
@@ -1,43 +1,28 @@
-from datetime import datetime, timezone
 from typing import Any, Dict, Set
 from unittest import mock
 
 import pytest
-from boto3.dynamodb.types import TypeDeserializer
 from fideslang.models import Dataset
 
 from fides.api.common_exceptions import MissingNamespaceSchemaException
-from fides.api.graph.config import (
-    CollectionAddress,
-    FieldAddress,
-    FieldPath,
-    ObjectField,
-    ScalarField,
-)
+from fides.api.graph.config import CollectionAddress, FieldPath
 from fides.api.graph.execution import ExecutionNode
-from fides.api.graph.graph import DatasetGraph, Edge
+from fides.api.graph.graph import DatasetGraph
 from fides.api.graph.traversal import Traversal, TraversalNode
 from fides.api.models.datasetconfig import convert_dataset_to_graph
 from fides.api.models.privacy_request import PrivacyRequest
 from fides.api.schemas.masking.masking_configuration import HashMaskingConfiguration
 from fides.api.schemas.masking.masking_secrets import MaskingSecretCache, SecretType
 from fides.api.schemas.namespace_meta.namespace_meta import NamespaceMeta
-from fides.api.service.connectors.query_configs.dynamodb_query_config import (
-    DynamoDBQueryConfig,
-)
-from fides.api.service.connectors.query_configs.mongodb_query_config import (
-    MongoQueryConfig,
-)
 from fides.api.service.connectors.query_configs.query_config import (
     QueryConfig,
     SQLQueryConfig,
 )
-from fides.api.service.connectors.scylla_query_config import ScyllaDBQueryConfig
 from fides.api.service.masking.strategy.masking_strategy_hash import HashMaskingStrategy
 from fides.api.util.data_category import DataCategory
 from tests.fixtures.application_fixtures import load_dataset
 
-from ...task.traversal_data import combined_mongo_postgresql_graph, integration_db_graph
+from ...task.traversal_data import integration_db_graph
 from ...test_helpers.cache_secrets_helper import cache_secret, clear_cache_secrets
 
 # customers -> address, order
@@ -461,416 +446,6 @@ def test_generate_update_stmts_from_multiple_rules(
             text_clause._bindparams["masked_email"].value == "*****"
         )  # String rewrite masking strategy
 
-
-class TestMongoQueryConfig:
-    @pytest.fixture(scope="function")
-    def combined_traversal(self, connection_config, integration_mongodb_config):
-        mongo_dataset, postgres_dataset = combined_mongo_postgresql_graph(
-            connection_config, integration_mongodb_config
-        )
-        combined_dataset_graph = DatasetGraph(mongo_dataset, postgres_dataset)
-        combined_traversal = Traversal(
-            combined_dataset_graph,
-            {"email": "customer-1@examplecom"},
-        )
-        return combined_traversal
-
-    @pytest.fixture(scope="function")
-    def customer_details_node(self, combined_traversal):
-        return combined_traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-
-    @pytest.fixture(scope="function")
-    def customer_feedback_node(self, combined_traversal):
-        return combined_traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_feedback")
-        ].to_mock_execution_node()
-
-    def test_field_map_nested(self, customer_details_node):
-        config = MongoQueryConfig(customer_details_node)
-
-        field_map = config.field_map()
-        assert isinstance(field_map[FieldPath("workplace_info")], ObjectField)
-        assert isinstance(
-            field_map[FieldPath("workplace_info", "employer")], ScalarField
-        )
-
-    def test_primary_key_field_paths(self, customer_details_node):
-        config = MongoQueryConfig(customer_details_node)
-        assert list(config.primary_key_field_paths.keys()) == [FieldPath("_id")]
-        assert isinstance(config.primary_key_field_paths[FieldPath("_id")], ScalarField)
-
-    def test_nested_query_field_paths(
-        self, customer_details_node, customer_feedback_node
-    ):
-        assert customer_details_node.query_field_paths == {
-            FieldPath("customer_id"),
-        }
-
-        assert customer_feedback_node.query_field_paths == {
-            FieldPath("customer_information", "email")
-        }
-
-    def test_nested_typed_filtered_values(self, customer_feedback_node):
-        """Identity data is located on a nested object"""
-        input_data = {
-            "customer_information.email": ["test@example.com"],
-            "ignore": ["abcde"],
-        }
-        assert customer_feedback_node.typed_filtered_values(input_data) == {
-            "customer_information.email": ["test@example.com"]
-        }
-
-    def test_generate_query(
-        self,
-        policy,
-        example_datasets,
-        integration_mongodb_config,
-        connection_config,
-    ):
-        dataset_postgres = Dataset(**example_datasets[0])
-        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
-        dataset_mongo = Dataset(**example_datasets[1])
-        mongo_graph = convert_dataset_to_graph(
-            dataset_mongo, integration_mongodb_config.key
-        )
-        dataset_graph = DatasetGraph(*[graph, mongo_graph])
-        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
-        # Edge created from Root to nested customer_information.email field
-        assert (
-            Edge(
-                FieldAddress("__ROOT__", "__ROOT__", "email"),
-                FieldAddress(
-                    "mongo_test", "customer_feedback", "customer_information", "email"
-                ),
-            )
-            in traversal.edges
-        )
-
-        # Test query on nested field
-        customer_feedback = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_feedback")
-        ].to_mock_execution_node()
-        config = MongoQueryConfig(customer_feedback)
-        input_data = {"customer_information.email": ["customer-1@example.com"]}
-        # Tuple of query, projection - Searching for documents with nested
-        # customer_information.email = customer-1@example.com
-        assert config.generate_query(input_data, policy) == (
-            {"customer_information.email": "customer-1@example.com"},
-            {"_id": 1, "customer_information": 1, "date": 1, "message": 1, "rating": 1},
-        )
-
-        # Test query nested data
-        customer_details = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-        config = MongoQueryConfig(customer_details)
-        input_data = {"customer_id": [1]}
-        # Tuple of query, projection - Projection is specifying fields at the top-level. Nested data will
-        # be filtered later.
-        assert config.generate_query(input_data, policy) == (
-            {"customer_id": 1},
-            {
-                "_id": 1,
-                "birthday": 1,
-                "comments": 1,
-                "customer_id": 1,
-                "customer_uuid": 1,
-                "emergency_contacts": 1,
-                "children": 1,
-                "gender": 1,
-                "travel_identifiers": 1,
-                "workplace_info": 1,
-            },
-        )
-
-    def test_generate_update_stmt_multiple_fields(
-        self,
-        erasure_policy,
-        example_datasets,
-        integration_mongodb_config,
-        connection_config,
-    ):
-        dataset_postgres = Dataset(**example_datasets[0])
-        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
-        dataset_mongo = Dataset(**example_datasets[1])
-        mongo_graph = convert_dataset_to_graph(
-            dataset_mongo, integration_mongodb_config.key
-        )
-        dataset_graph = DatasetGraph(*[graph, mongo_graph])
-
-        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
-        customer_details = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-        config = MongoQueryConfig(customer_details)
-        row = {
-            "birthday": "1988-01-10",
-            "gender": "male",
-            "customer_id": 1,
-            "_id": 1,
-            "workplace_info": {
-                "position": "Chief Strategist",
-                "direct_reports": ["Robbie Margo", "Sully Hunter"],
-            },
-            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
-            "children": ["Christopher Customer", "Courtney Customer"],
-        }
-
-        # Make target more broad
-        rule = erasure_policy.rules[0]
-        target = rule.targets[0]
-        target.data_category = DataCategory("user").value
-
-        mongo_statement = config.generate_update_stmt(
-            row, erasure_policy, privacy_request
-        )
-
-        expected_result_0 = {"customer_id": 1}
-        expected_result_1 = {
-            "$set": {
-                "birthday": None,
-                "children.0": None,
-                "children.1": None,
-                "customer_id": None,
-                "emergency_contacts.0.name": None,
-                "workplace_info.direct_reports.0": None,  # Both direct reports are masked.
-                "workplace_info.direct_reports.1": None,
-                "emergency_contacts.0.phone": None,
-                "gender": None,
-                "workplace_info.position": None,
-            }
-        }
-
-        print(mongo_statement[1])
-        print(expected_result_1)
-        assert mongo_statement[0] == expected_result_0
-        assert mongo_statement[1] == expected_result_1
-
-    def test_generate_update_stmt_multiple_rules(
-        self,
-        erasure_policy_two_rules,
-        example_datasets,
-        integration_mongodb_config,
-        connection_config,
-    ):
-        dataset_postgres = Dataset(**example_datasets[0])
-        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
-        dataset_mongo = Dataset(**example_datasets[1])
-        mongo_graph = convert_dataset_to_graph(
-            dataset_mongo, integration_mongodb_config.key
-        )
-        dataset_graph = DatasetGraph(*[graph, mongo_graph])
-
-        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
-
-        customer_details = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-
-        config = MongoQueryConfig(customer_details)
-        row = {
-            "birthday": "1988-01-10",
-            "gender": "male",
-            "customer_id": 1,
-            "_id": 1,
-            "workplace_info": {
-                "position": "Chief Strategist",
-                "direct_reports": ["Robbie Margo", "Sully Hunter"],
-            },
-            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
-            "children": ["Christopher Customer", "Courtney Customer"],
-        }
-
-        rule = erasure_policy_two_rules.rules[0]
-        rule.masking_strategy = {
-            "strategy": "hash",
-            "configuration": {"algorithm": "SHA-512"},
-        }
-        target = rule.targets[0]
-        target.data_category = DataCategory("user.demographic.date_of_birth").value
-
-        rule_two = erasure_policy_two_rules.rules[1]
-        rule_two.masking_strategy = {
-            "strategy": "random_string_rewrite",
-            "configuration": {"length": 30},
-        }
-        target = rule_two.targets[0]
-        target.data_category = DataCategory("user.demographic.gender").value
-        # cache secrets for hash strategy
-        secret = MaskingSecretCache[str](
-            secret="adobo",
-            masking_strategy=HashMaskingStrategy.name,
-            secret_type=SecretType.salt,
-        )
-        cache_secret(secret, privacy_request.id)
-
-        mongo_statement = config.generate_update_stmt(
-            row, erasure_policy_two_rules, privacy_request
-        )
-        assert mongo_statement[0] == {"customer_id": 1}
-        assert len(mongo_statement[1]["$set"]["gender"]) == 30
-        assert (
-            mongo_statement[1]["$set"]["birthday"]
-            == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
-                ["1988-01-10"], request_id=privacy_request.id
-            )[0]
-        )
-
-
-class TestDynamoDBQueryConfig:
-    @pytest.fixture(scope="function")
-    def identity(self):
-        identity = {"email": "customer-test_uuid@example.com"}
-        return identity
-
-    @pytest.fixture(scope="function")
-    def dataset_graph(self, integration_dynamodb_config, example_datasets):
-        dataset = Dataset(**example_datasets[11])
-        dataset_graph = convert_dataset_to_graph(
-            dataset, integration_dynamodb_config.key
-        )
-
-        return DatasetGraph(*[dataset_graph])
-
-    @pytest.fixture(scope="function")
-    def traversal(self, identity, dataset_graph):
-        dynamo_traversal = Traversal(dataset_graph, identity)
-        return dynamo_traversal
-
-    @pytest.fixture(scope="function")
-    def customer_node(self, traversal):
-        return traversal.traversal_node_dict[
-            CollectionAddress("dynamodb_example_test_dataset", "customer")
-        ].to_mock_execution_node()
-
-    @pytest.fixture(scope="function")
-    def customer_identifier_node(self, traversal):
-        return traversal.traversal_node_dict[
-            CollectionAddress("dynamodb_example_test_dataset", "customer_identifier")
-        ].to_mock_execution_node()
-
-    @pytest.fixture(scope="function")
-    def customer_row(self):
-        row = {
-            "customer_email": {"S": "customer-1@example.com"},
-            "name": {"S": "John Customer"},
-            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
-            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
-            "id": {"S": "1"},
-        }
-        return row
-
-    @pytest.fixture(scope="function")
-    def deserialized_customer_row(self, customer_row):
-        deserialized_customer_row = {}
-        deserializer = TypeDeserializer()
-        for key, value in customer_row.items():
-            deserialized_customer_row[key] = deserializer.deserialize(value)
-        return deserialized_customer_row
-
-    @pytest.fixture(scope="function")
-    def customer_identifier_row(self):
-        row = {
-            "customer_id": {"S": "customer-1@example.com"},
-            "email": {"S": "customer-1@example.com"},
-            "name": {"S": "Customer 1"},
-            "created": {"S": datetime.now(timezone.utc).isoformat()},
-        }
-        return row
-
-    @pytest.fixture(scope="function")
-    def deserialized_customer_identifier_row(self, customer_identifier_row):
-        deserialized_customer_identifier_row = {}
-        deserializer = TypeDeserializer()
-        for key, value in customer_identifier_row.items():
-            deserialized_customer_identifier_row[key] = deserializer.deserialize(value)
-        return deserialized_customer_identifier_row
-
-    def test_get_query_param_formatting_single_key(
-        self,
-        resources_dict,
-        customer_node,
-    ) -> None:
-        input_data = {
-            "fidesops_grouped_inputs": [],
-            "email": ["customer-test_uuid@example.com"],
-        }
-        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
-        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
-        item = query_config.generate_query(
-            input_data=input_data, policy=resources_dict["policy"]
-        )
-        assert item["ExpressionAttributeValues"] == {
-            ":value": {"S": "customer-test_uuid@example.com"}
-        }
-        assert item["KeyConditionExpression"] == "email = :value"
-
-    def test_put_query_param_formatting_single_key(
-        self,
-        erasure_policy,
-        customer_node,
-        deserialized_customer_row,
-    ) -> None:
-        input_data = {
-            "fidesops_grouped_inputs": [],
-            "email": ["customer-test_uuid@example.com"],
-        }
-        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
-        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
-        update_item = query_config.generate_update_stmt(
-            deserialized_customer_row, erasure_policy, privacy_request
-        )
-
-        assert update_item == {
-            "customer_email": {"S": "customer-1@example.com"},
-            "name": {"NULL": True},
-            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
-            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
-            "id": {"S": "1"},
-        }
-
-
-class TestScyllaDBQueryConfig:
-    @pytest.fixture(scope="function")
-    def complete_execution_node(
-        self, example_datasets, integration_scylladb_config_with_keyspace
-    ):
-        dataset = Dataset(**example_datasets[15])
-        graph = convert_dataset_to_graph(
-            dataset, integration_scylladb_config_with_keyspace.key
-        )
-        dataset_graph = DatasetGraph(*[graph])
-        identity = {"email": "customer-1@example.com"}
-        scylla_traversal = Traversal(dataset_graph, identity)
-        return scylla_traversal.traversal_node_dict[
-            CollectionAddress("scylladb_example_test_dataset", "users")
-        ].to_mock_execution_node()
-
-    def test_dry_run_query_no_data(self, scylladb_execution_node):
-        query_config = ScyllaDBQueryConfig(scylladb_execution_node)
-        dry_run_query = query_config.dry_run_query()
-        assert dry_run_query is None
-
-    def test_dry_run_query_with_data(self, complete_execution_node):
-        query_config = ScyllaDBQueryConfig(complete_execution_node)
-        dry_run_query = query_config.dry_run_query()
-        assert (
-            dry_run_query
-            == "SELECT age, alternative_contacts, ascii_data, big_int_data, do_not_contact, double_data, duration, email, float_data, last_contacted, logins, name, states_lived, timestamp, user_id, uuid FROM users WHERE email = ? ALLOW FILTERING;"
-        )
-
-    def test_query_to_str(self, complete_execution_node):
-        query_config = ScyllaDBQueryConfig(complete_execution_node)
-        statement = (
-            "SELECT name FROM users WHERE email = %(email)s",
-            {"email": "test@example.com"},
-        )
-        query_to_str = query_config.query_to_str(statement, {})
-        assert query_to_str == "SELECT name FROM users WHERE email = 'test@example.com'"
-
-
 class TestSQLLikeQueryConfig:
     def test_missing_namespace_meta_schema(self):
 
diff --git a/tests/ops/service/connectors/test_scylladb_query_config.py b/tests/ops/service/connectors/test_scylladb_query_config.py
new file mode 100644
index 0000000000..3cbc6f493f
--- /dev/null
+++ b/tests/ops/service/connectors/test_scylladb_query_config.py
@@ -0,0 +1,47 @@
+import pytest
+from fideslang.models import Dataset
+
+from fides.api.graph.config import CollectionAddress
+from fides.api.graph.graph import DatasetGraph
+from fides.api.graph.traversal import Traversal
+from fides.api.models.datasetconfig import convert_dataset_to_graph
+from fides.api.service.connectors.scylla_query_config import ScyllaDBQueryConfig
+
+
+class TestScyllaDBQueryConfig:
+    @pytest.fixture(scope="function")
+    def complete_execution_node(
+        self, example_datasets, integration_scylladb_config_with_keyspace
+    ):
+        dataset = Dataset(**example_datasets[15])
+        graph = convert_dataset_to_graph(
+            dataset, integration_scylladb_config_with_keyspace.key
+        )
+        dataset_graph = DatasetGraph(*[graph])
+        identity = {"email": "customer-1@example.com"}
+        scylla_traversal = Traversal(dataset_graph, identity)
+        return scylla_traversal.traversal_node_dict[
+            CollectionAddress("scylladb_example_test_dataset", "users")
+        ].to_mock_execution_node()
+
+    def test_dry_run_query_no_data(self, scylladb_execution_node):
+        query_config = ScyllaDBQueryConfig(scylladb_execution_node)
+        dry_run_query = query_config.dry_run_query()
+        assert dry_run_query is None
+
+    def test_dry_run_query_with_data(self, complete_execution_node):
+        query_config = ScyllaDBQueryConfig(complete_execution_node)
+        dry_run_query = query_config.dry_run_query()
+        assert (
+            dry_run_query
+            == "SELECT age, alternative_contacts, ascii_data, big_int_data, do_not_contact, double_data, duration, email, float_data, last_contacted, logins, name, states_lived, timestamp, user_id, uuid FROM users WHERE email = ? ALLOW FILTERING;"
+        )
+
+    def test_query_to_str(self, complete_execution_node):
+        query_config = ScyllaDBQueryConfig(complete_execution_node)
+        statement = (
+            "SELECT name FROM users WHERE email = %(email)s",
+            {"email": "test@example.com"},
+        )
+        query_to_str = query_config.query_to_str(statement, {})
+        assert query_to_str == "SELECT name FROM users WHERE email = 'test@example.com'"
diff --git a/tests/ops/task/traversal_data.py b/tests/ops/task/traversal_data.py
index d0ef50ae18..07ff478e3e 100644
--- a/tests/ops/task/traversal_data.py
+++ b/tests/ops/task/traversal_data.py
@@ -156,7 +156,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "address",
                 "fields": [
-                    {"name": "_id"},
+                    {"name": "_id", "fides_meta": {"primary_key": True}},
                     {
                         "name": "id",
                         "fides_meta": {
@@ -178,7 +178,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "orders",
                 "fields": [
-                    {"name": "_id"},
+                    {"name": "_id", "fides_meta": {"primary_key": True}},
                     {
                         "name": "customer_id",
                         "fides_meta": {
@@ -200,6 +200,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -228,6 +229,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -237,19 +239,27 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "comment",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "message",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "chat_name",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "ccn",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -258,7 +268,12 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "customer_details",
                 "fields": [
-                    {"name": "_id", "fides_meta": {"primary_key": True}},
+                    {
+                        "name": "_id",
+                        "fides_meta": {
+                            "primary_key": True,
+                        },
+                    },
                     {
                         "name": "birthday",
                         "fides_meta": {"data_type": "string"},
@@ -304,15 +319,21 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "name",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "relationship",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "phone",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -330,11 +351,15 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "employer",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "position",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "direct_reports",
@@ -350,6 +375,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -366,11 +392,15 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             },
                             {
                                 "name": "phone",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "internal_customer_id",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -394,6 +424,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -407,6 +438,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "id",
                         "fides_meta": {
+                            "primary_key": True,
                             "references": [
                                 {
                                     "dataset": mongo_db_name,
@@ -427,7 +459,10 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {"data_type": "object_id"},
+                        "fides_meta": {
+                            "primary_key": True,
+                            "data_type": "object_id",
+                        },
                     },
                     {
                         "name": "date",
@@ -454,7 +489,9 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             },
                             {
                                 "name": "full_name",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -473,7 +510,10 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {"data_type": "object_id"},
+                        "fides_meta": {
+                            "primary_key": True,
+                            "data_type": "object_id",
+                        },
                     },
                     {
                         "name": "customer_identifiers",
@@ -520,7 +560,10 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {"data_type": "object_id"},
+                        "fides_meta": {
+                            "primary_key": True,
+                            "data_type": "object_id",
+                        },
                     },
                     {
                         "name": "owner",

From 5d26b2f087b8ebcffd976d33f4ddd94e7507ea75 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Tue, 10 Dec 2024 14:16:57 -0800
Subject: [PATCH 18/22] Splitting out tests

---
 .../api/service/connectors/base_connector.py  |   5 +
 .../connectors/query_configs/query_config.py  |   1 +
 .../api/service/connectors/saas_connector.py  |   3 +
 .../integration_tests/test_mariadb_task.py    | 101 +++
 .../ops/integration_tests/test_mssql_task.py  | 101 +++
 .../ops/integration_tests/test_mysql_task.py  | 101 +++
 .../integration_tests/test_scylladb_task.py   | 190 +++++
 tests/ops/integration_tests/test_sql_task.py  | 756 +-----------------
 .../integration_tests/test_timescale_task.py  | 294 +++++++
 .../connectors/test_mongo_query_config.py     |   4 +-
 10 files changed, 801 insertions(+), 755 deletions(-)
 create mode 100644 tests/ops/integration_tests/test_mariadb_task.py
 create mode 100644 tests/ops/integration_tests/test_mssql_task.py
 create mode 100644 tests/ops/integration_tests/test_mysql_task.py
 create mode 100644 tests/ops/integration_tests/test_scylladb_task.py
 create mode 100644 tests/ops/integration_tests/test_timescale_task.py

diff --git a/src/fides/api/service/connectors/base_connector.py b/src/fides/api/service/connectors/base_connector.py
index ca3439f523..4bf46e5eca 100644
--- a/src/fides/api/service/connectors/base_connector.py
+++ b/src/fides/api/service/connectors/base_connector.py
@@ -132,3 +132,8 @@ def execute_standalone_retrieval_query(
         raise NotImplementedError(
             "execute_standalone_retrieval_query must be implemented in a concrete subclass"
         )
+
+    @property
+    def requires_primary_keys(self) -> bool:
+        """Indicates if datasets linked to this connector require primary keys for erasures. Defaults to True."""
+        return True
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index 4ef115d910..c54eecff85 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -467,6 +467,7 @@ def generate_update_stmt(
         # Create parameter mappings with masked_ prefix for SET values
         param_map = {
             **{f"masked_{k}": v for k, v in update_value_map.items()},
+            **non_empty_primary_key_fields,
             **non_empty_reference_fields,
         }
 
diff --git a/src/fides/api/service/connectors/saas_connector.py b/src/fides/api/service/connectors/saas_connector.py
index b917b6cfda..40a4d8a7eb 100644
--- a/src/fides/api/service/connectors/saas_connector.py
+++ b/src/fides/api/service/connectors/saas_connector.py
@@ -72,6 +72,9 @@
 class SaaSConnector(BaseConnector[AuthenticatedClient], Contextualizable):
     """A connector type to integrate with third-party SaaS APIs"""
 
+    def requires_primary_keys(self) -> bool:
+        return False
+
     def get_log_context(self) -> Dict[LoggerContextKeys, Any]:
         return {
             LoggerContextKeys.system_key: (
diff --git a/tests/ops/integration_tests/test_mariadb_task.py b/tests/ops/integration_tests/test_mariadb_task.py
new file mode 100644
index 0000000000..3951a2830a
--- /dev/null
+++ b/tests/ops/integration_tests/test_mariadb_task.py
@@ -0,0 +1,101 @@
+import pytest
+
+from fides.api.models.privacy_request import ExecutionLog
+
+from ...conftest import access_runner_tester
+from ..graph.graph_test_util import assert_rows_match, records_matching_fields
+from ..task.traversal_data import integration_db_graph
+
+
+@pytest.mark.integration_mariadb
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "dsr_version",
+    ["use_dsr_3_0", "use_dsr_2_0"],
+)
+async def test_mariadb_access_request_task(
+    db,
+    policy,
+    connection_config_mariadb,
+    mariadb_integration_db,
+    dsr_version,
+    request,
+    privacy_request,
+) -> None:
+    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+    v = access_runner_tester(
+        privacy_request,
+        policy,
+        integration_db_graph("my_maria_db_1"),
+        [connection_config_mariadb],
+        {"email": "customer-1@example.com"},
+        db,
+    )
+
+    assert_rows_match(
+        v["my_maria_db_1:address"],
+        min_size=2,
+        keys=["id", "street", "city", "state", "zip"],
+    )
+    assert_rows_match(
+        v["my_maria_db_1:orders"],
+        min_size=3,
+        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
+    )
+    assert_rows_match(
+        v["my_maria_db_1:payment_card"],
+        min_size=2,
+        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
+    )
+    assert_rows_match(
+        v["my_maria_db_1:customer"],
+        min_size=1,
+        keys=["id", "name", "email", "address_id"],
+    )
+
+    # links
+    assert v["my_maria_db_1:customer"][0]["email"] == "customer-1@example.com"
+
+    logs = (
+        ExecutionLog.query(db=db)
+        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
+        .all()
+    )
+
+    logs = [log.__dict__ for log in logs]
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_maria_db_1", collection_name="customer"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_maria_db_1", collection_name="address"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_maria_db_1", collection_name="orders"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs,
+                dataset_name="my_maria_db_1",
+                collection_name="payment_card",
+            )
+        )
+        > 0
+    )
diff --git a/tests/ops/integration_tests/test_mssql_task.py b/tests/ops/integration_tests/test_mssql_task.py
new file mode 100644
index 0000000000..6bc23eeda0
--- /dev/null
+++ b/tests/ops/integration_tests/test_mssql_task.py
@@ -0,0 +1,101 @@
+import pytest
+
+from fides.api.models.privacy_request import ExecutionLog
+
+from ...conftest import access_runner_tester
+from ..graph.graph_test_util import assert_rows_match, records_matching_fields
+from ..task.traversal_data import integration_db_graph
+
+
+@pytest.mark.integration_mssql
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "dsr_version",
+    ["use_dsr_3_0", "use_dsr_2_0"],
+)
+async def test_mssql_access_request_task(
+    db,
+    policy,
+    connection_config_mssql,
+    mssql_integration_db,
+    privacy_request,
+    dsr_version,
+    request,
+) -> None:
+    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+    v = access_runner_tester(
+        privacy_request,
+        policy,
+        integration_db_graph("my_mssql_db_1"),
+        [connection_config_mssql],
+        {"email": "customer-1@example.com"},
+        db,
+    )
+
+    assert_rows_match(
+        v["my_mssql_db_1:address"],
+        min_size=2,
+        keys=["id", "street", "city", "state", "zip"],
+    )
+    assert_rows_match(
+        v["my_mssql_db_1:orders"],
+        min_size=3,
+        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
+    )
+    assert_rows_match(
+        v["my_mssql_db_1:payment_card"],
+        min_size=2,
+        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
+    )
+    assert_rows_match(
+        v["my_mssql_db_1:customer"],
+        min_size=1,
+        keys=["id", "name", "email", "address_id"],
+    )
+
+    # links
+    assert v["my_mssql_db_1:customer"][0]["email"] == "customer-1@example.com"
+
+    logs = (
+        ExecutionLog.query(db=db)
+        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
+        .all()
+    )
+
+    logs = [log.__dict__ for log in logs]
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_mssql_db_1", collection_name="customer"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_mssql_db_1", collection_name="address"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_mssql_db_1", collection_name="orders"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs,
+                dataset_name="my_mssql_db_1",
+                collection_name="payment_card",
+            )
+        )
+        > 0
+    )
diff --git a/tests/ops/integration_tests/test_mysql_task.py b/tests/ops/integration_tests/test_mysql_task.py
new file mode 100644
index 0000000000..40551dd4d9
--- /dev/null
+++ b/tests/ops/integration_tests/test_mysql_task.py
@@ -0,0 +1,101 @@
+import pytest
+
+from fides.api.models.privacy_request import ExecutionLog
+
+from ...conftest import access_runner_tester
+from ..graph.graph_test_util import assert_rows_match, records_matching_fields
+from ..task.traversal_data import integration_db_graph
+
+
+@pytest.mark.integration
+@pytest.mark.integration_mysql
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "dsr_version",
+    ["use_dsr_3_0", "use_dsr_2_0"],
+)
+async def test_mysql_access_request_task(
+    db,
+    policy,
+    connection_config_mysql,
+    mysql_integration_db,
+    privacy_request,
+    dsr_version,
+    request,
+) -> None:
+    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+    v = access_runner_tester(
+        privacy_request,
+        policy,
+        integration_db_graph("my_mysql_db_1"),
+        [connection_config_mysql],
+        {"email": "customer-1@example.com"},
+        db,
+    )
+
+    assert_rows_match(
+        v["my_mysql_db_1:address"],
+        min_size=2,
+        keys=["id", "street", "city", "state", "zip"],
+    )
+    assert_rows_match(
+        v["my_mysql_db_1:orders"],
+        min_size=3,
+        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
+    )
+    assert_rows_match(
+        v["my_mysql_db_1:payment_card"],
+        min_size=2,
+        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
+    )
+    assert_rows_match(
+        v["my_mysql_db_1:customer"],
+        min_size=1,
+        keys=["id", "name", "email", "address_id"],
+    )
+
+    # links
+    assert v["my_mysql_db_1:customer"][0]["email"] == "customer-1@example.com"
+
+    logs = (
+        ExecutionLog.query(db=db)
+        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
+        .all()
+    )
+
+    logs = [log.__dict__ for log in logs]
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_mysql_db_1", collection_name="customer"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_mysql_db_1", collection_name="address"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name="my_mysql_db_1", collection_name="orders"
+            )
+        )
+        > 0
+    )
+    assert (
+        len(
+            records_matching_fields(
+                logs,
+                dataset_name="my_mysql_db_1",
+                collection_name="payment_card",
+            )
+        )
+        > 0
+    )
diff --git a/tests/ops/integration_tests/test_scylladb_task.py b/tests/ops/integration_tests/test_scylladb_task.py
new file mode 100644
index 0000000000..8ced1317ad
--- /dev/null
+++ b/tests/ops/integration_tests/test_scylladb_task.py
@@ -0,0 +1,190 @@
+import pytest
+from sqlalchemy.orm import Session
+
+from fides.api.models.privacy_request import ExecutionLogStatus, PrivacyRequest
+from fides.api.service.connectors.scylla_connector import ScyllaConnectorMissingKeyspace
+from fides.api.task.graph_task import get_cached_data_for_erasures
+
+from ...conftest import access_runner_tester, erasure_runner_tester
+from ..graph.graph_test_util import assert_rows_match, erasure_policy
+from ..task.traversal_data import integration_scylladb_graph
+
+
+@pytest.mark.integration
+@pytest.mark.integration_scylladb
+@pytest.mark.asyncio
+class TestScyllaDSRs:
+    @pytest.mark.parametrize(
+        "dsr_version",
+        ["use_dsr_2_0"],
+    )
+    async def test_scylladb_access_request_task_no_keyspace_dsr2(
+        self,
+        db: Session,
+        policy,
+        integration_scylladb_config,
+        scylladb_integration_no_keyspace,
+        privacy_request,
+        dsr_version,
+        request,
+    ) -> None:
+        request.getfixturevalue(dsr_version)
+
+        with pytest.raises(ScyllaConnectorMissingKeyspace) as err:
+            v = access_runner_tester(
+                privacy_request,
+                policy,
+                integration_scylladb_graph("scylla_example"),
+                [integration_scylladb_config],
+                {"email": "customer-1@example.com"},
+                db,
+            )
+
+        assert (
+            "No keyspace provided in the ScyllaDB configuration for connector scylla_example"
+            in str(err.value)
+        )
+
+    @pytest.mark.parametrize(
+        "dsr_version",
+        ["use_dsr_3_0"],
+    )
+    async def test_scylladb_access_request_task_no_keyspace_dsr3(
+        self,
+        db,
+        policy,
+        integration_scylladb_config,
+        scylladb_integration_no_keyspace,
+        privacy_request: PrivacyRequest,
+        dsr_version,
+        request,
+    ) -> None:
+        request.getfixturevalue(dsr_version)
+        v = access_runner_tester(
+            privacy_request,
+            policy,
+            integration_scylladb_graph("scylla_example"),
+            [integration_scylladb_config],
+            {"email": "customer-1@example.com"},
+            db,
+        )
+
+        assert v == {}
+        assert (
+            privacy_request.access_tasks.count() == 6
+        )  # There's 4 tables plus the root and terminal "dummy" tasks
+
+        # Root task should be completed
+        assert privacy_request.access_tasks.first().collection_name == "__ROOT__"
+        assert (
+            privacy_request.access_tasks.first().status == ExecutionLogStatus.complete
+        )
+
+        # All other tasks should be error
+        for access_task in privacy_request.access_tasks.offset(1):
+            assert access_task.status == ExecutionLogStatus.error
+
+    @pytest.mark.parametrize(
+        "dsr_version",
+        ["use_dsr_2_0", "use_dsr_3_0"],
+    )
+    async def test_scylladb_access_request_task(
+        self,
+        db,
+        policy,
+        integration_scylladb_config_with_keyspace,
+        scylla_reset_db,
+        scylladb_integration_with_keyspace,
+        privacy_request,
+        dsr_version,
+        request,
+    ) -> None:
+        request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+        results = access_runner_tester(
+            privacy_request,
+            policy,
+            integration_scylladb_graph("scylla_example_with_keyspace"),
+            [integration_scylladb_config_with_keyspace],
+            {"email": "customer-1@example.com"},
+            db,
+        )
+
+        assert_rows_match(
+            results["scylla_example_with_keyspace:users"],
+            min_size=1,
+            keys=[
+                "age",
+                "alternative_contacts",
+                "do_not_contact",
+                "email",
+                "name",
+                "last_contacted",
+                "logins",
+                "states_lived",
+            ],
+        )
+        assert_rows_match(
+            results["scylla_example_with_keyspace:user_activity"],
+            min_size=3,
+            keys=["timestamp", "user_agent", "activity_type"],
+        )
+        assert_rows_match(
+            results["scylla_example_with_keyspace:payment_methods"],
+            min_size=2,
+            keys=["card_number", "expiration_date"],
+        )
+        assert_rows_match(
+            results["scylla_example_with_keyspace:orders"],
+            min_size=2,
+            keys=["order_amount", "order_date", "order_description"],
+        )
+
+    @pytest.mark.parametrize(
+        "dsr_version",
+        ["use_dsr_2_0", "use_dsr_3_0"],
+    )
+    async def test_scylladb_erasure_task(
+        self,
+        db,
+        integration_scylladb_config_with_keyspace,
+        scylladb_integration_with_keyspace,
+        scylla_reset_db,
+        privacy_request,
+        dsr_version,
+        request,
+    ):
+        request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+        seed_email = "customer-1@example.com"
+
+        policy = erasure_policy(
+            db, "user.name", "user.behavior", "user.device", "user.payment"
+        )
+        privacy_request.policy_id = policy.id
+        privacy_request.save(db)
+
+        graph = integration_scylladb_graph("scylla_example_with_keyspace")
+        access_runner_tester(
+            privacy_request,
+            policy,
+            integration_scylladb_graph("scylla_example_with_keyspace"),
+            [integration_scylladb_config_with_keyspace],
+            {"email": seed_email},
+            db,
+        )
+        results = erasure_runner_tester(
+            privacy_request,
+            policy,
+            graph,
+            [integration_scylladb_config_with_keyspace],
+            {"email": seed_email},
+            get_cached_data_for_erasures(privacy_request.id),
+            db,
+        )
+        assert results == {
+            "scylla_example_with_keyspace:user_activity": 3,
+            "scylla_example_with_keyspace:users": 1,
+            "scylla_example_with_keyspace:payment_methods": 2,
+            "scylla_example_with_keyspace:orders": 2,
+        }
diff --git a/tests/ops/integration_tests/test_sql_task.py b/tests/ops/integration_tests/test_sql_task.py
index b349040988..cd4bb1551f 100644
--- a/tests/ops/integration_tests/test_sql_task.py
+++ b/tests/ops/integration_tests/test_sql_task.py
@@ -6,7 +6,6 @@
 import pytest
 from fideslang import Dataset
 from sqlalchemy import text
-from sqlalchemy.orm import Session
 
 from fides.api.graph.config import Collection, FieldAddress, GraphDataset, ScalarField
 from fides.api.graph.data_type import DataType, StringTypeConverter
@@ -15,14 +14,8 @@
 from fides.api.models.connectionconfig import ConnectionConfig
 from fides.api.models.datasetconfig import convert_dataset_to_graph
 from fides.api.models.policy import ActionType, Policy, Rule, RuleTarget
-from fides.api.models.privacy_request import (
-    ExecutionLog,
-    ExecutionLogStatus,
-    PrivacyRequest,
-    RequestTask,
-)
+from fides.api.models.privacy_request import ExecutionLog, RequestTask
 from fides.api.service.connectors import get_connector
-from fides.api.service.connectors.scylla_connector import ScyllaConnectorMissingKeyspace
 from fides.api.task.filter_results import filter_data_categories
 from fides.api.task.graph_task import get_cached_data_for_erasures
 from fides.config import CONFIG
@@ -35,12 +28,7 @@
     field,
     records_matching_fields,
 )
-from ..task.traversal_data import (
-    integration_db_graph,
-    integration_scylladb_graph,
-    postgres_db_graph_dataset,
-    str_converter,
-)
+from ..task.traversal_data import integration_db_graph, postgres_db_graph_dataset
 
 
 @pytest.mark.integration_postgres
@@ -497,468 +485,7 @@ async def test_postgres_privacy_requests_against_non_default_schema(
     assert johanna_record.name is None  # Masked by erasure request
 
 
-@pytest.mark.integration_mssql
-@pytest.mark.integration
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "dsr_version",
-    ["use_dsr_3_0", "use_dsr_2_0"],
-)
-async def test_mssql_access_request_task(
-    db,
-    policy,
-    connection_config_mssql,
-    mssql_integration_db,
-    privacy_request,
-    dsr_version,
-    request,
-) -> None:
-    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-    v = access_runner_tester(
-        privacy_request,
-        policy,
-        integration_db_graph("my_mssql_db_1"),
-        [connection_config_mssql],
-        {"email": "customer-1@example.com"},
-        db,
-    )
-
-    assert_rows_match(
-        v["my_mssql_db_1:address"],
-        min_size=2,
-        keys=["id", "street", "city", "state", "zip"],
-    )
-    assert_rows_match(
-        v["my_mssql_db_1:orders"],
-        min_size=3,
-        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
-    )
-    assert_rows_match(
-        v["my_mssql_db_1:payment_card"],
-        min_size=2,
-        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
-    )
-    assert_rows_match(
-        v["my_mssql_db_1:customer"],
-        min_size=1,
-        keys=["id", "name", "email", "address_id"],
-    )
-
-    # links
-    assert v["my_mssql_db_1:customer"][0]["email"] == "customer-1@example.com"
-
-    logs = (
-        ExecutionLog.query(db=db)
-        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
-        .all()
-    )
-
-    logs = [log.__dict__ for log in logs]
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_mssql_db_1", collection_name="customer"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_mssql_db_1", collection_name="address"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_mssql_db_1", collection_name="orders"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs,
-                dataset_name="my_mssql_db_1",
-                collection_name="payment_card",
-            )
-        )
-        > 0
-    )
-
-
-@pytest.mark.integration
-@pytest.mark.integration_mysql
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "dsr_version",
-    ["use_dsr_3_0", "use_dsr_2_0"],
-)
-async def test_mysql_access_request_task(
-    db,
-    policy,
-    connection_config_mysql,
-    mysql_integration_db,
-    privacy_request,
-    dsr_version,
-    request,
-) -> None:
-    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-    v = access_runner_tester(
-        privacy_request,
-        policy,
-        integration_db_graph("my_mysql_db_1"),
-        [connection_config_mysql],
-        {"email": "customer-1@example.com"},
-        db,
-    )
-
-    assert_rows_match(
-        v["my_mysql_db_1:address"],
-        min_size=2,
-        keys=["id", "street", "city", "state", "zip"],
-    )
-    assert_rows_match(
-        v["my_mysql_db_1:orders"],
-        min_size=3,
-        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
-    )
-    assert_rows_match(
-        v["my_mysql_db_1:payment_card"],
-        min_size=2,
-        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
-    )
-    assert_rows_match(
-        v["my_mysql_db_1:customer"],
-        min_size=1,
-        keys=["id", "name", "email", "address_id"],
-    )
-
-    # links
-    assert v["my_mysql_db_1:customer"][0]["email"] == "customer-1@example.com"
-
-    logs = (
-        ExecutionLog.query(db=db)
-        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
-        .all()
-    )
-
-    logs = [log.__dict__ for log in logs]
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_mysql_db_1", collection_name="customer"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_mysql_db_1", collection_name="address"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_mysql_db_1", collection_name="orders"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs,
-                dataset_name="my_mysql_db_1",
-                collection_name="payment_card",
-            )
-        )
-        > 0
-    )
-
-
-@pytest.mark.integration_mariadb
-@pytest.mark.integration
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "dsr_version",
-    ["use_dsr_3_0", "use_dsr_2_0"],
-)
-async def test_mariadb_access_request_task(
-    db,
-    policy,
-    connection_config_mariadb,
-    mariadb_integration_db,
-    dsr_version,
-    request,
-    privacy_request,
-) -> None:
-    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-    v = access_runner_tester(
-        privacy_request,
-        policy,
-        integration_db_graph("my_maria_db_1"),
-        [connection_config_mariadb],
-        {"email": "customer-1@example.com"},
-        db,
-    )
-
-    assert_rows_match(
-        v["my_maria_db_1:address"],
-        min_size=2,
-        keys=["id", "street", "city", "state", "zip"],
-    )
-    assert_rows_match(
-        v["my_maria_db_1:orders"],
-        min_size=3,
-        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
-    )
-    assert_rows_match(
-        v["my_maria_db_1:payment_card"],
-        min_size=2,
-        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
-    )
-    assert_rows_match(
-        v["my_maria_db_1:customer"],
-        min_size=1,
-        keys=["id", "name", "email", "address_id"],
-    )
-
-    # links
-    assert v["my_maria_db_1:customer"][0]["email"] == "customer-1@example.com"
-
-    logs = (
-        ExecutionLog.query(db=db)
-        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
-        .all()
-    )
-
-    logs = [log.__dict__ for log in logs]
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_maria_db_1", collection_name="customer"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_maria_db_1", collection_name="address"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name="my_maria_db_1", collection_name="orders"
-            )
-        )
-        > 0
-    )
-    assert (
-        len(
-            records_matching_fields(
-                logs,
-                dataset_name="my_maria_db_1",
-                collection_name="payment_card",
-            )
-        )
-        > 0
-    )
-
-
-@pytest.mark.integration
-@pytest.mark.integration_scylladb
-@pytest.mark.asyncio
-class TestScyllaDSRs:
-    @pytest.mark.parametrize(
-        "dsr_version",
-        ["use_dsr_2_0"],
-    )
-    async def test_scylladb_access_request_task_no_keyspace_dsr2(
-        self,
-        db: Session,
-        policy,
-        integration_scylladb_config,
-        scylladb_integration_no_keyspace,
-        privacy_request,
-        dsr_version,
-        request,
-    ) -> None:
-        request.getfixturevalue(dsr_version)
-
-        with pytest.raises(ScyllaConnectorMissingKeyspace) as err:
-            v = access_runner_tester(
-                privacy_request,
-                policy,
-                integration_scylladb_graph("scylla_example"),
-                [integration_scylladb_config],
-                {"email": "customer-1@example.com"},
-                db,
-            )
-
-        assert (
-            "No keyspace provided in the ScyllaDB configuration for connector scylla_example"
-            in str(err.value)
-        )
-
-    @pytest.mark.parametrize(
-        "dsr_version",
-        ["use_dsr_3_0"],
-    )
-    async def test_scylladb_access_request_task_no_keyspace_dsr3(
-        self,
-        db,
-        policy,
-        integration_scylladb_config,
-        scylladb_integration_no_keyspace,
-        privacy_request: PrivacyRequest,
-        dsr_version,
-        request,
-    ) -> None:
-        request.getfixturevalue(dsr_version)
-        v = access_runner_tester(
-            privacy_request,
-            policy,
-            integration_scylladb_graph("scylla_example"),
-            [integration_scylladb_config],
-            {"email": "customer-1@example.com"},
-            db,
-        )
-
-        assert v == {}
-        assert (
-            privacy_request.access_tasks.count() == 6
-        )  # There's 4 tables plus the root and terminal "dummy" tasks
-
-        # Root task should be completed
-        assert privacy_request.access_tasks.first().collection_name == "__ROOT__"
-        assert (
-            privacy_request.access_tasks.first().status == ExecutionLogStatus.complete
-        )
-
-        # All other tasks should be error
-        for access_task in privacy_request.access_tasks.offset(1):
-            assert access_task.status == ExecutionLogStatus.error
-
-    @pytest.mark.parametrize(
-        "dsr_version",
-        ["use_dsr_2_0", "use_dsr_3_0"],
-    )
-    async def test_scylladb_access_request_task(
-        self,
-        db,
-        policy,
-        integration_scylladb_config_with_keyspace,
-        scylla_reset_db,
-        scylladb_integration_with_keyspace,
-        privacy_request,
-        dsr_version,
-        request,
-    ) -> None:
-        request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-        results = access_runner_tester(
-            privacy_request,
-            policy,
-            integration_scylladb_graph("scylla_example_with_keyspace"),
-            [integration_scylladb_config_with_keyspace],
-            {"email": "customer-1@example.com"},
-            db,
-        )
-
-        assert_rows_match(
-            results["scylla_example_with_keyspace:users"],
-            min_size=1,
-            keys=[
-                "age",
-                "alternative_contacts",
-                "do_not_contact",
-                "email",
-                "name",
-                "last_contacted",
-                "logins",
-                "states_lived",
-            ],
-        )
-        assert_rows_match(
-            results["scylla_example_with_keyspace:user_activity"],
-            min_size=3,
-            keys=["timestamp", "user_agent", "activity_type"],
-        )
-        assert_rows_match(
-            results["scylla_example_with_keyspace:payment_methods"],
-            min_size=2,
-            keys=["card_number", "expiration_date"],
-        )
-        assert_rows_match(
-            results["scylla_example_with_keyspace:orders"],
-            min_size=2,
-            keys=["order_amount", "order_date", "order_description"],
-        )
-
-    @pytest.mark.parametrize(
-        "dsr_version",
-        ["use_dsr_2_0", "use_dsr_3_0"],
-    )
-    async def test_scylladb_erasure_task(
-        self,
-        db,
-        integration_scylladb_config_with_keyspace,
-        scylladb_integration_with_keyspace,
-        scylla_reset_db,
-        privacy_request,
-        dsr_version,
-        request,
-    ):
-        request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-        seed_email = "customer-1@example.com"
-
-        policy = erasure_policy(
-            db, "user.name", "user.behavior", "user.device", "user.payment"
-        )
-        privacy_request.policy_id = policy.id
-        privacy_request.save(db)
-
-        graph = integration_scylladb_graph("scylla_example_with_keyspace")
-        access_runner_tester(
-            privacy_request,
-            policy,
-            integration_scylladb_graph("scylla_example_with_keyspace"),
-            [integration_scylladb_config_with_keyspace],
-            {"email": seed_email},
-            db,
-        )
-        results = erasure_runner_tester(
-            privacy_request,
-            policy,
-            graph,
-            [integration_scylladb_config_with_keyspace],
-            {"email": seed_email},
-            get_cached_data_for_erasures(privacy_request.id),
-            db,
-        )
-        assert results == {
-            "scylla_example_with_keyspace:user_activity": 3,
-            "scylla_example_with_keyspace:users": 1,
-            "scylla_example_with_keyspace:payment_methods": 2,
-            "scylla_example_with_keyspace:orders": 2,
-        }
-
-
+@pytest.mark.integration_postgres
 @pytest.mark.integration
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
@@ -1595,280 +1122,3 @@ async def test_retry_erasure(
                 "error",
                 "error",
             }
-
-
-@pytest.mark.integration_timescale
-@pytest.mark.integration
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "dsr_version",
-    ["use_dsr_3_0", "use_dsr_2_0"],
-)
-async def test_timescale_access_request_task(
-    db,
-    policy,
-    timescale_connection_config,
-    timescale_integration_db,
-    privacy_request,
-    dsr_version,
-    request,
-) -> None:
-    database_name = "my_timescale_db_1"
-    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-    v = access_runner_tester(
-        privacy_request,
-        policy,
-        integration_db_graph(database_name),
-        [timescale_connection_config],
-        {"email": "customer-1@example.com"},
-        db,
-    )
-
-    assert_rows_match(
-        v[f"{database_name}:address"],
-        min_size=2,
-        keys=["id", "street", "city", "state", "zip"],
-    )
-    assert_rows_match(
-        v[f"{database_name}:orders"],
-        min_size=3,
-        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
-    )
-    assert_rows_match(
-        v[f"{database_name}:payment_card"],
-        min_size=2,
-        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
-    )
-    assert_rows_match(
-        v[f"{database_name}:customer"],
-        min_size=1,
-        keys=["id", "name", "email", "address_id"],
-    )
-
-    # links
-    assert v[f"{database_name}:customer"][0]["email"] == "customer-1@example.com"
-
-    logs = (
-        ExecutionLog.query(db=db)
-        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
-        .all()
-    )
-
-    logs = [log.__dict__ for log in logs]
-
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name=database_name, collection_name="customer"
-            )
-        )
-        > 0
-    )
-
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name=database_name, collection_name="address"
-            )
-        )
-        > 0
-    )
-
-    assert (
-        len(
-            records_matching_fields(
-                logs, dataset_name=database_name, collection_name="orders"
-            )
-        )
-        > 0
-    )
-
-    assert (
-        len(
-            records_matching_fields(
-                logs,
-                dataset_name=database_name,
-                collection_name="payment_card",
-            )
-        )
-        > 0
-    )
-
-
-@pytest.mark.integration_timescale
-@pytest.mark.integration
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "dsr_version",
-    ["use_dsr_3_0", "use_dsr_2_0"],
-)
-async def test_timescale_erasure_request_task(
-    db,
-    erasure_policy,
-    timescale_connection_config,
-    timescale_integration_db,
-    privacy_request_with_erasure_policy,
-    dsr_version,
-    request,
-) -> None:
-    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-    rule = erasure_policy.rules[0]
-    target = rule.targets[0]
-    target.data_category = "user"
-    target.save(db)
-
-    database_name = "my_timescale_db_1"
-
-    dataset = postgres_db_graph_dataset(database_name, timescale_connection_config.key)
-
-    # Set some data categories on fields that will be targeted by the policy above
-    field([dataset], database_name, "customer", "name").data_categories = ["user.name"]
-    field([dataset], database_name, "address", "street").data_categories = ["user"]
-    field([dataset], database_name, "payment_card", "ccn").data_categories = ["user"]
-
-    graph = DatasetGraph(dataset)
-
-    v = access_runner_tester(
-        privacy_request_with_erasure_policy,
-        erasure_policy,
-        graph,
-        [timescale_connection_config],
-        {"email": "customer-1@example.com"},
-        db,
-    )
-
-    v = erasure_runner_tester(
-        privacy_request_with_erasure_policy,
-        erasure_policy,
-        graph,
-        [timescale_connection_config],
-        {"email": "customer-1@example.com"},
-        get_cached_data_for_erasures(privacy_request_with_erasure_policy.id),
-        db,
-    )
-    assert v == {
-        f"{database_name}:customer": 1,
-        f"{database_name}:orders": 0,
-        f"{database_name}:payment_card": 2,
-        f"{database_name}:address": 2,
-    }, "No erasure on orders table - no data categories targeted"
-
-    # Verify masking in appropriate tables
-    address_cursor = timescale_integration_db.execute(
-        text("select * from address where id in (1, 2)")
-    )
-    for address in address_cursor:
-        assert address.street is None  # Masked due to matching data category
-        assert address.state is not None
-        assert address.city is not None
-        assert address.zip is not None
-
-    customer_cursor = timescale_integration_db.execute(
-        text("select * from customer where id = 1")
-    )
-    customer = [customer for customer in customer_cursor][0]
-    assert customer.name is None  # Masked due to matching data category
-    assert customer.email == "customer-1@example.com"
-    assert customer.address_id is not None
-
-    payment_card_cursor = timescale_integration_db.execute(
-        text("select * from payment_card where id in ('pay_aaa-aaa', 'pay_bbb-bbb')")
-    )
-    payment_cards = [card for card in payment_card_cursor]
-    assert all(
-        [card.ccn is None for card in payment_cards]
-    )  # Masked due to matching data category
-    assert not any([card.name is None for card in payment_cards]) is None
-
-
-@pytest.mark.integration_timescale
-@pytest.mark.integration
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "dsr_version",
-    ["use_dsr_3_0", "use_dsr_2_0"],
-)
-async def test_timescale_query_and_mask_hypertable(
-    db,
-    erasure_policy,
-    timescale_connection_config,
-    timescale_integration_db,
-    privacy_request_with_erasure_policy,
-    dsr_version,
-    request,
-) -> None:
-    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
-
-    database_name = "my_timescale_db_1"
-
-    dataset = postgres_db_graph_dataset(database_name, timescale_connection_config.key)
-    # For this test, add a new collection to our standard dataset corresponding to the
-    # "onsite_personnel" timescale hypertable
-    onsite_personnel_collection = Collection(
-        name="onsite_personnel",
-        fields=[
-            ScalarField(
-                name="responsible", data_type_converter=str_converter, identity="email"
-            ),
-            ScalarField(
-                name="time", data_type_converter=str_converter, primary_key=True
-            ),
-        ],
-    )
-
-    dataset.collections.append(onsite_personnel_collection)
-    graph = DatasetGraph(dataset)
-    rule = erasure_policy.rules[0]
-    target = rule.targets[0]
-    target.data_category = "user"
-    target.save(db)
-    # Update data category on responsible field
-    field(
-        [dataset], database_name, "onsite_personnel", "responsible"
-    ).data_categories = ["user.contact.email"]
-
-    access_results = access_runner_tester(
-        privacy_request_with_erasure_policy,
-        erasure_policy,
-        graph,
-        [timescale_connection_config],
-        {"email": "employee-1@example.com"},
-        db,
-    )
-
-    # Demonstrate hypertable can be queried
-    assert access_results[f"{database_name}:onsite_personnel"] == [
-        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 1, 9, 0)},
-        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 2, 9, 0)},
-        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 3, 9, 0)},
-        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 5, 9, 0)},
-    ]
-
-    # Run an erasure on the hypertable targeting the responsible field
-    v = erasure_runner_tester(
-        privacy_request_with_erasure_policy,
-        erasure_policy,
-        graph,
-        [timescale_connection_config],
-        {"email": "employee-1@example.com"},
-        get_cached_data_for_erasures(privacy_request_with_erasure_policy.id),
-        db,
-    )
-
-    assert v == {
-        f"{database_name}:customer": 0,
-        f"{database_name}:orders": 0,
-        f"{database_name}:payment_card": 0,
-        f"{database_name}:address": 0,
-        f"{database_name}:onsite_personnel": 4,
-    }, "onsite_personnel.responsible was the only targeted data category"
-
-    personnel_records = timescale_integration_db.execute(
-        text("select * from onsite_personnel")
-    )
-    for record in personnel_records:
-        assert (
-            record.responsible != "employee-1@example.com"
-        )  # These emails have all been masked
diff --git a/tests/ops/integration_tests/test_timescale_task.py b/tests/ops/integration_tests/test_timescale_task.py
new file mode 100644
index 0000000000..97af65ce65
--- /dev/null
+++ b/tests/ops/integration_tests/test_timescale_task.py
@@ -0,0 +1,294 @@
+from datetime import datetime
+
+import pytest
+from sqlalchemy import text
+
+from fides.api.graph.config import Collection, ScalarField
+from fides.api.graph.graph import DatasetGraph
+from fides.api.models.privacy_request import ExecutionLog
+from fides.api.task.graph_task import get_cached_data_for_erasures
+
+from ...conftest import access_runner_tester, erasure_runner_tester
+from ..graph.graph_test_util import assert_rows_match, field, records_matching_fields
+from ..task.traversal_data import (
+    integration_db_graph,
+    postgres_db_graph_dataset,
+    str_converter,
+)
+
+
+@pytest.mark.integration_timescale
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "dsr_version",
+    ["use_dsr_3_0", "use_dsr_2_0"],
+)
+async def test_timescale_access_request_task(
+    db,
+    policy,
+    timescale_connection_config,
+    timescale_integration_db,
+    privacy_request,
+    dsr_version,
+    request,
+) -> None:
+    database_name = "my_timescale_db_1"
+    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+    v = access_runner_tester(
+        privacy_request,
+        policy,
+        integration_db_graph(database_name),
+        [timescale_connection_config],
+        {"email": "customer-1@example.com"},
+        db,
+    )
+
+    assert_rows_match(
+        v[f"{database_name}:address"],
+        min_size=2,
+        keys=["id", "street", "city", "state", "zip"],
+    )
+    assert_rows_match(
+        v[f"{database_name}:orders"],
+        min_size=3,
+        keys=["id", "customer_id", "shipping_address_id", "payment_card_id"],
+    )
+    assert_rows_match(
+        v[f"{database_name}:payment_card"],
+        min_size=2,
+        keys=["id", "name", "ccn", "customer_id", "billing_address_id"],
+    )
+    assert_rows_match(
+        v[f"{database_name}:customer"],
+        min_size=1,
+        keys=["id", "name", "email", "address_id"],
+    )
+
+    # links
+    assert v[f"{database_name}:customer"][0]["email"] == "customer-1@example.com"
+
+    logs = (
+        ExecutionLog.query(db=db)
+        .filter(ExecutionLog.privacy_request_id == privacy_request.id)
+        .all()
+    )
+
+    logs = [log.__dict__ for log in logs]
+
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name=database_name, collection_name="customer"
+            )
+        )
+        > 0
+    )
+
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name=database_name, collection_name="address"
+            )
+        )
+        > 0
+    )
+
+    assert (
+        len(
+            records_matching_fields(
+                logs, dataset_name=database_name, collection_name="orders"
+            )
+        )
+        > 0
+    )
+
+    assert (
+        len(
+            records_matching_fields(
+                logs,
+                dataset_name=database_name,
+                collection_name="payment_card",
+            )
+        )
+        > 0
+    )
+
+
+@pytest.mark.integration_timescale
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "dsr_version",
+    ["use_dsr_3_0", "use_dsr_2_0"],
+)
+async def test_timescale_erasure_request_task(
+    db,
+    erasure_policy,
+    timescale_connection_config,
+    timescale_integration_db,
+    privacy_request_with_erasure_policy,
+    dsr_version,
+    request,
+) -> None:
+    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+    rule = erasure_policy.rules[0]
+    target = rule.targets[0]
+    target.data_category = "user"
+    target.save(db)
+
+    database_name = "my_timescale_db_1"
+
+    dataset = postgres_db_graph_dataset(database_name, timescale_connection_config.key)
+
+    # Set some data categories on fields that will be targeted by the policy above
+    field([dataset], database_name, "customer", "name").data_categories = ["user.name"]
+    field([dataset], database_name, "address", "street").data_categories = ["user"]
+    field([dataset], database_name, "payment_card", "ccn").data_categories = ["user"]
+
+    graph = DatasetGraph(dataset)
+
+    v = access_runner_tester(
+        privacy_request_with_erasure_policy,
+        erasure_policy,
+        graph,
+        [timescale_connection_config],
+        {"email": "customer-1@example.com"},
+        db,
+    )
+
+    v = erasure_runner_tester(
+        privacy_request_with_erasure_policy,
+        erasure_policy,
+        graph,
+        [timescale_connection_config],
+        {"email": "customer-1@example.com"},
+        get_cached_data_for_erasures(privacy_request_with_erasure_policy.id),
+        db,
+    )
+    assert v == {
+        f"{database_name}:customer": 1,
+        f"{database_name}:orders": 0,
+        f"{database_name}:payment_card": 2,
+        f"{database_name}:address": 2,
+    }, "No erasure on orders table - no data categories targeted"
+
+    # Verify masking in appropriate tables
+    address_cursor = timescale_integration_db.execute(
+        text("select * from address where id in (1, 2)")
+    )
+    for address in address_cursor:
+        assert address.street is None  # Masked due to matching data category
+        assert address.state is not None
+        assert address.city is not None
+        assert address.zip is not None
+
+    customer_cursor = timescale_integration_db.execute(
+        text("select * from customer where id = 1")
+    )
+    customer = [customer for customer in customer_cursor][0]
+    assert customer.name is None  # Masked due to matching data category
+    assert customer.email == "customer-1@example.com"
+    assert customer.address_id is not None
+
+    payment_card_cursor = timescale_integration_db.execute(
+        text("select * from payment_card where id in ('pay_aaa-aaa', 'pay_bbb-bbb')")
+    )
+    payment_cards = [card for card in payment_card_cursor]
+    assert all(
+        [card.ccn is None for card in payment_cards]
+    )  # Masked due to matching data category
+    assert not any([card.name is None for card in payment_cards]) is None
+
+
+@pytest.mark.integration_timescale
+@pytest.mark.integration
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "dsr_version",
+    ["use_dsr_3_0", "use_dsr_2_0"],
+)
+async def test_timescale_query_and_mask_hypertable(
+    db,
+    erasure_policy,
+    timescale_connection_config,
+    timescale_integration_db,
+    privacy_request_with_erasure_policy,
+    dsr_version,
+    request,
+) -> None:
+    request.getfixturevalue(dsr_version)  # REQUIRED to test both DSR 3.0 and 2.0
+
+    database_name = "my_timescale_db_1"
+
+    dataset = postgres_db_graph_dataset(database_name, timescale_connection_config.key)
+    # For this test, add a new collection to our standard dataset corresponding to the
+    # "onsite_personnel" timescale hypertable
+    onsite_personnel_collection = Collection(
+        name="onsite_personnel",
+        fields=[
+            ScalarField(
+                name="responsible", data_type_converter=str_converter, identity="email"
+            ),
+            ScalarField(
+                name="time", data_type_converter=str_converter, primary_key=True
+            ),
+        ],
+    )
+
+    dataset.collections.append(onsite_personnel_collection)
+    graph = DatasetGraph(dataset)
+    rule = erasure_policy.rules[0]
+    target = rule.targets[0]
+    target.data_category = "user"
+    target.save(db)
+    # Update data category on responsible field
+    field(
+        [dataset], database_name, "onsite_personnel", "responsible"
+    ).data_categories = ["user.contact.email"]
+
+    access_results = access_runner_tester(
+        privacy_request_with_erasure_policy,
+        erasure_policy,
+        graph,
+        [timescale_connection_config],
+        {"email": "employee-1@example.com"},
+        db,
+    )
+
+    # Demonstrate hypertable can be queried
+    assert access_results[f"{database_name}:onsite_personnel"] == [
+        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 1, 9, 0)},
+        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 2, 9, 0)},
+        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 3, 9, 0)},
+        {"responsible": "employee-1@example.com", "time": datetime(2022, 1, 5, 9, 0)},
+    ]
+
+    # Run an erasure on the hypertable targeting the responsible field
+    v = erasure_runner_tester(
+        privacy_request_with_erasure_policy,
+        erasure_policy,
+        graph,
+        [timescale_connection_config],
+        {"email": "employee-1@example.com"},
+        get_cached_data_for_erasures(privacy_request_with_erasure_policy.id),
+        db,
+    )
+
+    assert v == {
+        f"{database_name}:customer": 0,
+        f"{database_name}:orders": 0,
+        f"{database_name}:payment_card": 0,
+        f"{database_name}:address": 0,
+        f"{database_name}:onsite_personnel": 4,
+    }, "onsite_personnel.responsible was the only targeted data category"
+
+    personnel_records = timescale_integration_db.execute(
+        text("select * from onsite_personnel")
+    )
+    for record in personnel_records:
+        assert (
+            record.responsible != "employee-1@example.com"
+        )  # These emails have all been masked
diff --git a/tests/ops/service/connectors/test_mongo_query_config.py b/tests/ops/service/connectors/test_mongo_query_config.py
index 3912618801..c0f6079df1 100644
--- a/tests/ops/service/connectors/test_mongo_query_config.py
+++ b/tests/ops/service/connectors/test_mongo_query_config.py
@@ -191,7 +191,7 @@ def test_generate_update_stmt_multiple_fields(
             row, erasure_policy, privacy_request
         )
 
-        expected_result_0 = {"customer_id": 1}
+        expected_result_0 = {"_id": 1}
         expected_result_1 = {
             "$set": {
                 "birthday": None,
@@ -273,7 +273,7 @@ def test_generate_update_stmt_multiple_rules(
         mongo_statement = config.generate_update_stmt(
             row, erasure_policy_two_rules, privacy_request
         )
-        assert mongo_statement[0] == {"customer_id": 1}
+        assert mongo_statement[0] == {"_id": 1}
         assert len(mongo_statement[1]["$set"]["gender"]) == 30
         assert (
             mongo_statement[1]["$set"]["birthday"]

From 647586fa8f9d319d98514e38f0de644552146c99 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Tue, 10 Dec 2024 14:51:07 -0800
Subject: [PATCH 19/22] Reverting most of the removal of primary keys + misc
 files

---
 .../fixtures/connectors/datasetconfig.json    | 18 ++++-----
 .../bigquery_enterprise_test_dataset.yml      |  8 ++--
 .../dataset/bigquery_example_test_dataset.yml | 18 +++++++++
 .../dataset/dynamodb_example_test_dataset.yml |  7 ++++
 data/dataset/email_dataset.yml                |  6 +++
 ...le_field_masking_override_test_dataset.yml | 18 +++++++++
 data/dataset/example_test_dataset.invalid     | 18 +++++++++
 data/dataset/example_test_datasets.yml        | 16 ++++++++
 ...e_cloud_sql_mysql_example_test_dataset.yml |  8 ++++
 ...loud_sql_postgres_example_test_dataset.yml |  8 ++++
 data/dataset/manual_dataset.yml               |  4 ++
 data/dataset/mariadb_example_test_dataset.yml |  8 ++++
 data/dataset/mongo_example_test_dataset.yml   | 11 ++++++
 data/dataset/mssql_example_test_dataset.yml   |  8 ++++
 data/dataset/mysql_example_test_dataset.yml   |  9 +++++
 ...s_example_custom_request_field_dataset.yml |  1 +
 ...alid_masking_strategy_override_dataset.yml |  4 ++
 .../dataset/postgres_example_test_dataset.yml | 28 +++++++------
 .../dataset/redshift_example_test_dataset.yml | 18 +++++++++
 .../dataset/scylladb_example_test_dataset.yml |  5 +++
 .../snowflake_example_test_dataset.yml        | 18 +++++++++
 .../dataset/timebase_example_test_dataset.yml | 18 +++++++++
 dev-requirements.txt                          |  2 +-
 pyproject.toml                                |  2 +-
 src/fides/api/task/graph_task.py              | 27 +++++++++++++
 tests/fixtures/email_fixtures.py              | 39 ++++++++++++++-----
 .../v1/endpoints/test_dataset_endpoints.py    | 16 ++++----
 tests/ops/generator/test_data_generator.py    |  2 +
 tests/ops/models/test_datasetconfig.py        |  7 ++--
 tests/ops/task/test_create_request_tasks.py   |  6 +--
 tests/ops/task/traversal_data.py              | 17 ++++----
 tests/ops/util/test_dataset_yaml.py           |  3 ++
 32 files changed, 322 insertions(+), 56 deletions(-)

diff --git a/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json b/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json
index c41d13993b..6cf4d7d77c 100644
--- a/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json
+++ b/clients/admin-ui/cypress/fixtures/connectors/datasetconfig.json
@@ -38,7 +38,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -125,7 +125,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -199,7 +199,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -258,7 +258,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -366,7 +366,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -466,7 +466,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -503,7 +503,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -555,7 +555,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
@@ -664,7 +664,7 @@
                 "fides_meta": {
                   "references": null,
                   "identity": null,
-                  "primary_key": null,
+                  "primary_key": true,
                   "data_type": null,
                   "length": null,
                   "return_all_elements": null,
diff --git a/data/dataset/bigquery_enterprise_test_dataset.yml b/data/dataset/bigquery_enterprise_test_dataset.yml
index 52b20e7d03..59d27e68a2 100644
--- a/data/dataset/bigquery_enterprise_test_dataset.yml
+++ b/data/dataset/bigquery_enterprise_test_dataset.yml
@@ -30,7 +30,7 @@ dataset:
             fides_meta:
               references: null
               identity: null
-              primary_key: null
+              primary_key: true
               data_type: integer
               length: null
               return_all_elements: null
@@ -102,7 +102,7 @@ dataset:
             fides_meta:
               references: null
               identity: null
-              primary_key: null
+              primary_key: true
               data_type: integer
               length: null
               return_all_elements: null
@@ -204,7 +204,7 @@ dataset:
             fides_meta:
               references: null
               identity: null
-              primary_key: null
+              primary_key: true
               data_type: integer
               length: null
               return_all_elements: null
@@ -347,7 +347,7 @@ dataset:
             fides_meta:
               references: null
               identity: stackoverflow_user_id
-              primary_key: null
+              primary_key: true
               data_type: integer
               length: null
               return_all_elements: null
diff --git a/data/dataset/bigquery_example_test_dataset.yml b/data/dataset/bigquery_example_test_dataset.yml
index c4ea16cb44..11fdac1aba 100644
--- a/data/dataset/bigquery_example_test_dataset.yml
+++ b/data/dataset/bigquery_example_test_dataset.yml
@@ -13,6 +13,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -51,6 +53,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -76,6 +80,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -92,6 +98,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -106,6 +114,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -156,6 +166,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -165,6 +177,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -179,6 +193,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -211,6 +227,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/dynamodb_example_test_dataset.yml b/data/dataset/dynamodb_example_test_dataset.yml
index a4e5a1291a..d9ecbb8d1f 100644
--- a/data/dataset/dynamodb_example_test_dataset.yml
+++ b/data/dataset/dynamodb_example_test_dataset.yml
@@ -20,6 +20,7 @@ dataset:
           - name: email
             data_categories: [user.contact.email]
             fides_meta:
+              primary_key: True
               identity: email
               data_type: string
           - name: name
@@ -32,6 +33,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -56,12 +59,16 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
       - name: login
         fields:
           - name: customer_id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: login_date
             data_categories: [system.operations]
           - name: name
diff --git a/data/dataset/email_dataset.yml b/data/dataset/email_dataset.yml
index 64b49f71a8..c829e8a4ea 100644
--- a/data/dataset/email_dataset.yml
+++ b/data/dataset/email_dataset.yml
@@ -7,6 +7,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: true
           - name: customer_id
             data_categories: [user]
             fides_meta:
@@ -20,6 +22,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: true
           - name: first_name
             data_categories: [user.childrens]
           - name: last_name
@@ -50,6 +54,8 @@ dataset:
         fields:
           - name: id
             data_categories: [ system.operations ]
+            fides_meta:
+              primary_key: true
           - name: payer_email
             data_categories: [ user.contact.email ]
             fides_meta:
diff --git a/data/dataset/example_field_masking_override_test_dataset.yml b/data/dataset/example_field_masking_override_test_dataset.yml
index 74e29ca84e..24bdf84555 100644
--- a/data/dataset/example_field_masking_override_test_dataset.yml
+++ b/data/dataset/example_field_masking_override_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -86,6 +90,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -102,6 +108,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -116,6 +124,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -166,6 +176,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -175,6 +187,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -189,6 +203,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -221,6 +237,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: opened
             data_categories: [system.operations]
       - name: visit
diff --git a/data/dataset/example_test_dataset.invalid b/data/dataset/example_test_dataset.invalid
index a3bfe261ff..46e5235876 100644
--- a/data/dataset/example_test_dataset.invalid
+++ b/data/dataset/example_test_dataset.invalid
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           * name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           * name: state
             data_categories: [user.contact.address.state]
           * name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           * name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           * name: name
             data_categories: [user.name]
             fides_meta:
@@ -58,6 +62,8 @@ dataset:
               data_type: string
           * name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           * name: name
             data_categories: [user.name]
             fides_meta:
@@ -74,6 +80,8 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           * name: time
             data_categories: [user.sensor]
 
@@ -88,6 +96,8 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           * name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -138,6 +148,8 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           * name: name
             data_categories: [user.financial]
           * name: preferred
@@ -147,6 +159,8 @@ dataset:
         fields:
           * name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           * name: name
             data_categories: [system.operations]
           * name: price
@@ -161,6 +175,8 @@ dataset:
               data_type: string
           * name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           * name: month
             data_categories: [system.operations]
           * name: name
@@ -193,6 +209,8 @@ dataset:
                   direction: from
           * name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           * name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/example_test_datasets.yml b/data/dataset/example_test_datasets.yml
index e64e9fb1e8..898d61bc71 100644
--- a/data/dataset/example_test_datasets.yml
+++ b/data/dataset/example_test_datasets.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -55,6 +59,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -83,6 +89,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -212,6 +220,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -237,6 +247,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -256,6 +268,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -284,6 +298,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml b/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml
index 86b6ad2171..7f090e0487 100644
--- a/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml
+++ b/data/dataset/google_cloud_sql_mysql_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -55,6 +59,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -83,6 +89,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml b/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml
index 833361a300..47989b4201 100644
--- a/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml
+++ b/data/dataset/google_cloud_sql_postgres_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -55,6 +59,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -83,6 +89,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/manual_dataset.yml b/data/dataset/manual_dataset.yml
index 26d6acbe48..66f5e4a0da 100644
--- a/data/dataset/manual_dataset.yml
+++ b/data/dataset/manual_dataset.yml
@@ -7,6 +7,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: true
           - name: authorized_user
             data_categories: [user]
             fides_meta:
@@ -29,6 +31,8 @@ dataset:
         fields:
           - name: box_id
             data_categories: [user]
+            fides_meta:
+              primary_key: true
           - name: email
             data_categories: [user.contact.email]
             fides_meta:
diff --git a/data/dataset/mariadb_example_test_dataset.yml b/data/dataset/mariadb_example_test_dataset.yml
index 204ad8a56d..5e3c90f08f 100644
--- a/data/dataset/mariadb_example_test_dataset.yml
+++ b/data/dataset/mariadb_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -55,6 +59,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -83,6 +89,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/mongo_example_test_dataset.yml b/data/dataset/mongo_example_test_dataset.yml
index 587e74b317..0205f33049 100644
--- a/data/dataset/mongo_example_test_dataset.yml
+++ b/data/dataset/mongo_example_test_dataset.yml
@@ -82,6 +82,7 @@ dataset:
           - name: _id
             data_categories: [system.operations]
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: customer_identifiers
             fields:
@@ -112,6 +113,7 @@ dataset:
           - name: _id
             data_categories: [system.operations]
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: customer_information
             fields:
@@ -144,6 +146,7 @@ dataset:
           - name: _id
             data_categories: [system.operations]
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: passenger_information
             fields:
@@ -173,6 +176,7 @@ dataset:
           - name: _id
             data_categories: [system.operations]
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: thread
             fides_meta:
@@ -197,6 +201,7 @@ dataset:
           - name: _id
             data_categories: [system.operations]
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: email
             data_categories: [user.contact.email]
@@ -206,6 +211,7 @@ dataset:
           - name: id
             data_categories: [user.unique_id]
             fides_meta:
+              primary_key: True
               references:
                 - dataset: mongo_test
                   field: flights.pilots
@@ -219,6 +225,7 @@ dataset:
           - name: _id
             data_categories: [system.operations]
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: planes
             data_categories: [system.operations]
@@ -237,6 +244,7 @@ dataset:
           - name: _id
             data_categories: [system.operations]
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: billing_address_id
             data_categories: [system.operations]
@@ -253,6 +261,8 @@ dataset:
             data_categories: [user.unique_id]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -261,6 +271,7 @@ dataset:
         fields:
           - name: _id
             fides_meta:
+              primary_key: True
               data_type: object_id
           - name: owner
             fides_meta:
diff --git a/data/dataset/mssql_example_test_dataset.yml b/data/dataset/mssql_example_test_dataset.yml
index d58cf013d3..661c600727 100644
--- a/data/dataset/mssql_example_test_dataset.yml
+++ b/data/dataset/mssql_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -55,6 +59,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -83,6 +89,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/mysql_example_test_dataset.yml b/data/dataset/mysql_example_test_dataset.yml
index 7d2b16541b..f311ebf2c7 100644
--- a/data/dataset/mysql_example_test_dataset.yml
+++ b/data/dataset/mysql_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -55,6 +59,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
 
@@ -83,6 +89,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -210,3 +218,4 @@ dataset:
               data_type: string
           - name: updated_at
             data_categories: [system.operations]
+
diff --git a/data/dataset/postgres_example_custom_request_field_dataset.yml b/data/dataset/postgres_example_custom_request_field_dataset.yml
index 0a878fad87..96b58645d4 100644
--- a/data/dataset/postgres_example_custom_request_field_dataset.yml
+++ b/data/dataset/postgres_example_custom_request_field_dataset.yml
@@ -10,6 +10,7 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: string
+              primary_key: True
           - name: email_address
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml b/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml
index e66c2cd140..5195a3671a 100644
--- a/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml
+++ b/data/dataset/postgres_example_invalid_masking_strategy_override_dataset.yml
@@ -14,6 +14,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -29,6 +31,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
diff --git a/data/dataset/postgres_example_test_dataset.yml b/data/dataset/postgres_example_test_dataset.yml
index 1f01fe1f03..d62eb38d46 100644
--- a/data/dataset/postgres_example_test_dataset.yml
+++ b/data/dataset/postgres_example_test_dataset.yml
@@ -7,28 +7,18 @@ dataset:
         fields:
           - name: city
             data_categories: [user.contact.address.city]
-            fides_meta:
-              data_type: string
           - name: house
             data_categories: [user.contact.address.street]
-            fides_meta:
-              data_type: integer
           - name: id
             data_categories: [system.operations]
             fides_meta:
-              data_type: integer
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
-            fides_meta:
-              data_type: string
           - name: street
             data_categories: [user.contact.address.street]
-            fides_meta:
-              data_type: string
           - name: zip
             data_categories: [user.contact.address.postal_code]
-            fides_meta:
-              data_type: string
 
       - name: customer
         fields:
@@ -48,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -70,6 +62,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -92,6 +86,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -106,6 +102,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -156,6 +154,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -165,6 +165,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -179,6 +181,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -211,6 +215,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: opened
             data_categories: [system.operations]
       - name: visit
diff --git a/data/dataset/redshift_example_test_dataset.yml b/data/dataset/redshift_example_test_dataset.yml
index 2b1858e99a..9794f86bb3 100644
--- a/data/dataset/redshift_example_test_dataset.yml
+++ b/data/dataset/redshift_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -58,6 +62,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -74,6 +80,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -88,6 +96,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -138,6 +148,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -147,6 +159,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -161,6 +175,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -193,6 +209,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/scylladb_example_test_dataset.yml b/data/dataset/scylladb_example_test_dataset.yml
index 38c0ea7b51..8374540cc1 100644
--- a/data/dataset/scylladb_example_test_dataset.yml
+++ b/data/dataset/scylladb_example_test_dataset.yml
@@ -47,6 +47,7 @@ dataset:
             data_categories: [user.unique_id]
             fides_meta:
               data_type: integer
+              primary_key: True
           - name: uuid
             data_categories: [user.government_id]
       - name: user_activity
@@ -59,10 +60,12 @@ dataset:
                   field: users.user_id
                   direction: from
               data_type: integer
+              primary_key: True
           - name: timestamp
             data_categories: [user.behavior]
             fides_meta:
               data_type: string
+              primary_key: True
           - name: user_agent
             data_categories: [user.device]
             fides_meta:
@@ -77,6 +80,7 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: integer
+              primary_key: True
           - name: user_id
             data_categories: [user.unique_id]
             fides_meta:
@@ -97,6 +101,7 @@ dataset:
             data_categories: [system.operations]
             fides_meta:
               data_type: integer
+              primary_key: True
           - name: payment_method_id
             data_categories: [system.operations]
             fides_meta:
diff --git a/data/dataset/snowflake_example_test_dataset.yml b/data/dataset/snowflake_example_test_dataset.yml
index 9b1b79f125..da13723693 100644
--- a/data/dataset/snowflake_example_test_dataset.yml
+++ b/data/dataset/snowflake_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -62,6 +66,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -78,6 +84,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -92,6 +100,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -142,6 +152,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -151,6 +163,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -165,6 +179,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -197,6 +213,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/data/dataset/timebase_example_test_dataset.yml b/data/dataset/timebase_example_test_dataset.yml
index fe8a7e7d1d..ffd57a7c67 100644
--- a/data/dataset/timebase_example_test_dataset.yml
+++ b/data/dataset/timebase_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -58,6 +62,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -74,6 +80,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -88,6 +96,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -138,6 +148,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -147,6 +159,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -161,6 +175,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -193,6 +209,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/dev-requirements.txt b/dev-requirements.txt
index c51b9369a5..149cdcd658 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,12 +7,12 @@ mypy==1.10.0
 nox==2022.8.7
 pre-commit==2.20.0
 pylint==3.2.5
-pytest==7.2.2
 pytest-asyncio==0.19.0
 pytest-cov==4.0.0
 pytest-env==0.6.2
 pytest-mock==3.14.0
 pytest-rerunfailures==14.0
+pytest==7.2.2
 requests-mock==1.10.0
 setuptools>=64.0.2
 sqlalchemy-stubs==0.4
diff --git a/pyproject.toml b/pyproject.toml
index f1f4963dd0..087d2b2033 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -175,7 +175,7 @@ addopts = [
     "--no-cov-on-fail",
     "-ra",
     "-vv",
-    "--disable-pytest-warnings",
+    "--disable-pytest-warnings"
 ]
 markers = [
     "unit: only runs tests that don't require non-python dependencies (i.e. a database)",
diff --git a/src/fides/api/task/graph_task.py b/src/fides/api/task/graph_task.py
index 85576264f0..145094ea25 100644
--- a/src/fides/api/task/graph_task.py
+++ b/src/fides/api/task/graph_task.py
@@ -604,6 +604,33 @@ def erasure_request(
     ) -> int:
         """Run erasure request"""
 
+        # if there is no primary key specified in the graph node configuration
+        # note this in the execution log and perform no erasures on this node
+        if (
+            self.connector.requires_primary_keys
+            and not self.execution_node.collection.contains_field(
+                lambda f: f.primary_key
+            )
+        ):
+            logger.warning(
+                'Skipping erasures on "{}" as the "{}" connector requires a primary key to be defined in one of the collection fields, but none was found.',
+                self.execution_node.address,
+                self.connector.configuration.connection_type,
+            )
+            if self.request_task.id:
+                # For DSR 3.0, largely for testing. DSR 3.0 uses Request Task status
+                # instead of presence of cached erasure data to know if we should rerun a node
+                self.request_task.rows_masked = 0  # Saved as part of update_status
+            # TODO Remove when we stop support for DSR 2.0
+            self.resources.cache_erasure(self.key.value, 0)
+            self.update_status(
+                "No values were erased since no primary key was defined in any of the fields for this collection",
+                None,
+                ActionType.erasure,
+                ExecutionLogStatus.complete,
+            )
+            return 0
+
         if not self.can_write_data():
             logger.warning(
                 "No erasures on {} as its ConnectionConfig does not have write access.",
diff --git a/tests/fixtures/email_fixtures.py b/tests/fixtures/email_fixtures.py
index e25f39e3f4..0df1d61b84 100644
--- a/tests/fixtures/email_fixtures.py
+++ b/tests/fixtures/email_fixtures.py
@@ -192,17 +192,24 @@ def dynamic_email_address_config_dataset(
                         {
                             "name": "id",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                                "primary_key": True,
+                            },
                         },
                         {
                             "name": "email_address",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                            },
                         },
                         {
                             "name": "vendor_name",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                            },
                         },
                         {
                             "name": "site_id",
@@ -238,17 +245,24 @@ def dynamic_email_address_config_second_dataset(
                         {
                             "name": "id",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                                "primary_key": True,
+                            },
                         },
                         {
                             "name": "email_address",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                            },
                         },
                         {
                             "name": "vendor_name",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                            },
                         },
                         {
                             "name": "custom_field",
@@ -266,17 +280,24 @@ def dynamic_email_address_config_second_dataset(
                         {
                             "name": "id2",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                                "primary_key": True,
+                            },
                         },
                         {
                             "name": "email_address2",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                            },
                         },
                         {
                             "name": "vendor_name2",
                             "data_categories": ["system.operations"],
-                            "fides_meta": {"data_type": "string"},
+                            "fides_meta": {
+                                "data_type": "string",
+                            },
                         },
                         {
                             "name": "site_id2",
diff --git a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py
index 1ae9ac28f8..f744d59e47 100644
--- a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py
+++ b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py
@@ -232,7 +232,9 @@ def test_put_validate_dataset_invalid_length(
         invalid_dataset = example_datasets[0]
 
         # string is properly read:
-        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {"length": 123}
+        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
+            "length": 123
+        }
         response = api_client.put(
             validate_dataset_url, headers=auth_header, json=invalid_dataset
         )
@@ -245,7 +247,7 @@ def test_put_validate_dataset_invalid_length(
         )
 
         # fails with an invalid value
-        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {"length": -1}
+        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {"length": -1}
         response = api_client.put(
             validate_dataset_url, headers=auth_header, json=invalid_dataset
         )
@@ -267,7 +269,7 @@ def test_put_validate_dataset_invalid_data_type(
         invalid_dataset = example_datasets[0]
 
         # string is properly read:
-        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {
+        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
             "data_type": "string"
         }
         response = api_client.put(
@@ -282,7 +284,7 @@ def test_put_validate_dataset_invalid_data_type(
         )
 
         # fails with an invalid value
-        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {
+        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
             "data_type": "stringsssssss"
         }
 
@@ -296,7 +298,7 @@ def test_put_validate_dataset_invalid_data_type(
             == "Value error, The data type stringsssssss is not supported."
         )
 
-    def test_put_validate_dataset_invalid_fides_meta(
+    def test_put_validate_dataset_invalid_fidesops_meta(
         self,
         example_datasets: List,
         validate_dataset_url,
@@ -305,8 +307,8 @@ def test_put_validate_dataset_invalid_fides_meta(
     ) -> None:
         auth_header = generate_auth_header(scopes=[DATASET_READ])
         invalid_dataset = example_datasets[0]
-        # Add an invalid fides_meta annotation to ensure our type-checking is comprehensive
-        invalid_dataset["collections"][0]["fields"][0]["fides_meta"] = {
+        # Add an invalid fidesops_meta annotation to ensure our type-checking is comprehensive
+        invalid_dataset["collections"][0]["fields"][0]["fidesops_meta"] = {
             "references": [
                 {
                     "dataset": "postgres_example_test_dataset",
diff --git a/tests/ops/generator/test_data_generator.py b/tests/ops/generator/test_data_generator.py
index 04441237b7..af9ab1cc62 100644
--- a/tests/ops/generator/test_data_generator.py
+++ b/tests/ops/generator/test_data_generator.py
@@ -20,6 +20,7 @@
         fields:
           - name: id
             fides_meta:
+              primary_key: True
               data_type: integer
               references:
                 - dataset: db
@@ -33,6 +34,7 @@
         fields:
           - name: id
             fides_meta:
+              primary_key: True
               data_type: integer
           - name: user_id
           - name: street
diff --git a/tests/ops/models/test_datasetconfig.py b/tests/ops/models/test_datasetconfig.py
index 933c2fcb3f..969002baac 100644
--- a/tests/ops/models/test_datasetconfig.py
+++ b/tests/ops/models/test_datasetconfig.py
@@ -194,17 +194,18 @@ def test_convert_dataset_to_graph(example_datasets):
         (FieldAddress("postgres_example_test_dataset", "customer", "id"), "from")
     ]
 
+    # check that primary key member has been set
     assert (
         field([graph], "postgres_example_test_dataset", "address", "id").primary_key
-        is False
+        is True
     )
     assert (
         field([graph], "postgres_example_test_dataset", "customer", "id").primary_key
-        is False
+        is True
     )
     assert (
         field([graph], "postgres_example_test_dataset", "employee", "id").primary_key
-        is False
+        is True
     )
     assert (
         field([graph], "postgres_example_test_dataset", "visit", "email").primary_key
diff --git a/tests/ops/task/test_create_request_tasks.py b/tests/ops/task/test_create_request_tasks.py
index 3792fea0e3..290c2dc1be 100644
--- a/tests/ops/task/test_create_request_tasks.py
+++ b/tests/ops/task/test_create_request_tasks.py
@@ -105,7 +105,7 @@
             "is_array": False,
             "read_only": None,
             "references": [],
-            "primary_key": False,
+            "primary_key": True,
             "data_categories": ["system.operations"],
             "data_type_converter": "None",
             "return_all_elements": None,
@@ -307,7 +307,7 @@ def test_persist_access_tasks_with_object_fields_in_collection(
                     "is_array": False,
                     "read_only": None,
                     "references": [],
-                    "primary_key": False,
+                    "primary_key": True,
                     "data_categories": ["system.operations"],
                     "data_type_converter": "object_id",
                     "return_all_elements": None,
@@ -927,7 +927,7 @@ def test_erase_after_saas_upstream_and_downstream_tasks(
                 "is_array": False,
                 "read_only": None,
                 "references": [],
-                "primary_key": False,
+                "primary_key": True,
                 "data_categories": ["system.operations"],
                 "data_type_converter": "integer",
                 "return_all_elements": None,
diff --git a/tests/ops/task/traversal_data.py b/tests/ops/task/traversal_data.py
index 07ff478e3e..20d3773e17 100644
--- a/tests/ops/task/traversal_data.py
+++ b/tests/ops/task/traversal_data.py
@@ -33,7 +33,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
                 "fields": [
                     {
                         "name": "id",
-                        "fides_meta": {"data_type": "integer"},
+                        "fides_meta": {"primary_key": True, "data_type": "integer"},
                     },
                     {"name": "name", "fides_meta": {"data_type": "string"}},
                     {
@@ -58,7 +58,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
                 "name": "address",
                 "after": [f"{db_name}.customer", f"{db_name}.orders"],
                 "fields": [
-                    {"name": "id"},
+                    {"name": "id", "fides_meta": {"primary_key": True}},
                     {"name": "street", "fides_meta": {"data_type": "string"}},
                     {"name": "city", "fides_meta": {"data_type": "string"}},
                     {"name": "state", "fides_meta": {"data_type": "string"}},
@@ -68,7 +68,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
             {
                 "name": "orders",
                 "fields": [
-                    {"name": "id"},
+                    {"name": "id", "fides_meta": {"primary_key": True}},
                     {
                         "name": "customer_id",
                         "fides_meta": {
@@ -113,7 +113,7 @@ def postgres_dataset_dict(db_name: str) -> Dict[str, Any]:
                 "fields": [
                     {
                         "name": "id",
-                        "fides_meta": {"data_type": "string"},
+                        "fides_meta": {"primary_key": True, "data_type": "string"},
                     },
                     {"name": "name", "fides_meta": {"data_type": "string"}},
                     {"name": "ccn"},
@@ -657,7 +657,7 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                     {
                         "name": "user_id",
                         "data_categories": ["user.unique_id"],
-                        "fides_meta": {"data_type": "integer"},
+                        "fides_meta": {"data_type": "integer", "primary_key": True},
                     },
                     {"name": "uuid", "data_categories": ["user.government_id"]},
                 ],
@@ -677,12 +677,13 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                                 }
                             ],
                             "data_type": "integer",
+                            "primary_key": True,
                         },
                     },
                     {
                         "name": "timestamp",
                         "data_categories": ["user.behavior"],
-                        "fides_meta": {"data_type": "string"},
+                        "fides_meta": {"data_type": "string", "primary_key": True},
                     },
                     {
                         "name": "user_agent",
@@ -702,7 +703,7 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                     {
                         "name": "payment_method_id",
                         "data_categories": ["system.operations"],
-                        "fides_meta": {"data_type": "integer"},
+                        "fides_meta": {"data_type": "integer", "primary_key": True},
                     },
                     {
                         "name": "user_id",
@@ -732,7 +733,7 @@ def scylladb_dataset_dict(db_name: str) -> Dict[str, Any]:
                     {
                         "name": "order_id",
                         "data_categories": ["system.operations"],
-                        "fides_meta": {"data_type": "integer"},
+                        "fides_meta": {"data_type": "integer", "primary_key": True},
                     },
                     {
                         "name": "payment_method_id",
diff --git a/tests/ops/util/test_dataset_yaml.py b/tests/ops/util/test_dataset_yaml.py
index a610ac7569..edaa26a7ca 100644
--- a/tests/ops/util/test_dataset_yaml.py
+++ b/tests/ops/util/test_dataset_yaml.py
@@ -33,6 +33,7 @@
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
+              primary_key: True
               data_type: integer
 """
 
@@ -46,6 +47,7 @@
           - name: _id
             data_categories: [system.operations]
             fidesops_meta:
+              primary_key: True
               data_type: object_id
           - name: photo_id
             data_categories: [user.unique_id]
@@ -221,6 +223,7 @@ def test_invalid_datatype():
           - name: id
             data_categories: [system.operations]
             fidesops_meta:
+              primary_key: True
               data_type: integer
       - name: users
         fields:

From 7600ab4d4646ee1920e8ee62feee224942189ea7 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Tue, 10 Dec 2024 15:26:07 -0800
Subject: [PATCH 20/22] Removing primary key requirement for BigQuery erasures

---
 .../bigquery_enterprise_test_dataset.yml      | 552 +++++-------------
 .../dataset/bigquery_example_test_dataset.yml |  18 -
 .../service/connectors/bigquery_connector.py  |   4 +
 .../query_configs/bigquery_query_config.py    |  16 +-
 .../connectors/test_bigquery_connector.py     |   4 +-
 .../connectors/test_bigquery_queryconfig.py   |   4 +-
 ...est_bigquery_enterprise_privacy_request.py |  22 +-
 7 files changed, 166 insertions(+), 454 deletions(-)

diff --git a/data/dataset/bigquery_enterprise_test_dataset.yml b/data/dataset/bigquery_enterprise_test_dataset.yml
index 59d27e68a2..64668192d0 100644
--- a/data/dataset/bigquery_enterprise_test_dataset.yml
+++ b/data/dataset/bigquery_enterprise_test_dataset.yml
@@ -1,405 +1,149 @@
 dataset:
-  - fides_key: enterprise_dsr_testing
-    organization_fides_key: default_organization
-    tags: null
-    name: Bigquery Enterprise Test Dataset
-    description: BigQuery dataset containing real data
-    meta: null
-    data_categories: null
-    fides_meta:
-      resource_id: enterprise_dsr_testing.prj-sandbox-55855.enterprise_dsr_testing
-      after: null
-      namespace:
-        dataset_id: enterprise_dsr_testing
-        project_id: prj-sandbox-55855
-    collections:
-      - name: comments
-        description: null
-        data_categories: null
-        fields:
-          - name: creation_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta:
-              references: null
-              identity: null
-              primary_key: true
-              data_type: integer
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-          - name: post_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: score
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: text
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: user_display_name
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: user_id
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta:
-              references:
-                - dataset: enterprise_dsr_testing
-                  field: users.id
-                  direction: from
-              identity: null
-              primary_key: null
-              data_type: null
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-        fides_meta: null
-      - name: post_history
-        description: null
-        data_categories: null
-        fields:
-          - name: comment
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: creation_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta:
-              references: null
-              identity: null
-              primary_key: true
-              data_type: integer
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-          - name: post_history_type_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: post_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: revision_guid
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: text
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: user_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta:
-              references:
-                - dataset: enterprise_dsr_testing
-                  field: users.id
-                  direction: from
-              identity: null
-              primary_key: null
-              data_type: null
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-        fides_meta: null
-      - name: stackoverflow_posts
-        description: null
-        data_categories: null
-        fields:
-          - name: accepted_answer_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: answer_count
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: body
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: comment_count
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: community_owned_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: creation_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: favorite_count
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta:
-              references: null
-              identity: null
-              primary_key: true
-              data_type: integer
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-          - name: last_activity_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: last_edit_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: last_editor_display_name
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: last_editor_user_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta:
-              references:
-                - dataset: enterprise_dsr_testing
-                  field: users.id
-                  direction: from
-              identity: null
-              primary_key: null
-              data_type: null
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-          - name: owner_display_name
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: owner_user_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta:
-              references:
-                - dataset: enterprise_dsr_testing
-                  field: users.id
-                  direction: from
-              identity: null
-              primary_key: null
-              data_type: integer
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-          - name: parent_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: post_type_id
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: score
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: tags
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: title
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: view_count
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-        fides_meta: null
-      - name: users
-        description: null
-        data_categories: null
-        fields:
-          - name: about_me
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: age
-            description: null
-            data_categories:
-              - user
-            fides_meta: null
-            fields: null
-          - name: creation_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: display_name
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: down_votes
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: id
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta:
-              references: null
-              identity: stackoverflow_user_id
-              primary_key: true
-              data_type: integer
-              length: null
-              return_all_elements: null
-              read_only: null
-              custom_request_field: null
-            fields: null
-          - name: last_access_date
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: location
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: profile_image_url
-            description: null
-            data_categories:
-              - user.contact
-            fides_meta: null
-            fields: null
-          - name: reputation
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: up_votes
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: views
-            description: null
-            data_categories:
-              - system.operations
-            fides_meta: null
-            fields: null
-          - name: website_url
-            description: null
-            data_categories:
-              - user
-            fides_meta: null
-            fields: null
-        fides_meta:
-          after: null
-          erase_after:
-            - enterprise_dsr_testing.comments
-          skip_processing: false
-          masking_strategy_override: null
-          partitioning: null
+ - fides_key: enterprise_dsr_testing
+   organization_fides_key: default_organization
+   name: Bigquery Enterprise Test Dataset
+   description: BigQuery dataset containing real data
+   fides_meta:
+     resource_id: enterprise_dsr_testing.prj-sandbox-55855.enterprise_dsr_testing
+     namespace:
+       dataset_id: enterprise_dsr_testing
+       project_id: prj-sandbox-55855
+   collections:
+     - name: comments
+       fields:
+         - name: creation_date
+           data_categories: [system.operations]
+         - name: id
+           data_categories: [system.operations]
+           fides_meta:
+             data_type: integer
+         - name: post_id
+           data_categories: [system.operations]
+         - name: score
+           data_categories: [system.operations]
+         - name: text
+           data_categories: [user.contact]
+         - name: user_display_name
+           data_categories: [user.contact]
+         - name: user_id
+           data_categories: [user.contact]
+           fides_meta:
+             references:
+               - dataset: enterprise_dsr_testing
+                 field: users.id
+                 direction: from
+     - name: post_history
+       fields:
+         - name: comment
+           data_categories: [user.contact]
+         - name: creation_date
+           data_categories: [system.operations]
+         - name: id
+           data_categories: [system.operations]
+           fides_meta:
+             data_type: integer
+         - name: post_history_type_id
+           data_categories: [system.operations]
+         - name: post_id
+           data_categories: [system.operations]
+         - name: revision_guid
+           data_categories: [system.operations]
+         - name: text
+           data_categories: [user.contact]
+         - name: user_id
+           data_categories: [system.operations]
+           fides_meta:
+             references:
+               - dataset: enterprise_dsr_testing
+                 field: users.id
+                 direction: from
+     - name: stackoverflow_posts
+       fields:
+         - name: accepted_answer_id
+           data_categories: [system.operations]
+         - name: answer_count
+           data_categories: [system.operations]
+         - name: body
+           data_categories: [user.contact]
+         - name: comment_count
+           data_categories: [system.operations]
+         - name: community_owned_date
+           data_categories: [system.operations]
+         - name: creation_date
+           data_categories: [system.operations]
+         - name: favorite_count
+           data_categories: [system.operations]
+         - name: id
+           data_categories: [system.operations]
+           fides_meta:
+             data_type: integer
+         - name: last_activity_date
+           data_categories: [system.operations]
+         - name: last_edit_date
+           data_categories: [system.operations]
+         - name: last_editor_display_name
+           data_categories: [system.operations]
+         - name: last_editor_user_id
+           data_categories: [system.operations]
+           fides_meta:
+             references:
+               - dataset: enterprise_dsr_testing
+                 field: users.id
+                 direction: from
+         - name: owner_display_name
+           data_categories: [user.contact]
+         - name: owner_user_id
+           data_categories: [system.operations]
+           fides_meta:
+             references:
+               - dataset: enterprise_dsr_testing
+                 field: users.id
+                 direction: from
+             data_type: integer
+         - name: parent_id
+           data_categories: [system.operations]
+         - name: post_type_id
+           data_categories: [system.operations]
+         - name: score
+           data_categories: [system.operations]
+         - name: tags
+           data_categories: [system.operations]
+         - name: title
+           data_categories: [user.contact]
+         - name: view_count
+           data_categories: [system.operations]
+     - name: users
+       fields:
+         - name: about_me
+           data_categories: [user.contact]
+         - name: age
+           data_categories: [user]
+         - name: creation_date
+           data_categories: [system.operations]
+         - name: display_name
+           data_categories: [user.contact]
+         - name: down_votes
+           data_categories: [system.operations]
+         - name: id
+           data_categories: [user.contact]
+           fides_meta:
+             identity: stackoverflow_user_id
+             data_type: integer
+         - name: last_access_date
+           data_categories: [system.operations]
+         - name: location
+           data_categories: [user.contact]
+         - name: profile_image_url
+           data_categories: [user.contact]
+         - name: reputation
+           data_categories: [system.operations]
+         - name: up_votes
+           data_categories: [system.operations]
+         - name: views
+           data_categories: [system.operations]
+         - name: website_url
+           data_categories: [user]
+       fides_meta:
+         erase_after:
+           - enterprise_dsr_testing.comments
+         skip_processing: false
diff --git a/data/dataset/bigquery_example_test_dataset.yml b/data/dataset/bigquery_example_test_dataset.yml
index 11fdac1aba..c4ea16cb44 100644
--- a/data/dataset/bigquery_example_test_dataset.yml
+++ b/data/dataset/bigquery_example_test_dataset.yml
@@ -13,8 +13,6 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -53,8 +51,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -80,8 +76,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -98,8 +92,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -114,8 +106,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -166,8 +156,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -177,8 +165,6 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -193,8 +179,6 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -227,8 +211,6 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
-            fides_meta:
-              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/src/fides/api/service/connectors/bigquery_connector.py b/src/fides/api/service/connectors/bigquery_connector.py
index 8b51f90842..4c52b3b3f6 100644
--- a/src/fides/api/service/connectors/bigquery_connector.py
+++ b/src/fides/api/service/connectors/bigquery_connector.py
@@ -33,6 +33,10 @@ class BigQueryConnector(SQLConnector):
 
     secrets_schema = BigQuerySchema
 
+    @property
+    def requires_primary_keys(self) -> bool:
+        return False
+
     # Overrides BaseConnector.build_uri
     def build_uri(self) -> str:
         """Build URI of format"""
diff --git a/src/fides/api/service/connectors/query_configs/bigquery_query_config.py b/src/fides/api/service/connectors/query_configs/bigquery_query_config.py
index 74b28f3ada..6060ff5822 100644
--- a/src/fides/api/service/connectors/query_configs/bigquery_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/bigquery_query_config.py
@@ -123,15 +123,15 @@ def generate_update(
         TODO: DRY up this method and `generate_delete` a bit
         """
         update_value_map: Dict[str, Any] = self.update_value_map(row, policy, request)
-        non_empty_primary_keys: Dict[str, Field] = filter_nonempty_values(
+        non_empty_reference_field_keys: Dict[str, Field] = filter_nonempty_values(
             {
                 fpath.string_path: fld.cast(row[fpath.string_path])
-                for fpath, fld in self.primary_key_field_paths.items()
+                for fpath, fld in self.reference_field_paths.items()
                 if fpath.string_path in row
             }
         )
 
-        valid = len(non_empty_primary_keys) > 0 and update_value_map
+        valid = len(non_empty_reference_field_keys) > 0 and update_value_map
         if not valid:
             logger.warning(
                 "There is not enough data to generate a valid update statement for {}",
@@ -141,7 +141,7 @@ def generate_update(
 
         table = Table(self._generate_table_name(), MetaData(bind=client), autoload=True)
         where_clauses: List[ColumnElement] = [
-            getattr(table.c, k) == v for k, v in non_empty_primary_keys.items()
+            getattr(table.c, k) == v for k, v in non_empty_reference_field_keys.items()
         ]
 
         if self.partitioning:
@@ -172,15 +172,15 @@ def generate_delete(self, row: Row, client: Engine) -> List[Delete]:
         TODO: DRY up this method and `generate_update` a bit
         """
 
-        non_empty_primary_keys: Dict[str, Field] = filter_nonempty_values(
+        non_empty_reference_field_keys: Dict[str, Field] = filter_nonempty_values(
             {
                 fpath.string_path: fld.cast(row[fpath.string_path])
-                for fpath, fld in self.primary_key_field_paths.items()
+                for fpath, fld in self.reference_field_paths.items()
                 if fpath.string_path in row
             }
         )
 
-        valid = len(non_empty_primary_keys) > 0
+        valid = len(non_empty_reference_field_keys) > 0
         if not valid:
             logger.warning(
                 "There is not enough data to generate a valid DELETE statement for {}",
@@ -190,7 +190,7 @@ def generate_delete(self, row: Row, client: Engine) -> List[Delete]:
 
         table = Table(self._generate_table_name(), MetaData(bind=client), autoload=True)
         where_clauses: List[ColumnElement] = [
-            getattr(table.c, k) == v for k, v in non_empty_primary_keys.items()
+            getattr(table.c, k) == v for k, v in non_empty_reference_field_keys.items()
         ]
 
         if self.partitioning:
diff --git a/tests/ops/service/connectors/test_bigquery_connector.py b/tests/ops/service/connectors/test_bigquery_connector.py
index a9524777fe..2e7bc3b075 100644
--- a/tests/ops/service/connectors/test_bigquery_connector.py
+++ b/tests/ops/service/connectors/test_bigquery_connector.py
@@ -129,7 +129,7 @@ def test_generate_update_partitioned_table(
         assert len(updates) == 2
         assert (
             str(updates[0])
-            == "UPDATE `silken-precinct-284918.fidesopstest.customer` SET `name`=%(name:STRING)s WHERE `silken-precinct-284918.fidesopstest.customer`.`id` = %(id_1:INT64)s AND `created` > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1000 DAY) AND `created` <= CURRENT_TIMESTAMP()"
+            == "UPDATE `silken-precinct-284918.fidesopstest.customer` SET `name`=%(name:STRING)s WHERE `silken-precinct-284918.fidesopstest.customer`.`email` = %(email_1:STRING)s AND `created` > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1000 DAY) AND `created` <= CURRENT_TIMESTAMP()"
         )
 
     def test_generate_delete_partitioned_table(
@@ -158,7 +158,7 @@ def test_generate_delete_partitioned_table(
         assert len(deletes) == 2
         assert (
             str(deletes[0])
-            == "DELETE FROM `silken-precinct-284918.fidesopstest.customer` WHERE `silken-precinct-284918.fidesopstest.customer`.`id` = %(id_1:INT64)s AND `created` > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1000 DAY) AND `created` <= CURRENT_TIMESTAMP()"
+            == "DELETE FROM `silken-precinct-284918.fidesopstest.customer` WHERE `silken-precinct-284918.fidesopstest.customer`.`email` = %(email_1:STRING)s AND `created` > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1000 DAY) AND `created` <= CURRENT_TIMESTAMP()"
         )
 
     def test_retrieve_partitioned_data(
diff --git a/tests/ops/service/connectors/test_bigquery_queryconfig.py b/tests/ops/service/connectors/test_bigquery_queryconfig.py
index 06c51c5105..24a16517b6 100644
--- a/tests/ops/service/connectors/test_bigquery_queryconfig.py
+++ b/tests/ops/service/connectors/test_bigquery_queryconfig.py
@@ -196,7 +196,7 @@ def test_generate_delete_stmt(
         )
         stmts = set(str(stmt) for stmt in delete_stmts)
         expected_stmts = {
-            "DELETE FROM `employee` WHERE `employee`.`id` = %(id_1:STRING)s"
+            "DELETE FROM `employee` WHERE `employee`.`address_id` = %(address_id_1:STRING)s AND `employee`.`email` = %(email_1:STRING)s"
         }
         assert stmts == expected_stmts
 
@@ -289,6 +289,6 @@ def test_generate_namespaced_delete_stmt(
         )
         stmts = set(str(stmt) for stmt in delete_stmts)
         expected_stmts = {
-            "DELETE FROM `silken-precinct-284918.fidesopstest.employee` WHERE `silken-precinct-284918.fidesopstest.employee`.`id` = %(id_1:STRING)s"
+            "DELETE FROM `silken-precinct-284918.fidesopstest.employee` WHERE `silken-precinct-284918.fidesopstest.employee`.`address_id` = %(address_id_1:STRING)s AND `silken-precinct-284918.fidesopstest.employee`.`email` = %(email_1:STRING)s"
         }
         assert stmts == expected_stmts
diff --git a/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py b/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py
index 8fb7e29729..5a133c031f 100644
--- a/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py
+++ b/tests/ops/service/privacy_request/test_bigquery_enterprise_privacy_request.py
@@ -1,27 +1,9 @@
-import time
-from datetime import datetime, timezone
-from typing import Any, Dict, List, Set
 from unittest import mock
-from unittest.mock import ANY, Mock, call
-from uuid import uuid4
 
-import pydash
 import pytest
 
 from fides.api.models.audit_log import AuditLog, AuditLogAction
-from fides.api.models.privacy_request import (
-    ActionType,
-    CheckpointActionRequired,
-    ExecutionLog,
-    ExecutionLogStatus,
-    PolicyPreWebhook,
-    PrivacyRequest,
-    PrivacyRequestStatus,
-)
-from fides.api.schemas.masking.masking_configuration import MaskingConfiguration
-from fides.api.schemas.masking.masking_secrets import MaskingSecretCache
-from fides.api.schemas.policy import Rule
-from fides.api.service.masking.strategy.masking_strategy import MaskingStrategy
+from fides.api.models.privacy_request import ExecutionLog
 from tests.ops.service.privacy_request.test_request_runner_service import (
     get_privacy_request_results,
 )
@@ -54,7 +36,7 @@ def test_create_and_process_access_request_bigquery_enterprise(
 
     customer_email = "customer-1@example.com"
     user_id = (
-        1754  # this is a real (not generated) user id in the Stackoverflow dataset
+        1754  # this is a real (not generated) user id in the Stack Overflow dataset
     )
     data = {
         "requested_at": "2024-08-30T16:09:37.359Z",

From dd8a3ad873e1c80e9f2692671fd96fab0c64db7f Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Tue, 10 Dec 2024 17:05:19 -0800
Subject: [PATCH 21/22] Setting requires_primary_keys for select connectors +
 updating tests

---
 .../api/service/connectors/base_connector.py  |   8 +-
 .../service/connectors/bigquery_connector.py  |   1 +
 .../service/connectors/postgres_connector.py  |   5 +
 .../connectors/query_configs/query_config.py  |   4 +-
 .../api/service/connectors/saas_connector.py  |   2 +
 .../service/connectors/scylla_connector.py    |   5 +
 .../service/connectors/scylla_query_config.py |  11 +-
 .../postgres_example_test_dataset.yml         |  18 ++
 .../service/connectors/test_query_config.py   | 205 +++++++++++++++++-
 .../connectors/test_snowflake_query_config.py |   4 +-
 tests/ops/task/test_create_request_tasks.py   |   2 +-
 tests/ops/test_helpers/dataset_utils.py       |  30 ++-
 12 files changed, 281 insertions(+), 14 deletions(-)

diff --git a/src/fides/api/service/connectors/base_connector.py b/src/fides/api/service/connectors/base_connector.py
index 4bf46e5eca..e1f735df1c 100644
--- a/src/fides/api/service/connectors/base_connector.py
+++ b/src/fides/api/service/connectors/base_connector.py
@@ -135,5 +135,11 @@ def execute_standalone_retrieval_query(
 
     @property
     def requires_primary_keys(self) -> bool:
-        """Indicates if datasets linked to this connector require primary keys for erasures. Defaults to True."""
+        """
+        Indicates if datasets linked to this connector require primary keys for erasures.
+        Defaults to True.
+        """
+
+        # Defaulting to true for now so we can keep the default behavior and
+        # incrementally determine the need for primary keys across all connectors
         return True
diff --git a/src/fides/api/service/connectors/bigquery_connector.py b/src/fides/api/service/connectors/bigquery_connector.py
index 4c52b3b3f6..ae6fe4b909 100644
--- a/src/fides/api/service/connectors/bigquery_connector.py
+++ b/src/fides/api/service/connectors/bigquery_connector.py
@@ -35,6 +35,7 @@ class BigQueryConnector(SQLConnector):
 
     @property
     def requires_primary_keys(self) -> bool:
+        """BigQuery does not have the concept of primary keys so they're not required for erasures."""
         return False
 
     # Overrides BaseConnector.build_uri
diff --git a/src/fides/api/service/connectors/postgres_connector.py b/src/fides/api/service/connectors/postgres_connector.py
index 5354d4ec13..2abafc01c8 100644
--- a/src/fides/api/service/connectors/postgres_connector.py
+++ b/src/fides/api/service/connectors/postgres_connector.py
@@ -19,6 +19,11 @@ class PostgreSQLConnector(SQLConnector):
 
     secrets_schema = PostgreSQLSchema
 
+    @property
+    def requires_primary_keys(self) -> bool:
+        """Postgres allows arbitrary columns in the WHERE clause for updates so primary keys are not required."""
+        return False
+
     def build_uri(self) -> str:
         """Build URI of format postgresql://[user[:password]@][netloc][:port][/dbname]"""
         config = self.secrets_schema(**self.configuration.secrets or {})
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index c54eecff85..9f5ddb0251 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -430,7 +430,7 @@ def get_update_stmt(
     def get_update_clauses(
         self,
         update_value_map: Dict[str, Any],
-        non_empty_reference_fields: Dict[str, Field],
+        where_clause_fields: Dict[str, Field],
     ) -> List[str]:
         """Returns a list of update clauses for the update statement."""
 
@@ -567,7 +567,7 @@ def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]
     def get_update_clauses(
         self,
         update_value_map: Dict[str, Any],
-        non_empty_reference_fields: Dict[str, Field],
+        where_clause_fields: Dict[str, Field],
     ) -> List[str]:
         """Returns a list of update clauses for the update statement."""
         return self.format_key_map_for_update_stmt(update_value_map)
diff --git a/src/fides/api/service/connectors/saas_connector.py b/src/fides/api/service/connectors/saas_connector.py
index 40a4d8a7eb..b1101467bf 100644
--- a/src/fides/api/service/connectors/saas_connector.py
+++ b/src/fides/api/service/connectors/saas_connector.py
@@ -72,7 +72,9 @@
 class SaaSConnector(BaseConnector[AuthenticatedClient], Contextualizable):
     """A connector type to integrate with third-party SaaS APIs"""
 
+    @property
     def requires_primary_keys(self) -> bool:
+        """SaaS connectors work with HTTP requests, so the database concept of primary keys does not apply."""
         return False
 
     def get_log_context(self) -> Dict[LoggerContextKeys, Any]:
diff --git a/src/fides/api/service/connectors/scylla_connector.py b/src/fides/api/service/connectors/scylla_connector.py
index 43a821930c..ff17674b88 100644
--- a/src/fides/api/service/connectors/scylla_connector.py
+++ b/src/fides/api/service/connectors/scylla_connector.py
@@ -28,6 +28,11 @@ class ScyllaConnectorMissingKeyspace(Exception):
 class ScyllaConnector(BaseConnector[Cluster]):
     """Scylla Connector"""
 
+    @property
+    def requires_primary_keys(self) -> bool:
+        """ScyllaDB requires primary keys for erasures."""
+        return True
+
     def build_uri(self) -> str:
         """
         Builds URI - Not yet implemented
diff --git a/src/fides/api/service/connectors/scylla_query_config.py b/src/fides/api/service/connectors/scylla_query_config.py
index 5e93668459..1fa52d573d 100644
--- a/src/fides/api/service/connectors/scylla_query_config.py
+++ b/src/fides/api/service/connectors/scylla_query_config.py
@@ -77,14 +77,19 @@ def format_key_map_for_update_stmt(self, param_map: Dict[str, Any]) -> List[str]
     def get_update_clauses(
         self,
         update_value_map: Dict[str, Any],
-        non_empty_reference_fields: Dict[str, Field],
+        where_clause_fields: Dict[str, Field],
     ) -> List[str]:
-        """Returns a list of update clauses for the update statement."""
+        """Returns a list of update clauses for the update statement.
+
+        Omits primary key fields from updates since ScyllaDB prohibits
+        updating primary key fields.
+        """
+
         return self.format_key_map_for_update_stmt(
             {
                 key: value
                 for key, value in update_value_map.items()
-                if key not in non_empty_reference_fields
+                if key not in where_clause_fields
             }
         )
 
diff --git a/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml b/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml
index 768c972d99..e519a75008 100644
--- a/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml
+++ b/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml
@@ -11,6 +11,8 @@ dataset:
             data_categories: [user.contact.address.street]
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: state
             data_categories: [user.contact.address.state]
           - name: street
@@ -36,6 +38,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -58,6 +62,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [user.unique_id]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.name]
             fides_meta:
@@ -74,6 +80,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: time
             data_categories: [user.sensor]
 
@@ -88,6 +96,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: shipping_address_id
             data_categories: [system.operations]
             fides_meta:
@@ -138,6 +148,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [user.financial]
           - name: preferred
@@ -147,6 +159,8 @@ dataset:
         fields:
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: name
             data_categories: [system.operations]
           - name: price
@@ -161,6 +175,8 @@ dataset:
               data_type: string
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: month
             data_categories: [system.operations]
           - name: name
@@ -193,6 +209,8 @@ dataset:
                   direction: from
           - name: id
             data_categories: [system.operations]
+            fides_meta:
+              primary_key: True
           - name: opened
             data_categories: [system.operations]
 
diff --git a/tests/ops/service/connectors/test_query_config.py b/tests/ops/service/connectors/test_query_config.py
index 2aa0871255..eac650d587 100644
--- a/tests/ops/service/connectors/test_query_config.py
+++ b/tests/ops/service/connectors/test_query_config.py
@@ -21,6 +21,7 @@
 from fides.api.service.masking.strategy.masking_strategy_hash import HashMaskingStrategy
 from fides.api.util.data_category import DataCategory
 from tests.fixtures.application_fixtures import load_dataset
+from tests.ops.test_helpers.dataset_utils import remove_primary_keys
 
 from ...task.traversal_data import integration_db_graph
 from ...test_helpers.cache_secrets_helper import cache_secret, clear_cache_secrets
@@ -273,7 +274,7 @@ def test_generate_update_stmt_one_field(
         text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
         assert (
             text_clause.text
-            == """UPDATE customer SET name = :masked_name WHERE email = :email"""
+            == """UPDATE customer SET name = :masked_name WHERE id = :id"""
         )
         assert text_clause._bindparams["masked_name"].key == "masked_name"
         assert (
@@ -341,7 +342,7 @@ def test_generate_update_stmt_length_truncation(
         )
         assert (
             text_clause.text
-            == """UPDATE customer SET name = :masked_name WHERE email = :email"""
+            == """UPDATE customer SET name = :masked_name WHERE id = :id"""
         )
         assert text_clause._bindparams["masked_name"].key == "masked_name"
         # length truncation on name field
@@ -391,7 +392,7 @@ def test_generate_update_stmt_multiple_fields_same_rule(
         text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
         assert (
             text_clause.text
-            == "UPDATE customer SET email = :masked_email, name = :masked_name WHERE email = :email"
+            == "UPDATE customer SET email = :masked_email, name = :masked_name WHERE id = :id"
         )
         assert text_clause._bindparams["masked_name"].key == "masked_name"
         # since length is set to 40 in dataset.yml, we expect only first 40 chars of masked val
@@ -407,7 +408,7 @@ def test_generate_update_stmt_multiple_fields_same_rule(
                 ["customer-1@example.com"], request_id=privacy_request.id
             )[0]
         )
-        assert text_clause._bindparams["email"].value == "customer-1@example.com"
+        assert text_clause._bindparams["id"].value == 1
         clear_cache_secrets(privacy_request.id)
 
     def test_generate_update_stmts_from_multiple_rules(
@@ -434,6 +435,201 @@ def test_generate_update_stmts_from_multiple_rules(
             row, erasure_policy_two_rules, privacy_request
         )
 
+        assert (
+            text_clause.text
+            == "UPDATE customer SET email = :masked_email, name = :masked_name WHERE id = :id"
+        )
+        # Two different masking strategies used for name and email
+        assert (
+            text_clause._bindparams["masked_name"].value is None
+        )  # Null masking strategy
+        assert (
+            text_clause._bindparams["masked_email"].value == "*****"
+        )  # String rewrite masking strategy
+
+    def test_generate_update_stmt_one_field_without_primary_keys(
+        self, erasure_policy, example_datasets, connection_config
+    ):
+        dataset = remove_primary_keys(Dataset(**example_datasets[0]))
+        graph = convert_dataset_to_graph(dataset, connection_config.key)
+        dataset_graph = DatasetGraph(*[graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+
+        customer_node = traversal.traversal_node_dict[
+            CollectionAddress("postgres_example_test_dataset", "customer")
+        ].to_mock_execution_node()
+
+        config = SQLQueryConfig(customer_node)
+        row = {
+            "email": "customer-1@example.com",
+            "name": "John Customer",
+            "address_id": 1,
+            "id": 1,
+        }
+        text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
+        assert (
+            text_clause.text
+            == """UPDATE customer SET name = :masked_name WHERE email = :email"""
+        )
+        assert text_clause._bindparams["masked_name"].key == "masked_name"
+        assert (
+            text_clause._bindparams["masked_name"].value is None
+        )  # Null masking strategy
+
+    def test_generate_update_stmt_one_field_inbound_reference_without_primary_keys(
+        self, erasure_policy_address_city, example_datasets, connection_config
+    ):
+        dataset = remove_primary_keys(Dataset(**example_datasets[0]))
+        graph = convert_dataset_to_graph(dataset, connection_config.key)
+        dataset_graph = DatasetGraph(*[graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+
+        address_node = traversal.traversal_node_dict[
+            CollectionAddress("postgres_example_test_dataset", "address")
+        ].to_mock_execution_node()
+
+        config = SQLQueryConfig(address_node)
+        row = {
+            "id": 1,
+            "house": "123",
+            "street": "Main St",
+            "city": "San Francisco",
+            "state": "CA",
+            "zip": "94105",
+        }
+        text_clause = config.generate_update_stmt(
+            row, erasure_policy_address_city, privacy_request
+        )
+        assert (
+            text_clause.text
+            == """UPDATE address SET city = :masked_city WHERE id = :id"""
+        )
+        assert text_clause._bindparams["masked_city"].key == "masked_city"
+        assert (
+            text_clause._bindparams["masked_city"].value is None
+        )  # Null masking strategy
+
+    def test_generate_update_stmt_length_truncation_without_primary_keys(
+        self,
+        erasure_policy_string_rewrite_long,
+        example_datasets,
+        connection_config,
+    ):
+        dataset = remove_primary_keys(Dataset(**example_datasets[0]))
+        graph = convert_dataset_to_graph(dataset, connection_config.key)
+        dataset_graph = DatasetGraph(*[graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+
+        customer_node = traversal.traversal_node_dict[
+            CollectionAddress("postgres_example_test_dataset", "customer")
+        ].to_mock_execution_node()
+
+        config = SQLQueryConfig(customer_node)
+        row = {
+            "email": "customer-1@example.com",
+            "name": "John Customer",
+            "address_id": 1,
+            "id": 1,
+        }
+
+        text_clause = config.generate_update_stmt(
+            row, erasure_policy_string_rewrite_long, privacy_request
+        )
+        assert (
+            text_clause.text
+            == """UPDATE customer SET name = :masked_name WHERE email = :email"""
+        )
+        assert text_clause._bindparams["masked_name"].key == "masked_name"
+        # length truncation on name field
+        assert (
+            text_clause._bindparams["masked_name"].value
+            == "some rewrite value that is very long and"
+        )
+
+    def test_generate_update_stmt_multiple_fields_same_rule_without_primary_keys(
+        self, erasure_policy, example_datasets, connection_config
+    ):
+        dataset = remove_primary_keys(Dataset(**example_datasets[0]))
+        graph = convert_dataset_to_graph(dataset, connection_config.key)
+        dataset_graph = DatasetGraph(*[graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+
+        customer_node = traversal.traversal_node_dict[
+            CollectionAddress("postgres_example_test_dataset", "customer")
+        ].to_mock_execution_node()
+
+        config = SQLQueryConfig(customer_node)
+        row = {
+            "email": "customer-1@example.com",
+            "name": "John Customer",
+            "address_id": 1,
+            "id": 1,
+        }
+
+        # Make target more broad
+        rule = erasure_policy.rules[0]
+        target = rule.targets[0]
+        target.data_category = DataCategory("user").value
+
+        # Update rule masking strategy
+        rule.masking_strategy = {
+            "strategy": "hash",
+            "configuration": {"algorithm": "SHA-512"},
+        }
+        # cache secrets for hash strategy
+        secret = MaskingSecretCache[str](
+            secret="adobo",
+            masking_strategy=HashMaskingStrategy.name,
+            secret_type=SecretType.salt,
+        )
+        cache_secret(secret, privacy_request.id)
+
+        text_clause = config.generate_update_stmt(row, erasure_policy, privacy_request)
+        assert (
+            text_clause.text
+            == "UPDATE customer SET email = :masked_email, name = :masked_name WHERE email = :email"
+        )
+        assert text_clause._bindparams["masked_name"].key == "masked_name"
+        # since length is set to 40 in dataset.yml, we expect only first 40 chars of masked val
+        assert (
+            text_clause._bindparams["masked_name"].value
+            == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
+                ["John Customer"], request_id=privacy_request.id
+            )[0][0:40]
+        )
+        assert (
+            text_clause._bindparams["masked_email"].value
+            == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
+                ["customer-1@example.com"], request_id=privacy_request.id
+            )[0]
+        )
+        assert text_clause._bindparams["email"].value == "customer-1@example.com"
+        clear_cache_secrets(privacy_request.id)
+
+    def test_generate_update_stmts_from_multiple_rules_without_primary_keys(
+        self, erasure_policy_two_rules, example_datasets, connection_config
+    ):
+        dataset = remove_primary_keys(Dataset(**example_datasets[0]))
+        graph = convert_dataset_to_graph(dataset, connection_config.key)
+        dataset_graph = DatasetGraph(*[graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+        row = {
+            "email": "customer-1@example.com",
+            "name": "John Customer",
+            "address_id": 1,
+            "id": 1,
+        }
+
+        customer_node = traversal.traversal_node_dict[
+            CollectionAddress("postgres_example_test_dataset", "customer")
+        ].to_mock_execution_node()
+
+        config = SQLQueryConfig(customer_node)
+
+        text_clause = config.generate_update_stmt(
+            row, erasure_policy_two_rules, privacy_request
+        )
+
         assert (
             text_clause.text
             == "UPDATE customer SET email = :masked_email, name = :masked_name WHERE email = :email"
@@ -446,6 +642,7 @@ def test_generate_update_stmts_from_multiple_rules(
             text_clause._bindparams["masked_email"].value == "*****"
         )  # String rewrite masking strategy
 
+
 class TestSQLLikeQueryConfig:
     def test_missing_namespace_meta_schema(self):
 
diff --git a/tests/ops/service/connectors/test_snowflake_query_config.py b/tests/ops/service/connectors/test_snowflake_query_config.py
index 5521a1a88a..4f4b23b8c4 100644
--- a/tests/ops/service/connectors/test_snowflake_query_config.py
+++ b/tests/ops/service/connectors/test_snowflake_query_config.py
@@ -150,7 +150,7 @@ def test_generate_update_stmt(
         )
         assert (
             str(update_stmt)
-            == 'UPDATE "address" SET "city" = :city, "house" = :house, "state" = :state, "street" = :street, "zip" = :zip WHERE "id" = :id'
+            == 'UPDATE "address" SET "city" = :masked_city, "house" = :masked_house, "state" = :masked_state, "street" = :masked_street, "zip" = :masked_zip WHERE "id" = :id'
         )
 
     def test_generate_namespaced_update_stmt(
@@ -191,5 +191,5 @@ def test_generate_namespaced_update_stmt(
         )
         assert (
             str(update_stmt)
-            == 'UPDATE "FIDESOPS_TEST"."TEST"."address" SET "city" = :city, "house" = :house, "state" = :state, "street" = :street, "zip" = :zip WHERE "id" = :id'
+            == 'UPDATE "FIDESOPS_TEST"."TEST"."address" SET "city" = :masked_city, "house" = :masked_house, "state" = :masked_state, "street" = :masked_street, "zip" = :masked_zip WHERE "id" = :id'
         )
diff --git a/tests/ops/task/test_create_request_tasks.py b/tests/ops/task/test_create_request_tasks.py
index 290c2dc1be..ad118ee46c 100644
--- a/tests/ops/task/test_create_request_tasks.py
+++ b/tests/ops/task/test_create_request_tasks.py
@@ -927,7 +927,7 @@ def test_erase_after_saas_upstream_and_downstream_tasks(
                 "is_array": False,
                 "read_only": None,
                 "references": [],
-                "primary_key": True,
+                "primary_key": False,
                 "data_categories": ["system.operations"],
                 "data_type_converter": "integer",
                 "return_all_elements": None,
diff --git a/tests/ops/test_helpers/dataset_utils.py b/tests/ops/test_helpers/dataset_utils.py
index e60efb9892..d51e1f47ff 100644
--- a/tests/ops/test_helpers/dataset_utils.py
+++ b/tests/ops/test_helpers/dataset_utils.py
@@ -13,7 +13,11 @@
 )
 from fides.api.graph.data_type import DataType, get_data_type, to_data_type_string
 from fides.api.models.connectionconfig import ConnectionConfig
-from fides.api.models.datasetconfig import DatasetConfig, convert_dataset_to_graph
+from fides.api.models.datasetconfig import (
+    DatasetConfig,
+    DatasetField,
+    convert_dataset_to_graph,
+)
 from fides.api.util.collection_util import Row
 
 SAAS_DATASET_DIRECTORY = "data/saas/dataset/"
@@ -231,3 +235,27 @@ def get_simple_fields(fields: Iterable[Field]) -> List[Dict[str, Any]]:
             object["fields"] = get_simple_fields(field.fields.values())
         object_list.append(object)
     return object_list
+
+
+def remove_primary_keys(dataset: Dataset) -> Dataset:
+    """Returns a copy of the dataset with primary key fields removed from fides_meta."""
+    dataset_copy = dataset.model_copy(deep=True)
+
+    for collection in dataset_copy.collections:
+        for field in collection.fields:
+            if field.fides_meta:
+                if field.fides_meta.primary_key:
+                    field.fides_meta.primary_key = None
+                if field.fields:
+                    _remove_nested_primary_keys(field.fields)
+
+    return dataset_copy
+
+
+def _remove_nested_primary_keys(fields: List[DatasetField]) -> None:
+    """Helper function to recursively remove primary keys from nested fields."""
+    for field in fields:
+        if field.fides_meta and field.fides_meta.primary_key:
+            field.fides_meta.primary_key = None
+        if field.fields:
+            _remove_nested_primary_keys(field.fields)

From 3c3c63c0d25798cea9ff5e9c4e5213d17aa7be9e Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Tue, 10 Dec 2024 17:08:49 -0800
Subject: [PATCH 22/22] Revert setting requires_primary_keys to False for SaaS
 connectors

---
 src/fides/api/service/connectors/saas_connector.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/fides/api/service/connectors/saas_connector.py b/src/fides/api/service/connectors/saas_connector.py
index b1101467bf..b917b6cfda 100644
--- a/src/fides/api/service/connectors/saas_connector.py
+++ b/src/fides/api/service/connectors/saas_connector.py
@@ -72,11 +72,6 @@
 class SaaSConnector(BaseConnector[AuthenticatedClient], Contextualizable):
     """A connector type to integrate with third-party SaaS APIs"""
 
-    @property
-    def requires_primary_keys(self) -> bool:
-        """SaaS connectors work with HTTP requests, so the database concept of primary keys does not apply."""
-        return False
-
     def get_log_context(self) -> Dict[LoggerContextKeys, Any]:
         return {
             LoggerContextKeys.system_key: (