Skip to content

Commit

Permalink
LA-106: Integrate erasure request for BigQuery Enterprise DSR Testing…
Browse files Browse the repository at this point in the history
… Initiative (#5554)
  • Loading branch information
eastandwestwind authored Dec 5, 2024
1 parent bb69eb2 commit 1415108
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 23 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/backend_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,8 @@ jobs:
# Secrets to pull from 1Password
BIGQUERY_DATASET: op://github-actions/bigquery/BIGQUERY_DATASET
BIGQUERY_KEYFILE_CREDS: op://github-actions/bigquery/BIGQUERY_KEYFILE_CREDS
BIGQUERY_ENTERPRISE_DATASET: op://github-actions/bigquery-enterprise/BIGQUERY_DATASET
BIGQUERY_ENTERPRISE_KEYFILE_CREDS: op://github-actions/bigquery-enterprise/BIGQUERY_KEYFILE_CREDS
BIGQUERY_ENTERPRISE_DATASET: op://github-actions/bigquery-enterprise/BIGQUERY_ENTERPRISE_DATASET
BIGQUERY_ENTERPRISE_KEYFILE_CREDS: op://github-actions/bigquery-enterprise/BIGQUERY_ENTERPRISE_KEYFILE_CREDS
DYNAMODB_ACCESS_KEY_ID: op://github-actions/dynamodb/DYNAMODB_ACCESS_KEY_ID
DYNAMODB_ACCESS_KEY: op://github-actions/dynamodb/DYNAMODB_ACCESS_KEY
DYNAMODB_ASSUME_ROLE_ARN: op://github-actions/dynamodb/DYNAMODB_ASSUME_ROLE_ARN
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The types of changes are:
- Added `fides_consent_override` option in FidesJS SDK [#5541](https://github.com/ethyca/fides/pull/5541)
- Added new `script` ConsentMethod in FidesJS SDK for tracking automated consent [#5541](https://github.com/ethyca/fides/pull/5541)
- Added a new page under system integrations to run standalone dataset tests (Fidesplus) [#5549](https://github.com/ethyca/fides/pull/5549)
- Added new erasure tests for BigQuery Enterprise [#5554](https://github.com/ethyca/fides/pull/5554)

### Changed
- Adding hashes to system tab URLs [#5535](https://github.com/ethyca/fides/pull/5535)
Expand Down
25 changes: 7 additions & 18 deletions data/dataset/bigquery_enterprise_test_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dataset:
references: null
identity: null
primary_key: true
data_type: null
data_type: integer
length: null
return_all_elements: null
read_only: null
Expand Down Expand Up @@ -103,7 +103,7 @@ dataset:
references: null
identity: null
primary_key: true
data_type: null
data_type: integer
length: null
return_all_elements: null
read_only: null
Expand All @@ -119,18 +119,7 @@ dataset:
description: null
data_categories:
- system.operations
fides_meta:
references:
- dataset: enterprise_dsr_testing
field: stackoverflow_posts.id
direction: from
identity: null
primary_key: null
data_type: null
length: null
return_all_elements: null
read_only: null
custom_request_field: null
fides_meta: null
fields: null
- name: revision_guid
description: null
Expand All @@ -147,7 +136,7 @@ dataset:
- name: user_id
description: null
data_categories:
- user.contact
- system.operations
fides_meta:
references:
- dataset: enterprise_dsr_testing
Expand Down Expand Up @@ -216,7 +205,7 @@ dataset:
references: null
identity: null
primary_key: true
data_type: null
data_type: integer
length: null
return_all_elements: null
read_only: null
Expand Down Expand Up @@ -260,7 +249,7 @@ dataset:
- name: owner_display_name
description: null
data_categories:
- system.operations
- user.contact
fides_meta: null
fields: null
- name: owner_user_id
Expand All @@ -274,7 +263,7 @@ dataset:
direction: from
identity: null
primary_key: null
data_type: null
data_type: integer
length: null
return_all_elements: null
read_only: null
Expand Down
51 changes: 51 additions & 0 deletions tests/fixtures/application_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,57 @@ def biquery_erasure_policy(
pass


@pytest.fixture(scope="function")
def bigquery_enterprise_erasure_policy(
db: Session,
oauth_client: ClientDetail,
) -> Generator:
erasure_policy = Policy.create(
db=db,
data={
"name": "example enterprise erasure policy",
"key": "example_enterprise_erasure_policy",
"client_id": oauth_client.id,
},
)

erasure_rule = Rule.create(
db=db,
data={
"action_type": ActionType.erasure.value,
"client_id": oauth_client.id,
"name": "Erasure Rule Enterprise",
"policy_id": erasure_policy.id,
"masking_strategy": {
"strategy": "null_rewrite",
"configuration": {},
},
},
)

user_target = RuleTarget.create(
db=db,
data={
"client_id": oauth_client.id,
"data_category": DataCategory("user.contact").value,
"rule_id": erasure_rule.id,
},
)
yield erasure_policy
try:
user_target.delete(db)
except ObjectDeletedError:
pass
try:
erasure_rule.delete(db)
except ObjectDeletedError:
pass
try:
erasure_policy.delete(db)
except ObjectDeletedError:
pass


@pytest.fixture(scope="function")
def erasure_policy_aes(
db: Session,
Expand Down
97 changes: 97 additions & 0 deletions tests/fixtures/bigquery_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import ast
import os
import random
from datetime import datetime
from typing import Dict, Generator, List
from uuid import uuid4

Expand Down Expand Up @@ -449,6 +451,101 @@ def bigquery_resources_with_namespace_meta(
connection.execute(stmt)


@pytest.fixture(scope="function")
def bigquery_enterprise_resources(
bigquery_enterprise_test_dataset_config,
):
bigquery_connection_config = (
bigquery_enterprise_test_dataset_config.connection_config
)
connector = BigQueryConnector(bigquery_connection_config)
bigquery_client = connector.client()
with bigquery_client.connect() as connection:

# Real max id in the Stackoverflow dataset is 20081052, so we purposefully generate and id above this max
stmt = "select max(id) from enterprise_dsr_testing.users;"
res = connection.execute(stmt)
# Increment the id by a random number to avoid conflicts on concurrent test runs
random_increment = random.randint(0, 99999)
user_id = res.all()[0][0] + random_increment
display_name = (
f"fides_testing_{user_id}" # prefix to do manual cleanup if needed
)
last_access_date = datetime.now()
creation_date = datetime.now()
location = "Dream World"

# Create test user data
stmt = f"""
insert into enterprise_dsr_testing.users (id, display_name, last_access_date, creation_date, location)
values ({user_id}, '{display_name}', '{last_access_date}', '{creation_date}', '{location}');
"""
connection.execute(stmt)

# Create test stackoverflow_posts data. Posts are responses to questions on Stackoverflow, and does not include original question.
post_body = "For me, the solution was to adopt 3 cats and dance with them under the full moon at midnight."
stmt = "select max(id) from enterprise_dsr_testing.stackoverflow_posts;"
res = connection.execute(stmt)
random_increment = random.randint(0, 99999)
post_id = res.all()[0][0] + random_increment
stmt = f"""
insert into enterprise_dsr_testing.stackoverflow_posts (body, creation_date, id, owner_user_id, owner_display_name)
values ('{post_body}', '{creation_date}', {post_id}, {user_id}, '{display_name}');
"""
connection.execute(stmt)

# Create test comments data. Comments are responses to posts or questions on Stackoverflow, and does not include original question or post itself.
stmt = "select max(id) from enterprise_dsr_testing.comments;"
res = connection.execute(stmt)
random_increment = random.randint(0, 99999)
comment_id = res.all()[0][0] + random_increment
comment_text = "FYI this only works if you have pytest installed locally."
stmt = f"""
insert into enterprise_dsr_testing.comments (id, text, creation_date, post_id, user_id, user_display_name)
values ({comment_id}, '{comment_text}', '{creation_date}', {post_id}, {user_id}, '{display_name}');
"""
connection.execute(stmt)

# Create test post_history data
stmt = "select max(id) from enterprise_dsr_testing.comments;"
res = connection.execute(stmt)
random_increment = random.randint(0, 99999)
post_history_id = res.all()[0][0] + random_increment
revision_text = "this works if you have pytest"
uuid = str(uuid4())
stmt = f"""
insert into enterprise_dsr_testing.post_history (id, text, creation_date, post_id, user_id, post_history_type_id, revision_guid)
values ({post_history_id}, '{revision_text}', '{creation_date}', {post_id}, {user_id}, 1, '{uuid}');
"""
connection.execute(stmt)

yield {
"name": display_name,
"user_id": user_id,
"comment_id": comment_id,
"post_history_id": post_history_id,
"post_id": post_id,
"client": bigquery_client,
"connector": connector,
"first_comment_text": comment_text,
"first_post_body": post_body,
"revision_text": revision_text,
"display_name": display_name,
}
# Remove test data and close BigQuery connection in teardown
stmt = f"delete from enterprise_dsr_testing.post_history where id = {post_history_id};"
connection.execute(stmt)

stmt = f"delete from enterprise_dsr_testing.comments where id = {comment_id};"
connection.execute(stmt)

stmt = f"delete from enterprise_dsr_testing.stackoverflow_posts where id = {post_id};"
connection.execute(stmt)

stmt = f"delete from enterprise_dsr_testing.users where id = {user_id};"
connection.execute(stmt)


@pytest.fixture(scope="session")
def bigquery_test_engine(bigquery_keyfile_creds) -> Generator:
"""Return a connection to a Google BigQuery Warehouse"""
Expand Down
Loading

0 comments on commit 1415108

Please sign in to comment.