Skip to content

Commit

Permalink
Merge pull request #73 from uc-cdis/fix/remove-deprecated-metadata-keys
Browse files Browse the repository at this point in the history
Fix/remove deprecated metadata keys
  • Loading branch information
george42-ctds authored Oct 3, 2022
2 parents b9e4d42 + 4ab83fe commit 5ec1bc9
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 3 deletions.
20 changes: 17 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"exclude": {
"files": "^.secrets.baseline$",
"files": null,
"lines": null
},
"generated_at": "2022-09-15T16:17:07Z",
"generated_at": "2022-10-03T18:10:28Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -90,6 +90,14 @@
"type": "Hex High Entropy String"
}
],
"migrations/versions/6819874e85b9_remove_deprecated_metadata.py": [
{
"hashed_secret": "ecdb6b62dc6de954dbbef8185029415aecae5e5a",
"is_verified": false,
"line_number": 15,
"type": "Hex High Entropy String"
}
],
"poetry.lock": [
{
"hashed_secret": "940ab7206e90c8d2983ca7a38eca2d4a59d85fb5",
Expand All @@ -102,7 +110,7 @@
{
"hashed_secret": "143e9f2aca10dbd2711cb96047f4016f095e5709",
"is_verified": false,
"line_number": 3638,
"line_number": 3898,
"type": "Hex High Entropy String"
}
],
Expand All @@ -112,6 +120,12 @@
"is_verified": false,
"line_number": 225,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "ecdb6b62dc6de954dbbef8185029415aecae5e5a",
"is_verified": false,
"line_number": 283,
"type": "Hex High Entropy String"
}
]
},
Expand Down
57 changes: 57 additions & 0 deletions migrations/versions/6819874e85b9_remove_deprecated_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""remove deprecated metadata
Revision ID: 6819874e85b9
Revises: 3354f2c466ec
Create Date: 2022-09-27 13:43:39.827523
"""
import json

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "6819874e85b9"
down_revision = "3354f2c466ec"
branch_labels = None
depends_on = None


def escape(str):
# escape single quotes for SQL statement
return str.replace("'", "''")


def upgrade():
"""Remove deprecated metadata keys."""

remove_metadata_keys = ["_uploader_id", "_filename", "_bucket", "_file_extension"]

# extract existing PK (guid) and metadata (data) columns
connection = op.get_bind()
offset = 0
limit = 500
query = (
f"SELECT guid, data FROM metadata ORDER BY guid LIMIT {limit} OFFSET {offset}"
)
results = connection.execute(query).fetchall()
while results:
for r in results:
guid, data = r[0], r[1]
# scrub internal fields from metadata
for metadata_key in remove_metadata_keys:
if metadata_key in data.keys():
data.pop(metadata_key)
sql_statement = f"""UPDATE metadata
SET data='{escape(json.dumps(data))}'
WHERE guid='{guid}'"""
connection.execute(sql_statement)
# Grab another batch of rows
offset += limit
query = f"SELECT guid, data FROM metadata ORDER BY guid LIMIT {limit} OFFSET {offset} "
results = connection.execute(query).fetchall()


def downgrade():
pass
60 changes: 60 additions & 0 deletions tests/test_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,5 +238,65 @@ async def test_4d93784a25e5_upgrade(
_reset_migrations()


@pytest.mark.asyncio
async def test_6819874e85b9_upgrade():
"""
We can't create metadata by using the `client` fixture because of this issue:
https://github.com/encode/starlette/issues/440
so inserting directly into the DB instead.
"""

# before "remove_deprecated_metadata" migration
alembic_main(["--raiseerr", "downgrade", "3354f2c466ec"])

fake_guid = "7891011"
old_metadata = {
"foo": "bar",
"bizz": "buzz",
"_uploader_id": "uploader",
"_filename": "hello.txt",
"_bucket": "mybucket",
"_file_extension": ".txt",
}
new_metadata = {"foo": "bar", "bizz": "buzz"}
authz_data = {"version": 0, "_resource_paths": ["/programs/DEV"]}

async with db.with_bind(DB_DSN):

# insert data
sql_old_metadata = escape(json.dumps(old_metadata))
sql_authz_data = escape(json.dumps(authz_data))
insert_stmt = f"INSERT INTO metadata(\"guid\", \"data\", \"authz\") VALUES ('{fake_guid}', '{sql_old_metadata}', '{sql_authz_data}')"
await db.scalar(db.text(insert_stmt))

try:
# check that the request data was inserted correctly
data = await db.all(
db.text(
f"SELECT guid, data, authz FROM metadata WHERE guid = '{fake_guid}'"
)
)
row = {k: v for k, v in data[0].items()}
assert row == {"guid": fake_guid, "data": old_metadata, "authz": authz_data}

# run "remove_deprecated_metadata" migration
alembic_main(["--raiseerr", "upgrade", "6819874e85b9"])

# check that the migration removed the deprecated keys
data = await db.all(
db.text(
f"SELECT guid, data, authz FROM metadata WHERE guid = '{fake_guid}'"
)
)
assert len(data) == 1
row = {k: v for k, v in data[0].items()}
assert row == {"guid": fake_guid, "data": new_metadata, "authz": authz_data}

finally:
await db.all(db.text(f"DELETE FROM metadata WHERE guid = '{fake_guid}'"))

_reset_migrations()


def _reset_migrations():
alembic_main(["--raiseerr", "upgrade", "head"])

0 comments on commit 5ec1bc9

Please sign in to comment.