Skip to content

Commit

Permalink
Add migration
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathangreen committed Nov 27, 2024
1 parent 9097f53 commit 5f2130c
Showing 1 changed file with 32 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Remove unsafe characters summary
Revision ID: c3458e1ef9aa
Revises: 272da5f400de
Create Date: 2024-11-27 20:32:41.431147+00:00
"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "c3458e1ef9aa"
down_revision = "272da5f400de"
branch_labels = None
depends_on = None


def upgrade() -> None:
# Remove any characters that are not XML safe from the summary_text field. The code has been
# updated to filter out these characters, but this cleans up any existing data.
# https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-POSIX-REGEXP
op.execute(
"UPDATE works SET summary_text = regexp_replace("
" summary_text, '[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+', '', 'g'"
") WHERE "
"summary_text ~ '[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]'"
)


def downgrade() -> None:
# No need to do anything on downgrade.
pass

0 comments on commit 5f2130c

Please sign in to comment.