From 0807f20bfc36b1f30828ed562c7f79e14b5f6100 Mon Sep 17 00:00:00 2001 From: Vanessasaurus <814322+vsoch@users.noreply.github.com> Date: Mon, 23 May 2022 17:06:16 -0600 Subject: [PATCH] Adding ability for deid to support deid-provided functions (#208) * adding ability for deid to support deid-provided functions as of this version, a user can specify a value as a deid_func: meaning that we use a deid provided function. This is useful for providing some general uid and a customized jitter function, along with other functions that users might want to add. With this change I have provided docs and a contributing section for how to do this, along with running pyflakes on the tests Signed-off-by: vsoch --- .github/workflows/codespell.yml | 5 +- .gitignore | 3 +- CHANGELOG.md | 1 + deid/config/__init__.py | 49 +++- deid/dicom/actions/__init__.py | 13 + deid/dicom/{actions.py => actions/jitter.py} | 25 +- deid/dicom/actions/uids.py | 90 ++++++ deid/dicom/fields.py | 47 +++- deid/dicom/filter.py | 38 ++- deid/dicom/header.py | 4 +- deid/dicom/parser.py | 89 ++++-- deid/tests/Xtest_dicom_header.py | 1 - deid/tests/common.py | 14 +- deid/tests/test_clean.py | 1 - deid/tests/test_config.py | 1 - deid/tests/test_data.py | 3 - deid/tests/test_deid_recipe.py | 1 - deid/tests/test_dicom_fields.py | 1 - deid/tests/test_dicom_funcs.py | 280 +++++++++++++++++++ deid/tests/test_dicom_groups.py | 4 +- deid/tests/test_dicom_tags.py | 2 - deid/tests/test_dicom_utils.py | 5 +- deid/tests/test_file_meta.py | 11 +- deid/tests/test_filter_detect.py | 8 - deid/tests/test_replace_identifiers.py | 62 ++-- deid/tests/test_utils.py | 2 +- deid/tests/test_utils_files.py | 4 - deid/utils/__init__.py | 2 +- deid/utils/actions.py | 64 ++++- deid/version.py | 6 +- docs/_docs/contributing/code.md | 52 ++++ docs/_docs/examples/func-replace.md | 5 +- docs/_docs/user-docs/client.md | 2 +- docs/_docs/user-docs/recipe-funcs.md | 127 +++++++++ docs/_docs/user-docs/tags.md | 2 +- 35 files changed, 872 insertions(+), 152 deletions(-) create mode 100644 deid/dicom/actions/__init__.py rename deid/dicom/{actions.py => actions/jitter.py} (79%) create mode 100644 deid/dicom/actions/uids.py create mode 100644 deid/tests/test_dicom_funcs.py create mode 100644 docs/_docs/user-docs/recipe-funcs.md diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index bed2277a..e29d891d 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -12,8 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: codespell-project/actions-codespell@de089481bd65b71b4d02e34ffb3566b6d189333e - uses: crate-ci/typos@592b36d23c62cb378f6097a292bc902ee73f93ef # version 1.0.4 + - uses: actions/checkout@v3 + - uses: crate-ci/typos@592b36d23c62cb378f6097a292bc902ee73f93ef # version 1.0.4 with: files: ./deid ./docs/_docs ./docs/README.md ./docs/pages ./examples diff --git a/.gitignore b/.gitignore index 930a0fb0..2551ec9a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ dist deid.egg-info build pypi.sh +env private .vscode @@ -34,4 +35,4 @@ Icon Network Trash Folder Temporary Items .apdisk -.cache \ No newline at end of file +.cache diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a002413..5b893123 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are: Referenced versions in headers are tagged on Github, in parentheses are for pypi. ## [vxx](https://github.com/pydicom/deid/tree/master) (master) + - adding support for deid provided functions [#207](https://github.com/pydicom/deid/issues/207) (0.2.3) - update CTP deid.dicom up until [this commit](https://github.com/johnperry/CTP/commit/345b05b157c046532e8791a63ababbf6d0dba59b) (0.2.29) - various LGTM alert fixes [#186](https://github.com/pydicom/deid/pull/186) (0.0.28) - bug fix for exception when attempting to jitter DA/DT which cannot be jittered (space) [#189] (https://github.com/pydicom/deid/issues/189) (0.2.27) diff --git a/deid/config/__init__.py b/deid/config/__init__.py index 5af378ba..e18d6835 100644 --- a/deid/config/__init__.py +++ b/deid/config/__init__.py @@ -35,8 +35,9 @@ class DeidRecipe: - """Create and work with a deid recipe to filter and perform operations on - a dicom header. Usage typically looks like: + """Create a deid recipe to filter and perform operations on a dicom header. + + Usage typically looks like: deid = 'dicom.deid' recipe = DeidRecipe(deid) @@ -68,8 +69,10 @@ def __repr__(self): return "[deid]" def load(self, deid): - """load a deid recipe into the object. If a deid configuration is - already defined, append to that. + """ + Load a deid recipe into the object. + + If a deid configuration is already defined, append to that. """ deid = get_deid(deid) if deid is not None: @@ -82,7 +85,9 @@ def load(self, deid): self.deid = load_combined_deid([self.deid, deid]) def _get_section(self, name): - """return a section (key) in the loaded deid, if it exists""" + """ + Return a section (key) in the loaded deid, if it exists + """ section = None if self.deid is not None: section = self.deid.get(name) @@ -91,11 +96,15 @@ def _get_section(self, name): # Get Sections def get_format(self): - """return the format of the loaded deid, if one exists""" + """ + Return the format of the loaded deid, if one exists + """ return self._get_section("format") def _get_named_section(self, section_name, name=None): - """a helper function to return an entire section, or if a name is + """Get a named section from the deid recipe. + + a helper function to return an entire section, or if a name is provided, a named section under it. If the section is not defined, we appropriately return None. """ @@ -105,19 +114,27 @@ def _get_named_section(self, section_name, name=None): return section def get_filters(self, name=None): - """return all filters for a deid recipe, or a set based on a name""" + """ + Return all filters for a deid recipe, or a set based on a name + """ return self._get_named_section("filter", name) def get_values_lists(self, name=None): - """return a values list by name""" + """ + Return a values list by name + """ return self._get_named_section("values", name) def get_fields_lists(self, name=None): - """return a values list by name""" + """ + Return a values list by name + """ return self._get_named_section("fields", name) def _get_actions(self, action=None, field=None, section="header"): - """handler for header or filemeta actions.""" + """ + Handler for header or filemeta actions. + """ header = self._get_section(section) or [] if header is not None: if action is not None: @@ -129,7 +146,7 @@ def _get_actions(self, action=None, field=None, section="header"): return header def get_actions(self, action=None, field=None): - """get deid actions to perform on a header, or a subset based on a type + """Get deid actions to perform on a header, or a subset based on a type A header action is a list with the following: {'action': 'REMOVE', 'field': 'AssignedLocation'}, @@ -159,7 +176,9 @@ def has_actions(self): # Listing def listof(self, section): - """return a list of keys for a section""" + """ + Return a list of keys for a section + """ listing = self._get_section(section) or {} return list(listing.keys()) @@ -175,7 +194,9 @@ def ls_fieldlists(self): # Init def _init_deid(self, deid=None, base=False, default_base="dicom"): - """initialize the recipe with one or more deids, optionally including + """Initialize a recipe. + + initialize the recipe with one or more deids, optionally including the default. This function is called at init time. If you need to add or work with already loaded configurations, use add/remove diff --git a/deid/dicom/actions/__init__.py b/deid/dicom/actions/__init__.py new file mode 100644 index 00000000..f6eb10b1 --- /dev/null +++ b/deid/dicom/actions/__init__.py @@ -0,0 +1,13 @@ +from .jitter import jitter_timestamp, jitter_timestamp_func +from .uids import basic_uuid, dicom_uuid, suffix_uuid, pydicom_uuid + +# Function lookup +# Functions here must take an item, field, and value + +deid_funcs = { + "jitter": jitter_timestamp_func, + "dicom_uuid": dicom_uuid, + "suffix_uuid": suffix_uuid, + "basic_uuid": basic_uuid, + "pydicom_uuid": pydicom_uuid, +} diff --git a/deid/dicom/actions.py b/deid/dicom/actions/jitter.py similarity index 79% rename from deid/dicom/actions.py rename to deid/dicom/actions/jitter.py index af57761e..6eeab9e2 100644 --- a/deid/dicom/actions.py +++ b/deid/dicom/actions/jitter.py @@ -24,20 +24,37 @@ from deid.logger import bot from deid.utils import get_timestamp +from deid.utils import parse_keyvalue_pairs # Timestamps +def jitter_timestamp_func(item, value, field, **kwargs): + """ + A wrapper to jitter_timestamp so it works as a custom function. + """ + opts = parse_keyvalue_pairs(kwargs.get("extras")) + + # Default to jitter by one day + value = int(opts.get("days", 1)) + + # The user can optionally provide years + if "years" in opts: + value = (int(opts["years"]) * 365) + value + return jitter_timestamp(field, value) + + def jitter_timestamp(field, value): - """if present, jitter a timestamp in dicom - field "field" by number of days specified by "value" - The value can be positive or negative. + """Jitter a timestamp "field" by number of days specified by "value" + + The value can be positive or negative. This function is grandfathered + into deid custom funcs, as it existed before they did. Since a custom + func requires an item, we have a wrapper above to support this use case. Parameters ========== field: the field with the timestamp value: number of days to jitter by. Jitter bug! - """ if not isinstance(value, int): value = int(value) diff --git a/deid/dicom/actions/uids.py b/deid/dicom/actions/uids.py new file mode 100644 index 00000000..cef36668 --- /dev/null +++ b/deid/dicom/actions/uids.py @@ -0,0 +1,90 @@ +""" + +Copyright (c) 2022 Vanessa Sochat + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +""" + +from deid.utils import parse_keyvalue_pairs +from pydicom.uid import generate_uid as pydicom_generate_uid +from deid.logger import bot +import uuid + + +def basic_uuid(item, value, field, **kwargs): + """A basic function to replace a field with a uuid.uuid4() string""" + return str(uuid.uuid4()) + + +def pydicom_uuid(item, value, field, **kwargs): + """ + Use pydicom to generate the UID. Optional kwargs include: + + prefix (str): provide a custom prefix + stable_remapping (bool): if true, use the orignal value for entropy. + This ensures stability across different runs that use the same UID. + + The prefix must match '^(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))*\\.$' + """ + opts = parse_keyvalue_pairs(kwargs.get("extras")) + + # We always provide a prefix so the stable remapping is done + prefix = opts.get("prefix", "2.25.") + stable_remapping = opts.get("stable_remapping", True) + entropy_srcs = [] + + # They would need to unset the default prefix + if stable_remapping is True and not prefix: + bot.warning("A prefix must be provided to use stable remapping.") + + if stable_remapping is True: + original = str(field.element.value) + entropy_srcs.append(original) + return pydicom_generate_uid(prefix=prefix, entropy_srcs=entropy_srcs) + + +def suffix_uuid(item, value, field, **kwargs): + """Return the same field, with a uuid suffix. + + Provided in docs: https://pydicom.github.io/deid/examples/func-replace/ + """ + # a field can either be just the name string, or a DicomElement + if hasattr(field, "name"): + field = field.name + prefix = field.lower().replace(" ", " ") + return prefix + "-" + str(uuid.uuid4()) + + +def dicom_uuid(item, value, field, dicom, **kwargs): + """ + Generate a dicom uid that better conforms to the dicom standard. + """ + # a field can either be just the name string, or a DicomElement + if hasattr(field, "name"): + field = field.name + + opts = parse_keyvalue_pairs(kwargs.get("extras")) + org_root = opts.get("org_root", "anonymous-organization") + + bigint_uid = str(uuid.uuid4().int) + full_uid = org_root + "." + bigint_uid + + # A DICOM UID is limited to 64 characters + return full_uid[0:64] diff --git a/deid/dicom/fields.py b/deid/dicom/fields.py index 6f879953..e2d7af83 100644 --- a/deid/dicom/fields.py +++ b/deid/dicom/fields.py @@ -30,7 +30,10 @@ class DicomField: - """A dicom field holds the element, and a string that represents the entire + """ + A dicom field. + + A dicom field holds the element, and a string that represents the entire nested structure (e.g., SequenceName__CodeValue). """ @@ -48,18 +51,25 @@ def __repr__(self): @property def tag(self): - """Return a string of the element tag.""" + """ + Return a string of the element tag. + """ return str(self.element.tag) @property def stripped_tag(self): - """Return the stripped element tag""" + """ + Return the stripped element tag + """ return re.sub("([(]|[)]|,| )", "", str(self.element.tag)) # Contains def name_contains(self, expression): - """use re to search a field for a regular expression, meaning + """ + Determine if a name contains a pattern or expression. + + Use re to search a field for a regular expression, meaning the name, the keyword (nested) or the string tag. name.lower: includes nested keywords (e.g., Sequence_Child) @@ -78,7 +88,9 @@ def name_contains(self, expression): return False def value_contains(self, expression): - """use re to search a field value for a regular expression""" + """ + Use re to search a field value for a regular expression + """ values = self.element.value # If we are not dealing with a list @@ -94,7 +106,10 @@ def value_contains(self, expression): def extract_item(item, prefix=None, entry=None): - """a helper function to extract sequence, will extract values from + """ + Extract values from a dicom sequence depending on the type. + + A helper function to extract sequence, will extract values from a dicom sequence depending on the type. Parameters @@ -127,7 +142,10 @@ def extract_item(item, prefix=None, entry=None): def extract_sequence(sequence, prefix=None): - """return a pydicom.sequence.Sequence recursively + """ + Extract a sequence recursively. + + return a pydicom.sequence.Sequence recursively as a flattened list of items. For example, a nested FieldA and FieldB would return as: @@ -158,8 +176,10 @@ def extract_sequence(sequence, prefix=None): def expand_field_expression(field, dicom, contenders=None): - """Get a list of fields based on an expression. If - no expression found, return single field. Options for fields include: + """ + Get a list of fields based on an expression. + + If no expression found, return single field. Options for fields include: endswith: filter to fields that end with the expression startswith: filter to fields that start with the expression @@ -218,8 +238,9 @@ def expand_field_expression(field, dicom, contenders=None): def get_fields(dicom, skip=None, expand_sequences=True, seen=None): - """expand all dicom fields into a list, where each entry is - a DicomField. If we find a sequence, we unwrap it and + """Expand all dicom fields into a list. + + Each entry is a DicomField. If we find a sequence, we unwrap it and represent the location with the name (e.g., Sequence__Child) """ skip = skip or [] @@ -233,7 +254,9 @@ def get_fields(dicom, skip=None, expand_sequences=True, seen=None): datasets = [dicom, dicom.file_meta] def add_element(element, name, uid, is_filemeta): - """Add an element to fields, but only if it has not been seen. + """ + Add an element to fields, but only if it has not been seen. + The uid is derived from the tag (group, element) and includes nesting, so the "same" tag on different levels is considered different. diff --git a/deid/dicom/filter.py b/deid/dicom/filter.py index d8e8fdb1..1fc4b4ba 100644 --- a/deid/dicom/filter.py +++ b/deid/dicom/filter.py @@ -138,7 +138,9 @@ def notEquals(self, field, term): def missing(self, field): - """missing returns True if the dicom is missing the field entirely + """Determine if the dicom is missing a field. + + Missing returns True if the dicom is missing the field entirely This means that the entire field is None """ content = self.get(field) @@ -148,7 +150,9 @@ def missing(self, field): def empty(self, field): - """empty returns True if the value is found to be "". If the field + """Determine if the value is empty. + + Empty returns True if the value is found to be "". If the field is not present for the dicom, then we return False (missing != empty) """ if field not in self: @@ -185,7 +189,10 @@ def empty(self, field): def compareBase(self, field, expression, func, ignore_case=True): - """compareBase takes either re.search (for contains) or + """ + Search a field for an expression. + + compareBase takes either re.search (for contains) or re.match (for matches) and returns True if the given regular expression is contained or matched """ @@ -217,7 +224,10 @@ def compareBase(self, field, expression, func, ignore_case=True): def matches(self, field, expression): - """matches returns true if the value of the identifier matches + """ + Determine if a field value matches an expression. + + matches returns true if the value of the identifier matches the regular expression specified in the string argument; otherwise, it returns false. """ @@ -225,7 +235,10 @@ def matches(self, field, expression): def contains(self, field, expression): - """contains returns true if the value of the identifier + """ + Determine if a field value contains an expression. + + contains returns true if the value of the identifier contains the the string argument anywhere within it; otherwise, it returns false. """ @@ -233,7 +246,10 @@ def contains(self, field, expression): def notContains(self, field, expression): - """notContains returns true if the value of the identifier + """ + Determine if a field value does not contain an expression. + + notContains returns true if the value of the identifier does not contain the the string argument anywhere within it; """ return not self.compareBase(field=field, expression=expression, func=re.search) @@ -250,7 +266,10 @@ def notContains(self, field, expression): def startsWith(self, field, term): - """startsWith returns true if the value of the identifier + """ + Determine if a field value starts with an expression. + + startsWith returns true if the value of the identifier starts with the string argument; otherwise, it returns false. """ expression = "^%s" % term @@ -258,7 +277,10 @@ def startsWith(self, field, term): def endsWith(self, field, term): - """endsWith returns true if the value of the identifier ends with + """ + Determine if a field value ends with an expression. + + endsWith returns true if the value of the identifier ends with the string argument; otherwise, it returns false. """ expression = "%s$" % term diff --git a/deid/dicom/header.py b/deid/dicom/header.py index 303a45c3..d7b969c7 100644 --- a/deid/dicom/header.py +++ b/deid/dicom/header.py @@ -45,7 +45,9 @@ def get_identifiers( remove_private=False, disable_skip=False, ): - """extract all identifiers from a dicom image. + """ + Extract all identifiers from a dicom image. + This function returns a lookup by file name, where each value indexed includes a dictionary of nested fields (indexed by nested tag). diff --git a/deid/dicom/parser.py b/deid/dicom/parser.py index 0ee33912..f041ad5d 100644 --- a/deid/dicom/parser.py +++ b/deid/dicom/parser.py @@ -35,7 +35,7 @@ from deid.config import DeidRecipe from deid.config.standards import actions as valid_actions from deid.dicom.utils import save_dicom -from deid.dicom.actions import jitter_timestamp +from deid.dicom.actions import jitter_timestamp, deid_funcs from deid.dicom.tags import remove_sequences, get_private, get_tag, add_tag from deid.dicom.groups import extract_values_list, extract_fields_list from deid.dicom.fields import get_fields, expand_field_expression, DicomField @@ -49,7 +49,10 @@ class DicomParser: - """A dicom header serves as a cache to read in all fields from a dicom + """ + Parse a dicom, performing one or more actions on fields. + + A dicom parser serves as a cache to read in all fields from a dicom file. For each, we store the element and child elements """ @@ -72,6 +75,9 @@ def __init__( bot.error("Cannot find config %s, exiting" % (config)) self.config = read_json(config, ordered_dict=True) + # Keep a lookup of deid provided functions + self.deid_funcs = deid_funcs + # Deid can be a recipe or filename if not isinstance(recipe, DeidRecipe): recipe = DeidRecipe(recipe) @@ -85,7 +91,10 @@ def __repr__(self): return self.__str__() def load(self, dicom_file, force=True): - """Ensure that the dicom file exists, and use full path. Here + """ + Load the dicom file. + + Ensure that the dicom file exists, and use full path. Here we load the file, and save the dicom, dicom_file, and dicom_name. """ # Reset seen, which is generated when we parse @@ -106,7 +115,9 @@ def load(self, dicom_file, force=True): self.dicom_name = os.path.basename(self.dicom_file) def define(self, name, value): - """Add a function or variable to the lookup for later usage. + """ + Add a function or variable to the lookup for later usage. + This can be used for functions, lists, or variables. """ self.lookup[name] = value @@ -117,7 +128,10 @@ def reset_preamble(self): self.dicom.preamble = b"\0" * 128 def get_nested_field(self, field, return_parent=False): - """Based on a DicomField, return the one referenced in self.dicom. + """ + Retrieve a nested field. + + Based on a DicomField, return the one referenced in self.dicom. If a delete is needed, then the parent should be returned as well. """ # The field provided will be last in the list, the one we want @@ -165,8 +179,10 @@ def get_nested_field(self, field, return_parent=False): return desired def delete_field(self, field): - """Delete a field from the dicom. We do this by way of - parsing all nested levels of a tag into actual tags, + """ + Delete a field from the dicom. + + We do this by way of parsing all nested levels of a tag into actual tags, and deleting the child node. """ # Returns the parent, and a DataElement (indexes into parent by tag) @@ -176,7 +192,9 @@ def delete_field(self, field): del self.fields[field.uid] def blank_field(self, field): - """Blank a field""" + """ + Blank a field + """ element = self.get_nested_field(field) # Assert we have a data element, and can blank a string @@ -189,13 +207,17 @@ def blank_field(self, field): bot.warning("Unrecognized VR for %s, skipping blank." % field) def replace_field(self, field, value): - """Replace a value in a field. This uses the same function as ADD, - except we know that it's likely that the dicom has the value. + """ + Replace a value in a field. + This uses the same function as ADD, but likely the dicom has the value. """ self.add_field(field, value) def parse(self, strip_sequences=False, remove_private=False): - """The parse action corresponds to iterating through fields, and + """ + Parse the dicom. + + The parse action corresponds to iterating through fields, and for each one, saving a data structure with the full element, the string (with nested representation of the keywords) and the tag. We want to save all three in a flat list that is @@ -322,7 +344,10 @@ def find_by_name(self, name): # Actions def perform_action(self, field, value, action, filemeta=False): - """perform action takes an action (dictionary with field, action, value) + """ + Perform an action on a field. + + perform action takes an action (dictionary with field, action, value) and performs the action on the loaded dicom. Parameters @@ -375,18 +400,23 @@ def perform_action(self, field, value, action, filemeta=False): # Otherwise, these are operations on existing fields else: - """clone the fields dictionary. delete actions must also delete from the fields dictionary. - performing the clone and iterating on the clone allows the deletions while preventing a - runtime error - "dictionary changed size during iterations" - """ + # without deepcopy - "dictionary changed size during iterations" temp_fields = deepcopy(fields) for uid, field in temp_fields.items(): self._run_action(field=field, action=action, value=value) def add_field(self, field, value): - """add a field to the dicom. If it's already present, update the value.""" + """ + Add a field to the dicom. + + If it's already present, update the value. + """ value = parse_value( - item=self.lookup, value=value, field=field, dicom=self.dicom + item=self.lookup, + value=value, + field=field, + dicom=self.dicom, + funcs=self.deid_funcs, ) # The addition will be different depending on if we have filemeta @@ -440,7 +470,10 @@ def update_dicom(element, is_filemeta): bot.warning("Cannot find tag for field %s, skipping." % name) def _run_action(self, field, action, value=None): - """perform_action (above) typically is called using a loaded deid, + """ + Underlying function to run an action. + + perform_action (above) typically is called using a loaded deid, and _run_addition is typically done via an addition in a config Both result in a call to this function. If an action fails or is not done, None is returned, and the calling function should handle this. @@ -460,7 +493,11 @@ def _run_action(self, field, action, value=None): # Code the value with something in the response elif action == "JITTER": value = parse_value( - item=self.lookup, dicom=self.dicom, value=value, field=field + item=self.lookup, + dicom=self.dicom, + value=value, + field=field, + funcs=self.deid_funcs, ) if value is not None: # Jitter the field by the supplied value @@ -479,14 +516,20 @@ def _run_action(self, field, action, value=None): do_removal = True if value != None: do_removal = parse_value( - item=self.lookup, dicom=self.dicom, value=value, field=field + item=self.lookup, + dicom=self.dicom, + value=value, + field=field, + funcs=self.deid_funcs, ) - if do_removal == True: + if do_removal is True: self.delete_field(field) def remove_private(self): - """Remove private tags from the loaded dicom""" + """ + Remove private tags from the loaded dicom + """ try: self.dicom.remove_private_tags() except Exception: diff --git a/deid/tests/Xtest_dicom_header.py b/deid/tests/Xtest_dicom_header.py index d87b4db0..5bc9ee1f 100644 --- a/deid/tests/Xtest_dicom_header.py +++ b/deid/tests/Xtest_dicom_header.py @@ -30,7 +30,6 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir diff --git a/deid/tests/common.py b/deid/tests/common.py index 07d14fca..213b1754 100644 --- a/deid/tests/common.py +++ b/deid/tests/common.py @@ -52,8 +52,20 @@ def get_dicom(dataset): return read_file(next(dicom_files)) +def get_same_file(dataset): + """ + get a consistent dicom file + """ + from deid.dicom import get_files + + dicom_files = list(get_files(dataset)) + return dicom_files[0] + + def get_file(dataset): - """helper to get a dicom file""" + """ + get a dicom file + """ from deid.dicom import get_files dicom_files = get_files(dataset) diff --git a/deid/tests/test_clean.py b/deid/tests/test_clean.py index d5e3508e..fcd17b0c 100644 --- a/deid/tests/test_clean.py +++ b/deid/tests/test_clean.py @@ -27,7 +27,6 @@ import tempfile import shutil import os -import numpy as np from deid.utils import get_installdir from deid.data import get_dataset diff --git a/deid/tests/test_config.py b/deid/tests/test_config.py index a4d4b811..935030ee 100644 --- a/deid/tests/test_config.py +++ b/deid/tests/test_config.py @@ -28,7 +28,6 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir diff --git a/deid/tests/test_data.py b/deid/tests/test_data.py index b43396d7..31b312fe 100644 --- a/deid/tests/test_data.py +++ b/deid/tests/test_data.py @@ -28,9 +28,6 @@ """ import unittest -import tempfile -import shutil -import json import os diff --git a/deid/tests/test_deid_recipe.py b/deid/tests/test_deid_recipe.py index afcef581..225e697d 100644 --- a/deid/tests/test_deid_recipe.py +++ b/deid/tests/test_deid_recipe.py @@ -28,7 +28,6 @@ import unittest import tempfile import shutil -import json import os from deid.config import DeidRecipe diff --git a/deid/tests/test_dicom_fields.py b/deid/tests/test_dicom_fields.py index b1d8574e..c7820304 100644 --- a/deid/tests/test_dicom_fields.py +++ b/deid/tests/test_dicom_fields.py @@ -28,7 +28,6 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir diff --git a/deid/tests/test_dicom_funcs.py b/deid/tests/test_dicom_funcs.py new file mode 100644 index 00000000..36ce9fbd --- /dev/null +++ b/deid/tests/test_dicom_funcs.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python + +""" +Testing deid provided functions + +Copyright (c) 2020-2022 Vanessa Sochat + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +""" + +import unittest +import tempfile +import shutil +import re + +from deid.utils import get_installdir +from deid.data import get_dataset +from deid.dicom.parser import DicomParser +from deid.tests.common import get_file, get_same_file, create_recipe + +uuid_regex = "[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}" + + +class TestDicomFuncs(unittest.TestCase): + def setUp(self): + self.pwd = get_installdir() + self.dataset = get_dataset("humans") + self.tmpdir = tempfile.mkdtemp() + print("\n######################START######################") + + def tearDown(self): + shutil.rmtree(self.tmpdir) + print("\n######################END########################") + + def test_user_provided_func(self): + """ + %header + REMOVE ALL func:myfunction + """ + print("Test user provided func") + dicom_file = get_file(self.dataset) + + def myfunction(dicom, value, field, item): + from pydicom.tag import Tag + + tag = Tag(field.element.tag) + + if tag in dicom: + currentvalue = str(dicom.get(tag).value).lower() + if "hibbard" in currentvalue: + return True + return False + + actions = [{"action": "REMOVE", "field": "ALL", "value": "func:myfunction"}] + recipe = create_recipe(actions) + + # Create a parser, define function for it + parser = DicomParser(dicom_file, recipe=recipe) + parser.define("myfunction", myfunction) + parser.parse() + + self.assertEqual(160, len(parser.dicom)) + with self.assertRaises(KeyError): + parser.dicom["ReferringPhysicianName"].value + with self.assertRaises(KeyError): + parser.dicom["PhysiciansOfRecord"].value + with self.assertRaises(KeyError): + parser.dicom["RequestingPhysician"].value + with self.assertRaises(KeyError): + parser.dicom["00331019"].value + + def test_basic_uuid(self): + """ + %header + REPLACE ReferringPhysicianName deid_func:basic_uuid + """ + print("Test deid_func:basic_uuid") + + dicom_file = get_file(self.dataset) + actions = [ + { + "action": "REPLACE", + "field": "ReferringPhysicianName", + "value": "deid_func:basic_uuid", + } + ] + recipe = create_recipe(actions) + + # Create a parser, define function for it + parser = DicomParser(dicom_file, recipe=recipe) + parser.parse() + + # 8905e722-8103-4823-bc8f-8aed967e272d + print(parser.dicom["ReferringPhysicianName"].value) + assert re.search(uuid_regex, str(parser.dicom["ReferringPhysicianName"].value)) + + def test_pydicom_uuid(self): + """ + %header + REPLACE ReferringPhysicianName deid_func:pydicom_uuid + """ + print("Test deid_func:pydicom_uuid") + + dicom_file = get_file(self.dataset) + actions = [ + { + "action": "REPLACE", + "field": "ReferringPhysicianName", + "value": "deid_func:pydicom_uuid", + } + ] + recipe = create_recipe(actions) + + # Create a parser, define function for it + parser = DicomParser(dicom_file, recipe=recipe) + parser.parse() + + # Randomness is anything, but should be all numbers + print(parser.dicom["ReferringPhysicianName"].value) + name = str(parser.dicom["ReferringPhysicianName"].value) + assert re.search("([0-9]|.)+", name) + + # This is the pydicom default, and we default to stable remapping + assert ( + name == "2.25.39101090714049289438893821151950032074223798085258118413707" + ) + + # Add a custom prefix + # must match '^(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))*\\.$' + actions = [ + { + "action": "REPLACE", + "field": "ReferringPhysicianName", + "value": "deid_func:pydicom_uuid prefix=1.55.", + } + ] + recipe = create_recipe(actions) + parser = DicomParser(dicom_file, recipe=recipe) + parser.parse() + + # Randomness is anything, but should be all numbers + print(parser.dicom["ReferringPhysicianName"].value) + name = str(parser.dicom["ReferringPhysicianName"].value) + assert name.startswith("1.55.") + + # This should always be consistent if we use the original as entropy + dicom_file = get_same_file(self.dataset) + actions = [ + { + "action": "REPLACE", + "field": "ReferringPhysicianName", + "value": "deid_func:pydicom_uuid stable_remapping=false", + } + ] + recipe = create_recipe(actions) + parser = DicomParser(dicom_file, recipe=recipe) + parser.parse() + + # Randomness is anything, but should be all numbers + print(parser.dicom["ReferringPhysicianName"].value) + name = str(parser.dicom["ReferringPhysicianName"].value) + assert ( + name != "2.25.39101090714049289438893821151950032074223798085258118413707" + ) + + def test_suffix_uuid(self): + """ + %header + REPLACE ReferringPhysicianName deid_func:suffix_uuid + """ + print("Test deid_func:basic_uuid") + + dicom_file = get_file(self.dataset) + actions = [ + { + "action": "REPLACE", + "field": "ReferringPhysicianName", + "value": "deid_func:suffix_uuid", + } + ] + recipe = create_recipe(actions) + + # Create a parser, define function for it + parser = DicomParser(dicom_file, recipe=recipe) + parser.parse() + + # 8905e722-8103-4823-bc8f-8aed967e272d + print(parser.dicom["ReferringPhysicianName"].value) + name = str(parser.dicom["ReferringPhysicianName"].value) + assert "referringphysicianname-" in name + assert re.search(uuid_regex, name) + + def test_dicom_uuid(self): + """ + %header + REPLACE ReferringPhysicianName deid_func:suffix_uuid org=myorg + """ + print("Test deid_func:dicom_uuid") + + dicom_file = get_file(self.dataset) + actions = [ + { + "action": "REPLACE", + "field": "ReferringPhysicianName", + "value": "deid_func:dicom_uuid org_root=1.2.826.0.1.3680043.10.188", + } + ] + recipe = create_recipe(actions) + + # Create a parser, define function for it + parser = DicomParser(dicom_file, recipe=recipe) + parser.parse() + + # 8905e722-8103-4823-bc8f-8aed967e272d + print(parser.dicom["ReferringPhysicianName"].value) + name = str(parser.dicom["ReferringPhysicianName"].value) + assert "1.2.826.0.1.3680043.10.188" in name + assert len(name) == 64 + + def test_dicom_jitter(self): + """RECIPE RULE + REPLACE AcquisitionDate deid_func:jitter days=1 + """ + print("Test deid_func:jitter") + + dicom_file = get_file(self.dataset) + actions = [ + { + "action": "REPLACE", + "field": "AcquisitionDate", + "value": "deid_func:jitter days=1", + } + ] + recipe = create_recipe(actions) + + # Create a parser, define function for it + parser = DicomParser(dicom_file, recipe=recipe) + + original_date = parser.dicom.AcquisitionDate + assert original_date == "20230101" + parser.parse() + jittered_date = str(parser.dicom["AcquisitionDate"].value) + assert jittered_date == "20230102" + + # Add a day and a year + actions = [ + { + "action": "REPLACE", + "field": "AcquisitionDate", + "value": "deid_func:jitter days=1 years=1", + } + ] + recipe = create_recipe(actions) + + # Create a parser, define function for it + parser = DicomParser(dicom_file, recipe=recipe) + parser.parse() + jittered_date = str(parser.dicom["AcquisitionDate"].value) + assert jittered_date == "20240102" + + +if __name__ == "__main__": + unittest.main() diff --git a/deid/tests/test_dicom_groups.py b/deid/tests/test_dicom_groups.py index 531e1fab..69d7c056 100644 --- a/deid/tests/test_dicom_groups.py +++ b/deid/tests/test_dicom_groups.py @@ -28,16 +28,14 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir from deid.data import get_dataset -from deid.config import DeidRecipe from deid.dicom.fields import get_fields from deid.dicom import get_identifiers, replace_identifiers from deid.dicom.parser import DicomParser -from deid.tests.common import get_file, get_dicom +from deid.tests.common import get_dicom class TestDicomGroups(unittest.TestCase): diff --git a/deid/tests/test_dicom_tags.py b/deid/tests/test_dicom_tags.py index 98e07be0..eeabe817 100644 --- a/deid/tests/test_dicom_tags.py +++ b/deid/tests/test_dicom_tags.py @@ -25,12 +25,10 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir from deid.data import get_dataset -from deid.tests.common import get_file class TestDicomTags(unittest.TestCase): diff --git a/deid/tests/test_dicom_utils.py b/deid/tests/test_dicom_utils.py index 860ae820..42f2dc49 100644 --- a/deid/tests/test_dicom_utils.py +++ b/deid/tests/test_dicom_utils.py @@ -30,12 +30,11 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir from deid.data import get_dataset -from deid.tests.common import get_file, get_dicom +from deid.tests.common import get_dicom global generate_uid @@ -56,7 +55,6 @@ def test_get_files(self): print("Test test_get_files") print("Case 1: Test get files from dataset") from deid.dicom import get_files - from deid.config import load_deid found = 0 for dicom_file in get_files(self.dataset): @@ -75,7 +73,6 @@ def test_get_files_as_list(self): print("Test test_get_files_as_list") print("Case 1: Test get files from dataset") from deid.dicom import get_files - from deid.config import load_deid dicom_files = list(get_files(self.dataset)) found = len(dicom_files) diff --git a/deid/tests/test_file_meta.py b/deid/tests/test_file_meta.py index f8b3ea66..2c267deb 100644 --- a/deid/tests/test_file_meta.py +++ b/deid/tests/test_file_meta.py @@ -26,22 +26,13 @@ """ import unittest -import tempfile -import shutil -import json -import os from deid.utils import get_installdir from deid.data import get_dataset -from deid.dicom.parser import DicomParser -from deid.dicom import get_identifiers, replace_identifiers -from pydicom import read_file -from pydicom.sequence import Sequence +from deid.dicom import replace_identifiers from deid.tests.common import create_recipe, get_file -from collections import OrderedDict - class TestDicom(unittest.TestCase): def setUp(self): diff --git a/deid/tests/test_filter_detect.py b/deid/tests/test_filter_detect.py index 6d4dc991..0828cc6f 100644 --- a/deid/tests/test_filter_detect.py +++ b/deid/tests/test_filter_detect.py @@ -26,20 +26,12 @@ import unittest import tempfile import shutil -import json import os -import numpy as np from deid.utils import get_installdir from deid.data import get_dataset -from deid.dicom.parser import DicomParser -from deid.dicom import get_identifiers, replace_identifiers -from pydicom import read_file -from pydicom.sequence import Sequence from deid.tests.common import get_file -from collections import OrderedDict - class TestFilterDetect(unittest.TestCase): def setUp(self): diff --git a/deid/tests/test_replace_identifiers.py b/deid/tests/test_replace_identifiers.py index c3abb332..1a6be861 100644 --- a/deid/tests/test_replace_identifiers.py +++ b/deid/tests/test_replace_identifiers.py @@ -28,7 +28,6 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir @@ -217,7 +216,6 @@ def test_remove(self): {"action": "REMOVE", "field": field2name}, ] recipe = create_recipe(actions) - dicom = read_file(dicom_file) # Create a DicomParser to easily find fields parser = DicomParser(dicom_file) @@ -248,9 +246,9 @@ def test_remove(self): self.assertEqual(1, len(result)) with self.assertRaises(KeyError): - check1 = result[0][field1name].value + result[0][field1name].value with self.assertRaises(KeyError): - check2 = result[0][field2name].value + result[0][field2name].value def test_add_tag_variable(self): """RECIPE RULE @@ -387,11 +385,11 @@ def test_expanders(self): self.assertEqual(1, len(result)) self.assertEqual(157, len(result[0])) with self.assertRaises(KeyError): - check1 = result[0]["ExposureTime"].value + result[0]["ExposureTime"].value with self.assertRaises(KeyError): - check2 = result[0]["TotalCollimationWidth"].value + result[0]["TotalCollimationWidth"].value with self.assertRaises(KeyError): - check3 = result[0]["DataCollectionDiameter"].value + result[0]["DataCollectionDiameter"].value def test_expander_except(self): # Remove all fields except Manufacturer @@ -418,11 +416,11 @@ def test_expander_except(self): self.assertEqual("SIEMENS", result[0]["Manufacturer"].value) with self.assertRaises(KeyError): - check1 = result[0]["ExposureTime"].value + result[0]["ExposureTime"].value with self.assertRaises(KeyError): - check2 = result[0]["TotalCollimationWidth"].value + result[0]["TotalCollimationWidth"].value with self.assertRaises(KeyError): - check3 = result[0]["DataCollectionDiameter"].value + result[0]["DataCollectionDiameter"].value def test_fieldset_remove(self): """RECIPE @@ -471,11 +469,11 @@ def test_fieldset_remove(self): print(len(result[0])) self.assertEqual(expected_number, len(result[0])) with self.assertRaises(KeyError): - check1 = result[0]["Manufacturer"].value + result[0]["Manufacturer"].value with self.assertRaises(KeyError): - check2 = result[0]["TotalCollimationWidth"].value + result[0]["TotalCollimationWidth"].value with self.assertRaises(KeyError): - check3 = result[0]["SingleCollimationWidth"].value + result[0]["SingleCollimationWidth"].value def test_valueset_remove(self): """ @@ -516,11 +514,11 @@ def test_valueset_remove(self): ) self.assertEqual(1, len(result)) with self.assertRaises(KeyError): - check1 = result[0]["00090010"].value + result[0]["00090010"].value with self.assertRaises(KeyError): - check2 = result[0]["Manufacturer"].value + result[0]["Manufacturer"].value with self.assertRaises(KeyError): - check3 = result[0]["PhysiciansOfRecord"].value + result[0]["PhysiciansOfRecord"].value def test_fieldset_remove_private(self): """ @@ -550,9 +548,9 @@ def test_fieldset_remove_private(self): self.assertEqual(162, len(parser.dicom)) self.assertEqual("SIEMENS CT VA0 COAD", parser.dicom["00190010"].value) with self.assertRaises(KeyError): - check1 = parser.dicom["00090010"].value + parser.dicom["00090010"].value with self.assertRaises(KeyError): - check2 = parser.dicom["PatientID"].value + parser.dicom["PatientID"].value def test_valueset_private(self): """ @@ -580,11 +578,11 @@ def test_valueset_private(self): assert entry in parser.lookup["value_set2_private"] with self.assertRaises(KeyError): - check1 = parser.dicom["OtherPatientIDs"].value + parser.dicom["OtherPatientIDs"].value with self.assertRaises(KeyError): - check2 = parser.dicom["Manufacturer"].value + parser.dicom["Manufacturer"].value with self.assertRaises(KeyError): - check3 = parser.dicom["00190010"].value + parser.dicom["00190010"].value def test_tag_expanders_taggroup(self): # This tests targets the group portion of a tag identifier - 0009 in (0009, 0001) @@ -606,7 +604,7 @@ def test_tag_expanders_taggroup(self): ) self.assertEqual(1, len(result)) with self.assertRaises(KeyError): - check1 = result[0]["00090010"].value + result[0]["00090010"].value def test_tag_expanders_midtag(self): """REMOVE contains:8103 @@ -655,9 +653,9 @@ def test_tag_expanders_tagelement(self): self.assertEqual(1, len(result)) self.assertEqual(139, len(result[0])) with self.assertRaises(KeyError): - check1 = result[0]["00090010"].value + result[0]["00090010"].value with self.assertRaises(KeyError): - check2 = result[0]["PatientID"].value + result[0]["PatientID"].value def test_remove_all_func(self): """ @@ -690,13 +688,13 @@ def contains_hibbard(dicom, value, field, item): self.assertEqual(160, len(parser.dicom)) with self.assertRaises(KeyError): - check1 = parser.dicom["ReferringPhysicianName"].value + parser.dicom["ReferringPhysicianName"].value with self.assertRaises(KeyError): - check2 = parser.dicom["PhysiciansOfRecord"].value + parser.dicom["PhysiciansOfRecord"].value with self.assertRaises(KeyError): - check3 = parser.dicom["RequestingPhysician"].value + parser.dicom["RequestingPhysician"].value with self.assertRaises(KeyError): - check4 = parser.dicom["00331019"].value + parser.dicom["00331019"].value def test_strip_sequences(self): """ @@ -724,7 +722,7 @@ def test_strip_sequences(self): self.assertEqual(1, len(result)) self.assertEqual(156, len(result[0])) with self.assertRaises(KeyError): - check1 = result[0]["00081110"].value + result[0]["00081110"].value for tag in result[0]: self.assertFalse(isinstance(tag.value, Sequence)) @@ -828,7 +826,7 @@ def test_addremove_compounding(self): self.assertEqual(1, len(result)) self.assertEqual(155, len(result[0])) with self.assertRaises(KeyError): - willerror = result[0]["PatientIdentityRemoved"].value + result[0]["PatientIdentityRemoved"].value def test_removeadd_compounding(self): """ @@ -944,9 +942,9 @@ def test_valueset_remove_one_empty(self): self.assertEqual(1, len(result)) self.assertNotEqual(len(original_dataset), len(result[0])) with self.assertRaises(KeyError): - check1 = result[0]["00090010"].value + result[0]["00090010"].value with self.assertRaises(KeyError): - check2 = result[0]["Manufacturer"].value + result[0]["Manufacturer"].value def test_jitter_values(self): """ diff --git a/deid/tests/test_utils.py b/deid/tests/test_utils.py index a7ffa3cb..4c9dcffd 100644 --- a/deid/tests/test_utils.py +++ b/deid/tests/test_utils.py @@ -84,7 +84,7 @@ def test_write_read_files(self): bad_json = {"Wakkawakkawakka'}": [{True}, "2", 3]} tmpfile = tempfile.mkstemp()[1] os.remove(tmpfile) - with self.assertRaises(TypeError) as cm: + with self.assertRaises(TypeError): write_json(bad_json, tmpfile) print("Case 2: Providing good json") diff --git a/deid/tests/test_utils_files.py b/deid/tests/test_utils_files.py index 67f75a60..0cf624f6 100644 --- a/deid/tests/test_utils_files.py +++ b/deid/tests/test_utils_files.py @@ -28,12 +28,10 @@ import unittest import tempfile import shutil -import json import os from deid.utils import get_installdir from deid.data import get_dataset -from deid.tests.common import get_file class TestDicom(unittest.TestCase): @@ -52,7 +50,6 @@ def test_get_files(self): print("Test test_get_files") print("Case 1: Test get files from dataset") from deid.dicom import get_files - from deid.config import load_deid found = 0 for dicom_file in get_files(self.dataset): @@ -71,7 +68,6 @@ def test_get_files_as_list(self): print("Test test_get_files_as_list") print("Case 1: Test get files from dataset") from deid.dicom import get_files - from deid.config import load_deid dicom_files = list(get_files(self.dataset)) found = len(dicom_files) diff --git a/deid/utils/__init__.py b/deid/utils/__init__.py index 9dc24817..2ffdd395 100644 --- a/deid/utils/__init__.py +++ b/deid/utils/__init__.py @@ -9,4 +9,4 @@ to_int, ) -from .actions import get_timestamp, get_func, parse_value +from .actions import get_timestamp, get_func, parse_value, parse_keyvalue_pairs diff --git a/deid/utils/actions.py b/deid/utils/actions.py index 7161d3c3..0110e43a 100644 --- a/deid/utils/actions.py +++ b/deid/utils/actions.py @@ -28,12 +28,16 @@ import re -def parse_value(dicom, value, item=None, field=None): - """parse_value will parse the value field of an action, +def parse_value(dicom, value, item=None, field=None, funcs=None): + """ + Parse_value will parse the value field of an action, either returning: 1. the string (string or from function) 2. a variable looked up (var:FieldName) """ + # custom function lookup + funcs = funcs or {} + # If item is passed as None if item is None: item = dict() @@ -48,6 +52,27 @@ def parse_value(dicom, value, item=None, field=None): return None return item[value_option] + # The user wants to use a deid provided function + elif value_type.lower() == "deid_func": + + # There can be additional key=value pairs + try: + value_option, extras = value_option.split(" ", 1) + except: + extras = "" + pass + + if value_option not in funcs: + bot.warning("%s not a known deid provided function." % (value_option)) + return None + + print(extras) + # item is the lookup, value from the recipe, and field + # The field is an entire dicom element object + return funcs[value_option]( + dicom=dicom, value=value, field=field, item=item, extras=extras + ) + # The user is providing a specific function elif value_type.lower() == "func": @@ -64,14 +89,39 @@ def parse_value(dicom, value, item=None, field=None): return value +def parse_keyvalue_pairs(pairs): + """ + Given a listing of extra arguments, parse into lookup dict. + """ + values = {} + if not pairs: + return values + for pair in pairs.split(" "): + if "=" not in pair: + continue + key, value = pair.split("=", 1) + value = value.strip() + + # Ensure we convert booleans and none/null + if value == "true": + value = True + if value == "false": + value = False + if value in ["none", "null"]: + value = None + values[key.strip()] = value + return values + + def get_func(function_name): - """get_func will return a function that is defined from a string. + """ + Get_func will return a function that is defined from a string. + the function is assumed to be in this file Parameters ========== return a function from globals based on a name string - """ env = globals() if function_name in env: @@ -83,8 +133,10 @@ def get_func(function_name): def get_timestamp(item_date, item_time=None, jitter_days=None, format=None): - """get_timestamp will return (default) a UTC timestamp - with some date and (optional) time. A different format can be + """ + Get_timestamp will return (default) a UTC timestamp. + + This will have some date and (optional) time. A different format can be provided to change default behavior. eg: "%Y%m%d" """ if format is None: diff --git a/deid/version.py b/deid/version.py index 4970e5ef..4a8b82fe 100644 --- a/deid/version.py +++ b/deid/version.py @@ -22,13 +22,13 @@ """ -__version__ = "0.2.29" +__version__ = "0.2.3" AUTHOR = "Vanessa Sochat" -AUTHOR_EMAIL = "vsochat@stanford.edu" +AUTHOR_EMAIL = "vsoch@users.noreply.github.com" NAME = "deid" PACKAGE_URL = "https://github.com/pydicom/deid" KEYWORDS = "open source, python, anonymize, dicom" -DESCRIPTION = "deidentify dicom and other images with python and pydicom" +DESCRIPTION = "best effort deidentify dicom with python and pydicom" LICENSE = "LICENSE" INSTALL_REQUIRES = ( diff --git a/docs/_docs/contributing/code.md b/docs/_docs/contributing/code.md index 672f7c58..2918800c 100644 --- a/docs/_docs/contributing/code.md +++ b/docs/_docs/contributing/code.md @@ -12,3 +12,55 @@ Generally, for code contribution you should: 4. Open a pull request against the master branch. See the repository `CONTRIBUTING.md` for these same details. + +## Contributing a Custom Function + +Deid ships (as of version 0.2.3) with deid-provided functions that can be used in +header parsing. To contribute a custom function you should do the following: + + +1. Add a function to deid/dicom/actions, ideally in the appropriate file (e.g., uid functions in uuid.py, etc) +2. Ensure your function is added to the lookup in `deid/dicom/actions/__init__.py` so it can be found. +3. Add a test to `deid/tests/test_dicom_funcs.py` that ensures your function works, with or without custom variables. + + +Generally, a custom function should accept the following variables: + + - dicom: the dicom file + - item: expected to be the dictionary lookup of user provided values + - field: the dicom field + - value: the value to replace + +You can generally define a catch all `**kwargs` if you don't need a field. Finally, +if you do provide a custom variable, you'll need to also provide a default (or exit on error +if it's absolutely essential). As an example, if your custom function in the lookup is named +`generate_sesame_street_character` the user might provide a custom argument as follows: + +``` +%header + +REPLACE fields:PatientID deid_func:generate_sesame_street_character name=elmo +``` + +Within the function, you can expect the extra (unparsed) key value pairs to be provided as "extras" and you +can use the deid utils helper to parse these into a dictionary: + +```python +from deid.utils import parse_keyvalue_pairs +import random + +def generate_sesame_street_character(item, value, field, dicom, **kwargs): + """ + Add a sesame street character by name, or randomly chosen. + """ + opts = parse_keyvalue_pairs(kwargs.get("extras")) + + default_names = ["grover", "elmo", "big-bird", "oscar-the-grouch"] + name = opts.get("name") or random.choice(default_names) + + # The thing we return is the final value to replace the field with. + return name +``` + +And that should be it! You are free to use (or not use) the item, value, field, and dicom. +Please open an issue if you have any questions. diff --git a/docs/_docs/examples/func-replace.md b/docs/_docs/examples/func-replace.md index 26b577f2..638b4c8b 100644 --- a/docs/_docs/examples/func-replace.md +++ b/docs/_docs/examples/func-replace.md @@ -10,6 +10,8 @@ change field with a value derived from a function. This example was derived based on a prompt in [this pull request](https://github.com/pydicom/contrib-pydicom/pull/14). If you are interested in the code for this example, it's available [here](https://github.com/pydicom/deid/tree/master/examples/dicom/header-manipulation). +If you are interested in the functions provided by deid (and you don't want to write your +own function) see [this documentation](https://pydicom.github.io/deid/user-docs/recipe-funcs/). Let's get started! @@ -269,5 +271,6 @@ cleaned_files[0] (0020, 0052) Frame of Reference UID UI: frameofreferenceuid-1.2.826.0.1.3680043.10.188.3138524385829221974514732538424409758 ``` -That's it! If you need any help, please open an issue. Full code for the +That's it! If you need any help, please open an issue. If you think there is a function that could be added +to be provided [for all users](https://pydicom.github.io/deid/user-docs/recipe-funcs/) please also open an issue. Full code for the example above is [available here](https://github.com/pydicom/deid/tree/master/examples/dicom/header-manipulation). diff --git a/docs/_docs/user-docs/client.md b/docs/_docs/user-docs/client.md index 534ebf62..a2384e05 100644 --- a/docs/_docs/user-docs/client.md +++ b/docs/_docs/user-docs/client.md @@ -1,7 +1,7 @@ --- title: Deid Client category: User Documentation -order: 5 +order: 6 --- After you [install deid]({{ site.baseurl }}/install/) you will notice a command line application has been placed in your bin: diff --git a/docs/_docs/user-docs/recipe-funcs.md b/docs/_docs/user-docs/recipe-funcs.md new file mode 100644 index 00000000..211685b3 --- /dev/null +++ b/docs/_docs/user-docs/recipe-funcs.md @@ -0,0 +1,127 @@ +--- +title: Recipe Functions +category: User Documentation +order: 5 +--- + +The [recipe headers]({{ site.baseurl }}/user-docs/recipe-headers/) page taught you +how to write a recipe that has one or more commands to parse a dicom image header. +For example, we might have defined a custom function [per the example here](https://pydicom.github.io/deid/examples/func-replace/) +to replace patient info with a result from our custom function: + +``` +%header + +REPLACE fields:patient_info func:generate_uid +``` + +As of version 0.2.3 of deid, we have packaged functions along with deid that you can use without needing +to write your own! Current functions are provided for: + + - generating unique identifiers + - jittering + - *let us know if you want to contribute or request a new function!* + +The current offerings include the following: + +| Name | Description | Extra Params | +|---------------|-------------|--------------| +| `simple_uuid` | Modify with a simple `uuid.uuid4()` string | None | +| `dicom_uuid` | A more formal dicom uid that requires an org root | org_root | +| `suffix_uuid` | Make the value the field name with a `uuid.uuid4()` suffix. | None | +| `jitter` | The same as JITTER (grandfathered in) | days | + + +## A Simple UUID + +For a simple example, let's replace the recipe above with the deid provided "simple_uuid" function, +which is simply going to replace the field of our choice with a `uuid.uuid4()` string in Python. +That would look like this: + +``` +%header + +REPLACE fields:patient_info deid_func:simple_uuid +``` + +The only change is that we replaced `func` with `deid_func`. Deid will see this function +is provided in its library, and grab it for use. + + +## A Pydicom UUID + +Pydicom provides [a function to generate a UUID](https://pydicom.github.io/pydicom/dev/reference/generated/pydicom.uid.generate_uid.html) +and for most this is likely a good approach to take. The most basic usage (for one run) is to generate a random valid +unique identifier: + +``` +%header + +REPLACE ReferringPhysicianName deid_func:pydicom_uuid +``` + +The default uses `stable_remapping=true`, which means we use the original UUID as entropy +to be able to consistently return the same value between runs. You can disable it, however +we do not recommended it (but maybe could be appropriate for your use case). + +You can also optionally define a custom prefix. Note that it needs to match the +regular expression `^(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))*\\.$` which (in spoken terms) +is a number followed by a period, another number, and ending also in a period (e.g, `1.55.`). + + +``` +%header + +REPLACE ReferringPhysicianName deid_func:pydicom_uuid prefix=1.55. +``` + +## A Dicom UUID + +A more "formal" uuid function was added that requires an organization root. Your +organization should have it's own - for example the `PYMEDPHYS_ROOT_UID` is +"1.2.826.0.1.3680043.10.188" so we might do: + +``` +%header + +REPLACE fields:patient_info deid_func:dicom_uuid org_root=1.2.826.0.1.3680043.10.188 +``` +Notice how we've provided an extra argument, `org_root` to be parsed. If you don't +provide one an `anonymous-organization` will be used, which isn't technically an organization root. + + +## A UUID Suffix + +If you simply want to take the current field and add a suffix to it as the value: + +``` +%header + +REPLACE fields:patient_info deid_func:suffix_uuid +``` +This would make a final value that looks something like `patient_into-5897bd32-b4f3-4bda-9dc5-2d29e5688ea1` + + +## Jitter + +Jitter is intended for datetime fields, and technically you can just use the `JITTER` function provided +natively in the recipe. We decided to include it here to add further customization. For example, you can provide +variables for both days and years for a more fine-tuned jitter. We also wanted to add it here because +technically it is a custom action. A jitter (as a custom deid function) might look like this: + +``` +%header + +REPLACE fields:AcquisitionDate deid_func:jitter days=1 +``` + +or some number of years and days: + +``` +%header + +REPLACE fields:AcquisitionDate deid_func:jitter days=1 years=1 +``` + +And that's it! If you want to request or contribute a custom (deid provided) function, please +[open an issue](https://github.com/pydicom/deid/issues). diff --git a/docs/_docs/user-docs/tags.md b/docs/_docs/user-docs/tags.md index a3cb618d..a9baca89 100644 --- a/docs/_docs/user-docs/tags.md +++ b/docs/_docs/user-docs/tags.md @@ -1,7 +1,7 @@ --- title: Tags category: User Documentation -order: 6 +order: 7 --- It is sometimes helpful to be able to find a particular tag. [Pydicom](https://www.github.com/pydicom/pydicom)