Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow other VR Types for Date Jitter #182

Merged
merged 9 commits into from
Jul 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Referenced versions in headers are tagged on Github, in parentheses are for pypi
## [vxx](https://github.com/pydicom/deid/tree/master) (master)
- updated pydicom dependency from 1.3.0 to 2.1.1 [#171] (https://github.com/pydicom/deid/issues/171) (0.2.25)
- bug fix for multivalued fields in %values lists [#174](https://github.com/pydicom/deid/issues/174)
- allowing other VR types for jitter [#175](https://github.com/pydicom/deid/issues/175)
- ensuring that an add/replace of an existing value is also updated in fields [#173](https://github.com/pydicom/deid/issues/173)
- change to correct issue with deidentifying RGB images [#165](https://github.com/pydicom/deid/issues/165) (0.2.24)
- removing verbosity of debug logger (0.2.23)
Expand Down
Binary file modified deid/data/humans/ctbrain1.dcm
Binary file not shown.
42 changes: 19 additions & 23 deletions deid/dicom/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,47 +23,36 @@
"""

from deid.logger import bot
from deid.dicom.tags import update_tag
from deid.utils import get_timestamp

import re


# Timestamps


def jitter_timestamp(dicom, field, value):
def jitter_timestamp(field, value):
"""if present, jitter a timestamp in dicom
field "field" by number of days specified by "value"
The value can be positive or negative.

Parameters
==========
dicom: the pydicom Dataset
field: the field with the timestamp
value: number of days to jitter by. Jitter bug!

"""
if not isinstance(value, int):
value = int(value)

original = dicom.get(field)
original = field.element.value
new_value = original

if original is not None:

# Create default for new value
new_value = None

# If we have a string, we need to create a DataElement
if isinstance(field, str):
dcmvr = dicom.data_element(field).VR
else:
dcmvr = dicom.get(field).VR
original = dicom.get(field).value
dcmvr = field.element.VR

# DICOM Value Representation can be either DA (Date) DT (Timestamp),
# or something else, which is not supported.

if dcmvr == "DA":
# NEMA-compliant format for DICOM date is YYYYMMDD
new_value = get_timestamp(original, jitter_days=value, format="%Y%m%d")
Expand All @@ -81,12 +70,19 @@ def jitter_timestamp(dicom, field, value):
)

else:
# Do nothing and issue a warning.
bot.warning("JITTER not supported for %s with VR=%s" % (field, dcmvr))

if new_value is not None and new_value != original:

# Only update if there's something to update AND there's been change
dicom = update_tag(dicom, field=field, value=new_value)

return dicom
# If the field type is not supplied, attempt to parse different formats
for fmtstr in ["%Y%m%d", "%Y%m%d%H%M%S.%f%z", "%Y%m%d%H%M%S.%f"]:
try:
new_value = get_timestamp(
original, jitter_days=value, format=fmtstr
)
break
except:
pass

# If nothing works, do nothing and issue a warning.
if not new_value:
bot.warning("JITTER not supported for %s with VR=%s" % (field, dcmvr))

return new_value
19 changes: 2 additions & 17 deletions deid/dicom/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,24 +426,9 @@ def _run_action(self, field, action, value=None):
item=self.lookup, dicom=self.dicom, value=value, field=field
)
if value is not None:

# Cut out early if the field isn't in the dicom
if field.name not in self.dicom:
return

# Preserve the old value in case we need to update
old_value = self.dicom[field.name].value

# Jitter the field by the supplied value
jitter_timestamp(
dicom=self.dicom, field=field.element.keyword, value=value
)

# Get the updated value, update if it's different
new_value = self.dicom[field.name].value
if old_value != new_value:
self.replace_field(field, new_value)

new_val = jitter_timestamp(field=field, value=value)
self.replace_field(field, new_val)
else:
bot.warning("JITTER %s unsuccessful" % field)

Expand Down
51 changes: 32 additions & 19 deletions deid/tests/test_dicom_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,48 +89,61 @@ def test_get_files_as_list(self):

def test_jitter_timestamp(self):

from deid.dicom.fields import DicomField
from deid.dicom.actions import jitter_timestamp
from deid.dicom.tags import get_tag

dicom = get_dicom(self.dataset)

print("Testing test_jitter_timestamp")

print("Case 1: Testing jitter_timestamp with DICOM Date (DA)")
name = "StudyDate"
tag = get_tag(name)
dicom.StudyDate = "20131210"
dicom.data_element("StudyDate").VR = "DA"
jitter_timestamp(dicom, "StudyDate", 10)
dicom.data_element(name).VR = "DA"
field = DicomField(dicom.data_element(name), name, str(tag["tag"]))
actual = jitter_timestamp(field, 10)
expected = "20131220"
self.assertEqual(dicom.StudyDate, expected)
self.assertEqual(actual, expected)

print("Case 2: Testing with DICOM timestamp (DT)")
name = "AcquisitionDateTime"
tag = get_tag(name)
dicom.AcquisitionDateTime = "20131210081530"
dicom.data_element("AcquisitionDateTime").VR = "DT"
jitter_timestamp(dicom, "AcquisitionDateTime", 10)
dicom.data_element(name).VR = "DT"
field = DicomField(dicom.data_element(name), name, str(tag["tag"]))
actual = jitter_timestamp(field, 10)
expected = "20131220081530.000000"
self.assertEqual(dicom.AcquisitionDateTime, expected)
self.assertEqual(actual, expected)

print("Case 3: Testing with non-standard DICOM date (DA)")
name = "StudyDate"
tag = get_tag(name)
dicom.StudyDate = "2013/12/10"
dicom.data_element("StudyDate").VR = "DA"
jitter_timestamp(dicom, "StudyDate", 10)
dicom.data_element(name).VR = "DA"
field = DicomField(dicom.data_element(name), name, str(tag["tag"]))
actual = jitter_timestamp(field, 10)
expected = "20131220"
self.assertEqual(dicom.StudyDate, expected)
self.assertEqual(actual, expected)

print("Case 4: Testing negative jitter value")
name = "StudyDate"
tag = get_tag(name)
dicom.StudyDate = "20131210"
jitter_timestamp(dicom, "StudyDate", -5)
field = DicomField(dicom.data_element(name), name, str(tag["tag"]))
actual = jitter_timestamp(field, -5)
expected = "20131205"
self.assertEqual(dicom.StudyDate, expected)
self.assertEqual(actual, expected)

print("Case 5: Testing with empty field")
dicom.StudyDate = expected = ""
jitter_timestamp(dicom, "StudyDate", 10)
self.assertEqual(dicom.StudyDate, expected)

print("Case 6: Testing with nonexistent field")
del dicom.StudyDate
jitter_timestamp(dicom, "StudyDate", 10)
self.assertTrue("StudyDate" not in dicom)
name = "StudyDate"
tag = get_tag(name)
dicom.StudyDate = ""
field = DicomField(dicom.data_element(name), name, str(tag["tag"]))
actual = jitter_timestamp(field, 10)
expected = None
self.assertEqual(actual, expected)


def get_dicom(dataset):
Expand Down
95 changes: 86 additions & 9 deletions deid/tests/test_replace_identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def test_expanders(self):
strip_sequences=False,
)
self.assertEqual(1, len(result))
self.assertEqual(153, len(result[0]))
self.assertEqual(157, len(result[0]))
with self.assertRaises(KeyError):
check1 = result[0]["ExposureTime"].value
with self.assertRaises(KeyError):
Expand Down Expand Up @@ -545,7 +545,7 @@ def test_fieldset_remove_private(self):
self.assertTrue("(0009, 0010)" in parser.lookup["field_set2_private"])
self.assertTrue("(0010, 0020)" in parser.lookup["field_set2_private"])

self.assertEqual(158, len(parser.dicom))
self.assertEqual(162, len(parser.dicom))
self.assertEqual("SIEMENS CT VA0 COAD", parser.dicom["00190010"].value)
with self.assertRaises(KeyError):
check1 = parser.dicom["00090010"].value
Expand Down Expand Up @@ -650,7 +650,7 @@ def test_tag_expanders_tagelement(self):
strip_sequences=False,
)
self.assertEqual(1, len(result))
self.assertEqual(135, len(result[0]))
self.assertEqual(139, len(result[0]))
with self.assertRaises(KeyError):
check1 = result[0]["00090010"].value
with self.assertRaises(KeyError):
Expand Down Expand Up @@ -685,7 +685,7 @@ def contains_hibbard(dicom, value, field, item):
parser.define("contains_hibbard", contains_hibbard)
parser.parse()

self.assertEqual(156, len(parser.dicom))
self.assertEqual(160, len(parser.dicom))
with self.assertRaises(KeyError):
check1 = parser.dicom["ReferringPhysicianName"].value
with self.assertRaises(KeyError):
Expand Down Expand Up @@ -719,7 +719,7 @@ def test_strip_sequences(self):
strip_sequences=True,
)
self.assertEqual(1, len(result))
self.assertEqual(152, len(result[0]))
self.assertEqual(156, len(result[0]))
with self.assertRaises(KeyError):
check1 = result[0]["00081110"].value
for tag in result[0]:
Expand Down Expand Up @@ -789,7 +789,7 @@ def test_jitter_compounding(self):
)

self.assertEqual(1, len(result))
self.assertEqual(151, len(result[0]))
self.assertEqual(155, len(result[0]))
self.assertEqual("20230104", result[0]["StudyDate"].value)

def test_addremove_compounding(self):
Expand Down Expand Up @@ -820,7 +820,7 @@ def test_addremove_compounding(self):
)

self.assertEqual(1, len(result))
self.assertEqual(151, len(result[0]))
self.assertEqual(155, len(result[0]))
with self.assertRaises(KeyError):
willerror = result[0]["PatientIdentityRemoved"].value

Expand Down Expand Up @@ -852,7 +852,7 @@ def test_removeadd_compounding(self):
)

self.assertEqual(1, len(result))
self.assertEqual(151, len(result[0]))
self.assertEqual(155, len(result[0]))
self.assertEqual("123456", result[0]["PatientID"].value)

def test_valueset_empty_remove(self):
Expand Down Expand Up @@ -942,7 +942,84 @@ def test_valueset_remove_one_empty(self):
with self.assertRaises(KeyError):
check2 = result[0]["Manufacturer"].value

# MORE TESTS NEED TO BE WRITTEN TO TEST SEQUENCES
def test_jitter_values(self):
"""
Testing to ensure fields (including non-DA/DT VR fields) identified by a values list
are appropriately jittered

%values value_set1
FIELD StudyDate
%header
JITTER values:value_set1 1
"""
import pydicom

print("Test jitter from values list")
dicom_file = get_file(self.dataset)
original_dataset = pydicom.dcmread(dicom_file)

actions = [{"action": "JITTER", "field": "values:value_set1", "value": "1"}]
values = OrderedDict()
values["value_set1"] = [{"field": "StudyDate", "action": "FIELD"}]
recipe = create_recipe(actions, values=values)

# Check that values we want are present using DicomParser
parser = DicomParser(dicom_file, recipe=recipe)
parser.parse()
self.assertEqual(len(parser.lookup["value_set1"]), 1)
self.assertTrue("20230101" in parser.lookup["value_set1"])

# Perform action
result = replace_identifiers(
dicom_files=dicom_file,
deid=recipe,
save=False,
remove_private=False,
strip_sequences=False,
)

self.assertEqual(1, len(result))
self.assertEqual(len(original_dataset), len(result[0]))
self.assertEqual("20230102", result[0]["StudyDate"].value)
self.assertEqual("20230102", result[0]["SeriesDate"].value)
self.assertEqual("20230102", result[0]["AcquisitionDate"].value)
self.assertEqual("20230102", result[0]["ContentDate"].value)
self.assertEqual("20230102", result[0]["00291019"].value)
self.assertEqual("20230102011721.621000", result[0]["00291020"].value)
self.assertEqual(20230102, result[0]["00291021"].value)
self.assertEqual("20230102011721.621000-0040", result[0]["00291022"].value)

def test_jitter_private_tag(self):
"""
Testing to private tags can be jittered

%header
JITTER 00291019 1
"""
import pydicom

print("Test jitter private tag")
dicom_file = get_file(self.dataset)
original_dataset = pydicom.dcmread(dicom_file)

actions = [{"action": "JITTER", "field": "00291019", "value": "1"}]
recipe = create_recipe(actions)

# Perform action
result = replace_identifiers(
dicom_files=dicom_file,
deid=recipe,
save=False,
remove_private=False,
strip_sequences=False,
)

self.assertEqual(1, len(result))
self.assertEqual(len(original_dataset), len(result[0]))
self.assertEqual("20230102", result[0]["00291019"].value)


# MORE TESTS NEED TO BE WRITTEN TO TEST SEQUENCES


def create_recipe(actions, fields=None, values=None):
Expand Down