diff --git a/deid/dicom/fields.py b/deid/dicom/fields.py
index e2d7af83..4d1a78d6 100644
--- a/deid/dicom/fields.py
+++ b/deid/dicom/fields.py
@@ -250,8 +250,8 @@ def get_fields(dicom, skip=None, expand_sequences=True, seen=None):
if not isinstance(skip, list):
skip = [skip]
- # Retrieve both dicom and file meta fields
- datasets = [dicom, dicom.file_meta]
+ # Retrieve both dicom and file meta fields if dicom came from a file
+ datasets = [d for d in [dicom, dicom.get("file_meta")] if d]
def add_element(element, name, uid, is_filemeta):
"""
diff --git a/deid/dicom/parser.py b/deid/dicom/parser.py
index f041ad5d..2b93c6af 100644
--- a/deid/dicom/parser.py
+++ b/deid/dicom/parser.py
@@ -59,6 +59,15 @@ class DicomParser:
def __init__(
self, dicom_file, recipe=None, config=None, force=True, disable_skip=False
):
+ """
+ Create new instance of DicomParser
+
+ :param dicom_file: Path to a dicom file or instance of a pydicom.Dataset
+ :param recipe: a deid recipe, defaults to None
+ :param config: deid config, defaults to None
+ :param force: ignore errors when reading a dicom file, defaults to True
+ :param disable_skip: _description_, defaults to False
+ """
# Lookup for the dicom
self.lookup = {}
@@ -81,6 +90,7 @@ def __init__(
# Deid can be a recipe or filename
if not isinstance(recipe, DeidRecipe):
recipe = DeidRecipe(recipe)
+
self.load(dicom_file, force=force)
self.recipe = recipe
@@ -104,15 +114,15 @@ def load(self, dicom_file, force=True):
if isinstance(dicom_file, Dataset):
self.dicom = dicom_file
else:
-
# If we must read the file, the path must exist
if not os.path.exists(dicom_file):
bot.exit("%s does not exist." % dicom_file)
self.dicom = read_file(dicom_file, force=force)
# Set class variables that might be helpful later
- self.dicom_file = os.path.abspath(self.dicom.filename)
- self.dicom_name = os.path.basename(self.dicom_file)
+ df = self.dicom.get("filename")
+ self.dicom_file = None if not df else os.path.abspath(df)
+ self.dicom_name = None if not df else os.path.basename(self.dicom_file)
def define(self, name, value):
"""
diff --git a/docs/_docs/examples/deid-dataset.md b/docs/_docs/examples/deid-dataset.md
new file mode 100644
index 00000000..6e11090d
--- /dev/null
+++ b/docs/_docs/examples/deid-dataset.md
@@ -0,0 +1,254 @@
+---
+title: Deidentify a Pydicom Dataset
+category: Examples
+order: 6
+---
+
+In this example we will create a custom class to deidentify a single instance of a `pydicom.Dataset` with a custom recipe.
+
+
+### Overview
+We will use four files for this example:
+```
+my_deid_example
+├── my_deid_recipe.dicom
+├── my_dicom_file.json
+├── my_module.py
+└── requirements.txt
+```
+
+The `requirements.txt` file is used only to be able to run this example.
+```
+deid
+pydicom
+pycryptodome
+```
+
+We can install them by running the following commands (requires `conda`)
+```bash
+conda create -n deid_example python=3.9
+conda activate deid_example
+cd my_deid_example
+pip install -r requirements.txt
+```
+
+
+The contents of `my_dicom_file.json` are used to load a pydicom.Dataset instance.
+```json
+{
+ "SpecificCharacterSet":{"vr":"CS","Value":["ISO_IR 100"]},
+ "ImageType":{"vr":"CS","Value":["DERIVED","PRIMARY"]},
+ "SOPClassUID":{"vr":"UI","Value":["1.2.840.10008.5.1.4.1.1.1.2"]},
+ "StudyDate":{"vr":"DA","Value":["20220627"]},
+ "SeriesDate":{"vr":"DA","Value":["20220627"]},
+ "AcquisitionDate":{"vr":"DA","Value":["20220627"]},
+ "ContentDate":{"vr":"DA","Value":["20220627"]},
+ "StudyTime":{"vr":"TM","Value":["080803"]},
+ "ContentTime":{"vr":"TM","Value":["080808.202000"]},
+ "PatientName":{"vr":"PN","Value":[{"Alphabetic":"Maria^Doe"}]},
+ "PatientID":{"vr":"LO","Value":["1234567890"]},
+ "PatientBirthDate":{"vr":"DA","Value":["19900606"]},
+ "Modality":{"vr":"CS","Value":["MG"]},
+ "PatientSex":{"vr":"CS","Value":["F"]},
+ "PatientAge":{"vr":"AS","Value":["032Y"]},
+ "StudyID":{"vr":"SH","Value":["mammogram87654"]}
+}
+```
+
+
+### The recipe
+
+We create a custom recipe `my_deid_recipe.dicom` that specifies what we want to do.
+```
+FORMAT dicom
+
+%header
+
+ADD PatientIdentityRemoved Yes
+ADD DeidentificationMethod my_deid_recipe.dicom.v1.0
+
+# Specify what we want to keep
+
+KEEP ContentDate
+KEEP StudyDate
+
+# Replacements with custom functions. Those are registered in my_module.py
+
+REPLACE PatientName func:replace_name
+REPLACE AccessionNumber func:hash_func
+REPLACE AdmissionID func:hash_func
+REPLACE InterpretationID func:hash_func
+REPLACE PatientBirthDate func:remove_day
+REPLACE PatientID func:hash_func
+REPLACE PerformedProcedureStepID func:hash_func
+REPLACE PerformingPhysicianName func:hash_func
+REPLACE RequestedProcedureID func:hash_func
+REPLACE ResultsID func:hash_func
+REPLACE StudyID func:hash_func
+
+
+# Tags that require custom regex expressions
+# Curve Data"(50xx,xxxx)"
+REMOVE contains:^50.{6}$
+# Overlay comments and data (60xx[34]000)
+REMOVE contains:^60.{2}[34]000$
+# Private tags ggggeeee where gggg is odd
+REMOVE contains:^.{3}[13579].{4}$
+
+# Blank the other tags
+
+BLANK PatientWeight
+BLANK PatientSize
+REMOVE PatientAge
+REMOVE SeriesDate
+REMOVE AcquisitionDate
+REMOVE StudyTime
+REMOVE ContentTime
+REMOVE PatientAge
+REMOVE PatientSex
+
+# ... etc
+```
+
+
+### The custom deidentifier class
+
+```python
+from deid.config import DeidRecipe
+from deid.dicom.parser import DicomParser
+import pydicom
+from Crypto.Hash import SHA512
+from datetime import datetime
+
+class DeidDataset:
+ """This class allows to pseudonymize an instance of
+ pydicom.Dataset with our custom recipe and functions.
+ """
+ def __init__(self, secret_salt: str, recipe_path: str):
+ """New instance of our pseudonymizer class.
+
+ :param secret_salt: a random string that makes the
+ hashing harder to break.
+ :param recipe_path: path to our deid recipe.
+ """
+ self.secret_salt = secret_salt
+ self.recipe = DeidRecipe(recipe_path)
+
+ def pseudonymize(self, dataset:pydicom.Dataset) -> pydicom.Dataset:
+ """Pseudonymize a single dicom dataset
+
+ :param dataset: dataset that will be pseudonymized
+ :returns: pseudonymized dataset
+ """
+ parser = DicomParser(dataset, self.recipe)
+ # register functions that are specified in the recipe
+ parser.define('replace_name', self.replace_name)
+ parser.define('hash_func', self.deid_hash_func)
+ parser.define('remove_day', self.remove_day)
+ # parse the dataset and apply the deidentification
+ parser.parse(strip_sequences=True, remove_private=True)
+ return parser.dicom
+
+ # All registered functions that are used in the recipe must
+ # receive the arguments: `item`, `value`, `field`, `dicom`
+
+ def deid_hash_func(self, item, value, field, dicom) -> str:
+ """Performs self.hash to field.element.value"""
+ val = field.element.value
+ return self.hash(str(val))
+
+ @staticmethod
+ def remove_day(item, value, field, dicom) -> str:
+ """Removes the day from a DT field in the deid framework"""
+ dt = datetime.strptime(field.element.value, '%Y%m%d')
+ return dt.strftime("%Y%m01")
+
+ @staticmethod
+ def replace_name(item, value, field, dicom) -> str:
+ """Replace PatientName with PatientSex and coarse PatientAge"""
+ sex = dicom.get('PatientSex')
+ sex = {"F":'Female', "M": 'Male', 'O':'Other'}[sex]
+ age = DeidDataset.round_to_nearest(int(dicom.get('PatientAge')[:-1]), 5)
+ return f"{sex} {age:03d}Y {dicom.get('Modality')}"
+
+ # Helper methods for our registered ones
+ @staticmethod
+ def round_to_nearest(value, interval):
+ """Rounds value to closest multiple of interval"""
+ return interval * round(value/interval)
+
+ def hash(self, msg: str) -> str:
+ """
+ :param msg: message that we want to encrypt,
+ normally the PatientID or the StudyID.
+ :return: the encrypted message as hexdigest
+ (in characters from '0' to '9' and 'a' to 'f')
+ """
+ assert type(msg) == str, f"value is not of type str, {type(msg)}"
+ h = SHA512.new(truncate="256")
+ bytes_str = bytes(f"{self.secret_salt}{msg}", "utf-8")
+ h.update(bytes_str)
+ return str(h.hexdigest())
+
+# Load the pydicom Dataset
+import json
+
+# Unorthodox way of loading a pydicom.Dataset
+# please see pydicom documentation for more information
+# on how to load dicom files
+with open('my_dicom_file.json') as f:
+ dataset_dict = json.load(f)
+dataset = pydicom.Dataset.from_json(dataset_dict)
+
+print('Dataset before pseudonymization')
+print(dataset)
+
+#create an instance of our class
+deid_ds = DeidDataset("!2#4%6&7abc", 'my_deid_recipe.dicom')
+
+#pseudonymize the dataset
+print('\nDataset after pseudonymization')
+pseudonymized = deid_ds.pseudonymize(dataset)
+print(pseudonymized)
+```
+
+If we execute our python module
+
+```bash
+python my_module.py
+```
+
+It will give us the following output:
+```
+Dataset before pseudonymization
+(0008, 0005) Specific Character Set CS: 'ISO_IR 100'
+(0008, 0008) Image Type CS: ['DERIVED', 'PRIMARY']
+(0008, 0016) SOP Class UID UI: Digital Mammography X-Ray Image Storage - For Presentation
+(0008, 0020) Study Date DA: '20220627'
+(0008, 0021) Series Date DA: '20220627'
+(0008, 0022) Acquisition Date DA: '20220627'
+(0008, 0023) Content Date DA: '20220627'
+(0008, 0030) Study Time TM: '080803'
+(0008, 0033) Content Time TM: '080808.202000'
+(0008, 0060) Modality CS: 'MG'
+(0010, 0010) Patient's Name PN: 'Maria^Doe'
+(0010, 0020) Patient ID LO: '1234567890'
+(0010, 0030) Patient's Birth Date DA: '19900606'
+(0010, 0040) Patient's Sex CS: 'F'
+(0010, 1010) Patient's Age AS: '032Y'
+(0020, 0010) Study ID SH: 'mammogram87654'
+
+Dataset after pseudonymization
+(0008, 0005) Specific Character Set CS: 'ISO_IR 100'
+(0008, 0008) Image Type CS: ['DERIVED', 'PRIMARY']
+(0008, 0016) SOP Class UID UI: Digital Mammography X-Ray Image Storage - For Presentation
+(0008, 0020) Study Date DA: '20220627'
+(0008, 0023) Content Date DA: '20220627'
+(0008, 0060) Modality CS: 'MG'
+(0010, 0010) Patient's Name PN: 'Female 030Y MG'
+(0010, 0020) Patient ID LO: 'df65775690879c36437ae950c52d025102a1f9b8c8132f8b017f14e9ec45eacb'
+(0010, 0030) Patient's Birth Date DA: '19900601'
+(0012, 0062) Patient Identity Removed CS: 'Yes'
+(0012, 0063) De-identification Method LO: 'my_deid_recipe.dicom.v1.0'
+(0020, 0010) Study ID SH: 'ae4b477e5709d0c1f746e0adc9ab552fee100b91416f9f3a04037e999077e823'
+```
\ No newline at end of file