From 516aa71391086e3677382049901d948701c5b3aa Mon Sep 17 00:00:00 2001
From: Sally Grindstaff <sallybg@uw.edu>
Date: Tue, 21 Nov 2023 16:28:29 -0800
Subject: [PATCH 1/4] Clinical ETL: Update ethnicity mapper

---
 lib/seattleflu/id3c/cli/command/etl/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/seattleflu/id3c/cli/command/etl/__init__.py b/lib/seattleflu/id3c/cli/command/etl/__init__.py
index 8933f5a5..dab1a78d 100644
--- a/lib/seattleflu/id3c/cli/command/etl/__init__.py
+++ b/lib/seattleflu/id3c/cli/command/etl/__init__.py
@@ -196,6 +196,8 @@ def ethnicity(ethnicity: Optional[str]) -> Optional[bool]:
         "null":                               None,
         "declined to answer":                 None,
         "unable to collect":                  None,
+        "prefer not to answer":               None,
+        "don't know":                         None,
     }
 
     if ethnicity not in mapper:

From ff603d1ac6f0892aa0c2ed70d458da88423d3c0c Mon Sep 17 00:00:00 2001
From: Sally Grindstaff <sallybg@uw.edu>
Date: Tue, 21 Nov 2023 16:29:23 -0800
Subject: [PATCH 2/4] Clinical ETL: Fix codeable concepts for KP2023 functions

---
 lib/seattleflu/id3c/cli/command/etl/clinical.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lib/seattleflu/id3c/cli/command/etl/clinical.py b/lib/seattleflu/id3c/cli/command/etl/clinical.py
index 0b6c0de1..f4d08016 100644
--- a/lib/seattleflu/id3c/cli/command/etl/clinical.py
+++ b/lib/seattleflu/id3c/cli/command/etl/clinical.py
@@ -492,10 +492,11 @@ def create_symptom_conditions(record: dict, patient_reference: dict, encounter_r
     for symptom in record['symptom']:
         mapped_symptom_name = map_symptom(symptom)
         onset_date = record['date_symptom_onset']
-        symptom_code = {
-            "system": f"{SFS}/symptom",
-            "code": mapped_symptom_name
-        }
+        symptom_code = create_codeable_concept(
+            system = f"{SFS}/symptom",
+            code = mapped_symptom_name
+        )
+        
 
         condition_resource = create_condition_resource(mapped_symptom_name,
                                 patient_reference,
@@ -1047,7 +1048,10 @@ def create_icd10_conditions_kp2023(record:dict, patient_reference: dict) -> list
         condition_resource = create_condition_resource(icd10_code,
                                 patient_reference,
                                 None,
-                                icd10_codes[icd10_code]
+                                create_codeable_concept(
+                                    system = icd10_codes[icd10_code]["system"], 
+                                    code = icd10_codes[icd10_code]["code"], 
+                                    display = icd10_codes[icd10_code]["display"])
                             )
 
         condition_entries.append(create_resource_entry(

From 46ed98b7393038a9301f004778378e70ba921bab Mon Sep 17 00:00:00 2001
From: Sally Grindstaff <sallybg@uw.edu>
Date: Tue, 28 Nov 2023 14:00:54 -0800
Subject: [PATCH 3/4] Cast census tract as string in clinical parse-kp2023

---
 lib/seattleflu/id3c/cli/command/clinical.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/seattleflu/id3c/cli/command/clinical.py b/lib/seattleflu/id3c/cli/command/clinical.py
index f29df671..83d35ef0 100644
--- a/lib/seattleflu/id3c/cli/command/clinical.py
+++ b/lib/seattleflu/id3c/cli/command/clinical.py
@@ -1022,6 +1022,10 @@ def parse_kp2023(kp2023_filename: str) -> None:
     for col in date_cols:
         clinical_records[col] = pd.to_datetime(clinical_records[col]).dt.strftime('%Y-%m-%d')
 
+    # convert census_tract to string
+    # do this here rather than upon import with dtype, because the latter would require assuming capitalization of column name from KP
+    clinical_records['census_tract'] = clinical_records['census_tract'].astype('Int64').astype('str')
+
     # ensure there are no unintended columns being kept
     columns_to_keep = [
         '_provenance',

From 62abebfcccc3a7ddcb073f0fe1affa2614c12efc Mon Sep 17 00:00:00 2001
From: Sally Grindstaff <sallybg@uw.edu>
Date: Mon, 4 Dec 2023 11:56:27 -0800
Subject: [PATCH 4/4] Handle empty input/output in deduplicate-kp2023 and
 match-kp2023

Handle empty input in deduplicate-kp2023.
If no unmatched records are found by match-kp2023, do not write to the unmatched manifest.
---
 lib/seattleflu/id3c/cli/command/clinical.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/seattleflu/id3c/cli/command/clinical.py b/lib/seattleflu/id3c/cli/command/clinical.py
index 83d35ef0..d9e6a2e5 100644
--- a/lib/seattleflu/id3c/cli/command/clinical.py
+++ b/lib/seattleflu/id3c/cli/command/clinical.py
@@ -1268,7 +1268,7 @@ def match_kp2023(kp2023_manifest_filename: str, kp2023_manifest_matched_filename
     records to the matched file. Removes any matches from <KP2023 Clinical Manifest name>
     before writing it to <KP2023 Clinical Manifest Unmatched Data output filename>.
 
-    <KP2023 Clinical Manifest Matched Date filename> does not have to be an existing file,
+    <KP2023 Clinical Manifest Matched Data filename> does not have to be an existing file,
     but a filename must be provided. If the file does not exist, the newly matched records
     will be output to stdout without consolidating with previously matched records.
 
@@ -1345,7 +1345,8 @@ def match_kp2023(kp2023_manifest_filename: str, kp2023_manifest_matched_filename
     matched_clinical_records = pd.concat([matched_clinical_records, newly_matched_clinical_records]).reset_index(drop=True)
     LOG.info(f"A total of {len(matched_clinical_records)} records are matched to LIMS data with {len(unmatched_clinical_records)} still unmatched.")
 
-    unmatched_clinical_records.to_json(kp2023_manifest_unmatched_output_filename, orient='records', lines=True)
+    if not unmatched_clinical_records.empty:
+        unmatched_clinical_records.to_json(kp2023_manifest_unmatched_output_filename, orient='records', lines=True)
     if not matched_clinical_records.empty:
         dump_ndjson(matched_clinical_records)
 
@@ -1370,6 +1371,10 @@ def deduplicate_kp2023(kp2023_master_manifest_filename: str) -> None:
     # read in ndjson as pandas df
     clinical_records = pd.read_json(kp2023_master_manifest_filename, orient='records', dtype={'census_tract': 'string', 'age': 'int64'}, lines=True)
 
+    if clinical_records.empty:
+        LOG.info("No clinical records provided, nothing to deduplicate.")
+        return
+
     # sort by timestamp
     clinical_records = clinical_records.sort_values(by='spreadsheet_timestamp', ascending=True)