From 681d6be149545af0e217cc5cf4eb0b13a7a6716f Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Mon, 22 Apr 2024 12:24:21 -0700 Subject: [PATCH] parse-kp: do not convert timestamp to local timezone In summer 2022, all clinical parse functions were updated to convert encountered date timestamps from UTC to local time (see https://github.com/seattleflu/id3c-customizations/commit/3205e0b2e5f24900e59548f30a3d63ef5d78af19) Now, in 2024, we are reingesting kp encounter metadata from 2018-2021. These encounters were processed and uploaded to id3c with UTC encounter dates. Since the encounter identifier depends on the encounter date, and since encounter identifiers should be the same each time a sample is uploaded to id3c, the encounter date should be in UTC for these kp samples. --- lib/seattleflu/id3c/cli/command/clinical.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/seattleflu/id3c/cli/command/clinical.py b/lib/seattleflu/id3c/cli/command/clinical.py index 34f2eca6..3ed80e39 100644 --- a/lib/seattleflu/id3c/cli/command/clinical.py +++ b/lib/seattleflu/id3c/cli/command/clinical.py @@ -424,7 +424,14 @@ def parse_kp(kp_filename, kp_specimen_manifest_filename, manifest_format, output clinical_records = clinical_records[column_map.values()] # Convert dtypes - clinical_records["encountered"] = pd.to_datetime(clinical_records["encountered"]).dt.tz_localize('America/Los_Angeles') + #clinical_records["encountered"] = pd.to_datetime(clinical_records["encountered"]).dt.tz_localize('America/Los_Angeles') + # unlike other clinical parse functions, do not convert from UTC to local timezone + # this is because of a reingestion of kp 2018-2021 encounter metadata in 2024, in order to include ICD-10 codes + # timestamp conversion from UTC to local timezone only was added after kp 2018-2021 encounters were processed into id3c + # encounter identifiers are based on encounter date, so need to keep encounter date consistent with old + # records in order to avoid re-uploading the same encounter to id3c with a different encounter identifier than before + + clinical_records["encountered"] = pd.to_datetime(clinical_records["encountered"]) # Insert static value columns clinical_records["site"] = "KP"